1 //=- ARMScheduleA9.td - ARM Cortex-A9 Scheduling Definitions -*- tablegen -*-=//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file defines the itinerary class data for the ARM Cortex A9 processors.
12 //===----------------------------------------------------------------------===//
15 // Ad-hoc scheduling information derived from pretty vague "Cortex-A9 Technical
19 def A9_Issue0 : FuncUnit; // Issue 0
20 def A9_Issue1 : FuncUnit; // Issue 1
21 def A9_Branch : FuncUnit; // Branch
22 def A9_ALU0 : FuncUnit; // ALU / MUL pipeline 0
23 def A9_ALU1 : FuncUnit; // ALU pipeline 1
24 def A9_AGU : FuncUnit; // Address generation unit for ld / st
25 def A9_NPipe : FuncUnit; // NEON pipeline
26 def A9_MUX0 : FuncUnit; // AGU + NEON/FPU multiplexer
27 def A9_LS0 : FuncUnit; // L/S Units, 32-bit per unit. Fake FU to limit l/s.
28 def A9_LS1 : FuncUnit; // L/S Units, 32-bit per unit.
29 def A9_DRegsVFP: FuncUnit; // FP register set, VFP side
30 def A9_DRegsN : FuncUnit; // FP register set, NEON side
33 def A9_LdBypass : Bypass;
35 def CortexA9Itineraries : ProcessorItineraries<
36 [A9_Issue0, A9_Issue1, A9_Branch, A9_ALU0, A9_ALU1, A9_AGU, A9_NPipe, A9_MUX0,
37 A9_LS0, A9_LS1, A9_DRegsVFP, A9_DRegsN],
39 // Two fully-pipelined integer ALU pipelines
42 // Move instructions, unconditional
43 InstrItinData<IIC_iMOVi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
44 InstrStage<1, [A9_ALU0, A9_ALU1]>], [1]>,
45 InstrItinData<IIC_iMOVr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
46 InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>,
47 InstrItinData<IIC_iMOVsi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
48 InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>,
49 InstrItinData<IIC_iMOVsr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
50 InstrStage<2, [A9_ALU0, A9_ALU1]>], [2, 1, 1]>,
51 InstrItinData<IIC_iMOVix2 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
52 InstrStage<1, [A9_ALU0, A9_ALU1]>,
53 InstrStage<1, [A9_ALU0, A9_ALU1]>], [2]>,
56 InstrItinData<IIC_iMVNi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
57 InstrStage<1, [A9_ALU0, A9_ALU1]>],
59 InstrItinData<IIC_iMVNr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
60 InstrStage<1, [A9_ALU0, A9_ALU1]>],
61 [1, 1], [NoBypass, A9_LdBypass]>,
62 InstrItinData<IIC_iMVNsi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
63 InstrStage<2, [A9_ALU0, A9_ALU1]>],
65 InstrItinData<IIC_iMVNsr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
66 InstrStage<3, [A9_ALU0, A9_ALU1]>],
70 InstrItinData<IIC_iALUx , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
71 InstrStage<1, [A9_ALU0, A9_ALU1]>]>,
73 // Binary Instructions that produce a result
74 InstrItinData<IIC_iALUi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
75 InstrStage<1, [A9_ALU0, A9_ALU1]>],
76 [1, 1], [NoBypass, A9_LdBypass]>,
77 InstrItinData<IIC_iALUr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
78 InstrStage<1, [A9_ALU0, A9_ALU1]>],
79 [1, 1, 1], [NoBypass, A9_LdBypass, A9_LdBypass]>,
80 InstrItinData<IIC_iALUsi, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
81 InstrStage<2, [A9_ALU0, A9_ALU1]>],
82 [2, 1, 1], [NoBypass, A9_LdBypass, NoBypass]>,
83 InstrItinData<IIC_iALUsir,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
84 InstrStage<2, [A9_ALU0, A9_ALU1]>],
85 [2, 1, 1], [NoBypass, NoBypass, A9_LdBypass]>,
86 InstrItinData<IIC_iALUsr, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
87 InstrStage<3, [A9_ALU0, A9_ALU1]>],
89 [NoBypass, A9_LdBypass, NoBypass, NoBypass]>,
91 // Bitwise Instructions that produce a result
92 InstrItinData<IIC_iBITi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
93 InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>,
94 InstrItinData<IIC_iBITr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
95 InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1, 1]>,
96 InstrItinData<IIC_iBITsi, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
97 InstrStage<2, [A9_ALU0, A9_ALU1]>], [2, 1, 1]>,
98 InstrItinData<IIC_iBITsr, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
99 InstrStage<3, [A9_ALU0, A9_ALU1]>], [3, 1, 1, 1]>,
101 // Unary Instructions that produce a result
104 InstrItinData<IIC_iUNAr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
105 InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>,
107 // BFC, BFI, UBFX, SBFX
108 InstrItinData<IIC_iUNAsi, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
109 InstrStage<2, [A9_ALU0, A9_ALU1]>], [2, 1]>,
112 // Zero and sign extension instructions
113 InstrItinData<IIC_iEXTr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
114 InstrStage<1, [A9_ALU0, A9_ALU1]>], [2, 1]>,
115 InstrItinData<IIC_iEXTAr, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
116 InstrStage<2, [A9_ALU0, A9_ALU1]>], [3, 1, 1]>,
117 InstrItinData<IIC_iEXTAsr,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
118 InstrStage<3, [A9_ALU0, A9_ALU1]>], [3, 1, 1, 1]>,
120 // Compare instructions
121 InstrItinData<IIC_iCMPi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
122 InstrStage<1, [A9_ALU0, A9_ALU1]>],
124 InstrItinData<IIC_iCMPr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
125 InstrStage<1, [A9_ALU0, A9_ALU1]>],
126 [1, 1], [A9_LdBypass, A9_LdBypass]>,
127 InstrItinData<IIC_iCMPsi , [InstrStage<2, [A9_ALU0, A9_ALU1]>],
128 [1, 1], [A9_LdBypass, NoBypass]>,
129 InstrItinData<IIC_iCMPsr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
130 InstrStage<3, [A9_ALU0, A9_ALU1]>],
131 [1, 1, 1], [A9_LdBypass, NoBypass, NoBypass]>,
134 InstrItinData<IIC_iTSTi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
135 InstrStage<1, [A9_ALU0, A9_ALU1]>], [1]>,
136 InstrItinData<IIC_iTSTr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
137 InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>,
138 InstrItinData<IIC_iTSTsi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
139 InstrStage<2, [A9_ALU0, A9_ALU1]>], [1, 1]>,
140 InstrItinData<IIC_iTSTsr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
141 InstrStage<3, [A9_ALU0, A9_ALU1]>], [1, 1, 1]>,
143 // Move instructions, conditional
144 // FIXME: Correctly model the extra input dep on the destination.
145 InstrItinData<IIC_iCMOVi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
146 InstrStage<1, [A9_ALU0, A9_ALU1]>], [1]>,
147 InstrItinData<IIC_iCMOVr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
148 InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>,
149 InstrItinData<IIC_iCMOVsi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
150 InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>,
151 InstrItinData<IIC_iCMOVsr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
152 InstrStage<2, [A9_ALU0, A9_ALU1]>], [2, 1, 1]>,
154 // Integer multiply pipeline
156 InstrItinData<IIC_iMUL16 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
157 InstrStage<2, [A9_ALU0]>], [3, 1, 1]>,
158 InstrItinData<IIC_iMAC16 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
159 InstrStage<2, [A9_ALU0]>],
161 InstrItinData<IIC_iMUL32 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
162 InstrStage<2, [A9_ALU0]>], [4, 1, 1]>,
163 InstrItinData<IIC_iMAC32 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
164 InstrStage<2, [A9_ALU0]>],
166 InstrItinData<IIC_iMUL64 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
167 InstrStage<3, [A9_ALU0]>], [4, 5, 1, 1]>,
168 InstrItinData<IIC_iMAC64 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
169 InstrStage<3, [A9_ALU0]>],
171 // Integer load pipeline
172 // FIXME: The timings are some rough approximations
175 InstrItinData<IIC_iLoad_i , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
176 InstrStage<1, [A9_MUX0], 0>,
177 InstrStage<1, [A9_AGU]>,
178 InstrStage<1, [A9_LS0, A9_LS1]>],
179 [3, 1], [A9_LdBypass]>,
180 InstrItinData<IIC_iLoad_bh_i, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
181 InstrStage<1, [A9_MUX0], 0>,
182 InstrStage<2, [A9_AGU]>,
183 InstrStage<1, [A9_LS0, A9_LS1]>],
184 [4, 1], [A9_LdBypass]>,
185 // FIXME: If address is 64-bit aligned, AGU cycles is 1.
186 InstrItinData<IIC_iLoad_d_i , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
187 InstrStage<1, [A9_MUX0], 0>,
188 InstrStage<2, [A9_AGU]>,
189 InstrStage<1, [A9_LS0, A9_LS1]>],
190 [3, 3, 1], [A9_LdBypass]>,
193 InstrItinData<IIC_iLoad_r , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
194 InstrStage<1, [A9_MUX0], 0>,
195 InstrStage<1, [A9_AGU]>,
196 InstrStage<1, [A9_LS0, A9_LS1]>],
197 [3, 1, 1], [A9_LdBypass]>,
198 InstrItinData<IIC_iLoad_bh_r, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
199 InstrStage<1, [A9_MUX0], 0>,
200 InstrStage<2, [A9_AGU]>,
201 InstrStage<1, [A9_LS0, A9_LS1]>],
202 [4, 1, 1], [A9_LdBypass]>,
203 InstrItinData<IIC_iLoad_d_r , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
204 InstrStage<1, [A9_MUX0], 0>,
205 InstrStage<2, [A9_AGU]>,
206 InstrStage<1, [A9_LS0, A9_LS1]>],
207 [3, 3, 1, 1], [A9_LdBypass]>,
209 // Scaled register offset
210 InstrItinData<IIC_iLoad_si , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
211 InstrStage<1, [A9_MUX0], 0>,
212 InstrStage<1, [A9_AGU]>,
213 InstrStage<1, [A9_LS0, A9_LS1]>],
214 [4, 1, 1], [A9_LdBypass]>,
215 InstrItinData<IIC_iLoad_bh_si,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
216 InstrStage<1, [A9_MUX0], 0>,
217 InstrStage<2, [A9_AGU]>,
218 InstrStage<1, [A9_LS0, A9_LS1]>],
219 [5, 1, 1], [A9_LdBypass]>,
221 // Immediate offset with update
222 InstrItinData<IIC_iLoad_iu , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
223 InstrStage<1, [A9_MUX0], 0>,
224 InstrStage<1, [A9_AGU]>,
225 InstrStage<1, [A9_LS0, A9_LS1]>],
226 [3, 2, 1], [A9_LdBypass]>,
227 InstrItinData<IIC_iLoad_bh_iu,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
228 InstrStage<1, [A9_MUX0], 0>,
229 InstrStage<2, [A9_AGU]>,
230 InstrStage<1, [A9_LS0, A9_LS1]>],
231 [4, 3, 1], [A9_LdBypass]>,
233 // Register offset with update
234 InstrItinData<IIC_iLoad_ru , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
235 InstrStage<1, [A9_MUX0], 0>,
236 InstrStage<1, [A9_AGU]>,
237 InstrStage<1, [A9_LS0, A9_LS1]>],
238 [3, 2, 1, 1], [A9_LdBypass]>,
239 InstrItinData<IIC_iLoad_bh_ru,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
240 InstrStage<1, [A9_MUX0], 0>,
241 InstrStage<2, [A9_AGU]>,
242 InstrStage<1, [A9_LS0, A9_LS1]>],
243 [4, 3, 1, 1], [A9_LdBypass]>,
244 InstrItinData<IIC_iLoad_d_ru, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
245 InstrStage<1, [A9_MUX0], 0>,
246 InstrStage<2, [A9_AGU]>,
247 InstrStage<1, [A9_LS0, A9_LS1]>],
248 [3, 3, 1, 1], [A9_LdBypass]>,
250 // Scaled register offset with update
251 InstrItinData<IIC_iLoad_siu , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
252 InstrStage<1, [A9_MUX0], 0>,
253 InstrStage<1, [A9_AGU]>,
254 InstrStage<1, [A9_LS0, A9_LS1]>],
255 [4, 3, 1, 1], [A9_LdBypass]>,
256 InstrItinData<IIC_iLoad_bh_siu,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
257 InstrStage<1, [A9_MUX0], 0>,
258 InstrStage<2, [A9_AGU]>,
259 InstrStage<1, [A9_LS0, A9_LS1]>],
260 [5, 4, 1, 1], [A9_LdBypass]>,
262 // Load multiple, def is the 5th operand.
263 // FIXME: This assumes 3 to 4 registers.
264 InstrItinData<IIC_iLoad_m , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
265 InstrStage<1, [A9_MUX0], 0>,
266 InstrStage<2, [A9_AGU]>,
267 InstrStage<2, [A9_LS0, A9_LS1]>],
269 [NoBypass, NoBypass, NoBypass, NoBypass, A9_LdBypass]>,
271 // Load multiple + update, defs are the 1st and 5th operands.
272 InstrItinData<IIC_iLoad_mu , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
273 InstrStage<1, [A9_MUX0], 0>,
274 InstrStage<2, [A9_AGU]>,
275 InstrStage<2, [A9_LS0, A9_LS1]>],
277 [NoBypass, NoBypass, NoBypass, NoBypass, A9_LdBypass]>,
279 // Load multiple plus branch
280 InstrItinData<IIC_iLoad_mBr, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
281 InstrStage<1, [A9_MUX0], 0>,
282 InstrStage<1, [A9_AGU]>,
283 InstrStage<2, [A9_LS0, A9_LS1]>,
284 InstrStage<1, [A9_Branch]>],
286 [NoBypass, NoBypass, NoBypass, NoBypass, A9_LdBypass]>,
288 // Pop, def is the 3rd operand.
289 InstrItinData<IIC_iPop , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
290 InstrStage<1, [A9_MUX0], 0>,
291 InstrStage<2, [A9_AGU]>,
292 InstrStage<2, [A9_LS0, A9_LS1]>],
294 [NoBypass, NoBypass, A9_LdBypass]>,
296 // Pop + branch, def is the 3rd operand.
297 InstrItinData<IIC_iPop_Br, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
298 InstrStage<1, [A9_MUX0], 0>,
299 InstrStage<2, [A9_AGU]>,
300 InstrStage<2, [A9_LS0, A9_LS1]>,
301 InstrStage<1, [A9_Branch]>],
303 [NoBypass, NoBypass, A9_LdBypass]>,
306 // iLoadi + iALUr for t2LDRpci_pic.
307 InstrItinData<IIC_iLoadiALU, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
308 InstrStage<1, [A9_MUX0], 0>,
309 InstrStage<1, [A9_AGU]>,
310 InstrStage<1, [A9_LS0, A9_LS1]>,
311 InstrStage<1, [A9_ALU0, A9_ALU1]>],
314 // Integer store pipeline
317 InstrItinData<IIC_iStore_i , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
318 InstrStage<1, [A9_MUX0], 0>,
319 InstrStage<1, [A9_AGU]>,
320 InstrStage<1, [A9_LS0, A9_LS1]>], [1, 1]>,
321 InstrItinData<IIC_iStore_bh_i,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
322 InstrStage<1, [A9_MUX0], 0>,
323 InstrStage<2, [A9_AGU]>,
324 InstrStage<1, [A9_LS0, A9_LS1]>], [1, 1]>,
325 // FIXME: If address is 64-bit aligned, AGU cycles is 1.
326 InstrItinData<IIC_iStore_d_i, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
327 InstrStage<1, [A9_MUX0], 0>,
328 InstrStage<2, [A9_AGU]>,
329 InstrStage<1, [A9_LS0, A9_LS1]>], [1, 1]>,
332 InstrItinData<IIC_iStore_r , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
333 InstrStage<1, [A9_MUX0], 0>,
334 InstrStage<1, [A9_AGU]>,
335 InstrStage<1, [A9_LS0, A9_LS1]>], [1, 1, 1]>,
336 InstrItinData<IIC_iStore_bh_r,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
337 InstrStage<1, [A9_MUX0], 0>,
338 InstrStage<2, [A9_AGU]>,
339 InstrStage<1, [A9_LS0, A9_LS1]>], [1, 1, 1]>,
340 InstrItinData<IIC_iStore_d_r, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
341 InstrStage<1, [A9_MUX0], 0>,
342 InstrStage<2, [A9_AGU]>,
343 InstrStage<1, [A9_LS0, A9_LS1]>], [1, 1, 1]>,
345 // Scaled register offset
346 InstrItinData<IIC_iStore_si , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
347 InstrStage<1, [A9_MUX0], 0>,
348 InstrStage<1, [A9_AGU]>,
349 InstrStage<1, [A9_LS0, A9_LS1]>], [1, 1, 1]>,
350 InstrItinData<IIC_iStore_bh_si,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
351 InstrStage<1, [A9_MUX0], 0>,
352 InstrStage<2, [A9_AGU]>,
353 InstrStage<1, [A9_LS0, A9_LS1]>], [1, 1, 1]>,
355 // Immediate offset with update
356 InstrItinData<IIC_iStore_iu , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
357 InstrStage<1, [A9_MUX0], 0>,
358 InstrStage<1, [A9_AGU]>,
359 InstrStage<1, [A9_LS0, A9_LS1]>], [2, 1, 1]>,
360 InstrItinData<IIC_iStore_bh_iu,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
361 InstrStage<1, [A9_MUX0], 0>,
362 InstrStage<2, [A9_AGU]>,
363 InstrStage<1, [A9_LS0, A9_LS1]>], [3, 1, 1]>,
365 // Register offset with update
366 InstrItinData<IIC_iStore_ru , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
367 InstrStage<1, [A9_MUX0], 0>,
368 InstrStage<1, [A9_AGU]>,
369 InstrStage<1, [A9_LS0, A9_LS1]>],
371 InstrItinData<IIC_iStore_bh_ru,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
372 InstrStage<1, [A9_MUX0], 0>,
373 InstrStage<2, [A9_AGU]>,
374 InstrStage<1, [A9_LS0, A9_LS1]>],
376 InstrItinData<IIC_iStore_d_ru, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
377 InstrStage<1, [A9_MUX0], 0>,
378 InstrStage<2, [A9_AGU]>,
379 InstrStage<1, [A9_LS0, A9_LS1]>],
382 // Scaled register offset with update
383 InstrItinData<IIC_iStore_siu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
384 InstrStage<1, [A9_MUX0], 0>,
385 InstrStage<1, [A9_AGU]>,
386 InstrStage<1, [A9_LS0, A9_LS1]>],
388 InstrItinData<IIC_iStore_bh_siu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
389 InstrStage<1, [A9_MUX0], 0>,
390 InstrStage<2, [A9_AGU]>,
391 InstrStage<1, [A9_LS0, A9_LS1]>],
395 InstrItinData<IIC_iStore_m , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
396 InstrStage<1, [A9_MUX0], 0>,
397 InstrStage<1, [A9_AGU]>,
398 InstrStage<2, [A9_LS0, A9_LS1]>]>,
400 // Store multiple + update
401 InstrItinData<IIC_iStore_mu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
402 InstrStage<1, [A9_MUX0], 0>,
403 InstrStage<1, [A9_AGU]>,
404 InstrStage<2, [A9_LS0, A9_LS1]>], [2]>,
408 // no delay slots, so the latency of a branch is unimportant
409 InstrItinData<IIC_Br , [InstrStage<1, [A9_Issue0], 0>,
410 InstrStage<1, [A9_Issue1], 0>,
411 InstrStage<1, [A9_Branch]>]>,
413 // VFP and NEON shares the same register file. This means that every VFP
414 // instruction should wait for full completion of the consecutive NEON
415 // instruction and vice-versa. We model this behavior with two artificial FUs:
416 // DRegsVFP and DRegsVFP.
418 // Every VFP instruction:
419 // - Acquires DRegsVFP resource for 1 cycle
420 // - Reserves DRegsN resource for the whole duration (including time to
421 // register file writeback!).
422 // Every NEON instruction does the same but with FUs swapped.
424 // Since the reserved FU cannot be acquired, this models precisely
425 // "cross-domain" stalls.
428 // Issue through integer pipeline, and execute in NEON unit.
430 // FP Special Register to Integer Register File Move
431 InstrItinData<IIC_fpSTAT , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
432 InstrStage<2, [A9_DRegsN], 0, Reserved>,
433 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
434 InstrStage<1, [A9_MUX0], 0>,
435 InstrStage<1, [A9_NPipe]>]>,
437 // Single-precision FP Unary
438 InstrItinData<IIC_fpUNA32 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
439 // Extra latency cycles since wbck is 2 cycles
440 InstrStage<3, [A9_DRegsN], 0, Reserved>,
441 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
442 InstrStage<1, [A9_MUX0], 0>,
443 InstrStage<1, [A9_NPipe]>],
446 // Double-precision FP Unary
447 InstrItinData<IIC_fpUNA64 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
448 // Extra latency cycles since wbck is 2 cycles
449 InstrStage<3, [A9_DRegsN], 0, Reserved>,
450 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
451 InstrStage<1, [A9_MUX0], 0>,
452 InstrStage<1, [A9_NPipe]>],
456 // Single-precision FP Compare
457 InstrItinData<IIC_fpCMP32 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
458 // Extra latency cycles since wbck is 4 cycles
459 InstrStage<5, [A9_DRegsN], 0, Reserved>,
460 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
461 InstrStage<1, [A9_MUX0], 0>,
462 InstrStage<1, [A9_NPipe]>],
465 // Double-precision FP Compare
466 InstrItinData<IIC_fpCMP64 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
467 // Extra latency cycles since wbck is 4 cycles
468 InstrStage<5, [A9_DRegsN], 0, Reserved>,
469 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
470 InstrStage<1, [A9_MUX0], 0>,
471 InstrStage<1, [A9_NPipe]>],
474 // Single to Double FP Convert
475 InstrItinData<IIC_fpCVTSD , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
476 InstrStage<5, [A9_DRegsN], 0, Reserved>,
477 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
478 InstrStage<1, [A9_MUX0], 0>,
479 InstrStage<1, [A9_NPipe]>],
482 // Double to Single FP Convert
483 InstrItinData<IIC_fpCVTDS , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
484 InstrStage<5, [A9_DRegsN], 0, Reserved>,
485 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
486 InstrStage<1, [A9_MUX0], 0>,
487 InstrStage<1, [A9_NPipe]>],
491 // Single to Half FP Convert
492 InstrItinData<IIC_fpCVTSH , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
493 InstrStage<5, [A9_DRegsN], 0, Reserved>,
494 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
495 InstrStage<1, [A9_MUX0], 0>,
496 InstrStage<1, [A9_NPipe]>],
499 // Half to Single FP Convert
500 InstrItinData<IIC_fpCVTHS , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
501 InstrStage<3, [A9_DRegsN], 0, Reserved>,
502 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
503 InstrStage<1, [A9_MUX0], 0>,
504 InstrStage<1, [A9_NPipe]>],
508 // Single-Precision FP to Integer Convert
509 InstrItinData<IIC_fpCVTSI , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
510 InstrStage<5, [A9_DRegsN], 0, Reserved>,
511 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
512 InstrStage<1, [A9_MUX0], 0>,
513 InstrStage<1, [A9_NPipe]>],
516 // Double-Precision FP to Integer Convert
517 InstrItinData<IIC_fpCVTDI , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
518 InstrStage<5, [A9_DRegsN], 0, Reserved>,
519 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
520 InstrStage<1, [A9_MUX0], 0>,
521 InstrStage<1, [A9_NPipe]>],
524 // Integer to Single-Precision FP Convert
525 InstrItinData<IIC_fpCVTIS , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
526 InstrStage<5, [A9_DRegsN], 0, Reserved>,
527 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
528 InstrStage<1, [A9_MUX0], 0>,
529 InstrStage<1, [A9_NPipe]>],
532 // Integer to Double-Precision FP Convert
533 InstrItinData<IIC_fpCVTID , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
534 InstrStage<5, [A9_DRegsN], 0, Reserved>,
535 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
536 InstrStage<1, [A9_MUX0], 0>,
537 InstrStage<1, [A9_NPipe]>],
540 // Single-precision FP ALU
541 InstrItinData<IIC_fpALU32 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
542 InstrStage<5, [A9_DRegsN], 0, Reserved>,
543 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
544 InstrStage<1, [A9_MUX0], 0>,
545 InstrStage<1, [A9_NPipe]>],
548 // Double-precision FP ALU
549 InstrItinData<IIC_fpALU64 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
550 InstrStage<5, [A9_DRegsN], 0, Reserved>,
551 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
552 InstrStage<1, [A9_MUX0], 0>,
553 InstrStage<1, [A9_NPipe]>],
556 // Single-precision FP Multiply
557 InstrItinData<IIC_fpMUL32 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
558 InstrStage<6, [A9_DRegsN], 0, Reserved>,
559 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
560 InstrStage<1, [A9_MUX0], 0>,
561 InstrStage<1, [A9_NPipe]>],
564 // Double-precision FP Multiply
565 InstrItinData<IIC_fpMUL64 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
566 InstrStage<7, [A9_DRegsN], 0, Reserved>,
567 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
568 InstrStage<1, [A9_MUX0], 0>,
569 InstrStage<2, [A9_NPipe]>],
572 // Single-precision FP MAC
573 InstrItinData<IIC_fpMAC32 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
574 InstrStage<9, [A9_DRegsN], 0, Reserved>,
575 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
576 InstrStage<1, [A9_MUX0], 0>,
577 InstrStage<1, [A9_NPipe]>],
580 // Double-precision FP MAC
581 InstrItinData<IIC_fpMAC64 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
582 InstrStage<10, [A9_DRegsN], 0, Reserved>,
583 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
584 InstrStage<1, [A9_MUX0], 0>,
585 InstrStage<2, [A9_NPipe]>],
588 // Single-precision FP DIV
589 InstrItinData<IIC_fpDIV32 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
590 InstrStage<16, [A9_DRegsN], 0, Reserved>,
591 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
592 InstrStage<1, [A9_MUX0], 0>,
593 InstrStage<10, [A9_NPipe]>],
596 // Double-precision FP DIV
597 InstrItinData<IIC_fpDIV64 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
598 InstrStage<26, [A9_DRegsN], 0, Reserved>,
599 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
600 InstrStage<1, [A9_MUX0], 0>,
601 InstrStage<20, [A9_NPipe]>],
604 // Single-precision FP SQRT
605 InstrItinData<IIC_fpSQRT32, [InstrStage<1, [A9_DRegsVFP], 0, Required>,
606 InstrStage<18, [A9_DRegsN], 0, Reserved>,
607 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
608 InstrStage<1, [A9_MUX0], 0>,
609 InstrStage<13, [A9_NPipe]>],
612 // Double-precision FP SQRT
613 InstrItinData<IIC_fpSQRT64, [InstrStage<1, [A9_DRegsVFP], 0, Required>,
614 InstrStage<33, [A9_DRegsN], 0, Reserved>,
615 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
616 InstrStage<1, [A9_MUX0], 0>,
617 InstrStage<28, [A9_NPipe]>],
621 // Integer to Single-precision Move
622 InstrItinData<IIC_fpMOVIS, [InstrStage<1, [A9_DRegsVFP], 0, Required>,
623 // Extra 1 latency cycle since wbck is 2 cycles
624 InstrStage<3, [A9_DRegsN], 0, Reserved>,
625 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
626 InstrStage<1, [A9_MUX0], 0>,
627 InstrStage<1, [A9_NPipe]>],
630 // Integer to Double-precision Move
631 InstrItinData<IIC_fpMOVID, [InstrStage<1, [A9_DRegsVFP], 0, Required>,
632 // Extra 1 latency cycle since wbck is 2 cycles
633 InstrStage<3, [A9_DRegsN], 0, Reserved>,
634 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
635 InstrStage<1, [A9_MUX0], 0>,
636 InstrStage<1, [A9_NPipe]>],
639 // Single-precision to Integer Move
640 InstrItinData<IIC_fpMOVSI, [InstrStage<1, [A9_DRegsVFP], 0, Required>,
641 InstrStage<2, [A9_DRegsN], 0, Reserved>,
642 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
643 InstrStage<1, [A9_MUX0], 0>,
644 InstrStage<1, [A9_NPipe]>],
647 // Double-precision to Integer Move
648 InstrItinData<IIC_fpMOVDI, [InstrStage<1, [A9_DRegsVFP], 0, Required>,
649 InstrStage<2, [A9_DRegsN], 0, Reserved>,
650 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
651 InstrStage<1, [A9_MUX0], 0>,
652 InstrStage<1, [A9_NPipe]>],
655 // Single-precision FP Load
656 InstrItinData<IIC_fpLoad32, [InstrStage<1, [A9_DRegsVFP], 0, Required>,
657 InstrStage<2, [A9_DRegsN], 0, Reserved>,
658 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
659 InstrStage<1, [A9_MUX0], 0>,
660 InstrStage<1, [A9_NPipe]>],
663 // Double-precision FP Load
664 // FIXME: Result latency is 1 if address is 64-bit aligned.
665 InstrItinData<IIC_fpLoad64, [InstrStage<1, [A9_DRegsVFP], 0, Required>,
666 InstrStage<2, [A9_DRegsN], 0, Reserved>,
667 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
668 InstrStage<1, [A9_MUX0], 0>,
669 InstrStage<1, [A9_NPipe]>],
673 InstrItinData<IIC_fpLoad_m, [InstrStage<1, [A9_DRegsVFP], 0, Required>,
674 InstrStage<2, [A9_DRegsN], 0, Reserved>,
675 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
676 InstrStage<1, [A9_MUX0], 0>,
677 InstrStage<1, [A9_NPipe]>], [1, 1, 1, 1]>,
679 // FP Load Multiple + update
680 InstrItinData<IIC_fpLoad_mu,[InstrStage<1, [A9_DRegsVFP], 0, Required>,
681 InstrStage<2, [A9_DRegsN], 0, Reserved>,
682 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
683 InstrStage<1, [A9_MUX0], 0>,
684 InstrStage<1, [A9_NPipe]>], [2, 1, 1, 1]>,
686 // Single-precision FP Store
687 InstrItinData<IIC_fpStore32,[InstrStage<1, [A9_DRegsVFP], 0, Required>,
688 InstrStage<2, [A9_DRegsN], 0, Reserved>,
689 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
690 InstrStage<1, [A9_MUX0], 0>,
691 InstrStage<1, [A9_NPipe]>],
694 // Double-precision FP Store
695 InstrItinData<IIC_fpStore64,[InstrStage<1, [A9_DRegsVFP], 0, Required>,
696 InstrStage<2, [A9_DRegsN], 0, Reserved>,
697 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
698 InstrStage<1, [A9_MUX0], 0>,
699 InstrStage<1, [A9_NPipe]>],
703 InstrItinData<IIC_fpStore_m,[InstrStage<1, [A9_DRegsVFP], 0, Required>,
704 InstrStage<2, [A9_DRegsN], 0, Reserved>,
705 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
706 InstrStage<1, [A9_MUX0], 0>,
707 InstrStage<1, [A9_NPipe]>], [1, 1, 1, 1]>,
709 // FP Store Multiple + update
710 InstrItinData<IIC_fpStore_mu,[InstrStage<1, [A9_DRegsVFP], 0, Required>,
711 InstrStage<2, [A9_DRegsN], 0, Reserved>,
712 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
713 InstrStage<1, [A9_MUX0], 0>,
714 InstrStage<1, [A9_NPipe]>], [2, 1, 1, 1]>,
717 // FIXME: Conservatively assume insufficent alignment.
718 InstrItinData<IIC_VLD1, [InstrStage<1, [A9_DRegsN], 0, Required>,
719 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
720 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
721 InstrStage<1, [A9_MUX0], 0>,
722 InstrStage<2, [A9_NPipe]>],
725 InstrItinData<IIC_VLD1x2, [InstrStage<1, [A9_DRegsN], 0, Required>,
726 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
727 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
728 InstrStage<1, [A9_MUX0], 0>,
729 InstrStage<2, [A9_NPipe]>],
732 InstrItinData<IIC_VLD1x3, [InstrStage<1, [A9_DRegsN], 0, Required>,
733 InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
734 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
735 InstrStage<1, [A9_MUX0], 0>,
736 InstrStage<3, [A9_NPipe]>],
739 InstrItinData<IIC_VLD1x4, [InstrStage<1, [A9_DRegsN], 0, Required>,
740 InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
741 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
742 InstrStage<1, [A9_MUX0], 0>,
743 InstrStage<3, [A9_NPipe]>],
746 InstrItinData<IIC_VLD1u, [InstrStage<1, [A9_DRegsN], 0, Required>,
747 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
748 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
749 InstrStage<1, [A9_MUX0], 0>,
750 InstrStage<2, [A9_NPipe]>],
753 InstrItinData<IIC_VLD1x2u, [InstrStage<1, [A9_DRegsN], 0, Required>,
754 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
755 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
756 InstrStage<1, [A9_MUX0], 0>,
757 InstrStage<2, [A9_NPipe]>],
760 InstrItinData<IIC_VLD1x3u, [InstrStage<1, [A9_DRegsN], 0, Required>,
761 InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
762 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
763 InstrStage<1, [A9_MUX0], 0>,
764 InstrStage<3, [A9_NPipe]>],
767 InstrItinData<IIC_VLD1x4u, [InstrStage<1, [A9_DRegsN], 0, Required>,
768 InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
769 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
770 InstrStage<1, [A9_MUX0], 0>,
771 InstrStage<3, [A9_NPipe]>],
775 InstrItinData<IIC_VLD2, [InstrStage<1, [A9_DRegsN], 0, Required>,
776 // Extra latency cycles since wbck is 7 cycles
777 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
778 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
779 InstrStage<1, [A9_MUX0], 0>,
780 InstrStage<2, [A9_NPipe]>],
784 InstrItinData<IIC_VLD2x2, [InstrStage<1, [A9_DRegsN], 0, Required>,
785 InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
786 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
787 InstrStage<1, [A9_MUX0], 0>,
788 InstrStage<3, [A9_NPipe]>],
792 InstrItinData<IIC_VLD2ln, [InstrStage<1, [A9_DRegsN], 0, Required>,
793 InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
794 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
795 InstrStage<1, [A9_MUX0], 0>,
796 InstrStage<3, [A9_NPipe]>],
800 InstrItinData<IIC_VLD2u, [InstrStage<1, [A9_DRegsN], 0, Required>,
801 // Extra latency cycles since wbck is 7 cycles
802 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
803 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
804 InstrStage<1, [A9_MUX0], 0>,
805 InstrStage<2, [A9_NPipe]>],
809 InstrItinData<IIC_VLD2x2u, [InstrStage<1, [A9_DRegsN], 0, Required>,
810 InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
811 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
812 InstrStage<1, [A9_MUX0], 0>,
813 InstrStage<3, [A9_NPipe]>],
817 InstrItinData<IIC_VLD2lnu, [InstrStage<1, [A9_DRegsN], 0, Required>,
818 InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
819 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
820 InstrStage<1, [A9_MUX0], 0>,
821 InstrStage<3, [A9_NPipe]>],
822 [4, 4, 2, 1, 1, 1, 1, 1]>,
825 // FIXME: We don't model this instruction properly
826 InstrItinData<IIC_VLD3, [InstrStage<1, [A9_DRegsN], 0, Required>,
827 // Extra latency cycles since wbck is 6 cycles
828 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
829 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
830 InstrStage<1, [A9_MUX0], 0>,
831 InstrStage<1, [A9_NPipe]>],
835 // FIXME: We don't model this instruction properly
836 InstrItinData<IIC_VLD4, [InstrStage<1, [A9_DRegsN], 0, Required>,
837 // Extra latency cycles since wbck is 6 cycles
838 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
839 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
840 InstrStage<1, [A9_MUX0], 0>,
841 InstrStage<1, [A9_NPipe]>],
845 // FIXME: We don't model this instruction properly
846 InstrItinData<IIC_VST, [InstrStage<1, [A9_DRegsN], 0, Required>,
847 // Extra latency cycles since wbck is 6 cycles
848 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
849 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
850 InstrStage<1, [A9_MUX0], 0>,
851 InstrStage<1, [A9_NPipe]>]>,
853 // Double-register Integer Unary
854 InstrItinData<IIC_VUNAiD, [InstrStage<1, [A9_DRegsN], 0, Required>,
855 // Extra latency cycles since wbck is 6 cycles
856 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
857 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
858 InstrStage<1, [A9_MUX0], 0>,
859 InstrStage<1, [A9_NPipe]>],
862 // Quad-register Integer Unary
863 InstrItinData<IIC_VUNAiQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
864 // Extra latency cycles since wbck is 6 cycles
865 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
866 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
867 InstrStage<1, [A9_MUX0], 0>,
868 InstrStage<1, [A9_NPipe]>],
871 // Double-register Integer Q-Unary
872 InstrItinData<IIC_VQUNAiD, [InstrStage<1, [A9_DRegsN], 0, Required>,
873 // Extra latency cycles since wbck is 6 cycles
874 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
875 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
876 InstrStage<1, [A9_MUX0], 0>,
877 InstrStage<1, [A9_NPipe]>],
880 // Quad-register Integer CountQ-Unary
881 InstrItinData<IIC_VQUNAiQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
882 // Extra latency cycles since wbck is 6 cycles
883 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
884 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
885 InstrStage<1, [A9_MUX0], 0>,
886 InstrStage<1, [A9_NPipe]>],
889 // Double-register Integer Binary
890 InstrItinData<IIC_VBINiD, [InstrStage<1, [A9_DRegsN], 0, Required>,
891 // Extra latency cycles since wbck is 6 cycles
892 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
893 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
894 InstrStage<1, [A9_MUX0], 0>,
895 InstrStage<1, [A9_NPipe]>],
898 // Quad-register Integer Binary
899 InstrItinData<IIC_VBINiQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
900 // Extra latency cycles since wbck is 6 cycles
901 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
902 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
903 InstrStage<1, [A9_MUX0], 0>,
904 InstrStage<1, [A9_NPipe]>],
907 // Double-register Integer Subtract
908 InstrItinData<IIC_VSUBiD, [InstrStage<1, [A9_DRegsN], 0, Required>,
909 // Extra latency cycles since wbck is 6 cycles
910 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
911 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
912 InstrStage<1, [A9_MUX0], 0>,
913 InstrStage<1, [A9_NPipe]>],
916 // Quad-register Integer Subtract
917 InstrItinData<IIC_VSUBiQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
918 // Extra latency cycles since wbck is 6 cycles
919 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
920 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
921 InstrStage<1, [A9_MUX0], 0>,
922 InstrStage<1, [A9_NPipe]>],
925 // Double-register Integer Shift
926 InstrItinData<IIC_VSHLiD, [InstrStage<1, [A9_DRegsN], 0, Required>,
927 // Extra latency cycles since wbck is 6 cycles
928 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
929 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
930 InstrStage<1, [A9_MUX0], 0>,
931 InstrStage<1, [A9_NPipe]>],
934 // Quad-register Integer Shift
935 InstrItinData<IIC_VSHLiQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
936 // Extra latency cycles since wbck is 6 cycles
937 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
938 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
939 InstrStage<1, [A9_MUX0], 0>,
940 InstrStage<1, [A9_NPipe]>],
943 // Double-register Integer Shift (4 cycle)
944 InstrItinData<IIC_VSHLi4D, [InstrStage<1, [A9_DRegsN], 0, Required>,
945 // Extra latency cycles since wbck is 6 cycles
946 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
947 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
948 InstrStage<1, [A9_MUX0], 0>,
949 InstrStage<1, [A9_NPipe]>],
952 // Quad-register Integer Shift (4 cycle)
953 InstrItinData<IIC_VSHLi4Q, [InstrStage<1, [A9_DRegsN], 0, Required>,
954 // Extra latency cycles since wbck is 6 cycles
955 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
956 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
957 InstrStage<1, [A9_MUX0], 0>,
958 InstrStage<1, [A9_NPipe]>],
961 // Double-register Integer Binary (4 cycle)
962 InstrItinData<IIC_VBINi4D, [InstrStage<1, [A9_DRegsN], 0, Required>,
963 // Extra latency cycles since wbck is 6 cycles
964 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
965 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
966 InstrStage<1, [A9_MUX0], 0>,
967 InstrStage<1, [A9_NPipe]>],
970 // Quad-register Integer Binary (4 cycle)
971 InstrItinData<IIC_VBINi4Q, [InstrStage<1, [A9_DRegsN], 0, Required>,
972 // Extra latency cycles since wbck is 6 cycles
973 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
974 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
975 InstrStage<1, [A9_MUX0], 0>,
976 InstrStage<1, [A9_NPipe]>],
979 // Double-register Integer Subtract (4 cycle)
980 InstrItinData<IIC_VSUBiD, [InstrStage<1, [A9_DRegsN], 0, Required>,
981 // Extra latency cycles since wbck is 6 cycles
982 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
983 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
984 InstrStage<1, [A9_MUX0], 0>,
985 InstrStage<1, [A9_NPipe]>],
988 // Quad-register Integer Subtract (4 cycle)
989 InstrItinData<IIC_VSUBiQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
990 // Extra latency cycles since wbck is 6 cycles
991 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
992 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
993 InstrStage<1, [A9_MUX0], 0>,
994 InstrStage<1, [A9_NPipe]>],
998 // Double-register Integer Count
999 InstrItinData<IIC_VCNTiD, [InstrStage<1, [A9_DRegsN], 0, Required>,
1000 // Extra latency cycles since wbck is 6 cycles
1001 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1002 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1003 InstrStage<1, [A9_MUX0], 0>,
1004 InstrStage<1, [A9_NPipe]>],
1007 // Quad-register Integer Count
1008 // Result written in N3, but that is relative to the last cycle of multicycle,
1009 // so we use 4 for those cases
1010 InstrItinData<IIC_VCNTiQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
1011 // Extra latency cycles since wbck is 7 cycles
1012 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1013 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1014 InstrStage<1, [A9_MUX0], 0>,
1015 InstrStage<2, [A9_NPipe]>],
1018 // Double-register Absolute Difference and Accumulate
1019 InstrItinData<IIC_VABAD, [InstrStage<1, [A9_DRegsN], 0, Required>,
1020 // Extra latency cycles since wbck is 6 cycles
1021 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1022 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1023 InstrStage<1, [A9_MUX0], 0>,
1024 InstrStage<1, [A9_NPipe]>],
1027 // Quad-register Absolute Difference and Accumulate
1028 InstrItinData<IIC_VABAQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
1029 // Extra latency cycles since wbck is 6 cycles
1030 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1031 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1032 InstrStage<1, [A9_MUX0], 0>,
1033 InstrStage<2, [A9_NPipe]>],
1036 // Double-register Integer Pair Add Long
1037 InstrItinData<IIC_VPALiD, [InstrStage<1, [A9_DRegsN], 0, Required>,
1038 // Extra latency cycles since wbck is 6 cycles
1039 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1040 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1041 InstrStage<1, [A9_MUX0], 0>,
1042 InstrStage<1, [A9_NPipe]>],
1045 // Quad-register Integer Pair Add Long
1046 InstrItinData<IIC_VPALiQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
1047 // Extra latency cycles since wbck is 6 cycles
1048 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1049 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1050 InstrStage<1, [A9_MUX0], 0>,
1051 InstrStage<2, [A9_NPipe]>],
1055 // Double-register Integer Multiply (.8, .16)
1056 InstrItinData<IIC_VMULi16D, [InstrStage<1, [A9_DRegsN], 0, Required>,
1057 // Extra latency cycles since wbck is 6 cycles
1058 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1059 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1060 InstrStage<1, [A9_MUX0], 0>,
1061 InstrStage<1, [A9_NPipe]>],
1064 // Quad-register Integer Multiply (.8, .16)
1065 InstrItinData<IIC_VMULi16Q, [InstrStage<1, [A9_DRegsN], 0, Required>,
1066 // Extra latency cycles since wbck is 7 cycles
1067 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1068 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1069 InstrStage<1, [A9_MUX0], 0>,
1070 InstrStage<2, [A9_NPipe]>],
1074 // Double-register Integer Multiply (.32)
1075 InstrItinData<IIC_VMULi32D, [InstrStage<1, [A9_DRegsN], 0, Required>,
1076 // Extra latency cycles since wbck is 7 cycles
1077 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1078 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1079 InstrStage<1, [A9_MUX0], 0>,
1080 InstrStage<2, [A9_NPipe]>],
1083 // Quad-register Integer Multiply (.32)
1084 InstrItinData<IIC_VMULi32Q, [InstrStage<1, [A9_DRegsN], 0, Required>,
1085 // Extra latency cycles since wbck is 9 cycles
1086 InstrStage<10, [A9_DRegsVFP], 0, Reserved>,
1087 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1088 InstrStage<1, [A9_MUX0], 0>,
1089 InstrStage<4, [A9_NPipe]>],
1092 // Double-register Integer Multiply-Accumulate (.8, .16)
1093 InstrItinData<IIC_VMACi16D, [InstrStage<1, [A9_DRegsN], 0, Required>,
1094 // Extra latency cycles since wbck is 6 cycles
1095 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1096 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1097 InstrStage<1, [A9_MUX0], 0>,
1098 InstrStage<1, [A9_NPipe]>],
1101 // Double-register Integer Multiply-Accumulate (.32)
1102 InstrItinData<IIC_VMACi32D, [InstrStage<1, [A9_DRegsN], 0, Required>,
1103 // Extra latency cycles since wbck is 7 cycles
1104 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1105 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1106 InstrStage<1, [A9_MUX0], 0>,
1107 InstrStage<2, [A9_NPipe]>],
1110 // Quad-register Integer Multiply-Accumulate (.8, .16)
1111 InstrItinData<IIC_VMACi16Q, [InstrStage<1, [A9_DRegsN], 0, Required>,
1112 // Extra latency cycles since wbck is 7 cycles
1113 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1114 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1115 InstrStage<1, [A9_MUX0], 0>,
1116 InstrStage<2, [A9_NPipe]>],
1119 // Quad-register Integer Multiply-Accumulate (.32)
1120 InstrItinData<IIC_VMACi32Q, [InstrStage<1, [A9_DRegsN], 0, Required>,
1121 // Extra latency cycles since wbck is 9 cycles
1122 InstrStage<10, [A9_DRegsVFP], 0, Reserved>,
1123 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1124 InstrStage<1, [A9_MUX0], 0>,
1125 InstrStage<4, [A9_NPipe]>],
1130 InstrItinData<IIC_VMOV, [InstrStage<1, [A9_DRegsN], 0, Required>,
1131 InstrStage<1, [A9_DRegsVFP], 0, Reserved>,
1132 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1133 InstrStage<1, [A9_MUX0], 0>,
1134 InstrStage<1, [A9_NPipe]>],
1138 InstrItinData<IIC_VMOVImm, [InstrStage<1, [A9_DRegsN], 0, Required>,
1139 // Extra latency cycles since wbck is 6 cycles
1140 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1141 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1142 InstrStage<1, [A9_MUX0], 0>,
1143 InstrStage<1, [A9_NPipe]>],
1146 // Double-register Permute Move
1147 InstrItinData<IIC_VMOVD, [InstrStage<1, [A9_DRegsN], 0, Required>,
1148 // FIXME: all latencies are arbitrary, no information is available
1149 InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
1150 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1151 InstrStage<1, [A9_MUX0], 0>,
1152 InstrStage<1, [A9_NPipe]>],
1155 // Quad-register Permute Move
1156 InstrItinData<IIC_VMOVQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
1157 // FIXME: all latencies are arbitrary, no information is available
1158 InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
1159 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1160 InstrStage<1, [A9_MUX0], 0>,
1161 InstrStage<1, [A9_NPipe]>],
1164 // Integer to Single-precision Move
1165 InstrItinData<IIC_VMOVIS , [InstrStage<1, [A9_DRegsN], 0, Required>,
1166 // FIXME: all latencies are arbitrary, no information is available
1167 InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
1168 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1169 InstrStage<1, [A9_MUX0], 0>,
1170 InstrStage<1, [A9_NPipe]>],
1173 // Integer to Double-precision Move
1174 InstrItinData<IIC_VMOVID , [InstrStage<1, [A9_DRegsN], 0, Required>,
1175 // FIXME: all latencies are arbitrary, no information is available
1176 InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
1177 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1178 InstrStage<1, [A9_MUX0], 0>,
1179 InstrStage<1, [A9_NPipe]>],
1182 // Single-precision to Integer Move
1183 InstrItinData<IIC_VMOVSI , [InstrStage<1, [A9_DRegsN], 0, Required>,
1184 // FIXME: all latencies are arbitrary, no information is available
1185 InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
1186 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1187 InstrStage<1, [A9_MUX0], 0>,
1188 InstrStage<1, [A9_NPipe]>],
1191 // Double-precision to Integer Move
1192 InstrItinData<IIC_VMOVDI , [InstrStage<1, [A9_DRegsN], 0, Required>,
1193 // FIXME: all latencies are arbitrary, no information is available
1194 InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
1195 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1196 InstrStage<1, [A9_MUX0], 0>,
1197 InstrStage<1, [A9_NPipe]>],
1200 // Integer to Lane Move
1201 InstrItinData<IIC_VMOVISL , [InstrStage<1, [A9_DRegsN], 0, Required>,
1202 // FIXME: all latencies are arbitrary, no information is available
1203 InstrStage<4, [A9_DRegsVFP], 0, Reserved>,
1204 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1205 InstrStage<1, [A9_MUX0], 0>,
1206 InstrStage<2, [A9_NPipe]>],
1210 // Vector narrow move
1211 InstrItinData<IIC_VMOVN, [InstrStage<1, [A9_DRegsN], 0, Required>,
1212 // Extra latency cycles since wbck is 6 cycles
1213 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1214 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1215 InstrStage<1, [A9_MUX0], 0>,
1216 InstrStage<1, [A9_NPipe]>],
1219 // Double-register FP Unary
1220 InstrItinData<IIC_VUNAD, [InstrStage<1, [A9_DRegsN], 0, Required>,
1221 // Extra latency cycles since wbck is 6 cycles
1222 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1223 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1224 InstrStage<1, [A9_MUX0], 0>,
1225 InstrStage<1, [A9_NPipe]>],
1228 // Quad-register FP Unary
1229 // Result written in N5, but that is relative to the last cycle of multicycle,
1230 // so we use 6 for those cases
1231 InstrItinData<IIC_VUNAQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
1232 // Extra latency cycles since wbck is 7 cycles
1233 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1234 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1235 InstrStage<1, [A9_MUX0], 0>,
1236 InstrStage<2, [A9_NPipe]>],
1239 // Double-register FP Binary
1240 // FIXME: We're using this itin for many instructions and [2, 2] here is too
1242 InstrItinData<IIC_VBIND, [InstrStage<1, [A9_DRegsN], 0, Required>,
1243 // Extra latency cycles since wbck is 7 cycles
1244 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1245 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1246 InstrStage<1, [A9_MUX0], 0>,
1247 InstrStage<1, [A9_NPipe]>],
1250 // Quad-register FP Binary
1251 // Result written in N5, but that is relative to the last cycle of multicycle,
1252 // so we use 6 for those cases
1253 // FIXME: We're using this itin for many instructions and [2, 2] here is too
1255 InstrItinData<IIC_VBINQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
1256 // Extra latency cycles since wbck is 8 cycles
1257 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1258 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1259 InstrStage<1, [A9_MUX0], 0>,
1260 InstrStage<2, [A9_NPipe]>],
1263 // Double-register FP Multiple-Accumulate
1264 InstrItinData<IIC_VMACD, [InstrStage<1, [A9_DRegsN], 0, Required>,
1265 // Extra latency cycles since wbck is 7 cycles
1266 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1267 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1268 InstrStage<1, [A9_MUX0], 0>,
1269 InstrStage<2, [A9_NPipe]>],
1272 // Quad-register FP Multiple-Accumulate
1273 // Result written in N9, but that is relative to the last cycle of multicycle,
1274 // so we use 10 for those cases
1275 InstrItinData<IIC_VMACQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
1276 // Extra latency cycles since wbck is 9 cycles
1277 InstrStage<10, [A9_DRegsVFP], 0, Reserved>,
1278 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1279 InstrStage<1, [A9_MUX0], 0>,
1280 InstrStage<4, [A9_NPipe]>],
1283 // Double-register Reciprical Step
1284 InstrItinData<IIC_VRECSD, [InstrStage<1, [A9_DRegsN], 0, Required>,
1285 // Extra latency cycles since wbck is 7 cycles
1286 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1287 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1288 InstrStage<1, [A9_MUX0], 0>,
1289 InstrStage<2, [A9_NPipe]>],
1292 // Quad-register Reciprical Step
1293 InstrItinData<IIC_VRECSQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
1294 // Extra latency cycles since wbck is 9 cycles
1295 InstrStage<10, [A9_DRegsVFP], 0, Reserved>,
1296 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1297 InstrStage<1, [A9_MUX0], 0>,
1298 InstrStage<4, [A9_NPipe]>],
1301 // Double-register Permute
1302 InstrItinData<IIC_VPERMD, [InstrStage<1, [A9_DRegsN], 0, Required>,
1303 // Extra latency cycles since wbck is 6 cycles
1304 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1305 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1306 InstrStage<1, [A9_MUX0], 0>,
1307 InstrStage<1, [A9_NPipe]>],
1310 // Quad-register Permute
1311 // Result written in N2, but that is relative to the last cycle of multicycle,
1312 // so we use 3 for those cases
1313 InstrItinData<IIC_VPERMQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
1314 // Extra latency cycles since wbck is 7 cycles
1315 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1316 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1317 InstrStage<1, [A9_MUX0], 0>,
1318 InstrStage<2, [A9_NPipe]>],
1321 // Quad-register Permute (3 cycle issue)
1322 // Result written in N2, but that is relative to the last cycle of multicycle,
1323 // so we use 4 for those cases
1324 InstrItinData<IIC_VPERMQ3, [InstrStage<1, [A9_DRegsN], 0, Required>,
1325 // Extra latency cycles since wbck is 8 cycles
1326 InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
1327 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1328 InstrStage<1, [A9_MUX0], 0>,
1329 InstrStage<3, [A9_NPipe]>],
1333 // Double-register VEXT
1334 InstrItinData<IIC_VEXTD, [InstrStage<1, [A9_DRegsN], 0, Required>,
1335 // Extra latency cycles since wbck is 7 cycles
1336 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1337 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1338 InstrStage<1, [A9_MUX0], 0>,
1339 InstrStage<1, [A9_NPipe]>],
1342 // Quad-register VEXT
1343 InstrItinData<IIC_VEXTQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
1344 // Extra latency cycles since wbck is 9 cycles
1345 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1346 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1347 InstrStage<1, [A9_MUX0], 0>,
1348 InstrStage<2, [A9_NPipe]>],
1352 InstrItinData<IIC_VTB1, [InstrStage<1, [A9_DRegsN], 0, Required>,
1353 // Extra latency cycles since wbck is 7 cycles
1354 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1355 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1356 InstrStage<1, [A9_MUX0], 0>,
1357 InstrStage<2, [A9_NPipe]>],
1359 InstrItinData<IIC_VTB2, [InstrStage<2, [A9_DRegsN], 0, Required>,
1360 // Extra latency cycles since wbck is 7 cycles
1361 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1362 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1363 InstrStage<1, [A9_MUX0], 0>,
1364 InstrStage<2, [A9_NPipe]>],
1366 InstrItinData<IIC_VTB3, [InstrStage<2, [A9_DRegsN], 0, Required>,
1367 // Extra latency cycles since wbck is 8 cycles
1368 InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
1369 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1370 InstrStage<1, [A9_MUX0], 0>,
1371 InstrStage<3, [A9_NPipe]>],
1373 InstrItinData<IIC_VTB4, [InstrStage<1, [A9_DRegsN], 0, Required>,
1374 // Extra latency cycles since wbck is 8 cycles
1375 InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
1376 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1377 InstrStage<1, [A9_MUX0], 0>,
1378 InstrStage<3, [A9_NPipe]>],
1379 [4, 2, 2, 3, 3, 1]>,
1382 InstrItinData<IIC_VTBX1, [InstrStage<1, [A9_DRegsN], 0, Required>,
1383 // Extra latency cycles since wbck is 7 cycles
1384 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1385 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1386 InstrStage<1, [A9_MUX0], 0>,
1387 InstrStage<2, [A9_NPipe]>],
1389 InstrItinData<IIC_VTBX2, [InstrStage<1, [A9_DRegsN], 0, Required>,
1390 // Extra latency cycles since wbck is 7 cycles
1391 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1392 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1393 InstrStage<1, [A9_MUX0], 0>,
1394 InstrStage<2, [A9_NPipe]>],
1396 InstrItinData<IIC_VTBX3, [InstrStage<1, [A9_DRegsN], 0, Required>,
1397 // Extra latency cycles since wbck is 8 cycles
1398 InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
1399 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1400 InstrStage<1, [A9_MUX0], 0>,
1401 InstrStage<3, [A9_NPipe]>],
1402 [4, 1, 2, 2, 3, 1]>,
1403 InstrItinData<IIC_VTBX4, [InstrStage<1, [A9_DRegsN], 0, Required>,
1404 // Extra latency cycles since wbck is 8 cycles
1405 InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
1406 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1407 InstrStage<1, [A9_MUX0], 0>,
1408 InstrStage<2, [A9_NPipe]>],
1409 [4, 1, 2, 2, 3, 3, 1]>