1 //=- ARMScheduleA9.td - ARM Cortex-A9 Scheduling Definitions -*- tablegen -*-=//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file defines the itinerary class data for the ARM Cortex A9 processors.
12 //===----------------------------------------------------------------------===//
15 // Ad-hoc scheduling information derived from pretty vague "Cortex-A9 Technical
19 def A9_Issue : FuncUnit; // issue
20 def A9_Pipe0 : FuncUnit; // pipeline 0
21 def A9_Pipe1 : FuncUnit; // pipeline 1
22 def A9_LSPipe : FuncUnit; // LS pipe
23 def A9_NPipe : FuncUnit; // NEON ALU/MUL pipe
24 def A9_DRegsVFP: FuncUnit; // FP register set, VFP side
25 def A9_DRegsN : FuncUnit; // FP register set, NEON side
27 // Dual issue pipeline represented by A9_Pipe0 | A9_Pipe1
29 def CortexA9Itineraries : ProcessorItineraries<
30 [A9_NPipe, A9_DRegsN, A9_DRegsVFP, A9_LSPipe, A9_Pipe0, A9_Pipe1, A9_Issue], [
31 // Two fully-pipelined integer ALU pipelines
32 // FIXME: There are no operand latencies for these instructions at all!
34 // Move instructions, unconditional
35 InstrItinData<IIC_iMOVi , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [1]>,
36 InstrItinData<IIC_iMOVr , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [1, 1]>,
37 InstrItinData<IIC_iMOVsi , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [1, 1]>,
38 InstrItinData<IIC_iMOVsr , [InstrStage<2, [A9_Pipe0, A9_Pipe1]>], [2, 2, 1]>,
41 InstrItinData<IIC_iALUx , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>]>,
43 // Binary Instructions that produce a result
44 InstrItinData<IIC_iALUi , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [2, 2]>,
45 InstrItinData<IIC_iALUr , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [2, 2, 2]>,
46 InstrItinData<IIC_iALUsi , [InstrStage<2, [A9_Pipe0, A9_Pipe1]>], [2, 2, 1]>,
47 InstrItinData<IIC_iALUsr , [InstrStage<3, [A9_Pipe0, A9_Pipe1]>], [2, 2, 1, 1]>,
49 // Unary Instructions that produce a result
50 InstrItinData<IIC_iUNAr , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [2, 2]>,
51 InstrItinData<IIC_iUNAsi , [InstrStage<2, [A9_Pipe0, A9_Pipe1]>], [2, 1]>,
52 InstrItinData<IIC_iUNAsr , [InstrStage<3, [A9_Pipe0, A9_Pipe1]>], [2, 1, 1]>,
54 // Compare instructions
55 InstrItinData<IIC_iCMPi , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [2]>,
56 InstrItinData<IIC_iCMPr , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [2, 2]>,
57 InstrItinData<IIC_iCMPsi , [InstrStage<2, [A9_Pipe0, A9_Pipe1]>], [2, 1]>,
58 InstrItinData<IIC_iCMPsr , [InstrStage<3, [A9_Pipe0, A9_Pipe1]>], [2, 1, 1]>,
60 // Move instructions, conditional
61 InstrItinData<IIC_iCMOVi , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [2]>,
62 InstrItinData<IIC_iCMOVr , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [2, 1]>,
63 InstrItinData<IIC_iCMOVsi , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [2, 1]>,
64 InstrItinData<IIC_iCMOVsr , [InstrStage<2, [A9_Pipe0, A9_Pipe1]>], [2, 1, 1]>,
66 // Integer multiply pipeline
68 InstrItinData<IIC_iMUL16 , [InstrStage<1, [A9_Pipe1], 0>,
69 InstrStage<2, [A9_Pipe0]>], [4, 1, 1]>,
70 InstrItinData<IIC_iMAC16 , [InstrStage<1, [A9_Pipe1], 0>,
71 InstrStage<2, [A9_Pipe0]>], [4, 1, 1, 2]>,
72 InstrItinData<IIC_iMUL32 , [InstrStage<1, [A9_Pipe1], 0>,
73 InstrStage<2, [A9_Pipe0]>], [4, 1, 1]>,
74 InstrItinData<IIC_iMAC32 , [InstrStage<1, [A9_Pipe1], 0>,
75 InstrStage<2, [A9_Pipe0]>], [4, 1, 1, 2]>,
76 InstrItinData<IIC_iMUL64 , [InstrStage<2, [A9_Pipe1], 0>,
77 InstrStage<3, [A9_Pipe0]>], [4, 5, 1, 1]>,
78 InstrItinData<IIC_iMAC64 , [InstrStage<2, [A9_Pipe1], 0>,
79 InstrStage<3, [A9_Pipe0]>], [4, 5, 1, 1]>,
80 // Integer load pipeline
81 // FIXME: The timings are some rough approximations
84 InstrItinData<IIC_iLoadi , [InstrStage<1, [A9_Pipe1]>,
85 InstrStage<1, [A9_LSPipe]>], [3, 1]>,
88 InstrItinData<IIC_iLoadr , [InstrStage<1, [A9_Pipe1]>,
89 InstrStage<1, [A9_LSPipe]>], [3, 1, 1]>,
91 // Scaled register offset
92 InstrItinData<IIC_iLoadsi , [InstrStage<1, [A9_Pipe1]>,
93 InstrStage<2, [A9_LSPipe]>], [4, 1, 1]>,
95 // Immediate offset with update
96 InstrItinData<IIC_iLoadiu , [InstrStage<1, [A9_Pipe1]>,
97 InstrStage<2, [A9_LSPipe]>], [3, 2, 1]>,
99 // Register offset with update
100 InstrItinData<IIC_iLoadru , [InstrStage<1, [A9_Pipe1]>,
101 InstrStage<2, [A9_LSPipe]>], [3, 2, 1, 1]>,
103 // Scaled register offset with update
104 InstrItinData<IIC_iLoadsiu , [InstrStage<1, [A9_Pipe1]>,
105 InstrStage<2, [A9_LSPipe]>], [4, 3, 1, 1]>,
108 InstrItinData<IIC_iLoadm , [InstrStage<1, [A9_Pipe1]>,
109 InstrStage<1, [A9_LSPipe]>]>,
111 // Integer store pipeline
114 InstrItinData<IIC_iStorei , [InstrStage<1, [A9_Pipe1]>,
115 InstrStage<1, [A9_LSPipe]>], [3, 1]>,
118 InstrItinData<IIC_iStorer , [InstrStage<1, [ A9_Pipe1]>,
119 InstrStage<1, [A9_LSPipe]>], [3, 1, 1]>,
121 // Scaled register offset
122 InstrItinData<IIC_iStoresi , [InstrStage<1, [A9_Pipe1]>,
123 InstrStage<2, [A9_LSPipe]>], [3, 1, 1]>,
125 // Immediate offset with update
126 InstrItinData<IIC_iStoreiu , [InstrStage<1, [A9_Pipe1]>,
127 InstrStage<1, [A9_LSPipe]>], [2, 3, 1]>,
129 // Register offset with update
130 InstrItinData<IIC_iStoreru , [InstrStage<1, [A9_Pipe1]>,
131 InstrStage<1, [A9_LSPipe]>], [2, 3, 1, 1]>,
133 // Scaled register offset with update
134 InstrItinData<IIC_iStoresiu, [InstrStage<1, [A9_Pipe1]>,
135 InstrStage<2, [A9_LSPipe]>], [3, 3, 1, 1]>,
138 InstrItinData<IIC_iStorem , [InstrStage<1, [A9_Pipe1]>,
139 InstrStage<1, [A9_LSPipe]>]>,
142 // no delay slots, so the latency of a branch is unimportant
143 InstrItinData<IIC_Br , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>]>,
145 // VFP and NEON shares the same register file. This means that every VFP
146 // instruction should wait for full completion of the consecutive NEON
147 // instruction and vice-versa. We model this behavior with two artificial FUs:
148 // DRegsVFP and DRegsVFP.
150 // Every VFP instruction:
151 // - Acquires DRegsVFP resource for 1 cycle
152 // - Reserves DRegsN resource for the whole duration (including time to
153 // register file writeback!).
154 // Every NEON instruction does the same but with FUs swapped.
156 // Since the reserved FU cannot be acquired this models precisly "cross-domain"
160 // Issue through integer pipeline, and execute in NEON unit.
162 // FP Special Register to Integer Register File Move
163 InstrItinData<IIC_fpSTAT , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
164 InstrStage<2, [A9_DRegsN], 0, Reserved>,
165 InstrStage<1, [A9_Pipe1]>,
166 InstrStage<1, [A9_NPipe]>]>,
168 // Single-precision FP Unary
169 InstrItinData<IIC_fpUNA32 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
170 // Extra latency cycles since wbck is 2 cycles
171 InstrStage<3, [A9_DRegsN], 0, Reserved>,
172 InstrStage<1, [A9_Pipe1]>,
173 InstrStage<1, [A9_NPipe]>], [1, 1]>,
175 // Double-precision FP Unary
176 InstrItinData<IIC_fpUNA64 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
177 // Extra latency cycles since wbck is 2 cycles
178 InstrStage<3, [A9_DRegsN], 0, Reserved>,
179 InstrStage<1, [A9_Pipe1]>,
180 InstrStage<1, [A9_NPipe]>], [1, 1]>,
183 // Single-precision FP Compare
184 InstrItinData<IIC_fpCMP32 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
185 // Extra latency cycles since wbck is 4 cycles
186 InstrStage<5, [A9_DRegsN], 0, Reserved>,
187 InstrStage<1, [A9_Pipe1]>,
188 InstrStage<1, [A9_NPipe]>], [1, 1]>,
190 // Double-precision FP Compare
191 InstrItinData<IIC_fpCMP64 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
192 // Extra latency cycles since wbck is 4 cycles
193 InstrStage<5, [A9_DRegsN], 0, Reserved>,
194 InstrStage<1, [A9_Pipe1]>,
195 InstrStage<1, [A9_NPipe]>], [1, 1]>,
197 // Single to Double FP Convert
198 InstrItinData<IIC_fpCVTSD , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
199 InstrStage<5, [A9_DRegsN], 0, Reserved>,
200 InstrStage<1, [A9_Pipe1]>,
201 InstrStage<1, [A9_NPipe]>], [4, 1]>,
203 // Double to Single FP Convert
204 InstrItinData<IIC_fpCVTDS , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
205 InstrStage<5, [A9_DRegsN], 0, Reserved>,
206 InstrStage<1, [A9_Pipe1]>,
207 InstrStage<1, [A9_NPipe]>], [4, 1]>,
210 // Single to Half FP Convert
211 InstrItinData<IIC_fpCVTSH , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
212 InstrStage<5, [A9_DRegsN], 0, Reserved>,
213 InstrStage<1, [A9_Pipe1]>,
214 InstrStage<1, [A9_NPipe]>], [4, 1]>,
216 // Half to Single FP Convert
217 InstrItinData<IIC_fpCVTHS , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
218 InstrStage<3, [A9_DRegsN], 0, Reserved>,
219 InstrStage<1, [A9_Pipe1]>,
220 InstrStage<1, [A9_NPipe]>], [2, 1]>,
223 // Single-Precision FP to Integer Convert
224 InstrItinData<IIC_fpCVTSI , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
225 InstrStage<5, [A9_DRegsN], 0, Reserved>,
226 InstrStage<1, [A9_Pipe1]>,
227 InstrStage<1, [A9_NPipe]>], [4, 1]>,
229 // Double-Precision FP to Integer Convert
230 InstrItinData<IIC_fpCVTDI , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
231 InstrStage<5, [A9_DRegsN], 0, Reserved>,
232 InstrStage<1, [A9_Pipe1]>,
233 InstrStage<1, [A9_NPipe]>], [4, 1]>,
235 // Integer to Single-Precision FP Convert
236 InstrItinData<IIC_fpCVTIS , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
237 InstrStage<5, [A9_DRegsN], 0, Reserved>,
238 InstrStage<1, [A9_Pipe1]>,
239 InstrStage<1, [A9_NPipe]>], [4, 1]>,
241 // Integer to Double-Precision FP Convert
242 InstrItinData<IIC_fpCVTID , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
243 InstrStage<5, [A9_DRegsN], 0, Reserved>,
244 InstrStage<1, [A9_Pipe1]>,
245 InstrStage<1, [A9_NPipe]>], [4, 1]>,
247 // Single-precision FP ALU
248 InstrItinData<IIC_fpALU32 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
249 InstrStage<5, [A9_DRegsN], 0, Reserved>,
250 InstrStage<1, [A9_Pipe1]>,
251 InstrStage<1, [A9_NPipe]>], [4, 1, 1]>,
253 // Double-precision FP ALU
254 InstrItinData<IIC_fpALU64 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
255 InstrStage<5, [A9_DRegsN], 0, Reserved>,
256 InstrStage<1, [A9_Pipe1]>,
257 InstrStage<1, [A9_NPipe]>], [4, 1, 1]>,
259 // Single-precision FP Multiply
260 InstrItinData<IIC_fpMUL32 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
261 InstrStage<6, [A9_DRegsN], 0, Reserved>,
262 InstrStage<1, [A9_Pipe1]>,
263 InstrStage<1, [A9_NPipe]>], [5, 1, 1]>,
265 // Double-precision FP Multiply
266 InstrItinData<IIC_fpMUL64 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
267 InstrStage<7, [A9_DRegsN], 0, Reserved>,
268 InstrStage<1, [A9_Pipe1]>,
269 InstrStage<2, [A9_NPipe]>], [6, 1, 1]>,
271 // Single-precision FP MAC
272 InstrItinData<IIC_fpMAC32 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
273 InstrStage<9, [A9_DRegsN], 0, Reserved>,
274 InstrStage<1, [A9_Pipe1]>,
275 InstrStage<1, [A9_NPipe]>], [8, 0, 1, 1]>,
277 // Double-precision FP MAC
278 InstrItinData<IIC_fpMAC64 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
279 InstrStage<10, [A9_DRegsN], 0, Reserved>,
280 InstrStage<1, [A9_Pipe1]>,
281 InstrStage<2, [A9_NPipe]>], [9, 0, 1, 1]>,
283 // Single-precision FP DIV
284 InstrItinData<IIC_fpDIV32 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
285 InstrStage<16, [A9_DRegsN], 0, Reserved>,
286 InstrStage<1, [A9_Pipe1]>,
287 InstrStage<10, [A9_NPipe]>], [15, 1, 1]>,
289 // Double-precision FP DIV
290 InstrItinData<IIC_fpDIV64 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
291 InstrStage<26, [A9_DRegsN], 0, Reserved>,
292 InstrStage<1, [A9_Pipe1]>,
293 InstrStage<20, [A9_NPipe]>], [25, 1, 1]>,
295 // Single-precision FP SQRT
296 InstrItinData<IIC_fpSQRT32, [InstrStage<1, [A9_DRegsVFP], 0, Required>,
297 InstrStage<18, [A9_DRegsN], 0, Reserved>,
298 InstrStage<1, [A9_Pipe1]>,
299 InstrStage<13, [A9_NPipe]>], [17, 1]>,
301 // Double-precision FP SQRT
302 InstrItinData<IIC_fpSQRT64, [InstrStage<1, [A9_DRegsVFP], 0, Required>,
303 InstrStage<33, [A9_DRegsN], 0, Reserved>,
304 InstrStage<1, [A9_Pipe1]>,
305 InstrStage<28, [A9_NPipe]>], [32, 1]>,
308 // Integer to Single-precision Move
309 InstrItinData<IIC_fpMOVIS, [InstrStage<1, [A9_DRegsVFP], 0, Required>,
310 // Extra 1 latency cycle since wbck is 2 cycles
311 InstrStage<3, [A9_DRegsN], 0, Reserved>,
312 InstrStage<1, [A9_Pipe1]>,
313 InstrStage<1, [A9_NPipe]>], [1, 1]>,
315 // Integer to Double-precision Move
316 InstrItinData<IIC_fpMOVID, [InstrStage<1, [A9_DRegsVFP], 0, Required>,
317 // Extra 1 latency cycle since wbck is 2 cycles
318 InstrStage<3, [A9_DRegsN], 0, Reserved>,
319 InstrStage<1, [A9_Pipe1]>,
320 InstrStage<1, [A9_NPipe]>], [1, 1, 1]>,
322 // Single-precision to Integer Move
323 InstrItinData<IIC_fpMOVSI, [InstrStage<1, [A9_DRegsVFP], 0, Required>,
324 InstrStage<2, [A9_DRegsN], 0, Reserved>,
325 InstrStage<1, [A9_Pipe1]>,
326 InstrStage<1, [A9_NPipe]>], [1, 1]>,
328 // Double-precision to Integer Move
329 InstrItinData<IIC_fpMOVDI, [InstrStage<1, [A9_DRegsVFP], 0, Required>,
330 InstrStage<2, [A9_DRegsN], 0, Reserved>,
331 InstrStage<1, [A9_Pipe1]>,
332 InstrStage<1, [A9_NPipe]>], [1, 1, 1]>,
334 // Single-precision FP Load
335 // use A9_Issue to enforce the 1 load/store per cycle limit
336 InstrItinData<IIC_fpLoad32, [InstrStage<1, [A9_DRegsVFP], 0, Required>,
337 InstrStage<2, [A9_DRegsN], 0, Reserved>,
338 InstrStage<1, [A9_Issue], 0>,
339 InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
340 InstrStage<1, [A9_LSPipe], 0>,
341 InstrStage<1, [A9_NPipe]>]>,
343 // Double-precision FP Load
344 // use A9_Issue to enforce the 1 load/store per cycle limit
345 InstrItinData<IIC_fpLoad64, [InstrStage<1, [A9_DRegsVFP], 0, Required>,
346 InstrStage<2, [A9_DRegsN], 0, Reserved>,
347 InstrStage<1, [A9_Issue], 0>,
348 InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
349 InstrStage<1, [A9_LSPipe], 0>,
350 InstrStage<1, [A9_NPipe]>]>,
353 // use A9_Issue to enforce the 1 load/store per cycle limit
354 InstrItinData<IIC_fpLoadm, [InstrStage<1, [A9_DRegsVFP], 0, Required>,
355 InstrStage<2, [A9_DRegsN], 0, Reserved>,
356 InstrStage<1, [A9_Issue], 0>,
357 InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
358 InstrStage<1, [A9_LSPipe], 0>,
359 InstrStage<1, [A9_NPipe]>]>,
361 // Single-precision FP Store
362 // use A9_Issue to enforce the 1 load/store per cycle limit
363 InstrItinData<IIC_fpStore32,[InstrStage<1, [A9_DRegsVFP], 0, Required>,
364 InstrStage<2, [A9_DRegsN], 0, Reserved>,
365 InstrStage<1, [A9_Issue], 0>,
366 InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
367 InstrStage<1, [A9_LSPipe], 0>,
368 InstrStage<1, [A9_NPipe]>]>,
370 // Double-precision FP Store
371 // use A9_Issue to enforce the 1 load/store per cycle limit
372 InstrItinData<IIC_fpStore64,[InstrStage<1, [A9_DRegsVFP], 0, Required>,
373 InstrStage<2, [A9_DRegsN], 0, Reserved>,
374 InstrStage<1, [A9_Issue], 0>,
375 InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
376 InstrStage<1, [A9_LSPipe], 0>,
377 InstrStage<1, [A9_NPipe]>]>,
380 // use A9_Issue to enforce the 1 load/store per cycle limit
381 InstrItinData<IIC_fpStorem, [InstrStage<1, [A9_DRegsVFP], 0, Required>,
382 InstrStage<2, [A9_DRegsN], 0, Reserved>,
383 InstrStage<1, [A9_Issue], 0>,
384 InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
385 InstrStage<1, [A9_LSPipe], 0>,
386 InstrStage<1, [A9_NPipe]>]>,
388 // Issue through integer pipeline, and execute in NEON unit.
389 // FIXME: Neon pipeline and LdSt unit are multiplexed.
390 // Add some syntactic sugar to model this!
392 // FIXME: We don't model this instruction properly
393 InstrItinData<IIC_VLD1, [InstrStage<1, [A9_DRegsN], 0, Required>,
394 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
395 InstrStage<1, [A9_Issue], 0>,
396 InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
397 InstrStage<1, [A9_LSPipe], 0>,
398 InstrStage<1, [A9_NPipe]>]>,
401 // FIXME: We don't model this instruction properly
402 InstrItinData<IIC_VLD2, [InstrStage<1, [A9_DRegsN], 0, Required>,
403 // Extra latency cycles since wbck is 6 cycles
404 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
405 InstrStage<1, [A9_Issue], 0>,
406 InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
407 InstrStage<1, [A9_LSPipe], 0>,
408 InstrStage<1, [A9_NPipe]>], [2, 2, 1]>,
411 // FIXME: We don't model this instruction properly
412 InstrItinData<IIC_VLD3, [InstrStage<1, [A9_DRegsN], 0, Required>,
413 // Extra latency cycles since wbck is 6 cycles
414 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
415 InstrStage<1, [A9_Issue], 0>,
416 InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
417 InstrStage<1, [A9_LSPipe], 0>,
418 InstrStage<1, [A9_NPipe]>], [2, 2, 2, 1]>,
421 // FIXME: We don't model this instruction properly
422 InstrItinData<IIC_VLD4, [InstrStage<1, [A9_DRegsN], 0, Required>,
423 // Extra latency cycles since wbck is 6 cycles
424 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
425 InstrStage<1, [A9_Issue], 0>,
426 InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
427 InstrStage<1, [A9_LSPipe], 0>,
428 InstrStage<1, [A9_NPipe]>], [2, 2, 2, 2, 1]>,
431 // FIXME: We don't model this instruction properly
432 InstrItinData<IIC_VST, [InstrStage<1, [A9_DRegsN], 0, Required>,
433 // Extra latency cycles since wbck is 6 cycles
434 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
435 InstrStage<1, [A9_Issue], 0>,
436 InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
437 InstrStage<1, [A9_LSPipe], 0>,
438 InstrStage<1, [A9_NPipe]>]>,
440 // Double-register Integer Unary
441 InstrItinData<IIC_VUNAiD, [InstrStage<1, [A9_DRegsN], 0, Required>,
442 // Extra latency cycles since wbck is 6 cycles
443 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
444 InstrStage<1, [A9_Pipe1]>,
445 InstrStage<1, [A9_NPipe]>], [4, 2]>,
447 // Quad-register Integer Unary
448 InstrItinData<IIC_VUNAiQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
449 // Extra latency cycles since wbck is 6 cycles
450 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
451 InstrStage<1, [A9_Pipe1]>,
452 InstrStage<1, [A9_NPipe]>], [4, 2]>,
454 // Double-register Integer Q-Unary
455 InstrItinData<IIC_VQUNAiD, [InstrStage<1, [A9_DRegsN], 0, Required>,
456 // Extra latency cycles since wbck is 6 cycles
457 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
458 InstrStage<1, [A9_Pipe1]>,
459 InstrStage<1, [A9_NPipe]>], [4, 1]>,
461 // Quad-register Integer CountQ-Unary
462 InstrItinData<IIC_VQUNAiQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
463 // Extra latency cycles since wbck is 6 cycles
464 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
465 InstrStage<1, [A9_Pipe1]>,
466 InstrStage<1, [A9_NPipe]>], [4, 1]>,
468 // Double-register Integer Binary
469 InstrItinData<IIC_VBINiD, [InstrStage<1, [A9_DRegsN], 0, Required>,
470 // Extra latency cycles since wbck is 6 cycles
471 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
472 InstrStage<1, [A9_Pipe1]>,
473 InstrStage<1, [A9_NPipe]>], [3, 2, 2]>,
475 // Quad-register Integer Binary
476 InstrItinData<IIC_VBINiQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
477 // Extra latency cycles since wbck is 6 cycles
478 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
479 InstrStage<1, [A9_Pipe1]>,
480 InstrStage<1, [A9_NPipe]>], [3, 2, 2]>,
482 // Double-register Integer Subtract
483 InstrItinData<IIC_VSUBiD, [InstrStage<1, [A9_DRegsN], 0, Required>,
484 // Extra latency cycles since wbck is 6 cycles
485 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
486 InstrStage<1, [A9_Pipe1]>,
487 InstrStage<1, [A9_NPipe]>], [3, 2, 1]>,
489 // Quad-register Integer Subtract
490 InstrItinData<IIC_VSUBiQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
491 // Extra latency cycles since wbck is 6 cycles
492 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
493 InstrStage<1, [A9_Pipe1]>,
494 InstrStage<1, [A9_NPipe]>], [3, 2, 1]>,
496 // Double-register Integer Shift
497 InstrItinData<IIC_VSHLiD, [InstrStage<1, [A9_DRegsN], 0, Required>,
498 // Extra latency cycles since wbck is 6 cycles
499 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
500 InstrStage<1, [A9_Pipe1]>,
501 InstrStage<1, [A9_NPipe]>], [3, 1, 1]>,
503 // Quad-register Integer Shift
504 InstrItinData<IIC_VSHLiQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
505 // Extra latency cycles since wbck is 6 cycles
506 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
507 InstrStage<1, [A9_Pipe1]>,
508 InstrStage<1, [A9_NPipe]>], [3, 1, 1]>,
510 // Double-register Integer Shift (4 cycle)
511 InstrItinData<IIC_VSHLi4D, [InstrStage<1, [A9_DRegsN], 0, Required>,
512 // Extra latency cycles since wbck is 6 cycles
513 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
514 InstrStage<1, [A9_Pipe1]>,
515 InstrStage<1, [A9_NPipe]>], [4, 1, 1]>,
517 // Quad-register Integer Shift (4 cycle)
518 InstrItinData<IIC_VSHLi4Q, [InstrStage<1, [A9_DRegsN], 0, Required>,
519 // Extra latency cycles since wbck is 6 cycles
520 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
521 InstrStage<1, [A9_Pipe1]>,
522 InstrStage<1, [A9_NPipe]>], [4, 1, 1]>,
524 // Double-register Integer Binary (4 cycle)
525 InstrItinData<IIC_VBINi4D, [InstrStage<1, [A9_DRegsN], 0, Required>,
526 // Extra latency cycles since wbck is 6 cycles
527 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
528 InstrStage<1, [A9_Pipe1]>,
529 InstrStage<1, [A9_NPipe]>], [4, 2, 2]>,
531 // Quad-register Integer Binary (4 cycle)
532 InstrItinData<IIC_VBINi4Q, [InstrStage<1, [A9_DRegsN], 0, Required>,
533 // Extra latency cycles since wbck is 6 cycles
534 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
535 InstrStage<1, [A9_Pipe1]>,
536 InstrStage<1, [A9_NPipe]>], [4, 2, 2]>,
538 // Double-register Integer Subtract (4 cycle)
539 InstrItinData<IIC_VSUBiD, [InstrStage<1, [A9_DRegsN], 0, Required>,
540 // Extra latency cycles since wbck is 6 cycles
541 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
542 InstrStage<1, [A9_Pipe1]>,
543 InstrStage<1, [A9_NPipe]>], [4, 2, 1]>,
545 // Quad-register Integer Subtract (4 cycle)
546 InstrItinData<IIC_VSUBiQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
547 // Extra latency cycles since wbck is 6 cycles
548 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
549 InstrStage<1, [A9_Pipe1]>,
550 InstrStage<1, [A9_NPipe]>], [4, 2, 1]>,
553 // Double-register Integer Count
554 InstrItinData<IIC_VCNTiD, [InstrStage<1, [A9_DRegsN], 0, Required>,
555 // Extra latency cycles since wbck is 6 cycles
556 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
557 InstrStage<1, [A9_Pipe1]>,
558 InstrStage<1, [A9_NPipe]>], [3, 2, 2]>,
560 // Quad-register Integer Count
561 // Result written in N3, but that is relative to the last cycle of multicycle,
562 // so we use 4 for those cases
563 InstrItinData<IIC_VCNTiQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
564 // Extra latency cycles since wbck is 7 cycles
565 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
566 InstrStage<1, [A9_Pipe1]>,
567 InstrStage<2, [A9_NPipe]>], [4, 2, 2]>,
569 // Double-register Absolute Difference and Accumulate
570 InstrItinData<IIC_VABAD, [InstrStage<1, [A9_DRegsN], 0, Required>,
571 // Extra latency cycles since wbck is 6 cycles
572 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
573 InstrStage<1, [A9_Pipe1]>,
574 InstrStage<1, [A9_NPipe]>], [6, 3, 2, 1]>,
576 // Quad-register Absolute Difference and Accumulate
577 InstrItinData<IIC_VABAQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
578 // Extra latency cycles since wbck is 6 cycles
579 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
580 InstrStage<1, [A9_Pipe1]>,
581 InstrStage<2, [A9_NPipe]>], [6, 3, 2, 1]>,
583 // Double-register Integer Pair Add Long
584 InstrItinData<IIC_VPALiD, [InstrStage<1, [A9_DRegsN], 0, Required>,
585 // Extra latency cycles since wbck is 6 cycles
586 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
587 InstrStage<1, [A9_Pipe1]>,
588 InstrStage<1, [A9_NPipe]>], [6, 3, 1]>,
590 // Quad-register Integer Pair Add Long
591 InstrItinData<IIC_VPALiQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
592 // Extra latency cycles since wbck is 6 cycles
593 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
594 InstrStage<1, [A9_Pipe1]>,
595 InstrStage<2, [A9_NPipe]>], [6, 3, 1]>,
598 // Double-register Integer Multiply (.8, .16)
599 InstrItinData<IIC_VMULi16D, [InstrStage<1, [A9_DRegsN], 0, Required>,
600 // Extra latency cycles since wbck is 6 cycles
601 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
602 InstrStage<1, [A9_Pipe1]>,
603 InstrStage<1, [A9_NPipe]>], [6, 2, 2]>,
605 // Quad-register Integer Multiply (.8, .16)
606 InstrItinData<IIC_VMULi16Q, [InstrStage<1, [A9_DRegsN], 0, Required>,
607 // Extra latency cycles since wbck is 7 cycles
608 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
609 InstrStage<1, [A9_Pipe1]>,
610 InstrStage<2, [A9_NPipe]>], [7, 2, 2]>,
613 // Double-register Integer Multiply (.32)
614 InstrItinData<IIC_VMULi32D, [InstrStage<1, [A9_DRegsN], 0, Required>,
615 // Extra latency cycles since wbck is 7 cycles
616 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
617 InstrStage<1, [A9_Pipe1]>,
618 InstrStage<2, [A9_NPipe]>], [7, 2, 1]>,
620 // Quad-register Integer Multiply (.32)
621 InstrItinData<IIC_VMULi32Q, [InstrStage<1, [A9_DRegsN], 0, Required>,
622 // Extra latency cycles since wbck is 9 cycles
623 InstrStage<10, [A9_DRegsVFP], 0, Reserved>,
624 InstrStage<1, [A9_Pipe1]>,
625 InstrStage<4, [A9_NPipe]>], [9, 2, 1]>,
627 // Double-register Integer Multiply-Accumulate (.8, .16)
628 InstrItinData<IIC_VMACi16D, [InstrStage<1, [A9_DRegsN], 0, Required>,
629 // Extra latency cycles since wbck is 6 cycles
630 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
631 InstrStage<1, [A9_Pipe1]>,
632 InstrStage<1, [A9_NPipe]>], [6, 3, 2, 2]>,
634 // Double-register Integer Multiply-Accumulate (.32)
635 InstrItinData<IIC_VMACi32D, [InstrStage<1, [A9_DRegsN], 0, Required>,
636 // Extra latency cycles since wbck is 7 cycles
637 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
638 InstrStage<1, [A9_Pipe1]>,
639 InstrStage<2, [A9_NPipe]>], [7, 3, 2, 1]>,
641 // Quad-register Integer Multiply-Accumulate (.8, .16)
642 InstrItinData<IIC_VMACi16Q, [InstrStage<1, [A9_DRegsN], 0, Required>,
643 // Extra latency cycles since wbck is 7 cycles
644 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
645 InstrStage<1, [A9_Pipe1]>,
646 InstrStage<2, [A9_NPipe]>], [7, 3, 2, 2]>,
648 // Quad-register Integer Multiply-Accumulate (.32)
649 InstrItinData<IIC_VMACi32Q, [InstrStage<1, [A9_DRegsN], 0, Required>,
650 // Extra latency cycles since wbck is 9 cycles
651 InstrStage<10, [A9_DRegsVFP], 0, Reserved>,
652 InstrStage<1, [A9_Pipe1]>,
653 InstrStage<4, [A9_NPipe]>], [9, 3, 2, 1]>,
656 InstrItinData<IIC_VMOVImm, [InstrStage<1, [A9_DRegsN], 0, Required>,
657 // Extra latency cycles since wbck is 6 cycles
658 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
659 InstrStage<1, [A9_Pipe1]>,
660 InstrStage<1, [A9_NPipe]>], [3]>,
662 // Double-register Permute Move
663 InstrItinData<IIC_VMOVD, [InstrStage<1, [A9_DRegsN], 0, Required>,
664 // FIXME: all latencies are arbitrary, no information is available
665 InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
666 InstrStage<1, [A9_Pipe1]>,
667 InstrStage<1, [A9_LSPipe]>], [2, 1]>,
669 // Quad-register Permute Move
670 // Result written in N2, but that is relative to the last cycle of multicycle,
671 // so we use 3 for those cases
672 InstrItinData<IIC_VMOVQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
673 // FIXME: all latencies are arbitrary, no information is available
674 InstrStage<4, [A9_DRegsVFP], 0, Reserved>,
675 InstrStage<1, [A9_Pipe1]>,
676 InstrStage<2, [A9_NPipe]>], [3, 1]>,
678 // Integer to Single-precision Move
679 InstrItinData<IIC_VMOVIS , [InstrStage<1, [A9_DRegsN], 0, Required>,
680 // FIXME: all latencies are arbitrary, no information is available
681 InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
682 InstrStage<1, [A9_Pipe1]>,
683 InstrStage<1, [A9_NPipe]>], [2, 1]>,
685 // Integer to Double-precision Move
686 InstrItinData<IIC_VMOVID , [InstrStage<1, [A9_DRegsN], 0, Required>,
687 // FIXME: all latencies are arbitrary, no information is available
688 InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
689 InstrStage<1, [A9_Pipe1]>,
690 InstrStage<1, [A9_NPipe]>], [2, 1, 1]>,
692 // Single-precision to Integer Move
693 InstrItinData<IIC_VMOVSI , [InstrStage<1, [A9_DRegsN], 0, Required>,
694 // FIXME: all latencies are arbitrary, no information is available
695 InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
696 InstrStage<1, [A9_Pipe1]>,
697 InstrStage<1, [A9_NPipe]>], [2, 1]>,
699 // Double-precision to Integer Move
700 InstrItinData<IIC_VMOVDI , [InstrStage<1, [A9_DRegsN], 0, Required>,
701 // FIXME: all latencies are arbitrary, no information is available
702 InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
703 InstrStage<1, [A9_Pipe1]>,
704 InstrStage<1, [A9_NPipe]>], [2, 2, 1]>,
706 // Integer to Lane Move
707 InstrItinData<IIC_VMOVISL , [InstrStage<1, [A9_DRegsN], 0, Required>,
708 // FIXME: all latencies are arbitrary, no information is available
709 InstrStage<4, [A9_DRegsVFP], 0, Reserved>,
710 InstrStage<1, [A9_Pipe1]>,
711 InstrStage<2, [A9_NPipe]>], [3, 1, 1]>,
714 // Double-register FP Unary
715 InstrItinData<IIC_VUNAD, [InstrStage<1, [A9_DRegsN], 0, Required>,
716 // Extra latency cycles since wbck is 6 cycles
717 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
718 InstrStage<1, [A9_Pipe1]>,
719 InstrStage<1, [A9_NPipe]>], [5, 2]>,
721 // Quad-register FP Unary
722 // Result written in N5, but that is relative to the last cycle of multicycle,
723 // so we use 6 for those cases
724 InstrItinData<IIC_VUNAQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
725 // Extra latency cycles since wbck is 7 cycles
726 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
727 InstrStage<1, [A9_Pipe1]>,
728 InstrStage<2, [A9_NPipe]>], [6, 2]>,
730 // Double-register FP Binary
731 // FIXME: We're using this itin for many instructions and [2, 2] here is too
733 InstrItinData<IIC_VBIND, [InstrStage<1, [A9_DRegsN], 0, Required>,
734 // Extra latency cycles since wbck is 7 cycles
735 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
736 InstrStage<1, [A9_Pipe1]>,
737 InstrStage<1, [A9_NPipe]>], [5, 2, 2]>,
739 // Quad-register FP Binary
740 // Result written in N5, but that is relative to the last cycle of multicycle,
741 // so we use 6 for those cases
742 // FIXME: We're using this itin for many instructions and [2, 2] here is too
744 InstrItinData<IIC_VBINQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
745 // Extra latency cycles since wbck is 8 cycles
746 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
747 InstrStage<1, [A9_Pipe1]>,
748 InstrStage<2, [A9_NPipe]>], [6, 2, 2]>,
750 // Double-register FP Multiple-Accumulate
751 InstrItinData<IIC_VMACD, [InstrStage<1, [A9_DRegsN], 0, Required>,
752 // Extra latency cycles since wbck is 7 cycles
753 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
754 InstrStage<1, [A9_Pipe1]>,
755 InstrStage<2, [A9_NPipe]>], [6, 3, 2, 1]>,
757 // Quad-register FP Multiple-Accumulate
758 // Result written in N9, but that is relative to the last cycle of multicycle,
759 // so we use 10 for those cases
760 InstrItinData<IIC_VMACQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
761 // Extra latency cycles since wbck is 9 cycles
762 InstrStage<10, [A9_DRegsVFP], 0, Reserved>,
763 InstrStage<1, [A9_Pipe1]>,
764 InstrStage<4, [A9_NPipe]>], [8, 4, 2, 1]>,
766 // Double-register Reciprical Step
767 InstrItinData<IIC_VRECSD, [InstrStage<1, [A9_DRegsN], 0, Required>,
768 // Extra latency cycles since wbck is 7 cycles
769 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
770 InstrStage<1, [A9_Pipe1]>,
771 InstrStage<2, [A9_NPipe]>], [6, 2, 2]>,
773 // Quad-register Reciprical Step
774 InstrItinData<IIC_VRECSQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
775 // Extra latency cycles since wbck is 9 cycles
776 InstrStage<10, [A9_DRegsVFP], 0, Reserved>,
777 InstrStage<1, [A9_Pipe1]>,
778 InstrStage<4, [A9_NPipe]>], [8, 2, 2]>,
780 // Double-register Permute
781 InstrItinData<IIC_VPERMD, [InstrStage<1, [A9_DRegsN], 0, Required>,
782 // Extra latency cycles since wbck is 6 cycles
783 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
784 InstrStage<1, [A9_Pipe1]>,
785 InstrStage<1, [A9_NPipe]>], [2, 2, 1, 1]>,
787 // Quad-register Permute
788 // Result written in N2, but that is relative to the last cycle of multicycle,
789 // so we use 3 for those cases
790 InstrItinData<IIC_VPERMQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
791 // Extra latency cycles since wbck is 7 cycles
792 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
793 InstrStage<1, [A9_Pipe1]>,
794 InstrStage<2, [A9_NPipe]>], [3, 3, 1, 1]>,
796 // Quad-register Permute (3 cycle issue)
797 // Result written in N2, but that is relative to the last cycle of multicycle,
798 // so we use 4 for those cases
799 InstrItinData<IIC_VPERMQ3, [InstrStage<1, [A9_DRegsN], 0, Required>,
800 // Extra latency cycles since wbck is 8 cycles
801 InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
802 InstrStage<1, [A9_Pipe1]>,
803 InstrStage<3, [A9_LSPipe]>], [4, 4, 1, 1]>,
806 // Double-register VEXT
807 InstrItinData<IIC_VEXTD, [InstrStage<1, [A9_DRegsN], 0, Required>,
808 // Extra latency cycles since wbck is 7 cycles
809 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
810 InstrStage<1, [A9_Pipe1]>,
811 InstrStage<1, [A9_NPipe]>], [2, 1, 1]>,
813 // Quad-register VEXT
814 InstrItinData<IIC_VEXTQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
815 // Extra latency cycles since wbck is 9 cycles
816 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
817 InstrStage<1, [A9_Pipe1]>,
818 InstrStage<2, [A9_NPipe]>], [3, 1, 1]>,
821 InstrItinData<IIC_VTB1, [InstrStage<1, [A9_DRegsN], 0, Required>,
822 // Extra latency cycles since wbck is 7 cycles
823 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
824 InstrStage<1, [A9_Pipe1]>,
825 InstrStage<2, [A9_NPipe]>], [3, 2, 1]>,
826 InstrItinData<IIC_VTB2, [InstrStage<2, [A9_DRegsN], 0, Required>,
827 // Extra latency cycles since wbck is 7 cycles
828 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
829 InstrStage<1, [A9_Pipe1]>,
830 InstrStage<2, [A9_NPipe]>], [3, 2, 2, 1]>,
831 InstrItinData<IIC_VTB3, [InstrStage<2, [A9_DRegsN], 0, Required>,
832 // Extra latency cycles since wbck is 8 cycles
833 InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
834 InstrStage<1, [A9_Pipe1]>,
835 InstrStage<3, [A9_NPipe]>], [4, 2, 2, 3, 1]>,
836 InstrItinData<IIC_VTB4, [InstrStage<1, [A9_DRegsN], 0, Required>,
837 // Extra latency cycles since wbck is 8 cycles
838 InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
839 InstrStage<1, [A9_Pipe1]>,
840 InstrStage<3, [A9_NPipe]>], [4, 2, 2, 3, 3, 1]>,
843 InstrItinData<IIC_VTBX1, [InstrStage<1, [A9_DRegsN], 0, Required>,
844 // Extra latency cycles since wbck is 7 cycles
845 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
846 InstrStage<1, [A9_Pipe1]>,
847 InstrStage<2, [A9_NPipe]>], [3, 1, 2, 1]>,
848 InstrItinData<IIC_VTBX2, [InstrStage<1, [A9_DRegsN], 0, Required>,
849 // Extra latency cycles since wbck is 7 cycles
850 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
851 InstrStage<1, [A9_Pipe1]>,
852 InstrStage<2, [A9_NPipe]>], [3, 1, 2, 2, 1]>,
853 InstrItinData<IIC_VTBX3, [InstrStage<1, [A9_DRegsN], 0, Required>,
854 // Extra latency cycles since wbck is 8 cycles
855 InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
856 InstrStage<1, [A9_Pipe1]>,
857 InstrStage<3, [A9_NPipe]>], [4, 1, 2, 2, 3, 1]>,
858 InstrItinData<IIC_VTBX4, [InstrStage<1, [A9_DRegsN], 0, Required>,
859 // Extra latency cycles since wbck is 8 cycles
860 InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
861 InstrStage<1, [A9_Pipe1]>,
862 InstrStage<2, [A9_NPipe]>], [4, 1, 2, 2, 3, 3, 1]>