1 //=- ARMScheduleA9.td - ARM Cortex-A9 Scheduling Definitions -*- tablegen -*-=//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file defines the itinerary class data for the ARM Cortex A9 processors.
12 //===----------------------------------------------------------------------===//
15 // Ad-hoc scheduling information derived from pretty vague "Cortex-A9 Technical
19 def A9_Pipe0 : FuncUnit; // pipeline 0
20 def A9_Pipe1 : FuncUnit; // pipeline 1
21 def A9_LSPipe : FuncUnit; // LS pipe
22 def A9_NPipe : FuncUnit; // NEON ALU/MUL pipe
23 def A9_DRegsVFP: FuncUnit; // FP register set, VFP side
24 def A9_DRegsN : FuncUnit; // FP register set, NEON side
26 // Dual issue pipeline represented by A9_Pipe0 | A9_Pipe1
28 def CortexA9Itineraries : ProcessorItineraries<
29 [A9_NPipe, A9_DRegsN, A9_DRegsVFP, A9_LSPipe, A9_Pipe0, A9_Pipe1], [
30 // Two fully-pipelined integer ALU pipelines
31 // FIXME: There are no operand latencies for these instructions at all!
33 // Move instructions, unconditional
34 InstrItinData<IIC_iMOVi , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [1]>,
35 InstrItinData<IIC_iMOVr , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [1, 1]>,
36 InstrItinData<IIC_iMOVsi , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [1, 1]>,
37 InstrItinData<IIC_iMOVsr , [InstrStage<2, [A9_Pipe0, A9_Pipe1]>], [2, 2, 1]>,
40 InstrItinData<IIC_iALUx , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>]>,
42 // Binary Instructions that produce a result
43 InstrItinData<IIC_iALUi , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [2, 2]>,
44 InstrItinData<IIC_iALUr , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [2, 2, 2]>,
45 InstrItinData<IIC_iALUsi, [InstrStage<2, [A9_Pipe0, A9_Pipe1]>], [2, 2, 1]>,
46 InstrItinData<IIC_iALUsr,[InstrStage<3, [A9_Pipe0, A9_Pipe1]>], [2, 2, 1, 1]>,
48 // Unary Instructions that produce a result
49 InstrItinData<IIC_iUNAr , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [2, 2]>,
50 InstrItinData<IIC_iUNAsi , [InstrStage<2, [A9_Pipe0, A9_Pipe1]>], [2, 1]>,
51 InstrItinData<IIC_iUNAsr , [InstrStage<3, [A9_Pipe0, A9_Pipe1]>], [2, 1, 1]>,
53 // Compare instructions
54 InstrItinData<IIC_iCMPi , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [2]>,
55 InstrItinData<IIC_iCMPr , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [2, 2]>,
56 InstrItinData<IIC_iCMPsi , [InstrStage<2, [A9_Pipe0, A9_Pipe1]>], [2, 1]>,
57 InstrItinData<IIC_iCMPsr , [InstrStage<3, [A9_Pipe0, A9_Pipe1]>], [2, 1, 1]>,
59 // Move instructions, conditional
60 InstrItinData<IIC_iCMOVi , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [2]>,
61 InstrItinData<IIC_iCMOVr , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [2, 1]>,
62 InstrItinData<IIC_iCMOVsi , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [2, 1]>,
63 InstrItinData<IIC_iCMOVsr , [InstrStage<2, [A9_Pipe0, A9_Pipe1]>], [2, 1, 1]>,
65 // Integer multiply pipeline
67 InstrItinData<IIC_iMUL16 , [InstrStage<1, [A9_Pipe1], 0>,
68 InstrStage<2, [A9_Pipe0]>], [4, 1, 1]>,
69 InstrItinData<IIC_iMAC16 , [InstrStage<1, [A9_Pipe1], 0>,
70 InstrStage<2, [A9_Pipe0]>], [4, 1, 1, 2]>,
71 InstrItinData<IIC_iMUL32 , [InstrStage<1, [A9_Pipe1], 0>,
72 InstrStage<2, [A9_Pipe0]>], [4, 1, 1]>,
73 InstrItinData<IIC_iMAC32 , [InstrStage<1, [A9_Pipe1], 0>,
74 InstrStage<2, [A9_Pipe0]>], [4, 1, 1, 2]>,
75 InstrItinData<IIC_iMUL64 , [InstrStage<2, [A9_Pipe1], 0>,
76 InstrStage<3, [A9_Pipe0]>], [4, 5, 1, 1]>,
77 InstrItinData<IIC_iMAC64 , [InstrStage<2, [A9_Pipe1], 0>,
78 InstrStage<3, [A9_Pipe0]>], [4, 5, 1, 1]>,
79 // Integer load pipeline
80 // FIXME: The timings are some rough approximations
83 InstrItinData<IIC_iLoadi , [InstrStage<1, [A9_Pipe1]>,
84 InstrStage<1, [A9_LSPipe]>], [3, 1]>,
87 InstrItinData<IIC_iLoadr , [InstrStage<1, [A9_Pipe1]>,
88 InstrStage<1, [A9_LSPipe]>], [3, 1, 1]>,
90 // Scaled register offset
91 InstrItinData<IIC_iLoadsi , [InstrStage<1, [A9_Pipe1]>,
92 InstrStage<2, [A9_LSPipe]>], [4, 1, 1]>,
94 // Immediate offset with update
95 InstrItinData<IIC_iLoadiu , [InstrStage<1, [A9_Pipe1]>,
96 InstrStage<2, [A9_LSPipe]>], [3, 2, 1]>,
98 // Register offset with update
99 InstrItinData<IIC_iLoadru , [InstrStage<1, [A9_Pipe1]>,
100 InstrStage<2, [A9_LSPipe]>], [3, 2, 1, 1]>,
102 // Scaled register offset with update
103 InstrItinData<IIC_iLoadsiu , [InstrStage<1, [A9_Pipe1]>,
104 InstrStage<2, [A9_LSPipe]>], [4, 3, 1, 1]>,
107 InstrItinData<IIC_iLoadm , [InstrStage<1, [A9_Pipe1]>,
108 InstrStage<1, [A9_LSPipe]>]>,
110 // Integer store pipeline
113 InstrItinData<IIC_iStorei , [InstrStage<1, [A9_Pipe1]>,
114 InstrStage<1, [A9_LSPipe]>], [3, 1]>,
117 InstrItinData<IIC_iStorer , [InstrStage<1, [ A9_Pipe1]>,
118 InstrStage<1, [A9_LSPipe]>], [3, 1, 1]>,
120 // Scaled register offset
121 InstrItinData<IIC_iStoresi , [InstrStage<1, [A9_Pipe1]>,
122 InstrStage<2, [A9_LSPipe]>], [3, 1, 1]>,
124 // Immediate offset with update
125 InstrItinData<IIC_iStoreiu , [InstrStage<1, [A9_Pipe1]>,
126 InstrStage<1, [A9_LSPipe]>], [2, 3, 1]>,
128 // Register offset with update
129 InstrItinData<IIC_iStoreru , [InstrStage<1, [A9_Pipe1]>,
130 InstrStage<1, [A9_LSPipe]>], [2, 3, 1, 1]>,
132 // Scaled register offset with update
133 InstrItinData<IIC_iStoresiu, [InstrStage<1, [A9_Pipe1]>,
134 InstrStage<2, [A9_LSPipe]>], [3, 3, 1, 1]>,
137 InstrItinData<IIC_iStorem , [InstrStage<1, [A9_Pipe1]>,
138 InstrStage<1, [A9_LSPipe]>]>,
141 // no delay slots, so the latency of a branch is unimportant
142 InstrItinData<IIC_Br , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>]>,
144 // VFP and NEON shares the same register file. This means that every VFP
145 // instruction should wait for full completion of the consecutive NEON
146 // instruction and vice-versa. We model this behavior with two artificial FUs:
147 // DRegsVFP and DRegsVFP.
149 // Every VFP instruction:
150 // - Acquires DRegsVFP resource for 1 cycle
151 // - Reserves DRegsN resource for the whole duration (including time to
152 // register file writeback!).
153 // Every NEON instruction does the same but with FUs swapped.
155 // Since the reserved FU cannot be acquired, this models precisely
156 // "cross-domain" stalls.
159 // Issue through integer pipeline, and execute in NEON unit.
161 // FP Special Register to Integer Register File Move
162 InstrItinData<IIC_fpSTAT , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
163 InstrStage<2, [A9_DRegsN], 0, Reserved>,
164 InstrStage<1, [A9_Pipe1]>,
165 InstrStage<1, [A9_NPipe]>]>,
167 // Single-precision FP Unary
168 InstrItinData<IIC_fpUNA32 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
169 // Extra latency cycles since wbck is 2 cycles
170 InstrStage<3, [A9_DRegsN], 0, Reserved>,
171 InstrStage<1, [A9_Pipe1]>,
172 InstrStage<1, [A9_NPipe]>], [1, 1]>,
174 // Double-precision FP Unary
175 InstrItinData<IIC_fpUNA64 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
176 // Extra latency cycles since wbck is 2 cycles
177 InstrStage<3, [A9_DRegsN], 0, Reserved>,
178 InstrStage<1, [A9_Pipe1]>,
179 InstrStage<1, [A9_NPipe]>], [1, 1]>,
182 // Single-precision FP Compare
183 InstrItinData<IIC_fpCMP32 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
184 // Extra latency cycles since wbck is 4 cycles
185 InstrStage<5, [A9_DRegsN], 0, Reserved>,
186 InstrStage<1, [A9_Pipe1]>,
187 InstrStage<1, [A9_NPipe]>], [1, 1]>,
189 // Double-precision FP Compare
190 InstrItinData<IIC_fpCMP64 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
191 // Extra latency cycles since wbck is 4 cycles
192 InstrStage<5, [A9_DRegsN], 0, Reserved>,
193 InstrStage<1, [A9_Pipe1]>,
194 InstrStage<1, [A9_NPipe]>], [1, 1]>,
196 // Single to Double FP Convert
197 InstrItinData<IIC_fpCVTSD , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
198 InstrStage<5, [A9_DRegsN], 0, Reserved>,
199 InstrStage<1, [A9_Pipe1]>,
200 InstrStage<1, [A9_NPipe]>], [4, 1]>,
202 // Double to Single FP Convert
203 InstrItinData<IIC_fpCVTDS , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
204 InstrStage<5, [A9_DRegsN], 0, Reserved>,
205 InstrStage<1, [A9_Pipe1]>,
206 InstrStage<1, [A9_NPipe]>], [4, 1]>,
209 // Single to Half FP Convert
210 InstrItinData<IIC_fpCVTSH , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
211 InstrStage<5, [A9_DRegsN], 0, Reserved>,
212 InstrStage<1, [A9_Pipe1]>,
213 InstrStage<1, [A9_NPipe]>], [4, 1]>,
215 // Half to Single FP Convert
216 InstrItinData<IIC_fpCVTHS , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
217 InstrStage<3, [A9_DRegsN], 0, Reserved>,
218 InstrStage<1, [A9_Pipe1]>,
219 InstrStage<1, [A9_NPipe]>], [2, 1]>,
222 // Single-Precision FP to Integer Convert
223 InstrItinData<IIC_fpCVTSI , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
224 InstrStage<5, [A9_DRegsN], 0, Reserved>,
225 InstrStage<1, [A9_Pipe1]>,
226 InstrStage<1, [A9_NPipe]>], [4, 1]>,
228 // Double-Precision FP to Integer Convert
229 InstrItinData<IIC_fpCVTDI , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
230 InstrStage<5, [A9_DRegsN], 0, Reserved>,
231 InstrStage<1, [A9_Pipe1]>,
232 InstrStage<1, [A9_NPipe]>], [4, 1]>,
234 // Integer to Single-Precision FP Convert
235 InstrItinData<IIC_fpCVTIS , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
236 InstrStage<5, [A9_DRegsN], 0, Reserved>,
237 InstrStage<1, [A9_Pipe1]>,
238 InstrStage<1, [A9_NPipe]>], [4, 1]>,
240 // Integer to Double-Precision FP Convert
241 InstrItinData<IIC_fpCVTID , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
242 InstrStage<5, [A9_DRegsN], 0, Reserved>,
243 InstrStage<1, [A9_Pipe1]>,
244 InstrStage<1, [A9_NPipe]>], [4, 1]>,
246 // Single-precision FP ALU
247 InstrItinData<IIC_fpALU32 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
248 InstrStage<5, [A9_DRegsN], 0, Reserved>,
249 InstrStage<1, [A9_Pipe1]>,
250 InstrStage<1, [A9_NPipe]>], [4, 1, 1]>,
252 // Double-precision FP ALU
253 InstrItinData<IIC_fpALU64 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
254 InstrStage<5, [A9_DRegsN], 0, Reserved>,
255 InstrStage<1, [A9_Pipe1]>,
256 InstrStage<1, [A9_NPipe]>], [4, 1, 1]>,
258 // Single-precision FP Multiply
259 InstrItinData<IIC_fpMUL32 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
260 InstrStage<6, [A9_DRegsN], 0, Reserved>,
261 InstrStage<1, [A9_Pipe1]>,
262 InstrStage<1, [A9_NPipe]>], [5, 1, 1]>,
264 // Double-precision FP Multiply
265 InstrItinData<IIC_fpMUL64 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
266 InstrStage<7, [A9_DRegsN], 0, Reserved>,
267 InstrStage<1, [A9_Pipe1]>,
268 InstrStage<2, [A9_NPipe]>], [6, 1, 1]>,
270 // Single-precision FP MAC
271 InstrItinData<IIC_fpMAC32 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
272 InstrStage<9, [A9_DRegsN], 0, Reserved>,
273 InstrStage<1, [A9_Pipe1]>,
274 InstrStage<1, [A9_NPipe]>], [8, 0, 1, 1]>,
276 // Double-precision FP MAC
277 InstrItinData<IIC_fpMAC64 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
278 InstrStage<10, [A9_DRegsN], 0, Reserved>,
279 InstrStage<1, [A9_Pipe1]>,
280 InstrStage<2, [A9_NPipe]>], [9, 0, 1, 1]>,
282 // Single-precision FP DIV
283 InstrItinData<IIC_fpDIV32 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
284 InstrStage<16, [A9_DRegsN], 0, Reserved>,
285 InstrStage<1, [A9_Pipe1]>,
286 InstrStage<10, [A9_NPipe]>], [15, 1, 1]>,
288 // Double-precision FP DIV
289 InstrItinData<IIC_fpDIV64 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
290 InstrStage<26, [A9_DRegsN], 0, Reserved>,
291 InstrStage<1, [A9_Pipe1]>,
292 InstrStage<20, [A9_NPipe]>], [25, 1, 1]>,
294 // Single-precision FP SQRT
295 InstrItinData<IIC_fpSQRT32, [InstrStage<1, [A9_DRegsVFP], 0, Required>,
296 InstrStage<18, [A9_DRegsN], 0, Reserved>,
297 InstrStage<1, [A9_Pipe1]>,
298 InstrStage<13, [A9_NPipe]>], [17, 1]>,
300 // Double-precision FP SQRT
301 InstrItinData<IIC_fpSQRT64, [InstrStage<1, [A9_DRegsVFP], 0, Required>,
302 InstrStage<33, [A9_DRegsN], 0, Reserved>,
303 InstrStage<1, [A9_Pipe1]>,
304 InstrStage<28, [A9_NPipe]>], [32, 1]>,
307 // Integer to Single-precision Move
308 InstrItinData<IIC_fpMOVIS, [InstrStage<1, [A9_DRegsVFP], 0, Required>,
309 // Extra 1 latency cycle since wbck is 2 cycles
310 InstrStage<3, [A9_DRegsN], 0, Reserved>,
311 InstrStage<1, [A9_Pipe1]>,
312 InstrStage<1, [A9_NPipe]>], [1, 1]>,
314 // Integer to Double-precision Move
315 InstrItinData<IIC_fpMOVID, [InstrStage<1, [A9_DRegsVFP], 0, Required>,
316 // Extra 1 latency cycle since wbck is 2 cycles
317 InstrStage<3, [A9_DRegsN], 0, Reserved>,
318 InstrStage<1, [A9_Pipe1]>,
319 InstrStage<1, [A9_NPipe]>], [1, 1, 1]>,
321 // Single-precision to Integer Move
322 InstrItinData<IIC_fpMOVSI, [InstrStage<1, [A9_DRegsVFP], 0, Required>,
323 InstrStage<2, [A9_DRegsN], 0, Reserved>,
324 InstrStage<1, [A9_Pipe1]>,
325 InstrStage<1, [A9_NPipe]>], [1, 1]>,
327 // Double-precision to Integer Move
328 InstrItinData<IIC_fpMOVDI, [InstrStage<1, [A9_DRegsVFP], 0, Required>,
329 InstrStage<2, [A9_DRegsN], 0, Reserved>,
330 InstrStage<1, [A9_Pipe1]>,
331 InstrStage<1, [A9_NPipe]>], [1, 1, 1]>,
333 // Single-precision FP Load
334 InstrItinData<IIC_fpLoad32, [InstrStage<1, [A9_DRegsVFP], 0, Required>,
335 InstrStage<2, [A9_DRegsN], 0, Reserved>,
336 InstrStage<1, [A9_Pipe1], 0>,
337 InstrStage<1, [A9_LSPipe]>,
338 InstrStage<1, [A9_NPipe]>]>,
340 // Double-precision FP Load
341 InstrItinData<IIC_fpLoad64, [InstrStage<1, [A9_DRegsVFP], 0, Required>,
342 InstrStage<2, [A9_DRegsN], 0, Reserved>,
343 InstrStage<1, [A9_Pipe1], 0>,
344 InstrStage<1, [A9_LSPipe]>,
345 InstrStage<1, [A9_NPipe]>]>,
348 InstrItinData<IIC_fpLoadm, [InstrStage<1, [A9_DRegsVFP], 0, Required>,
349 InstrStage<2, [A9_DRegsN], 0, Reserved>,
350 InstrStage<1, [A9_Pipe1], 0>,
351 InstrStage<1, [A9_LSPipe]>,
352 InstrStage<1, [A9_NPipe]>]>,
354 // Single-precision FP Store
355 InstrItinData<IIC_fpStore32,[InstrStage<1, [A9_DRegsVFP], 0, Required>,
356 InstrStage<2, [A9_DRegsN], 0, Reserved>,
357 InstrStage<1, [A9_Pipe1], 0>,
358 InstrStage<1, [A9_LSPipe]>,
359 InstrStage<1, [A9_NPipe]>]>,
361 // Double-precision FP Store
362 InstrItinData<IIC_fpStore64,[InstrStage<1, [A9_DRegsVFP], 0, Required>,
363 InstrStage<2, [A9_DRegsN], 0, Reserved>,
364 InstrStage<1, [A9_Pipe1], 0>,
365 InstrStage<1, [A9_LSPipe]>,
366 InstrStage<1, [A9_NPipe]>]>,
369 InstrItinData<IIC_fpStorem, [InstrStage<1, [A9_DRegsVFP], 0, Required>,
370 InstrStage<2, [A9_DRegsN], 0, Reserved>,
371 InstrStage<1, [A9_Pipe1], 0>,
372 InstrStage<1, [A9_LSPipe]>,
373 InstrStage<1, [A9_NPipe]>]>,
375 // Issue through integer pipeline, and execute in NEON unit.
376 // FIXME: Neon pipeline and LdSt unit are multiplexed.
377 // Add some syntactic sugar to model this!
379 // FIXME: We don't model this instruction properly
380 InstrItinData<IIC_VLD1, [InstrStage<1, [A9_DRegsN], 0, Required>,
381 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
382 InstrStage<1, [A9_Pipe1], 0>,
383 InstrStage<1, [A9_LSPipe]>,
384 InstrStage<1, [A9_NPipe]>]>,
387 // FIXME: We don't model this instruction properly
388 InstrItinData<IIC_VLD2, [InstrStage<1, [A9_DRegsN], 0, Required>,
389 // Extra latency cycles since wbck is 6 cycles
390 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
391 InstrStage<1, [A9_Pipe1], 0>,
392 InstrStage<1, [A9_LSPipe]>,
393 InstrStage<1, [A9_NPipe]>], [2, 2, 1]>,
396 // FIXME: We don't model this instruction properly
397 InstrItinData<IIC_VLD3, [InstrStage<1, [A9_DRegsN], 0, Required>,
398 // Extra latency cycles since wbck is 6 cycles
399 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
400 InstrStage<1, [A9_Pipe1], 0>,
401 InstrStage<1, [A9_LSPipe]>,
402 InstrStage<1, [A9_NPipe]>], [2, 2, 2, 1]>,
405 // FIXME: We don't model this instruction properly
406 InstrItinData<IIC_VLD4, [InstrStage<1, [A9_DRegsN], 0, Required>,
407 // Extra latency cycles since wbck is 6 cycles
408 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
409 InstrStage<1, [A9_Pipe1], 0>,
410 InstrStage<1, [A9_LSPipe]>,
411 InstrStage<1, [A9_NPipe]>], [2, 2, 2, 2, 1]>,
414 // FIXME: We don't model this instruction properly
415 InstrItinData<IIC_VST, [InstrStage<1, [A9_DRegsN], 0, Required>,
416 // Extra latency cycles since wbck is 6 cycles
417 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
418 InstrStage<1, [A9_Pipe1], 0>,
419 InstrStage<1, [A9_LSPipe]>,
420 InstrStage<1, [A9_NPipe]>]>,
422 // Double-register Integer Unary
423 InstrItinData<IIC_VUNAiD, [InstrStage<1, [A9_DRegsN], 0, Required>,
424 // Extra latency cycles since wbck is 6 cycles
425 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
426 InstrStage<1, [A9_Pipe1]>,
427 InstrStage<1, [A9_NPipe]>], [4, 2]>,
429 // Quad-register Integer Unary
430 InstrItinData<IIC_VUNAiQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
431 // Extra latency cycles since wbck is 6 cycles
432 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
433 InstrStage<1, [A9_Pipe1]>,
434 InstrStage<1, [A9_NPipe]>], [4, 2]>,
436 // Double-register Integer Q-Unary
437 InstrItinData<IIC_VQUNAiD, [InstrStage<1, [A9_DRegsN], 0, Required>,
438 // Extra latency cycles since wbck is 6 cycles
439 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
440 InstrStage<1, [A9_Pipe1]>,
441 InstrStage<1, [A9_NPipe]>], [4, 1]>,
443 // Quad-register Integer CountQ-Unary
444 InstrItinData<IIC_VQUNAiQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
445 // Extra latency cycles since wbck is 6 cycles
446 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
447 InstrStage<1, [A9_Pipe1]>,
448 InstrStage<1, [A9_NPipe]>], [4, 1]>,
450 // Double-register Integer Binary
451 InstrItinData<IIC_VBINiD, [InstrStage<1, [A9_DRegsN], 0, Required>,
452 // Extra latency cycles since wbck is 6 cycles
453 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
454 InstrStage<1, [A9_Pipe1]>,
455 InstrStage<1, [A9_NPipe]>], [3, 2, 2]>,
457 // Quad-register Integer Binary
458 InstrItinData<IIC_VBINiQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
459 // Extra latency cycles since wbck is 6 cycles
460 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
461 InstrStage<1, [A9_Pipe1]>,
462 InstrStage<1, [A9_NPipe]>], [3, 2, 2]>,
464 // Double-register Integer Subtract
465 InstrItinData<IIC_VSUBiD, [InstrStage<1, [A9_DRegsN], 0, Required>,
466 // Extra latency cycles since wbck is 6 cycles
467 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
468 InstrStage<1, [A9_Pipe1]>,
469 InstrStage<1, [A9_NPipe]>], [3, 2, 1]>,
471 // Quad-register Integer Subtract
472 InstrItinData<IIC_VSUBiQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
473 // Extra latency cycles since wbck is 6 cycles
474 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
475 InstrStage<1, [A9_Pipe1]>,
476 InstrStage<1, [A9_NPipe]>], [3, 2, 1]>,
478 // Double-register Integer Shift
479 InstrItinData<IIC_VSHLiD, [InstrStage<1, [A9_DRegsN], 0, Required>,
480 // Extra latency cycles since wbck is 6 cycles
481 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
482 InstrStage<1, [A9_Pipe1]>,
483 InstrStage<1, [A9_NPipe]>], [3, 1, 1]>,
485 // Quad-register Integer Shift
486 InstrItinData<IIC_VSHLiQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
487 // Extra latency cycles since wbck is 6 cycles
488 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
489 InstrStage<1, [A9_Pipe1]>,
490 InstrStage<1, [A9_NPipe]>], [3, 1, 1]>,
492 // Double-register Integer Shift (4 cycle)
493 InstrItinData<IIC_VSHLi4D, [InstrStage<1, [A9_DRegsN], 0, Required>,
494 // Extra latency cycles since wbck is 6 cycles
495 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
496 InstrStage<1, [A9_Pipe1]>,
497 InstrStage<1, [A9_NPipe]>], [4, 1, 1]>,
499 // Quad-register Integer Shift (4 cycle)
500 InstrItinData<IIC_VSHLi4Q, [InstrStage<1, [A9_DRegsN], 0, Required>,
501 // Extra latency cycles since wbck is 6 cycles
502 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
503 InstrStage<1, [A9_Pipe1]>,
504 InstrStage<1, [A9_NPipe]>], [4, 1, 1]>,
506 // Double-register Integer Binary (4 cycle)
507 InstrItinData<IIC_VBINi4D, [InstrStage<1, [A9_DRegsN], 0, Required>,
508 // Extra latency cycles since wbck is 6 cycles
509 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
510 InstrStage<1, [A9_Pipe1]>,
511 InstrStage<1, [A9_NPipe]>], [4, 2, 2]>,
513 // Quad-register Integer Binary (4 cycle)
514 InstrItinData<IIC_VBINi4Q, [InstrStage<1, [A9_DRegsN], 0, Required>,
515 // Extra latency cycles since wbck is 6 cycles
516 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
517 InstrStage<1, [A9_Pipe1]>,
518 InstrStage<1, [A9_NPipe]>], [4, 2, 2]>,
520 // Double-register Integer Subtract (4 cycle)
521 InstrItinData<IIC_VSUBiD, [InstrStage<1, [A9_DRegsN], 0, Required>,
522 // Extra latency cycles since wbck is 6 cycles
523 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
524 InstrStage<1, [A9_Pipe1]>,
525 InstrStage<1, [A9_NPipe]>], [4, 2, 1]>,
527 // Quad-register Integer Subtract (4 cycle)
528 InstrItinData<IIC_VSUBiQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
529 // Extra latency cycles since wbck is 6 cycles
530 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
531 InstrStage<1, [A9_Pipe1]>,
532 InstrStage<1, [A9_NPipe]>], [4, 2, 1]>,
535 // Double-register Integer Count
536 InstrItinData<IIC_VCNTiD, [InstrStage<1, [A9_DRegsN], 0, Required>,
537 // Extra latency cycles since wbck is 6 cycles
538 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
539 InstrStage<1, [A9_Pipe1]>,
540 InstrStage<1, [A9_NPipe]>], [3, 2, 2]>,
542 // Quad-register Integer Count
543 // Result written in N3, but that is relative to the last cycle of multicycle,
544 // so we use 4 for those cases
545 InstrItinData<IIC_VCNTiQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
546 // Extra latency cycles since wbck is 7 cycles
547 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
548 InstrStage<1, [A9_Pipe1]>,
549 InstrStage<2, [A9_NPipe]>], [4, 2, 2]>,
551 // Double-register Absolute Difference and Accumulate
552 InstrItinData<IIC_VABAD, [InstrStage<1, [A9_DRegsN], 0, Required>,
553 // Extra latency cycles since wbck is 6 cycles
554 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
555 InstrStage<1, [A9_Pipe1]>,
556 InstrStage<1, [A9_NPipe]>], [6, 3, 2, 1]>,
558 // Quad-register Absolute Difference and Accumulate
559 InstrItinData<IIC_VABAQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
560 // Extra latency cycles since wbck is 6 cycles
561 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
562 InstrStage<1, [A9_Pipe1]>,
563 InstrStage<2, [A9_NPipe]>], [6, 3, 2, 1]>,
565 // Double-register Integer Pair Add Long
566 InstrItinData<IIC_VPALiD, [InstrStage<1, [A9_DRegsN], 0, Required>,
567 // Extra latency cycles since wbck is 6 cycles
568 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
569 InstrStage<1, [A9_Pipe1]>,
570 InstrStage<1, [A9_NPipe]>], [6, 3, 1]>,
572 // Quad-register Integer Pair Add Long
573 InstrItinData<IIC_VPALiQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
574 // Extra latency cycles since wbck is 6 cycles
575 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
576 InstrStage<1, [A9_Pipe1]>,
577 InstrStage<2, [A9_NPipe]>], [6, 3, 1]>,
580 // Double-register Integer Multiply (.8, .16)
581 InstrItinData<IIC_VMULi16D, [InstrStage<1, [A9_DRegsN], 0, Required>,
582 // Extra latency cycles since wbck is 6 cycles
583 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
584 InstrStage<1, [A9_Pipe1]>,
585 InstrStage<1, [A9_NPipe]>], [6, 2, 2]>,
587 // Quad-register Integer Multiply (.8, .16)
588 InstrItinData<IIC_VMULi16Q, [InstrStage<1, [A9_DRegsN], 0, Required>,
589 // Extra latency cycles since wbck is 7 cycles
590 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
591 InstrStage<1, [A9_Pipe1]>,
592 InstrStage<2, [A9_NPipe]>], [7, 2, 2]>,
595 // Double-register Integer Multiply (.32)
596 InstrItinData<IIC_VMULi32D, [InstrStage<1, [A9_DRegsN], 0, Required>,
597 // Extra latency cycles since wbck is 7 cycles
598 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
599 InstrStage<1, [A9_Pipe1]>,
600 InstrStage<2, [A9_NPipe]>], [7, 2, 1]>,
602 // Quad-register Integer Multiply (.32)
603 InstrItinData<IIC_VMULi32Q, [InstrStage<1, [A9_DRegsN], 0, Required>,
604 // Extra latency cycles since wbck is 9 cycles
605 InstrStage<10, [A9_DRegsVFP], 0, Reserved>,
606 InstrStage<1, [A9_Pipe1]>,
607 InstrStage<4, [A9_NPipe]>], [9, 2, 1]>,
609 // Double-register Integer Multiply-Accumulate (.8, .16)
610 InstrItinData<IIC_VMACi16D, [InstrStage<1, [A9_DRegsN], 0, Required>,
611 // Extra latency cycles since wbck is 6 cycles
612 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
613 InstrStage<1, [A9_Pipe1]>,
614 InstrStage<1, [A9_NPipe]>], [6, 3, 2, 2]>,
616 // Double-register Integer Multiply-Accumulate (.32)
617 InstrItinData<IIC_VMACi32D, [InstrStage<1, [A9_DRegsN], 0, Required>,
618 // Extra latency cycles since wbck is 7 cycles
619 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
620 InstrStage<1, [A9_Pipe1]>,
621 InstrStage<2, [A9_NPipe]>], [7, 3, 2, 1]>,
623 // Quad-register Integer Multiply-Accumulate (.8, .16)
624 InstrItinData<IIC_VMACi16Q, [InstrStage<1, [A9_DRegsN], 0, Required>,
625 // Extra latency cycles since wbck is 7 cycles
626 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
627 InstrStage<1, [A9_Pipe1]>,
628 InstrStage<2, [A9_NPipe]>], [7, 3, 2, 2]>,
630 // Quad-register Integer Multiply-Accumulate (.32)
631 InstrItinData<IIC_VMACi32Q, [InstrStage<1, [A9_DRegsN], 0, Required>,
632 // Extra latency cycles since wbck is 9 cycles
633 InstrStage<10, [A9_DRegsVFP], 0, Reserved>,
634 InstrStage<1, [A9_Pipe1]>,
635 InstrStage<4, [A9_NPipe]>], [9, 3, 2, 1]>,
638 InstrItinData<IIC_VMOVImm, [InstrStage<1, [A9_DRegsN], 0, Required>,
639 // Extra latency cycles since wbck is 6 cycles
640 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
641 InstrStage<1, [A9_Pipe1]>,
642 InstrStage<1, [A9_NPipe]>], [3]>,
644 // Double-register Permute Move
645 InstrItinData<IIC_VMOVD, [InstrStage<1, [A9_DRegsN], 0, Required>,
646 // FIXME: all latencies are arbitrary, no information is available
647 InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
648 InstrStage<1, [A9_Pipe1]>,
649 InstrStage<1, [A9_LSPipe]>], [2, 1]>,
651 // Quad-register Permute Move
652 // Result written in N2, but that is relative to the last cycle of multicycle,
653 // so we use 3 for those cases
654 InstrItinData<IIC_VMOVQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
655 // FIXME: all latencies are arbitrary, no information is available
656 InstrStage<4, [A9_DRegsVFP], 0, Reserved>,
657 InstrStage<1, [A9_Pipe1]>,
658 InstrStage<2, [A9_NPipe]>], [3, 1]>,
660 // Integer to Single-precision Move
661 InstrItinData<IIC_VMOVIS , [InstrStage<1, [A9_DRegsN], 0, Required>,
662 // FIXME: all latencies are arbitrary, no information is available
663 InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
664 InstrStage<1, [A9_Pipe1]>,
665 InstrStage<1, [A9_NPipe]>], [2, 1]>,
667 // Integer to Double-precision Move
668 InstrItinData<IIC_VMOVID , [InstrStage<1, [A9_DRegsN], 0, Required>,
669 // FIXME: all latencies are arbitrary, no information is available
670 InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
671 InstrStage<1, [A9_Pipe1]>,
672 InstrStage<1, [A9_NPipe]>], [2, 1, 1]>,
674 // Single-precision to Integer Move
675 InstrItinData<IIC_VMOVSI , [InstrStage<1, [A9_DRegsN], 0, Required>,
676 // FIXME: all latencies are arbitrary, no information is available
677 InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
678 InstrStage<1, [A9_Pipe1]>,
679 InstrStage<1, [A9_NPipe]>], [2, 1]>,
681 // Double-precision to Integer Move
682 InstrItinData<IIC_VMOVDI , [InstrStage<1, [A9_DRegsN], 0, Required>,
683 // FIXME: all latencies are arbitrary, no information is available
684 InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
685 InstrStage<1, [A9_Pipe1]>,
686 InstrStage<1, [A9_NPipe]>], [2, 2, 1]>,
688 // Integer to Lane Move
689 InstrItinData<IIC_VMOVISL , [InstrStage<1, [A9_DRegsN], 0, Required>,
690 // FIXME: all latencies are arbitrary, no information is available
691 InstrStage<4, [A9_DRegsVFP], 0, Reserved>,
692 InstrStage<1, [A9_Pipe1]>,
693 InstrStage<2, [A9_NPipe]>], [3, 1, 1]>,
696 // Double-register FP Unary
697 InstrItinData<IIC_VUNAD, [InstrStage<1, [A9_DRegsN], 0, Required>,
698 // Extra latency cycles since wbck is 6 cycles
699 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
700 InstrStage<1, [A9_Pipe1]>,
701 InstrStage<1, [A9_NPipe]>], [5, 2]>,
703 // Quad-register FP Unary
704 // Result written in N5, but that is relative to the last cycle of multicycle,
705 // so we use 6 for those cases
706 InstrItinData<IIC_VUNAQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
707 // Extra latency cycles since wbck is 7 cycles
708 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
709 InstrStage<1, [A9_Pipe1]>,
710 InstrStage<2, [A9_NPipe]>], [6, 2]>,
712 // Double-register FP Binary
713 // FIXME: We're using this itin for many instructions and [2, 2] here is too
715 InstrItinData<IIC_VBIND, [InstrStage<1, [A9_DRegsN], 0, Required>,
716 // Extra latency cycles since wbck is 7 cycles
717 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
718 InstrStage<1, [A9_Pipe1]>,
719 InstrStage<1, [A9_NPipe]>], [5, 2, 2]>,
721 // Quad-register FP Binary
722 // Result written in N5, but that is relative to the last cycle of multicycle,
723 // so we use 6 for those cases
724 // FIXME: We're using this itin for many instructions and [2, 2] here is too
726 InstrItinData<IIC_VBINQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
727 // Extra latency cycles since wbck is 8 cycles
728 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
729 InstrStage<1, [A9_Pipe1]>,
730 InstrStage<2, [A9_NPipe]>], [6, 2, 2]>,
732 // Double-register FP Multiple-Accumulate
733 InstrItinData<IIC_VMACD, [InstrStage<1, [A9_DRegsN], 0, Required>,
734 // Extra latency cycles since wbck is 7 cycles
735 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
736 InstrStage<1, [A9_Pipe1]>,
737 InstrStage<2, [A9_NPipe]>], [6, 3, 2, 1]>,
739 // Quad-register FP Multiple-Accumulate
740 // Result written in N9, but that is relative to the last cycle of multicycle,
741 // so we use 10 for those cases
742 InstrItinData<IIC_VMACQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
743 // Extra latency cycles since wbck is 9 cycles
744 InstrStage<10, [A9_DRegsVFP], 0, Reserved>,
745 InstrStage<1, [A9_Pipe1]>,
746 InstrStage<4, [A9_NPipe]>], [8, 4, 2, 1]>,
748 // Double-register Reciprical Step
749 InstrItinData<IIC_VRECSD, [InstrStage<1, [A9_DRegsN], 0, Required>,
750 // Extra latency cycles since wbck is 7 cycles
751 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
752 InstrStage<1, [A9_Pipe1]>,
753 InstrStage<2, [A9_NPipe]>], [6, 2, 2]>,
755 // Quad-register Reciprical Step
756 InstrItinData<IIC_VRECSQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
757 // Extra latency cycles since wbck is 9 cycles
758 InstrStage<10, [A9_DRegsVFP], 0, Reserved>,
759 InstrStage<1, [A9_Pipe1]>,
760 InstrStage<4, [A9_NPipe]>], [8, 2, 2]>,
762 // Double-register Permute
763 InstrItinData<IIC_VPERMD, [InstrStage<1, [A9_DRegsN], 0, Required>,
764 // Extra latency cycles since wbck is 6 cycles
765 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
766 InstrStage<1, [A9_Pipe1]>,
767 InstrStage<1, [A9_NPipe]>], [2, 2, 1, 1]>,
769 // Quad-register Permute
770 // Result written in N2, but that is relative to the last cycle of multicycle,
771 // so we use 3 for those cases
772 InstrItinData<IIC_VPERMQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
773 // Extra latency cycles since wbck is 7 cycles
774 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
775 InstrStage<1, [A9_Pipe1]>,
776 InstrStage<2, [A9_NPipe]>], [3, 3, 1, 1]>,
778 // Quad-register Permute (3 cycle issue)
779 // Result written in N2, but that is relative to the last cycle of multicycle,
780 // so we use 4 for those cases
781 InstrItinData<IIC_VPERMQ3, [InstrStage<1, [A9_DRegsN], 0, Required>,
782 // Extra latency cycles since wbck is 8 cycles
783 InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
784 InstrStage<1, [A9_Pipe1]>,
785 InstrStage<3, [A9_LSPipe]>], [4, 4, 1, 1]>,
788 // Double-register VEXT
789 InstrItinData<IIC_VEXTD, [InstrStage<1, [A9_DRegsN], 0, Required>,
790 // Extra latency cycles since wbck is 7 cycles
791 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
792 InstrStage<1, [A9_Pipe1]>,
793 InstrStage<1, [A9_NPipe]>], [2, 1, 1]>,
795 // Quad-register VEXT
796 InstrItinData<IIC_VEXTQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
797 // Extra latency cycles since wbck is 9 cycles
798 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
799 InstrStage<1, [A9_Pipe1]>,
800 InstrStage<2, [A9_NPipe]>], [3, 1, 1]>,
803 InstrItinData<IIC_VTB1, [InstrStage<1, [A9_DRegsN], 0, Required>,
804 // Extra latency cycles since wbck is 7 cycles
805 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
806 InstrStage<1, [A9_Pipe1]>,
807 InstrStage<2, [A9_NPipe]>], [3, 2, 1]>,
808 InstrItinData<IIC_VTB2, [InstrStage<2, [A9_DRegsN], 0, Required>,
809 // Extra latency cycles since wbck is 7 cycles
810 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
811 InstrStage<1, [A9_Pipe1]>,
812 InstrStage<2, [A9_NPipe]>], [3, 2, 2, 1]>,
813 InstrItinData<IIC_VTB3, [InstrStage<2, [A9_DRegsN], 0, Required>,
814 // Extra latency cycles since wbck is 8 cycles
815 InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
816 InstrStage<1, [A9_Pipe1]>,
817 InstrStage<3, [A9_NPipe]>], [4, 2, 2, 3, 1]>,
818 InstrItinData<IIC_VTB4, [InstrStage<1, [A9_DRegsN], 0, Required>,
819 // Extra latency cycles since wbck is 8 cycles
820 InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
821 InstrStage<1, [A9_Pipe1]>,
822 InstrStage<3, [A9_NPipe]>], [4, 2, 2, 3, 3, 1]>,
825 InstrItinData<IIC_VTBX1, [InstrStage<1, [A9_DRegsN], 0, Required>,
826 // Extra latency cycles since wbck is 7 cycles
827 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
828 InstrStage<1, [A9_Pipe1]>,
829 InstrStage<2, [A9_NPipe]>], [3, 1, 2, 1]>,
830 InstrItinData<IIC_VTBX2, [InstrStage<1, [A9_DRegsN], 0, Required>,
831 // Extra latency cycles since wbck is 7 cycles
832 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
833 InstrStage<1, [A9_Pipe1]>,
834 InstrStage<2, [A9_NPipe]>], [3, 1, 2, 2, 1]>,
835 InstrItinData<IIC_VTBX3, [InstrStage<1, [A9_DRegsN], 0, Required>,
836 // Extra latency cycles since wbck is 8 cycles
837 InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
838 InstrStage<1, [A9_Pipe1]>,
839 InstrStage<3, [A9_NPipe]>], [4, 1, 2, 2, 3, 1]>,
840 InstrItinData<IIC_VTBX4, [InstrStage<1, [A9_DRegsN], 0, Required>,
841 // Extra latency cycles since wbck is 8 cycles
842 InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
843 InstrStage<1, [A9_Pipe1]>,
844 InstrStage<2, [A9_NPipe]>], [4, 1, 2, 2, 3, 3, 1]>