1 //=- ARMScheduleA9.td - ARM Cortex-A9 Scheduling Definitions -*- tablegen -*-=//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file defines the itinerary class data for the ARM Cortex A9 processors.
12 //===----------------------------------------------------------------------===//
15 // Ad-hoc scheduling information derived from pretty vague "Cortex-A9 Technical
19 def A9_Issue : FuncUnit; // issue
20 def A9_Pipe0 : FuncUnit; // pipeline 0
21 def A9_Pipe1 : FuncUnit; // pipeline 1
22 def A9_LSPipe : FuncUnit; // LS pipe
23 def A9_NPipe : FuncUnit; // NEON ALU/MUL pipe
24 def A9_DRegsVFP: FuncUnit; // FP register set, VFP side
25 def A9_DRegsN : FuncUnit; // FP register set, NEON side
27 // Dual issue pipeline represented by A9_Pipe0 | A9_Pipe1
29 def CortexA9Itineraries : ProcessorItineraries<
30 [A9_NPipe, A9_DRegsN, A9_DRegsVFP, A9_LSPipe, A9_Pipe0, A9_Pipe1, A9_Issue], [
31 // Two fully-pipelined integer ALU pipelines
32 // FIXME: There are no operand latencies for these instructions at all!
34 // Move instructions, unconditional
35 InstrItinData<IIC_iMOVi , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [1]>,
36 InstrItinData<IIC_iMOVr , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [1, 1]>,
37 InstrItinData<IIC_iMOVsi , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [1, 1]>,
38 InstrItinData<IIC_iMOVsr , [InstrStage<2, [A9_Pipe0, A9_Pipe1]>], [2, 2, 1]>,
41 InstrItinData<IIC_iALUx , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>]>,
43 // Binary Instructions that produce a result
44 InstrItinData<IIC_iALUi , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [2, 2]>,
45 InstrItinData<IIC_iALUr , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [2, 2, 2]>,
46 InstrItinData<IIC_iALUsi , [InstrStage<2, [A9_Pipe0, A9_Pipe1]>], [2, 2, 1]>,
47 InstrItinData<IIC_iALUsr , [InstrStage<3, [A9_Pipe0, A9_Pipe1]>], [2, 2, 1, 1]>,
49 // Unary Instructions that produce a result
50 InstrItinData<IIC_iUNAr , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [2, 2]>,
51 InstrItinData<IIC_iUNAsi , [InstrStage<2, [A9_Pipe0, A9_Pipe1]>], [2, 1]>,
52 InstrItinData<IIC_iUNAsr , [InstrStage<3, [A9_Pipe0, A9_Pipe1]>], [2, 1, 1]>,
54 // Compare instructions
55 InstrItinData<IIC_iCMPi , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [2]>,
56 InstrItinData<IIC_iCMPr , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [2, 2]>,
57 InstrItinData<IIC_iCMPsi , [InstrStage<2, [A9_Pipe0, A9_Pipe1]>], [2, 1]>,
58 InstrItinData<IIC_iCMPsr , [InstrStage<3, [A9_Pipe0, A9_Pipe1]>], [2, 1, 1]>,
60 // Move instructions, conditional
61 InstrItinData<IIC_iCMOVi , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [2]>,
62 InstrItinData<IIC_iCMOVr , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [2, 1]>,
63 InstrItinData<IIC_iCMOVsi , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [2, 1]>,
64 InstrItinData<IIC_iCMOVsr , [InstrStage<2, [A9_Pipe0, A9_Pipe1]>], [2, 1, 1]>,
66 // Integer multiply pipeline
68 InstrItinData<IIC_iMUL16 , [InstrStage<1, [A9_Pipe1], 0>,
69 InstrStage<2, [A9_Pipe0]>], [4, 1, 1]>,
70 InstrItinData<IIC_iMAC16 , [InstrStage<1, [A9_Pipe1], 0>,
71 InstrStage<2, [A9_Pipe0]>], [4, 1, 1, 2]>,
72 InstrItinData<IIC_iMUL32 , [InstrStage<1, [A9_Pipe1], 0>,
73 InstrStage<2, [A9_Pipe0]>], [4, 1, 1]>,
74 InstrItinData<IIC_iMAC32 , [InstrStage<1, [A9_Pipe1], 0>,
75 InstrStage<2, [A9_Pipe0]>], [4, 1, 1, 2]>,
76 InstrItinData<IIC_iMUL64 , [InstrStage<2, [A9_Pipe1], 0>,
77 InstrStage<3, [A9_Pipe0]>], [4, 5, 1, 1]>,
78 InstrItinData<IIC_iMAC64 , [InstrStage<2, [A9_Pipe1], 0>,
79 InstrStage<3, [A9_Pipe0]>], [4, 5, 1, 1]>,
83 // no delay slots, so the latency of a branch is unimportant
84 InstrItinData<IIC_Br , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>]>,
86 // VFP and NEON shares the same register file. This means that every VFP
87 // instruction should wait for full completion of the consecutive NEON
88 // instruction and vice-versa. We model this behavior with two artificial FUs:
89 // DRegsVFP and DRegsVFP.
91 // Every VFP instruction:
92 // - Acquires DRegsVFP resource for 1 cycle
93 // - Reserves DRegsN resource for the whole duration (including time to
94 // register file writeback!).
95 // Every NEON instruction does the same but with FUs swapped.
97 // Since the reserved FU cannot be acquired this models precisly "cross-domain"
101 // Issue through integer pipeline, and execute in NEON unit.
103 // FP Special Register to Integer Register File Move
104 InstrItinData<IIC_fpSTAT , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
105 InstrStage<2, [A9_DRegsN], 0, Reserved>,
106 InstrStage<1, [A9_Pipe1]>,
107 InstrStage<1, [A9_NPipe]>]>,
109 // Single-precision FP Unary
110 InstrItinData<IIC_fpUNA32 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
111 // Extra latency cycles since wbck is 2 cycles
112 InstrStage<3, [A9_DRegsN], 0, Reserved>,
113 InstrStage<1, [A9_Pipe1]>,
114 InstrStage<1, [A9_NPipe]>], [1, 1]>,
116 // Double-precision FP Unary
117 InstrItinData<IIC_fpUNA64 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
118 // Extra latency cycles since wbck is 2 cycles
119 InstrStage<3, [A9_DRegsN], 0, Reserved>,
120 InstrStage<1, [A9_Pipe1]>,
121 InstrStage<1, [A9_NPipe]>], [1, 1]>,
124 // Single-precision FP Compare
125 InstrItinData<IIC_fpCMP32 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
126 // Extra latency cycles since wbck is 4 cycles
127 InstrStage<5, [A9_DRegsN], 0, Reserved>,
128 InstrStage<1, [A9_Pipe1]>,
129 InstrStage<1, [A9_NPipe]>], [1, 1]>,
131 // Double-precision FP Compare
132 InstrItinData<IIC_fpCMP64 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
133 // Extra latency cycles since wbck is 4 cycles
134 InstrStage<5, [A9_DRegsN], 0, Reserved>,
135 InstrStage<1, [A9_Pipe1]>,
136 InstrStage<1, [A9_NPipe]>], [1, 1]>,
138 // Single to Double FP Convert
139 InstrItinData<IIC_fpCVTSD , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
140 InstrStage<5, [A9_DRegsN], 0, Reserved>,
141 InstrStage<1, [A9_Pipe1]>,
142 InstrStage<1, [A9_NPipe]>], [4, 1]>,
144 // Double to Single FP Convert
145 InstrItinData<IIC_fpCVTDS , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
146 InstrStage<5, [A9_DRegsN], 0, Reserved>,
147 InstrStage<1, [A9_Pipe1]>,
148 InstrStage<1, [A9_NPipe]>], [4, 1]>,
151 // Single to Half FP Convert
152 InstrItinData<IIC_fpCVTSH , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
153 InstrStage<5, [A9_DRegsN], 0, Reserved>,
154 InstrStage<1, [A9_Pipe1]>,
155 InstrStage<1, [A9_NPipe]>], [4, 1]>,
157 // Half to Single FP Convert
158 InstrItinData<IIC_fpCVTHS , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
159 InstrStage<3, [A9_DRegsN], 0, Reserved>,
160 InstrStage<1, [A9_Pipe1]>,
161 InstrStage<1, [A9_NPipe]>], [2, 1]>,
164 // Single-Precision FP to Integer Convert
165 InstrItinData<IIC_fpCVTSI , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
166 InstrStage<5, [A9_DRegsN], 0, Reserved>,
167 InstrStage<1, [A9_Pipe1]>,
168 InstrStage<1, [A9_NPipe]>], [4, 1]>,
170 // Double-Precision FP to Integer Convert
171 InstrItinData<IIC_fpCVTDI , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
172 InstrStage<5, [A9_DRegsN], 0, Reserved>,
173 InstrStage<1, [A9_Pipe1]>,
174 InstrStage<1, [A9_NPipe]>], [4, 1]>,
176 // Integer to Single-Precision FP Convert
177 InstrItinData<IIC_fpCVTIS , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
178 InstrStage<5, [A9_DRegsN], 0, Reserved>,
179 InstrStage<1, [A9_Pipe1]>,
180 InstrStage<1, [A9_NPipe]>], [4, 1]>,
182 // Integer to Double-Precision FP Convert
183 InstrItinData<IIC_fpCVTID , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
184 InstrStage<5, [A9_DRegsN], 0, Reserved>,
185 InstrStage<1, [A9_Pipe1]>,
186 InstrStage<1, [A9_NPipe]>], [4, 1]>,
188 // Single-precision FP ALU
189 InstrItinData<IIC_fpALU32 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
190 InstrStage<5, [A9_DRegsN], 0, Reserved>,
191 InstrStage<1, [A9_Pipe1]>,
192 InstrStage<1, [A9_NPipe]>], [4, 1, 1]>,
194 // Double-precision FP ALU
195 InstrItinData<IIC_fpALU64 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
196 InstrStage<5, [A9_DRegsN], 0, Reserved>,
197 InstrStage<1, [A9_Pipe1]>,
198 InstrStage<1, [A9_NPipe]>], [4, 1, 1]>,
200 // Single-precision FP Multiply
201 InstrItinData<IIC_fpMUL32 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
202 InstrStage<6, [A9_DRegsN], 0, Reserved>,
203 InstrStage<1, [A9_Pipe1]>,
204 InstrStage<1, [A9_NPipe]>], [5, 1, 1]>,
206 // Double-precision FP Multiply
207 InstrItinData<IIC_fpMUL64 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
208 InstrStage<7, [A9_DRegsN], 0, Reserved>,
209 InstrStage<1, [A9_Pipe1]>,
210 InstrStage<2, [A9_NPipe]>], [6, 1, 1]>,
212 // Single-precision FP MAC
213 InstrItinData<IIC_fpMAC32 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
214 InstrStage<9, [A9_DRegsN], 0, Reserved>,
215 InstrStage<1, [A9_Pipe1]>,
216 InstrStage<1, [A9_NPipe]>], [8, 0, 1, 1]>,
218 // Double-precision FP MAC
219 InstrItinData<IIC_fpMAC64 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
220 InstrStage<10, [A9_DRegsN], 0, Reserved>,
221 InstrStage<1, [A9_Pipe1]>,
222 InstrStage<2, [A9_NPipe]>], [9, 0, 1, 1]>,
224 // Single-precision FP DIV
225 InstrItinData<IIC_fpDIV32 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
226 InstrStage<16, [A9_DRegsN], 0, Reserved>,
227 InstrStage<1, [A9_Pipe1]>,
228 InstrStage<10, [A9_NPipe]>], [15, 1, 1]>,
230 // Double-precision FP DIV
231 InstrItinData<IIC_fpDIV64 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
232 InstrStage<26, [A9_DRegsN], 0, Reserved>,
233 InstrStage<1, [A9_Pipe1]>,
234 InstrStage<20, [A9_NPipe]>], [25, 1, 1]>,
236 // Single-precision FP SQRT
237 InstrItinData<IIC_fpSQRT32, [InstrStage<1, [A9_DRegsVFP], 0, Required>,
238 InstrStage<18, [A9_DRegsN], 0, Reserved>,
239 InstrStage<1, [A9_Pipe1]>,
240 InstrStage<13, [A9_NPipe]>], [17, 1]>,
242 // Double-precision FP SQRT
243 InstrItinData<IIC_fpSQRT64, [InstrStage<1, [A9_DRegsVFP], 0, Required>,
244 InstrStage<33, [A9_DRegsN], 0, Reserved>,
245 InstrStage<1, [A9_Pipe1]>,
246 InstrStage<28, [A9_NPipe]>], [32, 1]>,
249 // Integer to Single-precision Move
250 InstrItinData<IIC_fpMOVIS, [InstrStage<1, [A9_DRegsVFP], 0, Required>,
251 // Extra 1 latency cycle since wbck is 2 cycles
252 InstrStage<3, [A9_DRegsN], 0, Reserved>,
253 InstrStage<1, [A9_Pipe1]>,
254 InstrStage<1, [A9_NPipe]>], [1, 1]>,
256 // Integer to Double-precision Move
257 InstrItinData<IIC_fpMOVID, [InstrStage<1, [A9_DRegsVFP], 0, Required>,
258 // Extra 1 latency cycle since wbck is 2 cycles
259 InstrStage<3, [A9_DRegsN], 0, Reserved>,
260 InstrStage<1, [A9_Pipe1]>,
261 InstrStage<1, [A9_NPipe]>], [1, 1, 1]>,
263 // Single-precision to Integer Move
264 InstrItinData<IIC_fpMOVSI, [InstrStage<1, [A9_DRegsVFP], 0, Required>,
265 InstrStage<2, [A9_DRegsN], 0, Reserved>,
266 InstrStage<1, [A9_Pipe1]>,
267 InstrStage<1, [A9_NPipe]>], [1, 1]>,
269 // Double-precision to Integer Move
270 InstrItinData<IIC_fpMOVDI, [InstrStage<1, [A9_DRegsVFP], 0, Required>,
271 InstrStage<2, [A9_DRegsN], 0, Reserved>,
272 InstrStage<1, [A9_Pipe1]>,
273 InstrStage<1, [A9_NPipe]>], [1, 1, 1]>,
275 // Single-precision FP Load
276 // use A9_Issue to enforce the 1 load/store per cycle limit
277 InstrItinData<IIC_fpLoad32, [InstrStage<1, [A9_DRegsVFP], 0, Required>,
278 InstrStage<2, [A9_DRegsN], 0, Reserved>,
279 InstrStage<1, [A9_Issue], 0>,
280 InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
281 InstrStage<1, [A9_LSPipe], 0>,
282 InstrStage<1, [A9_NPipe]>]>,
284 // Double-precision FP Load
285 // use A9_Issue to enforce the 1 load/store per cycle limit
286 InstrItinData<IIC_fpLoad64, [InstrStage<1, [A9_DRegsVFP], 0, Required>,
287 InstrStage<2, [A9_DRegsN], 0, Reserved>,
288 InstrStage<1, [A9_Issue], 0>,
289 InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
290 InstrStage<1, [A9_LSPipe], 0>,
291 InstrStage<1, [A9_NPipe]>]>,
294 // use A9_Issue to enforce the 1 load/store per cycle limit
295 InstrItinData<IIC_fpLoadm, [InstrStage<1, [A9_DRegsVFP], 0, Required>,
296 InstrStage<2, [A9_DRegsN], 0, Reserved>,
297 InstrStage<1, [A9_Issue], 0>,
298 InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
299 InstrStage<1, [A9_LSPipe], 0>,
300 InstrStage<1, [A9_NPipe]>]>,
302 // Single-precision FP Store
303 // use A9_Issue to enforce the 1 load/store per cycle limit
304 InstrItinData<IIC_fpStore32,[InstrStage<1, [A9_DRegsVFP], 0, Required>,
305 InstrStage<2, [A9_DRegsN], 0, Reserved>,
306 InstrStage<1, [A9_Issue], 0>,
307 InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
308 InstrStage<1, [A9_LSPipe], 0>,
309 InstrStage<1, [A9_NPipe]>]>,
311 // Double-precision FP Store
312 // use A9_Issue to enforce the 1 load/store per cycle limit
313 InstrItinData<IIC_fpStore64,[InstrStage<1, [A9_DRegsVFP], 0, Required>,
314 InstrStage<2, [A9_DRegsN], 0, Reserved>,
315 InstrStage<1, [A9_Issue], 0>,
316 InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
317 InstrStage<1, [A9_LSPipe], 0>,
318 InstrStage<1, [A9_NPipe]>]>,
321 // use A9_Issue to enforce the 1 load/store per cycle limit
322 InstrItinData<IIC_fpStorem, [InstrStage<1, [A9_DRegsVFP], 0, Required>,
323 InstrStage<2, [A9_DRegsN], 0, Reserved>,
324 InstrStage<1, [A9_Issue], 0>,
325 InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
326 InstrStage<1, [A9_LSPipe], 0>,
327 InstrStage<1, [A9_NPipe]>]>,
329 // Issue through integer pipeline, and execute in NEON unit.
330 // FIXME: Neon pipeline and LdSt unit are multiplexed.
331 // Add some syntactic sugar to model this!
333 // FIXME: We don't model this instruction properly
334 InstrItinData<IIC_VLD1, [InstrStage<1, [A9_DRegsN], 0, Required>,
335 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
336 InstrStage<1, [A9_Issue], 0>,
337 InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
338 InstrStage<1, [A9_LSPipe], 0>,
339 InstrStage<1, [A9_NPipe]>]>,
342 // FIXME: We don't model this instruction properly
343 InstrItinData<IIC_VLD2, [InstrStage<1, [A9_DRegsN], 0, Required>,
344 // Extra latency cycles since wbck is 6 cycles
345 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
346 InstrStage<1, [A9_Issue], 0>,
347 InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
348 InstrStage<1, [A9_LSPipe], 0>,
349 InstrStage<1, [A9_NPipe]>], [2, 2, 1]>,
352 // FIXME: We don't model this instruction properly
353 InstrItinData<IIC_VLD3, [InstrStage<1, [A9_DRegsN], 0, Required>,
354 // Extra latency cycles since wbck is 6 cycles
355 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
356 InstrStage<1, [A9_Issue], 0>,
357 InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
358 InstrStage<1, [A9_LSPipe], 0>,
359 InstrStage<1, [A9_NPipe]>], [2, 2, 2, 1]>,
362 // FIXME: We don't model this instruction properly
363 InstrItinData<IIC_VLD4, [InstrStage<1, [A9_DRegsN], 0, Required>,
364 // Extra latency cycles since wbck is 6 cycles
365 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
366 InstrStage<1, [A9_Issue], 0>,
367 InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
368 InstrStage<1, [A9_LSPipe], 0>,
369 InstrStage<1, [A9_NPipe]>], [2, 2, 2, 2, 1]>,
372 // FIXME: We don't model this instruction properly
373 InstrItinData<IIC_VST, [InstrStage<1, [A9_DRegsN], 0, Required>,
374 // Extra latency cycles since wbck is 6 cycles
375 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
376 InstrStage<1, [A9_Issue], 0>,
377 InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
378 InstrStage<1, [A9_LSPipe], 0>,
379 InstrStage<1, [A9_NPipe]>]>,
381 // Double-register Integer Unary
382 InstrItinData<IIC_VUNAiD, [InstrStage<1, [A9_DRegsN], 0, Required>,
383 // Extra latency cycles since wbck is 6 cycles
384 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
385 InstrStage<1, [A9_Pipe1]>,
386 InstrStage<1, [A9_NPipe]>], [4, 2]>,
388 // Quad-register Integer Unary
389 InstrItinData<IIC_VUNAiQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
390 // Extra latency cycles since wbck is 6 cycles
391 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
392 InstrStage<1, [A9_Pipe1]>,
393 InstrStage<1, [A9_NPipe]>], [4, 2]>,
395 // Double-register Integer Q-Unary
396 InstrItinData<IIC_VQUNAiD, [InstrStage<1, [A9_DRegsN], 0, Required>,
397 // Extra latency cycles since wbck is 6 cycles
398 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
399 InstrStage<1, [A9_Pipe1]>,
400 InstrStage<1, [A9_NPipe]>], [4, 1]>,
402 // Quad-register Integer CountQ-Unary
403 InstrItinData<IIC_VQUNAiQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
404 // Extra latency cycles since wbck is 6 cycles
405 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
406 InstrStage<1, [A9_Pipe1]>,
407 InstrStage<1, [A9_NPipe]>], [4, 1]>,
409 // Double-register Integer Binary
410 InstrItinData<IIC_VBINiD, [InstrStage<1, [A9_DRegsN], 0, Required>,
411 // Extra latency cycles since wbck is 6 cycles
412 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
413 InstrStage<1, [A9_Pipe1]>,
414 InstrStage<1, [A9_NPipe]>], [3, 2, 2]>,
416 // Quad-register Integer Binary
417 InstrItinData<IIC_VBINiQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
418 // Extra latency cycles since wbck is 6 cycles
419 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
420 InstrStage<1, [A9_Pipe1]>,
421 InstrStage<1, [A9_NPipe]>], [3, 2, 2]>,
423 // Double-register Integer Subtract
424 InstrItinData<IIC_VSUBiD, [InstrStage<1, [A9_DRegsN], 0, Required>,
425 // Extra latency cycles since wbck is 6 cycles
426 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
427 InstrStage<1, [A9_Pipe1]>,
428 InstrStage<1, [A9_NPipe]>], [3, 2, 1]>,
430 // Quad-register Integer Subtract
431 InstrItinData<IIC_VSUBiQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
432 // Extra latency cycles since wbck is 6 cycles
433 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
434 InstrStage<1, [A9_Pipe1]>,
435 InstrStage<1, [A9_NPipe]>], [3, 2, 1]>,
437 // Double-register Integer Shift
438 InstrItinData<IIC_VSHLiD, [InstrStage<1, [A9_DRegsN], 0, Required>,
439 // Extra latency cycles since wbck is 6 cycles
440 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
441 InstrStage<1, [A9_Pipe1]>,
442 InstrStage<1, [A9_NPipe]>], [3, 1, 1]>,
444 // Quad-register Integer Shift
445 InstrItinData<IIC_VSHLiQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
446 // Extra latency cycles since wbck is 6 cycles
447 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
448 InstrStage<1, [A9_Pipe1]>,
449 InstrStage<1, [A9_NPipe]>], [3, 1, 1]>,
451 // Double-register Integer Shift (4 cycle)
452 InstrItinData<IIC_VSHLi4D, [InstrStage<1, [A9_DRegsN], 0, Required>,
453 // Extra latency cycles since wbck is 6 cycles
454 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
455 InstrStage<1, [A9_Pipe1]>,
456 InstrStage<1, [A9_NPipe]>], [4, 1, 1]>,
458 // Quad-register Integer Shift (4 cycle)
459 InstrItinData<IIC_VSHLi4Q, [InstrStage<1, [A9_DRegsN], 0, Required>,
460 // Extra latency cycles since wbck is 6 cycles
461 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
462 InstrStage<1, [A9_Pipe1]>,
463 InstrStage<1, [A9_NPipe]>], [4, 1, 1]>,
465 // Double-register Integer Binary (4 cycle)
466 InstrItinData<IIC_VBINi4D, [InstrStage<1, [A9_DRegsN], 0, Required>,
467 // Extra latency cycles since wbck is 6 cycles
468 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
469 InstrStage<1, [A9_Pipe1]>,
470 InstrStage<1, [A9_NPipe]>], [4, 2, 2]>,
472 // Quad-register Integer Binary (4 cycle)
473 InstrItinData<IIC_VBINi4Q, [InstrStage<1, [A9_DRegsN], 0, Required>,
474 // Extra latency cycles since wbck is 6 cycles
475 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
476 InstrStage<1, [A9_Pipe1]>,
477 InstrStage<1, [A9_NPipe]>], [4, 2, 2]>,
479 // Double-register Integer Subtract (4 cycle)
480 InstrItinData<IIC_VSUBiD, [InstrStage<1, [A9_DRegsN], 0, Required>,
481 // Extra latency cycles since wbck is 6 cycles
482 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
483 InstrStage<1, [A9_Pipe1]>,
484 InstrStage<1, [A9_NPipe]>], [4, 2, 1]>,
486 // Quad-register Integer Subtract (4 cycle)
487 InstrItinData<IIC_VSUBiQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
488 // Extra latency cycles since wbck is 6 cycles
489 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
490 InstrStage<1, [A9_Pipe1]>,
491 InstrStage<1, [A9_NPipe]>], [4, 2, 1]>,
494 // Double-register Integer Count
495 InstrItinData<IIC_VCNTiD, [InstrStage<1, [A9_DRegsN], 0, Required>,
496 // Extra latency cycles since wbck is 6 cycles
497 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
498 InstrStage<1, [A9_Pipe1]>,
499 InstrStage<1, [A9_NPipe]>], [3, 2, 2]>,
501 // Quad-register Integer Count
502 // Result written in N3, but that is relative to the last cycle of multicycle,
503 // so we use 4 for those cases
504 InstrItinData<IIC_VCNTiQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
505 // Extra latency cycles since wbck is 7 cycles
506 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
507 InstrStage<1, [A9_Pipe1]>,
508 InstrStage<2, [A9_NPipe]>], [4, 2, 2]>,
510 // Double-register Absolute Difference and Accumulate
511 InstrItinData<IIC_VABAD, [InstrStage<1, [A9_DRegsN], 0, Required>,
512 // Extra latency cycles since wbck is 6 cycles
513 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
514 InstrStage<1, [A9_Pipe1]>,
515 InstrStage<1, [A9_NPipe]>], [6, 3, 2, 1]>,
517 // Quad-register Absolute Difference and Accumulate
518 InstrItinData<IIC_VABAQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
519 // Extra latency cycles since wbck is 6 cycles
520 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
521 InstrStage<1, [A9_Pipe1]>,
522 InstrStage<2, [A9_NPipe]>], [6, 3, 2, 1]>,
524 // Double-register Integer Pair Add Long
525 InstrItinData<IIC_VPALiD, [InstrStage<1, [A9_DRegsN], 0, Required>,
526 // Extra latency cycles since wbck is 6 cycles
527 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
528 InstrStage<1, [A9_Pipe1]>,
529 InstrStage<1, [A9_NPipe]>], [6, 3, 1]>,
531 // Quad-register Integer Pair Add Long
532 InstrItinData<IIC_VPALiQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
533 // Extra latency cycles since wbck is 6 cycles
534 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
535 InstrStage<1, [A9_Pipe1]>,
536 InstrStage<2, [A9_NPipe]>], [6, 3, 1]>,
539 // Double-register Integer Multiply (.8, .16)
540 InstrItinData<IIC_VMULi16D, [InstrStage<1, [A9_DRegsN], 0, Required>,
541 // Extra latency cycles since wbck is 6 cycles
542 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
543 InstrStage<1, [A9_Pipe1]>,
544 InstrStage<1, [A9_NPipe]>], [6, 2, 2]>,
546 // Quad-register Integer Multiply (.8, .16)
547 InstrItinData<IIC_VMULi16Q, [InstrStage<1, [A9_DRegsN], 0, Required>,
548 // Extra latency cycles since wbck is 7 cycles
549 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
550 InstrStage<1, [A9_Pipe1]>,
551 InstrStage<2, [A9_NPipe]>], [7, 2, 2]>,
554 // Double-register Integer Multiply (.32)
555 InstrItinData<IIC_VMULi32D, [InstrStage<1, [A9_DRegsN], 0, Required>,
556 // Extra latency cycles since wbck is 7 cycles
557 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
558 InstrStage<1, [A9_Pipe1]>,
559 InstrStage<2, [A9_NPipe]>], [7, 2, 1]>,
561 // Quad-register Integer Multiply (.32)
562 InstrItinData<IIC_VMULi32Q, [InstrStage<1, [A9_DRegsN], 0, Required>,
563 // Extra latency cycles since wbck is 9 cycles
564 InstrStage<10, [A9_DRegsVFP], 0, Reserved>,
565 InstrStage<1, [A9_Pipe1]>,
566 InstrStage<4, [A9_NPipe]>], [9, 2, 1]>,
568 // Double-register Integer Multiply-Accumulate (.8, .16)
569 InstrItinData<IIC_VMACi16D, [InstrStage<1, [A9_DRegsN], 0, Required>,
570 // Extra latency cycles since wbck is 6 cycles
571 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
572 InstrStage<1, [A9_Pipe1]>,
573 InstrStage<1, [A9_NPipe]>], [6, 3, 2, 2]>,
575 // Double-register Integer Multiply-Accumulate (.32)
576 InstrItinData<IIC_VMACi32D, [InstrStage<1, [A9_DRegsN], 0, Required>,
577 // Extra latency cycles since wbck is 7 cycles
578 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
579 InstrStage<1, [A9_Pipe1]>,
580 InstrStage<2, [A9_NPipe]>], [7, 3, 2, 1]>,
582 // Quad-register Integer Multiply-Accumulate (.8, .16)
583 InstrItinData<IIC_VMACi16Q, [InstrStage<1, [A9_DRegsN], 0, Required>,
584 // Extra latency cycles since wbck is 7 cycles
585 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
586 InstrStage<1, [A9_Pipe1]>,
587 InstrStage<2, [A9_NPipe]>], [7, 3, 2, 2]>,
589 // Quad-register Integer Multiply-Accumulate (.32)
590 InstrItinData<IIC_VMACi32Q, [InstrStage<1, [A9_DRegsN], 0, Required>,
591 // Extra latency cycles since wbck is 9 cycles
592 InstrStage<10, [A9_DRegsVFP], 0, Reserved>,
593 InstrStage<1, [A9_Pipe1]>,
594 InstrStage<4, [A9_NPipe]>], [9, 3, 2, 1]>,
597 InstrItinData<IIC_VMOVImm, [InstrStage<1, [A9_DRegsN], 0, Required>,
598 // Extra latency cycles since wbck is 6 cycles
599 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
600 InstrStage<1, [A9_Pipe1]>,
601 InstrStage<1, [A9_NPipe]>], [3]>,
603 // Double-register Permute Move
604 InstrItinData<IIC_VMOVD, [InstrStage<1, [A9_DRegsN], 0, Required>,
605 // FIXME: all latencies are arbitrary, no information is available
606 InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
607 InstrStage<1, [A9_Pipe1]>,
608 InstrStage<1, [A9_LSPipe]>], [2, 1]>,
610 // Quad-register Permute Move
611 // Result written in N2, but that is relative to the last cycle of multicycle,
612 // so we use 3 for those cases
613 InstrItinData<IIC_VMOVQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
614 // FIXME: all latencies are arbitrary, no information is available
615 InstrStage<4, [A9_DRegsVFP], 0, Reserved>,
616 InstrStage<1, [A9_Pipe1]>,
617 InstrStage<2, [A9_NPipe]>], [3, 1]>,
619 // Integer to Single-precision Move
620 InstrItinData<IIC_VMOVIS , [InstrStage<1, [A9_DRegsN], 0, Required>,
621 // FIXME: all latencies are arbitrary, no information is available
622 InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
623 InstrStage<1, [A9_Pipe1]>,
624 InstrStage<1, [A9_NPipe]>], [2, 1]>,
626 // Integer to Double-precision Move
627 InstrItinData<IIC_VMOVID , [InstrStage<1, [A9_DRegsN], 0, Required>,
628 // FIXME: all latencies are arbitrary, no information is available
629 InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
630 InstrStage<1, [A9_Pipe1]>,
631 InstrStage<1, [A9_NPipe]>], [2, 1, 1]>,
633 // Single-precision to Integer Move
634 InstrItinData<IIC_VMOVSI , [InstrStage<1, [A9_DRegsN], 0, Required>,
635 // FIXME: all latencies are arbitrary, no information is available
636 InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
637 InstrStage<1, [A9_Pipe1]>,
638 InstrStage<1, [A9_NPipe]>], [2, 1]>,
640 // Double-precision to Integer Move
641 InstrItinData<IIC_VMOVDI , [InstrStage<1, [A9_DRegsN], 0, Required>,
642 // FIXME: all latencies are arbitrary, no information is available
643 InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
644 InstrStage<1, [A9_Pipe1]>,
645 InstrStage<1, [A9_NPipe]>], [2, 2, 1]>,
647 // Integer to Lane Move
648 InstrItinData<IIC_VMOVISL , [InstrStage<1, [A9_DRegsN], 0, Required>,
649 // FIXME: all latencies are arbitrary, no information is available
650 InstrStage<4, [A9_DRegsVFP], 0, Reserved>,
651 InstrStage<1, [A9_Pipe1]>,
652 InstrStage<2, [A9_NPipe]>], [3, 1, 1]>,
655 // Double-register FP Unary
656 InstrItinData<IIC_VUNAD, [InstrStage<1, [A9_DRegsN], 0, Required>,
657 // Extra latency cycles since wbck is 6 cycles
658 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
659 InstrStage<1, [A9_Pipe1]>,
660 InstrStage<1, [A9_NPipe]>], [5, 2]>,
662 // Quad-register FP Unary
663 // Result written in N5, but that is relative to the last cycle of multicycle,
664 // so we use 6 for those cases
665 InstrItinData<IIC_VUNAQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
666 // Extra latency cycles since wbck is 7 cycles
667 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
668 InstrStage<1, [A9_Pipe1]>,
669 InstrStage<2, [A9_NPipe]>], [6, 2]>,
671 // Double-register FP Binary
672 // FIXME: We're using this itin for many instructions and [2, 2] here is too
674 InstrItinData<IIC_VBIND, [InstrStage<1, [A9_DRegsN], 0, Required>,
675 // Extra latency cycles since wbck is 7 cycles
676 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
677 InstrStage<1, [A9_Pipe1]>,
678 InstrStage<1, [A9_NPipe]>], [5, 2, 2]>,
680 // Quad-register FP Binary
681 // Result written in N5, but that is relative to the last cycle of multicycle,
682 // so we use 6 for those cases
683 // FIXME: We're using this itin for many instructions and [2, 2] here is too
685 InstrItinData<IIC_VBINQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
686 // Extra latency cycles since wbck is 8 cycles
687 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
688 InstrStage<1, [A9_Pipe1]>,
689 InstrStage<2, [A9_NPipe]>], [6, 2, 2]>,
691 // Double-register FP Multiple-Accumulate
692 InstrItinData<IIC_VMACD, [InstrStage<1, [A9_DRegsN], 0, Required>,
693 // Extra latency cycles since wbck is 7 cycles
694 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
695 InstrStage<1, [A9_Pipe1]>,
696 InstrStage<2, [A9_NPipe]>], [6, 3, 2, 1]>,
698 // Quad-register FP Multiple-Accumulate
699 // Result written in N9, but that is relative to the last cycle of multicycle,
700 // so we use 10 for those cases
701 InstrItinData<IIC_VMACQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
702 // Extra latency cycles since wbck is 9 cycles
703 InstrStage<10, [A9_DRegsVFP], 0, Reserved>,
704 InstrStage<1, [A9_Pipe1]>,
705 InstrStage<4, [A9_NPipe]>], [8, 4, 2, 1]>,
707 // Double-register Reciprical Step
708 InstrItinData<IIC_VRECSD, [InstrStage<1, [A9_DRegsN], 0, Required>,
709 // Extra latency cycles since wbck is 7 cycles
710 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
711 InstrStage<1, [A9_Pipe1]>,
712 InstrStage<2, [A9_NPipe]>], [6, 2, 2]>,
714 // Quad-register Reciprical Step
715 InstrItinData<IIC_VRECSQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
716 // Extra latency cycles since wbck is 9 cycles
717 InstrStage<10, [A9_DRegsVFP], 0, Reserved>,
718 InstrStage<1, [A9_Pipe1]>,
719 InstrStage<4, [A9_NPipe]>], [8, 2, 2]>,
721 // Double-register Permute
722 InstrItinData<IIC_VPERMD, [InstrStage<1, [A9_DRegsN], 0, Required>,
723 // Extra latency cycles since wbck is 6 cycles
724 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
725 InstrStage<1, [A9_Pipe1]>,
726 InstrStage<1, [A9_NPipe]>], [2, 2, 1, 1]>,
728 // Quad-register Permute
729 // Result written in N2, but that is relative to the last cycle of multicycle,
730 // so we use 3 for those cases
731 InstrItinData<IIC_VPERMQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
732 // Extra latency cycles since wbck is 7 cycles
733 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
734 InstrStage<1, [A9_Pipe1]>,
735 InstrStage<2, [A9_NPipe]>], [3, 3, 1, 1]>,
737 // Quad-register Permute (3 cycle issue)
738 // Result written in N2, but that is relative to the last cycle of multicycle,
739 // so we use 4 for those cases
740 InstrItinData<IIC_VPERMQ3, [InstrStage<1, [A9_DRegsN], 0, Required>,
741 // Extra latency cycles since wbck is 8 cycles
742 InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
743 InstrStage<1, [A9_Pipe1]>,
744 InstrStage<3, [A9_LSPipe]>], [4, 4, 1, 1]>,
747 // Double-register VEXT
748 InstrItinData<IIC_VEXTD, [InstrStage<1, [A9_DRegsN], 0, Required>,
749 // Extra latency cycles since wbck is 7 cycles
750 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
751 InstrStage<1, [A9_Pipe1]>,
752 InstrStage<1, [A9_NPipe]>], [2, 1, 1]>,
754 // Quad-register VEXT
755 InstrItinData<IIC_VEXTQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
756 // Extra latency cycles since wbck is 9 cycles
757 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
758 InstrStage<1, [A9_Pipe1]>,
759 InstrStage<2, [A9_NPipe]>], [3, 1, 1]>,
762 InstrItinData<IIC_VTB1, [InstrStage<1, [A9_DRegsN], 0, Required>,
763 // Extra latency cycles since wbck is 7 cycles
764 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
765 InstrStage<1, [A9_Pipe1]>,
766 InstrStage<2, [A9_NPipe]>], [3, 2, 1]>,
767 InstrItinData<IIC_VTB2, [InstrStage<2, [A9_DRegsN], 0, Required>,
768 // Extra latency cycles since wbck is 7 cycles
769 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
770 InstrStage<1, [A9_Pipe1]>,
771 InstrStage<2, [A9_NPipe]>], [3, 2, 2, 1]>,
772 InstrItinData<IIC_VTB3, [InstrStage<2, [A9_DRegsN], 0, Required>,
773 // Extra latency cycles since wbck is 8 cycles
774 InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
775 InstrStage<1, [A9_Pipe1]>,
776 InstrStage<3, [A9_NPipe]>], [4, 2, 2, 3, 1]>,
777 InstrItinData<IIC_VTB4, [InstrStage<1, [A9_DRegsN], 0, Required>,
778 // Extra latency cycles since wbck is 8 cycles
779 InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
780 InstrStage<1, [A9_Pipe1]>,
781 InstrStage<3, [A9_NPipe]>], [4, 2, 2, 3, 3, 1]>,
784 InstrItinData<IIC_VTBX1, [InstrStage<1, [A9_DRegsN], 0, Required>,
785 // Extra latency cycles since wbck is 7 cycles
786 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
787 InstrStage<1, [A9_Pipe1]>,
788 InstrStage<2, [A9_NPipe]>], [3, 1, 2, 1]>,
789 InstrItinData<IIC_VTBX2, [InstrStage<1, [A9_DRegsN], 0, Required>,
790 // Extra latency cycles since wbck is 7 cycles
791 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
792 InstrStage<1, [A9_Pipe1]>,
793 InstrStage<2, [A9_NPipe]>], [3, 1, 2, 2, 1]>,
794 InstrItinData<IIC_VTBX3, [InstrStage<1, [A9_DRegsN], 0, Required>,
795 // Extra latency cycles since wbck is 8 cycles
796 InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
797 InstrStage<1, [A9_Pipe1]>,
798 InstrStage<3, [A9_NPipe]>], [4, 1, 2, 2, 3, 1]>,
799 InstrItinData<IIC_VTBX4, [InstrStage<1, [A9_DRegsN], 0, Required>,
800 // Extra latency cycles since wbck is 8 cycles
801 InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
802 InstrStage<1, [A9_Pipe1]>,
803 InstrStage<2, [A9_NPipe]>], [4, 1, 2, 2, 3, 3, 1]>