1 //=- ARMScheduleA9.td - ARM Cortex-A9 Scheduling Definitions -*- tablegen -*-=//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file defines the itinerary class data for the ARM Cortex A9 processors.
12 //===----------------------------------------------------------------------===//
15 // Ad-hoc scheduling information derived from pretty vague "Cortex-A9 Technical
19 def A9_Issue0 : FuncUnit; // Issue 0
20 def A9_Issue1 : FuncUnit; // Issue 1
21 def A9_Branch : FuncUnit; // Branch
22 def A9_ALU0 : FuncUnit; // ALU / MUL pipeline 0
23 def A9_ALU1 : FuncUnit; // ALU pipeline 1
24 def A9_AGU : FuncUnit; // Address generation unit for ld / st
25 def A9_NPipe : FuncUnit; // NEON pipeline
26 def A9_MUX0 : FuncUnit; // AGU + NEON/FPU multiplexer
27 def A9_LSUnit : FuncUnit; // L/S Unit
28 def A9_DRegsVFP: FuncUnit; // FP register set, VFP side
29 def A9_DRegsN : FuncUnit; // FP register set, NEON side
32 def A9_LdBypass : Bypass;
34 def CortexA9Itineraries : ProcessorItineraries<
35 [A9_Issue0, A9_Issue1, A9_Branch, A9_ALU0, A9_ALU1, A9_AGU, A9_NPipe, A9_MUX0,
36 A9_LSUnit, A9_DRegsVFP, A9_DRegsN],
38 // Two fully-pipelined integer ALU pipelines
41 // Move instructions, unconditional
42 InstrItinData<IIC_iMOVi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
43 InstrStage<1, [A9_ALU0, A9_ALU1]>], [1]>,
44 InstrItinData<IIC_iMOVr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
45 InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>,
46 InstrItinData<IIC_iMOVsi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
47 InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>,
48 InstrItinData<IIC_iMOVsr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
49 InstrStage<2, [A9_ALU0, A9_ALU1]>], [2, 1, 1]>,
50 InstrItinData<IIC_iMOVix2 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
51 InstrStage<1, [A9_ALU0, A9_ALU1]>,
52 InstrStage<1, [A9_ALU0, A9_ALU1]>], [2]>,
55 InstrItinData<IIC_iMVNi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
56 InstrStage<1, [A9_ALU0, A9_ALU1]>],
58 InstrItinData<IIC_iMVNr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
59 InstrStage<1, [A9_ALU0, A9_ALU1]>],
60 [1, 1], [NoBypass, A9_LdBypass]>,
61 InstrItinData<IIC_iMVNsi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
62 InstrStage<2, [A9_ALU0, A9_ALU1]>],
64 InstrItinData<IIC_iMVNsr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
65 InstrStage<3, [A9_ALU0, A9_ALU1]>],
69 InstrItinData<IIC_iALUx , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
70 InstrStage<1, [A9_ALU0, A9_ALU1]>]>,
72 // Binary Instructions that produce a result
73 InstrItinData<IIC_iALUi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
74 InstrStage<1, [A9_ALU0, A9_ALU1]>],
75 [1, 1], [NoBypass, A9_LdBypass]>,
76 InstrItinData<IIC_iALUr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
77 InstrStage<1, [A9_ALU0, A9_ALU1]>],
78 [1, 1, 1], [NoBypass, A9_LdBypass, A9_LdBypass]>,
79 InstrItinData<IIC_iALUsi, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
80 InstrStage<2, [A9_ALU0, A9_ALU1]>],
81 [2, 1, 1], [NoBypass, A9_LdBypass, NoBypass]>,
82 InstrItinData<IIC_iALUsir,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
83 InstrStage<2, [A9_ALU0, A9_ALU1]>],
84 [2, 1, 1], [NoBypass, NoBypass, A9_LdBypass]>,
85 InstrItinData<IIC_iALUsr, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
86 InstrStage<3, [A9_ALU0, A9_ALU1]>],
88 [NoBypass, A9_LdBypass, NoBypass, NoBypass]>,
90 // Bitwise Instructions that produce a result
91 InstrItinData<IIC_iBITi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
92 InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>,
93 InstrItinData<IIC_iBITr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
94 InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1, 1]>,
95 InstrItinData<IIC_iBITsi, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
96 InstrStage<2, [A9_ALU0, A9_ALU1]>], [2, 1, 1]>,
97 InstrItinData<IIC_iBITsr, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
98 InstrStage<3, [A9_ALU0, A9_ALU1]>], [3, 1, 1, 1]>,
100 // Unary Instructions that produce a result
103 InstrItinData<IIC_iUNAr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
104 InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>,
106 // BFC, BFI, UBFX, SBFX
107 InstrItinData<IIC_iUNAsi, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
108 InstrStage<2, [A9_ALU0, A9_ALU1]>], [2, 1]>,
111 // Zero and sign extension instructions
112 InstrItinData<IIC_iEXTr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
113 InstrStage<1, [A9_ALU0, A9_ALU1]>], [2, 1]>,
114 InstrItinData<IIC_iEXTAr, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
115 InstrStage<2, [A9_ALU0, A9_ALU1]>], [3, 1, 1]>,
116 InstrItinData<IIC_iEXTAsr,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
117 InstrStage<3, [A9_ALU0, A9_ALU1]>], [3, 1, 1, 1]>,
119 // Compare instructions
120 InstrItinData<IIC_iCMPi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
121 InstrStage<1, [A9_ALU0, A9_ALU1]>],
123 InstrItinData<IIC_iCMPr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
124 InstrStage<1, [A9_ALU0, A9_ALU1]>],
125 [1, 1], [A9_LdBypass, A9_LdBypass]>,
126 InstrItinData<IIC_iCMPsi , [InstrStage<2, [A9_ALU0, A9_ALU1]>],
127 [1, 1], [A9_LdBypass, NoBypass]>,
128 InstrItinData<IIC_iCMPsr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
129 InstrStage<3, [A9_ALU0, A9_ALU1]>],
130 [1, 1, 1], [A9_LdBypass, NoBypass, NoBypass]>,
133 InstrItinData<IIC_iTSTi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
134 InstrStage<1, [A9_ALU0, A9_ALU1]>], [1]>,
135 InstrItinData<IIC_iTSTr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
136 InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>,
137 InstrItinData<IIC_iTSTsi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
138 InstrStage<2, [A9_ALU0, A9_ALU1]>], [1, 1]>,
139 InstrItinData<IIC_iTSTsr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
140 InstrStage<3, [A9_ALU0, A9_ALU1]>], [1, 1, 1]>,
142 // Move instructions, conditional
143 // FIXME: Correctly model the extra input dep on the destination.
144 InstrItinData<IIC_iCMOVi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
145 InstrStage<1, [A9_ALU0, A9_ALU1]>], [1]>,
146 InstrItinData<IIC_iCMOVr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
147 InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>,
148 InstrItinData<IIC_iCMOVsi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
149 InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>,
150 InstrItinData<IIC_iCMOVsr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
151 InstrStage<2, [A9_ALU0, A9_ALU1]>], [2, 1, 1]>,
153 // Integer multiply pipeline
155 InstrItinData<IIC_iMUL16 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
156 InstrStage<2, [A9_ALU0]>], [3, 1, 1]>,
157 InstrItinData<IIC_iMAC16 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
158 InstrStage<2, [A9_ALU0]>],
160 InstrItinData<IIC_iMUL32 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
161 InstrStage<2, [A9_ALU0]>], [4, 1, 1]>,
162 InstrItinData<IIC_iMAC32 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
163 InstrStage<2, [A9_ALU0]>],
165 InstrItinData<IIC_iMUL64 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
166 InstrStage<3, [A9_ALU0]>], [4, 5, 1, 1]>,
167 InstrItinData<IIC_iMAC64 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
168 InstrStage<3, [A9_ALU0]>],
170 // Integer load pipeline
171 // FIXME: The timings are some rough approximations
174 InstrItinData<IIC_iLoad_i , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
175 InstrStage<1, [A9_MUX0], 0>,
176 InstrStage<1, [A9_AGU]>,
177 InstrStage<1, [A9_LSUnit]>],
178 [3, 1], [A9_LdBypass]>,
179 InstrItinData<IIC_iLoad_bh_i, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
180 InstrStage<1, [A9_MUX0], 0>,
181 InstrStage<2, [A9_AGU]>,
182 InstrStage<1, [A9_LSUnit]>],
183 [4, 1], [A9_LdBypass]>,
184 // FIXME: If address is 64-bit aligned, AGU cycles is 1.
185 InstrItinData<IIC_iLoad_d_i , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
186 InstrStage<1, [A9_MUX0], 0>,
187 InstrStage<2, [A9_AGU]>,
188 InstrStage<1, [A9_LSUnit]>],
189 [3, 3, 1], [A9_LdBypass]>,
192 InstrItinData<IIC_iLoad_r , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
193 InstrStage<1, [A9_MUX0], 0>,
194 InstrStage<1, [A9_AGU]>,
195 InstrStage<1, [A9_LSUnit]>],
196 [3, 1, 1], [A9_LdBypass]>,
197 InstrItinData<IIC_iLoad_bh_r, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
198 InstrStage<1, [A9_MUX0], 0>,
199 InstrStage<2, [A9_AGU]>,
200 InstrStage<1, [A9_LSUnit]>],
201 [4, 1, 1], [A9_LdBypass]>,
202 InstrItinData<IIC_iLoad_d_r , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
203 InstrStage<1, [A9_MUX0], 0>,
204 InstrStage<2, [A9_AGU]>,
205 InstrStage<1, [A9_LSUnit]>],
206 [3, 3, 1, 1], [A9_LdBypass]>,
208 // Scaled register offset
209 InstrItinData<IIC_iLoad_si , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
210 InstrStage<1, [A9_MUX0], 0>,
211 InstrStage<1, [A9_AGU]>,
212 InstrStage<1, [A9_LSUnit]>],
213 [4, 1, 1], [A9_LdBypass]>,
214 InstrItinData<IIC_iLoad_bh_si,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
215 InstrStage<1, [A9_MUX0], 0>,
216 InstrStage<2, [A9_AGU]>,
217 InstrStage<1, [A9_LSUnit]>],
218 [5, 1, 1], [A9_LdBypass]>,
220 // Immediate offset with update
221 InstrItinData<IIC_iLoad_iu , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
222 InstrStage<1, [A9_MUX0], 0>,
223 InstrStage<1, [A9_AGU]>,
224 InstrStage<1, [A9_LSUnit]>],
225 [3, 2, 1], [A9_LdBypass]>,
226 InstrItinData<IIC_iLoad_bh_iu,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
227 InstrStage<1, [A9_MUX0], 0>,
228 InstrStage<2, [A9_AGU]>,
229 InstrStage<1, [A9_LSUnit]>],
230 [4, 3, 1], [A9_LdBypass]>,
232 // Register offset with update
233 InstrItinData<IIC_iLoad_ru , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
234 InstrStage<1, [A9_MUX0], 0>,
235 InstrStage<1, [A9_AGU]>,
236 InstrStage<1, [A9_LSUnit]>],
237 [3, 2, 1, 1], [A9_LdBypass]>,
238 InstrItinData<IIC_iLoad_bh_ru,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
239 InstrStage<1, [A9_MUX0], 0>,
240 InstrStage<2, [A9_AGU]>,
241 InstrStage<1, [A9_LSUnit]>],
242 [4, 3, 1, 1], [A9_LdBypass]>,
243 InstrItinData<IIC_iLoad_d_ru, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
244 InstrStage<1, [A9_MUX0], 0>,
245 InstrStage<2, [A9_AGU]>,
246 InstrStage<1, [A9_LSUnit]>],
247 [3, 3, 1, 1], [A9_LdBypass]>,
249 // Scaled register offset with update
250 InstrItinData<IIC_iLoad_siu , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
251 InstrStage<1, [A9_MUX0], 0>,
252 InstrStage<1, [A9_AGU]>,
253 InstrStage<1, [A9_LSUnit]>],
254 [4, 3, 1, 1], [A9_LdBypass]>,
255 InstrItinData<IIC_iLoad_bh_siu,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
256 InstrStage<1, [A9_MUX0], 0>,
257 InstrStage<2, [A9_AGU]>,
258 InstrStage<1, [A9_LSUnit]>],
259 [5, 4, 1, 1], [A9_LdBypass]>,
261 // Load multiple, def is the 5th operand.
262 // FIXME: This assumes 3 to 4 registers.
263 InstrItinData<IIC_iLoad_m , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
264 InstrStage<1, [A9_MUX0], 0>,
265 InstrStage<2, [A9_AGU], 1>,
266 InstrStage<2, [A9_LSUnit]>],
268 [NoBypass, NoBypass, NoBypass, NoBypass, A9_LdBypass]>,
270 // Load multiple + update, defs are the 1st and 5th operands.
271 InstrItinData<IIC_iLoad_mu , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
272 InstrStage<1, [A9_MUX0], 0>,
273 InstrStage<2, [A9_AGU], 1>,
274 InstrStage<2, [A9_LSUnit]>],
276 [NoBypass, NoBypass, NoBypass, NoBypass, A9_LdBypass]>,
278 // Load multiple plus branch
279 InstrItinData<IIC_iLoad_mBr, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
280 InstrStage<1, [A9_MUX0], 0>,
281 InstrStage<1, [A9_AGU], 1>,
282 InstrStage<2, [A9_LSUnit]>,
283 InstrStage<1, [A9_Branch]>],
285 [NoBypass, NoBypass, NoBypass, NoBypass, A9_LdBypass]>,
287 // Pop, def is the 3rd operand.
288 InstrItinData<IIC_iPop , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
289 InstrStage<1, [A9_MUX0], 0>,
290 InstrStage<2, [A9_AGU], 1>,
291 InstrStage<2, [A9_LSUnit]>],
293 [NoBypass, NoBypass, A9_LdBypass]>,
295 // Pop + branch, def is the 3rd operand.
296 InstrItinData<IIC_iPop_Br, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
297 InstrStage<1, [A9_MUX0], 0>,
298 InstrStage<2, [A9_AGU], 1>,
299 InstrStage<2, [A9_LSUnit]>,
300 InstrStage<1, [A9_Branch]>],
302 [NoBypass, NoBypass, A9_LdBypass]>,
305 // iLoadi + iALUr for t2LDRpci_pic.
306 InstrItinData<IIC_iLoadiALU, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
307 InstrStage<1, [A9_MUX0], 0>,
308 InstrStage<1, [A9_AGU]>,
309 InstrStage<1, [A9_LSUnit]>,
310 InstrStage<1, [A9_ALU0, A9_ALU1]>],
313 // Integer store pipeline
316 InstrItinData<IIC_iStore_i , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
317 InstrStage<1, [A9_MUX0], 0>,
318 InstrStage<1, [A9_AGU]>,
319 InstrStage<1, [A9_LSUnit]>], [1, 1]>,
320 InstrItinData<IIC_iStore_bh_i,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
321 InstrStage<1, [A9_MUX0], 0>,
322 InstrStage<2, [A9_AGU], 1>,
323 InstrStage<1, [A9_LSUnit]>], [1, 1]>,
324 // FIXME: If address is 64-bit aligned, AGU cycles is 1.
325 InstrItinData<IIC_iStore_d_i, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
326 InstrStage<1, [A9_MUX0], 0>,
327 InstrStage<2, [A9_AGU], 1>,
328 InstrStage<1, [A9_LSUnit]>], [1, 1]>,
331 InstrItinData<IIC_iStore_r , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
332 InstrStage<1, [A9_MUX0], 0>,
333 InstrStage<1, [A9_AGU]>,
334 InstrStage<1, [A9_LSUnit]>], [1, 1, 1]>,
335 InstrItinData<IIC_iStore_bh_r,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
336 InstrStage<1, [A9_MUX0], 0>,
337 InstrStage<2, [A9_AGU], 1>,
338 InstrStage<1, [A9_LSUnit]>], [1, 1, 1]>,
339 InstrItinData<IIC_iStore_d_r, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
340 InstrStage<1, [A9_MUX0], 0>,
341 InstrStage<2, [A9_AGU], 1>,
342 InstrStage<1, [A9_LSUnit]>], [1, 1, 1]>,
344 // Scaled register offset
345 InstrItinData<IIC_iStore_si , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
346 InstrStage<1, [A9_MUX0], 0>,
347 InstrStage<1, [A9_AGU]>,
348 InstrStage<1, [A9_LSUnit]>], [1, 1, 1]>,
349 InstrItinData<IIC_iStore_bh_si,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
350 InstrStage<1, [A9_MUX0], 0>,
351 InstrStage<2, [A9_AGU], 1>,
352 InstrStage<1, [A9_LSUnit]>], [1, 1, 1]>,
354 // Immediate offset with update
355 InstrItinData<IIC_iStore_iu , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
356 InstrStage<1, [A9_MUX0], 0>,
357 InstrStage<1, [A9_AGU]>,
358 InstrStage<1, [A9_LSUnit]>], [2, 1, 1]>,
359 InstrItinData<IIC_iStore_bh_iu,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
360 InstrStage<1, [A9_MUX0], 0>,
361 InstrStage<2, [A9_AGU], 1>,
362 InstrStage<1, [A9_LSUnit]>], [3, 1, 1]>,
364 // Register offset with update
365 InstrItinData<IIC_iStore_ru , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
366 InstrStage<1, [A9_MUX0], 0>,
367 InstrStage<1, [A9_AGU]>,
368 InstrStage<1, [A9_LSUnit]>],
370 InstrItinData<IIC_iStore_bh_ru,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
371 InstrStage<1, [A9_MUX0], 0>,
372 InstrStage<2, [A9_AGU], 1>,
373 InstrStage<1, [A9_LSUnit]>],
375 InstrItinData<IIC_iStore_d_ru, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
376 InstrStage<1, [A9_MUX0], 0>,
377 InstrStage<2, [A9_AGU], 1>,
378 InstrStage<1, [A9_LSUnit]>],
381 // Scaled register offset with update
382 InstrItinData<IIC_iStore_siu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
383 InstrStage<1, [A9_MUX0], 0>,
384 InstrStage<1, [A9_AGU]>,
385 InstrStage<1, [A9_LSUnit]>],
387 InstrItinData<IIC_iStore_bh_siu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
388 InstrStage<1, [A9_MUX0], 0>,
389 InstrStage<2, [A9_AGU], 1>,
390 InstrStage<1, [A9_LSUnit]>],
394 InstrItinData<IIC_iStore_m , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
395 InstrStage<1, [A9_MUX0], 0>,
396 InstrStage<1, [A9_AGU]>,
397 InstrStage<2, [A9_LSUnit]>]>,
399 // Store multiple + update
400 InstrItinData<IIC_iStore_mu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
401 InstrStage<1, [A9_MUX0], 0>,
402 InstrStage<1, [A9_AGU]>,
403 InstrStage<2, [A9_LSUnit]>], [2]>,
407 // no delay slots, so the latency of a branch is unimportant
408 InstrItinData<IIC_Br , [InstrStage<1, [A9_Issue0], 0>,
409 InstrStage<1, [A9_Issue1], 0>,
410 InstrStage<1, [A9_Branch]>]>,
412 // VFP and NEON shares the same register file. This means that every VFP
413 // instruction should wait for full completion of the consecutive NEON
414 // instruction and vice-versa. We model this behavior with two artificial FUs:
415 // DRegsVFP and DRegsVFP.
417 // Every VFP instruction:
418 // - Acquires DRegsVFP resource for 1 cycle
419 // - Reserves DRegsN resource for the whole duration (including time to
420 // register file writeback!).
421 // Every NEON instruction does the same but with FUs swapped.
423 // Since the reserved FU cannot be acquired, this models precisely
424 // "cross-domain" stalls.
427 // Issue through integer pipeline, and execute in NEON unit.
429 // FP Special Register to Integer Register File Move
430 InstrItinData<IIC_fpSTAT , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
431 InstrStage<1, [A9_MUX0], 0>,
432 InstrStage<1, [A9_DRegsVFP], 0, Required>,
433 InstrStage<2, [A9_DRegsN], 0, Reserved>,
434 InstrStage<1, [A9_NPipe]>]>,
436 // Single-precision FP Unary
437 InstrItinData<IIC_fpUNA32 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
438 InstrStage<1, [A9_MUX0], 0>,
439 InstrStage<1, [A9_DRegsVFP], 0, Required>,
440 // Extra latency cycles since wbck is 2 cycles
441 InstrStage<3, [A9_DRegsN], 0, Reserved>,
442 InstrStage<1, [A9_NPipe]>],
445 // Double-precision FP Unary
446 InstrItinData<IIC_fpUNA64 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
447 InstrStage<1, [A9_MUX0], 0>,
448 InstrStage<1, [A9_DRegsVFP], 0, Required>,
449 // Extra latency cycles since wbck is 2 cycles
450 InstrStage<3, [A9_DRegsN], 0, Reserved>,
451 InstrStage<1, [A9_NPipe]>],
455 // Single-precision FP Compare
456 InstrItinData<IIC_fpCMP32 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
457 InstrStage<1, [A9_MUX0], 0>,
458 InstrStage<1, [A9_DRegsVFP], 0, Required>,
459 // Extra latency cycles since wbck is 4 cycles
460 InstrStage<5, [A9_DRegsN], 0, Reserved>,
461 InstrStage<1, [A9_NPipe]>],
464 // Double-precision FP Compare
465 InstrItinData<IIC_fpCMP64 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
466 InstrStage<1, [A9_MUX0], 0>,
467 InstrStage<1, [A9_DRegsVFP], 0, Required>,
468 // Extra latency cycles since wbck is 4 cycles
469 InstrStage<5, [A9_DRegsN], 0, Reserved>,
470 InstrStage<1, [A9_NPipe]>],
473 // Single to Double FP Convert
474 InstrItinData<IIC_fpCVTSD , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
475 InstrStage<1, [A9_MUX0], 0>,
476 InstrStage<1, [A9_DRegsVFP], 0, Required>,
477 InstrStage<5, [A9_DRegsN], 0, Reserved>,
478 InstrStage<1, [A9_NPipe]>],
481 // Double to Single FP Convert
482 InstrItinData<IIC_fpCVTDS , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
483 InstrStage<1, [A9_MUX0], 0>,
484 InstrStage<1, [A9_DRegsVFP], 0, Required>,
485 InstrStage<5, [A9_DRegsN], 0, Reserved>,
486 InstrStage<1, [A9_NPipe]>],
490 // Single to Half FP Convert
491 InstrItinData<IIC_fpCVTSH , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
492 InstrStage<1, [A9_MUX0], 0>,
493 InstrStage<1, [A9_DRegsVFP], 0, Required>,
494 InstrStage<5, [A9_DRegsN], 0, Reserved>,
495 InstrStage<1, [A9_NPipe]>],
498 // Half to Single FP Convert
499 InstrItinData<IIC_fpCVTHS , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
500 InstrStage<1, [A9_MUX0], 0>,
501 InstrStage<1, [A9_DRegsVFP], 0, Required>,
502 InstrStage<3, [A9_DRegsN], 0, Reserved>,
503 InstrStage<1, [A9_NPipe]>],
507 // Single-Precision FP to Integer Convert
508 InstrItinData<IIC_fpCVTSI , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
509 InstrStage<1, [A9_MUX0], 0>,
510 InstrStage<1, [A9_DRegsVFP], 0, Required>,
511 InstrStage<5, [A9_DRegsN], 0, Reserved>,
512 InstrStage<1, [A9_NPipe]>],
515 // Double-Precision FP to Integer Convert
516 InstrItinData<IIC_fpCVTDI , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
517 InstrStage<1, [A9_MUX0], 0>,
518 InstrStage<1, [A9_DRegsVFP], 0, Required>,
519 InstrStage<5, [A9_DRegsN], 0, Reserved>,
520 InstrStage<1, [A9_NPipe]>],
523 // Integer to Single-Precision FP Convert
524 InstrItinData<IIC_fpCVTIS , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
525 InstrStage<1, [A9_MUX0], 0>,
526 InstrStage<1, [A9_DRegsVFP], 0, Required>,
527 InstrStage<5, [A9_DRegsN], 0, Reserved>,
528 InstrStage<1, [A9_NPipe]>],
531 // Integer to Double-Precision FP Convert
532 InstrItinData<IIC_fpCVTID , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
533 InstrStage<1, [A9_MUX0], 0>,
534 InstrStage<1, [A9_DRegsVFP], 0, Required>,
535 InstrStage<5, [A9_DRegsN], 0, Reserved>,
536 InstrStage<1, [A9_NPipe]>],
539 // Single-precision FP ALU
540 InstrItinData<IIC_fpALU32 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
541 InstrStage<1, [A9_MUX0], 0>,
542 InstrStage<1, [A9_DRegsVFP], 0, Required>,
543 InstrStage<5, [A9_DRegsN], 0, Reserved>,
544 InstrStage<1, [A9_NPipe]>],
547 // Double-precision FP ALU
548 InstrItinData<IIC_fpALU64 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
549 InstrStage<1, [A9_MUX0], 0>,
550 InstrStage<1, [A9_DRegsVFP], 0, Required>,
551 InstrStage<5, [A9_DRegsN], 0, Reserved>,
552 InstrStage<1, [A9_NPipe]>],
555 // Single-precision FP Multiply
556 InstrItinData<IIC_fpMUL32 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
557 InstrStage<1, [A9_MUX0], 0>,
558 InstrStage<1, [A9_DRegsVFP], 0, Required>,
559 InstrStage<6, [A9_DRegsN], 0, Reserved>,
560 InstrStage<1, [A9_NPipe]>],
563 // Double-precision FP Multiply
564 InstrItinData<IIC_fpMUL64 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
565 InstrStage<1, [A9_MUX0], 0>,
566 InstrStage<1, [A9_DRegsVFP], 0, Required>,
567 InstrStage<7, [A9_DRegsN], 0, Reserved>,
568 InstrStage<2, [A9_NPipe]>],
571 // Single-precision FP MAC
572 InstrItinData<IIC_fpMAC32 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
573 InstrStage<1, [A9_MUX0], 0>,
574 InstrStage<1, [A9_DRegsVFP], 0, Required>,
575 InstrStage<9, [A9_DRegsN], 0, Reserved>,
576 InstrStage<1, [A9_NPipe]>],
579 // Double-precision FP MAC
580 InstrItinData<IIC_fpMAC64 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
581 InstrStage<1, [A9_MUX0], 0>,
582 InstrStage<1, [A9_DRegsVFP], 0, Required>,
583 InstrStage<10, [A9_DRegsN], 0, Reserved>,
584 InstrStage<2, [A9_NPipe]>],
587 // Single-precision FP DIV
588 InstrItinData<IIC_fpDIV32 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
589 InstrStage<1, [A9_MUX0], 0>,
590 InstrStage<1, [A9_DRegsVFP], 0, Required>,
591 InstrStage<16, [A9_DRegsN], 0, Reserved>,
592 InstrStage<10, [A9_NPipe]>],
595 // Double-precision FP DIV
596 InstrItinData<IIC_fpDIV64 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
597 InstrStage<1, [A9_MUX0], 0>,
598 InstrStage<1, [A9_DRegsVFP], 0, Required>,
599 InstrStage<26, [A9_DRegsN], 0, Reserved>,
600 InstrStage<20, [A9_NPipe]>],
603 // Single-precision FP SQRT
604 InstrItinData<IIC_fpSQRT32, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
605 InstrStage<1, [A9_MUX0], 0>,
606 InstrStage<1, [A9_DRegsVFP], 0, Required>,
607 InstrStage<18, [A9_DRegsN], 0, Reserved>,
608 InstrStage<13, [A9_NPipe]>],
611 // Double-precision FP SQRT
612 InstrItinData<IIC_fpSQRT64, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
613 InstrStage<1, [A9_MUX0], 0>,
614 InstrStage<1, [A9_DRegsVFP], 0, Required>,
615 InstrStage<33, [A9_DRegsN], 0, Reserved>,
616 InstrStage<28, [A9_NPipe]>],
620 // Integer to Single-precision Move
621 InstrItinData<IIC_fpMOVIS, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
622 InstrStage<1, [A9_MUX0], 0>,
623 InstrStage<1, [A9_DRegsVFP], 0, Required>,
624 // Extra 1 latency cycle since wbck is 2 cycles
625 InstrStage<3, [A9_DRegsN], 0, Reserved>,
626 InstrStage<1, [A9_NPipe]>],
629 // Integer to Double-precision Move
630 InstrItinData<IIC_fpMOVID, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
631 InstrStage<1, [A9_MUX0], 0>,
632 InstrStage<1, [A9_DRegsVFP], 0, Required>,
633 // Extra 1 latency cycle since wbck is 2 cycles
634 InstrStage<3, [A9_DRegsN], 0, Reserved>,
635 InstrStage<1, [A9_NPipe]>],
638 // Single-precision to Integer Move
639 InstrItinData<IIC_fpMOVSI, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
640 InstrStage<1, [A9_MUX0], 0>,
641 InstrStage<1, [A9_DRegsVFP], 0, Required>,
642 InstrStage<2, [A9_DRegsN], 0, Reserved>,
643 InstrStage<1, [A9_NPipe]>],
646 // Double-precision to Integer Move
647 InstrItinData<IIC_fpMOVDI, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
648 InstrStage<1, [A9_MUX0], 0>,
649 InstrStage<1, [A9_DRegsVFP], 0, Required>,
650 InstrStage<2, [A9_DRegsN], 0, Reserved>,
651 InstrStage<1, [A9_NPipe]>],
654 // Single-precision FP Load
655 InstrItinData<IIC_fpLoad32, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
656 InstrStage<1, [A9_MUX0], 0>,
657 InstrStage<1, [A9_DRegsVFP], 0, Required>,
658 InstrStage<2, [A9_DRegsN], 0, Reserved>,
659 InstrStage<1, [A9_NPipe]>,
660 InstrStage<1, [A9_LSUnit]>],
663 // Double-precision FP Load
664 // FIXME: Result latency is 1 if address is 64-bit aligned.
665 InstrItinData<IIC_fpLoad64, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
666 InstrStage<1, [A9_MUX0], 0>,
667 InstrStage<1, [A9_DRegsVFP], 0, Required>,
668 InstrStage<2, [A9_DRegsN], 0, Reserved>,
669 InstrStage<1, [A9_NPipe]>,
670 InstrStage<1, [A9_LSUnit]>],
674 InstrItinData<IIC_fpLoad_m, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
675 InstrStage<1, [A9_MUX0], 0>,
676 InstrStage<1, [A9_DRegsVFP], 0, Required>,
677 InstrStage<2, [A9_DRegsN], 0, Reserved>,
678 InstrStage<1, [A9_NPipe]>,
679 InstrStage<1, [A9_LSUnit]>], [1, 1, 1, 1]>,
681 // FP Load Multiple + update
682 InstrItinData<IIC_fpLoad_mu,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
683 InstrStage<1, [A9_MUX0], 0>,
684 InstrStage<1, [A9_DRegsVFP], 0, Required>,
685 InstrStage<2, [A9_DRegsN], 0, Reserved>,
686 InstrStage<1, [A9_NPipe]>,
687 InstrStage<1, [A9_LSUnit]>], [2, 1, 1, 1]>,
689 // Single-precision FP Store
690 InstrItinData<IIC_fpStore32,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
691 InstrStage<1, [A9_MUX0], 0>,
692 InstrStage<1, [A9_DRegsVFP], 0, Required>,
693 InstrStage<2, [A9_DRegsN], 0, Reserved>,
694 InstrStage<1, [A9_NPipe]>,
695 InstrStage<1, [A9_LSUnit]>],
698 // Double-precision FP Store
699 InstrItinData<IIC_fpStore64,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
700 InstrStage<1, [A9_MUX0], 0>,
701 InstrStage<1, [A9_DRegsVFP], 0, Required>,
702 InstrStage<2, [A9_DRegsN], 0, Reserved>,
703 InstrStage<1, [A9_NPipe]>,
704 InstrStage<1, [A9_LSUnit]>],
708 InstrItinData<IIC_fpStore_m,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
709 InstrStage<1, [A9_MUX0], 0>,
710 InstrStage<1, [A9_DRegsVFP], 0, Required>,
711 InstrStage<2, [A9_DRegsN], 0, Reserved>,
712 InstrStage<1, [A9_NPipe]>,
713 InstrStage<1, [A9_LSUnit]>], [1, 1, 1, 1]>,
715 // FP Store Multiple + update
716 InstrItinData<IIC_fpStore_mu,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
717 InstrStage<1, [A9_MUX0], 0>,
718 InstrStage<1, [A9_DRegsVFP], 0, Required>,
719 InstrStage<2, [A9_DRegsN], 0, Reserved>,
720 InstrStage<1, [A9_NPipe]>,
721 InstrStage<1, [A9_LSUnit]>], [2, 1, 1, 1]>,
724 // FIXME: Conservatively assume insufficent alignment.
725 InstrItinData<IIC_VLD1, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
726 InstrStage<1, [A9_MUX0], 0>,
727 InstrStage<1, [A9_DRegsN], 0, Required>,
728 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
729 InstrStage<2, [A9_NPipe], 1>,
730 InstrStage<2, [A9_LSUnit]>],
733 InstrItinData<IIC_VLD1x2, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
734 InstrStage<1, [A9_MUX0], 0>,
735 InstrStage<1, [A9_DRegsN], 0, Required>,
736 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
737 InstrStage<2, [A9_NPipe], 1>,
738 InstrStage<2, [A9_LSUnit]>],
741 InstrItinData<IIC_VLD1x3, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
742 InstrStage<1, [A9_MUX0], 0>,
743 InstrStage<1, [A9_DRegsN], 0, Required>,
744 InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
745 InstrStage<3, [A9_NPipe], 1>,
746 InstrStage<3, [A9_LSUnit]>],
749 InstrItinData<IIC_VLD1x4, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
750 InstrStage<1, [A9_MUX0], 0>,
751 InstrStage<1, [A9_DRegsN], 0, Required>,
752 InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
753 InstrStage<3, [A9_NPipe], 1>,
754 InstrStage<3, [A9_LSUnit]>],
757 InstrItinData<IIC_VLD1u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
758 InstrStage<1, [A9_MUX0], 0>,
759 InstrStage<1, [A9_DRegsN], 0, Required>,
760 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
761 InstrStage<2, [A9_NPipe], 1>,
762 InstrStage<2, [A9_LSUnit]>],
765 InstrItinData<IIC_VLD1x2u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
766 InstrStage<1, [A9_MUX0], 0>,
767 InstrStage<1, [A9_DRegsN], 0, Required>,
768 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
769 InstrStage<2, [A9_NPipe], 1>,
770 InstrStage<2, [A9_LSUnit]>],
773 InstrItinData<IIC_VLD1x3u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
774 InstrStage<1, [A9_MUX0], 0>,
775 InstrStage<1, [A9_DRegsN], 0, Required>,
776 InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
777 InstrStage<3, [A9_NPipe], 1>,
778 InstrStage<3, [A9_LSUnit]>],
781 InstrItinData<IIC_VLD1x4u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
782 InstrStage<1, [A9_MUX0], 0>,
783 InstrStage<1, [A9_DRegsN], 0, Required>,
784 InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
785 InstrStage<3, [A9_NPipe], 1>,
786 InstrStage<3, [A9_LSUnit]>],
790 InstrItinData<IIC_VLD2, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
791 InstrStage<1, [A9_MUX0], 0>,
792 InstrStage<1, [A9_DRegsN], 0, Required>,
793 // Extra latency cycles since wbck is 7 cycles
794 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
795 InstrStage<2, [A9_NPipe], 1>,
796 InstrStage<2, [A9_LSUnit]>],
800 InstrItinData<IIC_VLD2x2, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
801 InstrStage<1, [A9_MUX0], 0>,
802 InstrStage<1, [A9_DRegsN], 0, Required>,
803 InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
804 InstrStage<3, [A9_NPipe], 1>,
805 InstrStage<3, [A9_LSUnit]>],
809 InstrItinData<IIC_VLD2ln, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
810 InstrStage<1, [A9_MUX0], 0>,
811 InstrStage<1, [A9_DRegsN], 0, Required>,
812 InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
813 InstrStage<3, [A9_NPipe], 1>,
814 InstrStage<3, [A9_LSUnit]>],
818 InstrItinData<IIC_VLD2u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
819 InstrStage<1, [A9_MUX0], 0>,
820 InstrStage<1, [A9_DRegsN], 0, Required>,
821 // Extra latency cycles since wbck is 7 cycles
822 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
823 InstrStage<2, [A9_NPipe], 1>,
824 InstrStage<2, [A9_LSUnit]>],
828 InstrItinData<IIC_VLD2x2u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
829 InstrStage<1, [A9_MUX0], 0>,
830 InstrStage<1, [A9_DRegsN], 0, Required>,
831 InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
832 InstrStage<3, [A9_NPipe], 1>,
833 InstrStage<3, [A9_LSUnit]>],
837 InstrItinData<IIC_VLD2lnu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
838 InstrStage<1, [A9_MUX0], 0>,
839 InstrStage<1, [A9_DRegsN], 0, Required>,
840 InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
841 InstrStage<3, [A9_NPipe], 1>,
842 InstrStage<3, [A9_LSUnit]>],
843 [4, 4, 2, 1, 1, 1, 1, 1]>,
846 InstrItinData<IIC_VLD3, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
847 InstrStage<1, [A9_MUX0], 0>,
848 InstrStage<1, [A9_DRegsN], 0, Required>,
849 InstrStage<10,[A9_DRegsVFP], 0, Reserved>,
850 InstrStage<4, [A9_NPipe], 1>,
851 InstrStage<4, [A9_LSUnit]>],
855 InstrItinData<IIC_VLD3ln, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
856 InstrStage<1, [A9_MUX0], 0>,
857 InstrStage<1, [A9_DRegsN], 0, Required>,
858 InstrStage<11,[A9_DRegsVFP], 0, Reserved>,
859 InstrStage<5, [A9_NPipe], 1>,
860 InstrStage<5, [A9_LSUnit]>],
861 [5, 5, 6, 1, 1, 1, 1, 2]>,
864 InstrItinData<IIC_VLD3u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
865 InstrStage<1, [A9_MUX0], 0>,
866 InstrStage<1, [A9_DRegsN], 0, Required>,
867 InstrStage<10,[A9_DRegsVFP], 0, Reserved>,
868 InstrStage<4, [A9_NPipe], 1>,
869 InstrStage<4, [A9_LSUnit]>],
873 InstrItinData<IIC_VLD3lnu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
874 InstrStage<1, [A9_MUX0], 0>,
875 InstrStage<1, [A9_DRegsN], 0, Required>,
876 InstrStage<11,[A9_DRegsVFP], 0, Reserved>,
877 InstrStage<5, [A9_NPipe], 1>,
878 InstrStage<5, [A9_LSUnit]>],
879 [5, 5, 6, 2, 1, 1, 1, 1, 1, 2]>,
882 InstrItinData<IIC_VLD4, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
883 InstrStage<1, [A9_MUX0], 0>,
884 InstrStage<1, [A9_DRegsN], 0, Required>,
885 InstrStage<10,[A9_DRegsVFP], 0, Reserved>,
886 InstrStage<4, [A9_NPipe], 1>,
887 InstrStage<4, [A9_LSUnit]>],
891 InstrItinData<IIC_VLD4ln, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
892 InstrStage<1, [A9_MUX0], 0>,
893 InstrStage<1, [A9_DRegsN], 0, Required>,
894 InstrStage<11,[A9_DRegsVFP], 0, Reserved>,
895 InstrStage<5, [A9_NPipe], 1>,
896 InstrStage<5, [A9_LSUnit]>],
897 [5, 5, 6, 6, 1, 1, 1, 1, 2, 2]>,
900 InstrItinData<IIC_VLD4u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
901 InstrStage<1, [A9_MUX0], 0>,
902 InstrStage<1, [A9_DRegsN], 0, Required>,
903 InstrStage<10,[A9_DRegsVFP], 0, Reserved>,
904 InstrStage<4, [A9_NPipe], 1>,
905 InstrStage<4, [A9_LSUnit]>],
909 InstrItinData<IIC_VLD4lnu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
910 InstrStage<1, [A9_MUX0], 0>,
911 InstrStage<1, [A9_DRegsN], 0, Required>,
912 InstrStage<11,[A9_DRegsVFP], 0, Reserved>,
913 InstrStage<5, [A9_NPipe], 1>,
914 InstrStage<5, [A9_LSUnit]>],
915 [5, 5, 6, 6, 2, 1, 1, 1, 1, 1, 2, 2]>,
918 InstrItinData<IIC_VST1, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
919 InstrStage<1, [A9_MUX0], 0>,
920 InstrStage<1, [A9_DRegsN], 0, Required>,
921 InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
922 InstrStage<2, [A9_NPipe], 1>,
923 InstrStage<2, [A9_LSUnit]>],
927 InstrItinData<IIC_VST1x2, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
928 InstrStage<1, [A9_MUX0], 0>,
929 InstrStage<1, [A9_DRegsN], 0, Required>,
930 InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
931 InstrStage<2, [A9_NPipe], 1>,
932 InstrStage<2, [A9_LSUnit]>],
936 InstrItinData<IIC_VST1x3, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
937 InstrStage<1, [A9_MUX0], 0>,
938 InstrStage<1, [A9_DRegsN], 0, Required>,
939 InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
940 InstrStage<3, [A9_NPipe], 1>,
941 InstrStage<3, [A9_LSUnit]>],
945 InstrItinData<IIC_VST1x4, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
946 InstrStage<1, [A9_MUX0], 0>,
947 InstrStage<1, [A9_DRegsN], 0, Required>,
948 InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
949 InstrStage<3, [A9_NPipe], 1>,
950 InstrStage<3, [A9_LSUnit]>],
954 InstrItinData<IIC_VST1u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
955 InstrStage<1, [A9_MUX0], 0>,
956 InstrStage<1, [A9_DRegsN], 0, Required>,
957 InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
958 InstrStage<2, [A9_NPipe], 1>,
959 InstrStage<2, [A9_LSUnit]>],
963 InstrItinData<IIC_VST1x2u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
964 InstrStage<1, [A9_MUX0], 0>,
965 InstrStage<1, [A9_DRegsN], 0, Required>,
966 InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
967 InstrStage<2, [A9_NPipe], 1>,
968 InstrStage<2, [A9_LSUnit]>],
972 InstrItinData<IIC_VST1x3u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
973 InstrStage<1, [A9_MUX0], 0>,
974 InstrStage<1, [A9_DRegsN], 0, Required>,
975 InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
976 InstrStage<3, [A9_NPipe], 1>,
977 InstrStage<3, [A9_LSUnit]>],
978 [2, 1, 1, 1, 1, 1, 2]>,
981 InstrItinData<IIC_VST1x4u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
982 InstrStage<1, [A9_MUX0], 0>,
983 InstrStage<1, [A9_DRegsN], 0, Required>,
984 InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
985 InstrStage<3, [A9_NPipe], 1>,
986 InstrStage<3, [A9_LSUnit]>],
987 [2, 1, 1, 1, 1, 1, 2, 2]>,
990 InstrItinData<IIC_VST2, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
991 InstrStage<1, [A9_MUX0], 0>,
992 InstrStage<1, [A9_DRegsN], 0, Required>,
993 InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
994 InstrStage<2, [A9_NPipe], 1>,
995 InstrStage<2, [A9_LSUnit]>],
999 InstrItinData<IIC_VST2x2, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1000 InstrStage<1, [A9_MUX0], 0>,
1001 InstrStage<1, [A9_DRegsN], 0, Required>,
1002 InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
1003 InstrStage<3, [A9_NPipe], 1>,
1004 InstrStage<3, [A9_LSUnit]>],
1005 [1, 1, 1, 1, 2, 2]>,
1008 InstrItinData<IIC_VST2u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1009 InstrStage<1, [A9_MUX0], 0>,
1010 InstrStage<1, [A9_DRegsN], 0, Required>,
1011 InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
1012 InstrStage<2, [A9_NPipe], 1>,
1013 InstrStage<2, [A9_LSUnit]>],
1014 [2, 1, 1, 1, 1, 1]>,
1017 InstrItinData<IIC_VST2x2u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1018 InstrStage<1, [A9_MUX0], 0>,
1019 InstrStage<1, [A9_DRegsN], 0, Required>,
1020 InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
1021 InstrStage<3, [A9_NPipe], 1>,
1022 InstrStage<3, [A9_LSUnit]>],
1023 [2, 1, 1, 1, 1, 1, 2, 2]>,
1026 InstrItinData<IIC_VST2ln, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1027 InstrStage<1, [A9_MUX0], 0>,
1028 InstrStage<1, [A9_DRegsN], 0, Required>,
1029 InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
1030 InstrStage<2, [A9_NPipe], 1>,
1031 InstrStage<2, [A9_LSUnit]>],
1035 InstrItinData<IIC_VST2lnu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1036 InstrStage<1, [A9_MUX0], 0>,
1037 InstrStage<1, [A9_DRegsN], 0, Required>,
1038 InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
1039 InstrStage<3, [A9_NPipe], 1>,
1040 InstrStage<3, [A9_LSUnit]>],
1041 [2, 1, 1, 1, 1, 1]>,
1044 InstrItinData<IIC_VST3, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1045 InstrStage<1, [A9_MUX0], 0>,
1046 InstrStage<1, [A9_DRegsN], 0, Required>,
1047 InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
1048 InstrStage<3, [A9_NPipe], 1>,
1049 InstrStage<3, [A9_LSUnit]>],
1053 InstrItinData<IIC_VST3u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1054 InstrStage<1, [A9_MUX0], 0>,
1055 InstrStage<1, [A9_DRegsN], 0, Required>,
1056 InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
1057 InstrStage<3, [A9_NPipe], 1>,
1058 InstrStage<3, [A9_LSUnit]>],
1059 [2, 1, 1, 1, 1, 1, 2]>,
1062 InstrItinData<IIC_VST3ln, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1063 InstrStage<1, [A9_MUX0], 0>,
1064 InstrStage<1, [A9_DRegsN], 0, Required>,
1065 InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
1066 InstrStage<3, [A9_NPipe], 1>,
1067 InstrStage<3, [A9_LSUnit]>],
1071 InstrItinData<IIC_VST3lnu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1072 InstrStage<1, [A9_MUX0], 0>,
1073 InstrStage<1, [A9_DRegsN], 0, Required>,
1074 InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
1075 InstrStage<3, [A9_NPipe], 1>,
1076 InstrStage<3, [A9_LSUnit]>],
1077 [2, 1, 1, 1, 1, 1, 2]>,
1080 InstrItinData<IIC_VST4, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1081 InstrStage<1, [A9_MUX0], 0>,
1082 InstrStage<1, [A9_DRegsN], 0, Required>,
1083 InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
1084 InstrStage<3, [A9_NPipe], 1>,
1085 InstrStage<3, [A9_LSUnit]>],
1086 [1, 1, 1, 1, 2, 2]>,
1089 InstrItinData<IIC_VST4u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1090 InstrStage<1, [A9_MUX0], 0>,
1091 InstrStage<1, [A9_DRegsN], 0, Required>,
1092 InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
1093 InstrStage<3, [A9_NPipe], 1>,
1094 InstrStage<3, [A9_LSUnit]>],
1095 [2, 1, 1, 1, 1, 1, 2, 2]>,
1098 InstrItinData<IIC_VST4ln, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1099 InstrStage<1, [A9_MUX0], 0>,
1100 InstrStage<1, [A9_DRegsN], 0, Required>,
1101 InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
1102 InstrStage<3, [A9_NPipe], 1>,
1103 InstrStage<3, [A9_LSUnit]>],
1104 [1, 1, 1, 1, 2, 2]>,
1107 InstrItinData<IIC_VST4lnu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1108 InstrStage<1, [A9_MUX0], 0>,
1109 InstrStage<1, [A9_DRegsN], 0, Required>,
1110 InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
1111 InstrStage<3, [A9_NPipe], 1>,
1112 InstrStage<3, [A9_LSUnit]>],
1113 [2, 1, 1, 1, 1, 1, 2, 2]>,
1116 // Double-register Integer Unary
1117 InstrItinData<IIC_VUNAiD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1118 InstrStage<1, [A9_MUX0], 0>,
1119 InstrStage<1, [A9_DRegsN], 0, Required>,
1120 // Extra latency cycles since wbck is 6 cycles
1121 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1122 InstrStage<1, [A9_NPipe]>],
1125 // Quad-register Integer Unary
1126 InstrItinData<IIC_VUNAiQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1127 InstrStage<1, [A9_MUX0], 0>,
1128 InstrStage<1, [A9_DRegsN], 0, Required>,
1129 // Extra latency cycles since wbck is 6 cycles
1130 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1131 InstrStage<1, [A9_NPipe]>],
1134 // Double-register Integer Q-Unary
1135 InstrItinData<IIC_VQUNAiD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1136 InstrStage<1, [A9_MUX0], 0>,
1137 InstrStage<1, [A9_DRegsN], 0, Required>,
1138 // Extra latency cycles since wbck is 6 cycles
1139 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1140 InstrStage<1, [A9_NPipe]>],
1143 // Quad-register Integer CountQ-Unary
1144 InstrItinData<IIC_VQUNAiQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1145 InstrStage<1, [A9_MUX0], 0>,
1146 InstrStage<1, [A9_DRegsN], 0, Required>,
1147 // Extra latency cycles since wbck is 6 cycles
1148 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1149 InstrStage<1, [A9_NPipe]>],
1152 // Double-register Integer Binary
1153 InstrItinData<IIC_VBINiD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1154 InstrStage<1, [A9_MUX0], 0>,
1155 InstrStage<1, [A9_DRegsN], 0, Required>,
1156 // Extra latency cycles since wbck is 6 cycles
1157 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1158 InstrStage<1, [A9_NPipe]>],
1161 // Quad-register Integer Binary
1162 InstrItinData<IIC_VBINiQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1163 InstrStage<1, [A9_MUX0], 0>,
1164 InstrStage<1, [A9_DRegsN], 0, Required>,
1165 // Extra latency cycles since wbck is 6 cycles
1166 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1167 InstrStage<1, [A9_NPipe]>],
1170 // Double-register Integer Subtract
1171 InstrItinData<IIC_VSUBiD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1172 InstrStage<1, [A9_MUX0], 0>,
1173 InstrStage<1, [A9_DRegsN], 0, Required>,
1174 // Extra latency cycles since wbck is 6 cycles
1175 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1176 InstrStage<1, [A9_NPipe]>],
1179 // Quad-register Integer Subtract
1180 InstrItinData<IIC_VSUBiQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1181 InstrStage<1, [A9_MUX0], 0>,
1182 InstrStage<1, [A9_DRegsN], 0, Required>,
1183 // Extra latency cycles since wbck is 6 cycles
1184 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1185 InstrStage<1, [A9_NPipe]>],
1188 // Double-register Integer Shift
1189 InstrItinData<IIC_VSHLiD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1190 InstrStage<1, [A9_MUX0], 0>,
1191 InstrStage<1, [A9_DRegsN], 0, Required>,
1192 // Extra latency cycles since wbck is 6 cycles
1193 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1194 InstrStage<1, [A9_NPipe]>],
1197 // Quad-register Integer Shift
1198 InstrItinData<IIC_VSHLiQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1199 InstrStage<1, [A9_MUX0], 0>,
1200 InstrStage<1, [A9_DRegsN], 0, Required>,
1201 // Extra latency cycles since wbck is 6 cycles
1202 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1203 InstrStage<1, [A9_NPipe]>],
1206 // Double-register Integer Shift (4 cycle)
1207 InstrItinData<IIC_VSHLi4D, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1208 InstrStage<1, [A9_MUX0], 0>,
1209 InstrStage<1, [A9_DRegsN], 0, Required>,
1210 // Extra latency cycles since wbck is 6 cycles
1211 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1212 InstrStage<1, [A9_NPipe]>],
1215 // Quad-register Integer Shift (4 cycle)
1216 InstrItinData<IIC_VSHLi4Q, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1217 InstrStage<1, [A9_MUX0], 0>,
1218 InstrStage<1, [A9_DRegsN], 0, Required>,
1219 // Extra latency cycles since wbck is 6 cycles
1220 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1221 InstrStage<1, [A9_NPipe]>],
1224 // Double-register Integer Binary (4 cycle)
1225 InstrItinData<IIC_VBINi4D, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1226 InstrStage<1, [A9_MUX0], 0>,
1227 InstrStage<1, [A9_DRegsN], 0, Required>,
1228 // Extra latency cycles since wbck is 6 cycles
1229 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1230 InstrStage<1, [A9_NPipe]>],
1233 // Quad-register Integer Binary (4 cycle)
1234 InstrItinData<IIC_VBINi4Q, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1235 InstrStage<1, [A9_MUX0], 0>,
1236 InstrStage<1, [A9_DRegsN], 0, Required>,
1237 // Extra latency cycles since wbck is 6 cycles
1238 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1239 InstrStage<1, [A9_NPipe]>],
1242 // Double-register Integer Subtract (4 cycle)
1243 InstrItinData<IIC_VSUBi4D, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1244 InstrStage<1, [A9_MUX0], 0>,
1245 InstrStage<1, [A9_DRegsN], 0, Required>,
1246 // Extra latency cycles since wbck is 6 cycles
1247 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1248 InstrStage<1, [A9_NPipe]>],
1251 // Quad-register Integer Subtract (4 cycle)
1252 InstrItinData<IIC_VSUBi4Q, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1253 InstrStage<1, [A9_MUX0], 0>,
1254 InstrStage<1, [A9_DRegsN], 0, Required>,
1255 // Extra latency cycles since wbck is 6 cycles
1256 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1257 InstrStage<1, [A9_NPipe]>],
1261 // Double-register Integer Count
1262 InstrItinData<IIC_VCNTiD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1263 InstrStage<1, [A9_MUX0], 0>,
1264 InstrStage<1, [A9_DRegsN], 0, Required>,
1265 // Extra latency cycles since wbck is 6 cycles
1266 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1267 InstrStage<1, [A9_NPipe]>],
1270 // Quad-register Integer Count
1271 // Result written in N3, but that is relative to the last cycle of multicycle,
1272 // so we use 4 for those cases
1273 InstrItinData<IIC_VCNTiQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1274 InstrStage<1, [A9_MUX0], 0>,
1275 InstrStage<1, [A9_DRegsN], 0, Required>,
1276 // Extra latency cycles since wbck is 7 cycles
1277 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1278 InstrStage<2, [A9_NPipe]>],
1281 // Double-register Absolute Difference and Accumulate
1282 InstrItinData<IIC_VABAD, [InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1283 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1284 InstrStage<1, [A9_MUX0], 0>,
1285 InstrStage<1, [A9_DRegsN], 0, Required>,
1286 // Extra latency cycles since wbck is 6 cycles
1287 InstrStage<1, [A9_NPipe]>],
1290 // Quad-register Absolute Difference and Accumulate
1291 InstrItinData<IIC_VABAQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1292 InstrStage<1, [A9_MUX0], 0>,
1293 InstrStage<1, [A9_DRegsN], 0, Required>,
1294 // Extra latency cycles since wbck is 6 cycles
1295 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1296 InstrStage<2, [A9_NPipe]>],
1299 // Double-register Integer Pair Add Long
1300 InstrItinData<IIC_VPALiD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1301 InstrStage<1, [A9_MUX0], 0>,
1302 InstrStage<1, [A9_DRegsN], 0, Required>,
1303 // Extra latency cycles since wbck is 6 cycles
1304 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1305 InstrStage<1, [A9_NPipe]>],
1308 // Quad-register Integer Pair Add Long
1309 InstrItinData<IIC_VPALiQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1310 InstrStage<1, [A9_MUX0], 0>,
1311 InstrStage<1, [A9_DRegsN], 0, Required>,
1312 // Extra latency cycles since wbck is 6 cycles
1313 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1314 InstrStage<2, [A9_NPipe]>],
1318 // Double-register Integer Multiply (.8, .16)
1319 InstrItinData<IIC_VMULi16D, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1320 InstrStage<1, [A9_MUX0], 0>,
1321 InstrStage<1, [A9_DRegsN], 0, Required>,
1322 // Extra latency cycles since wbck is 6 cycles
1323 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1324 InstrStage<1, [A9_NPipe]>],
1327 // Quad-register Integer Multiply (.8, .16)
1328 InstrItinData<IIC_VMULi16Q, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1329 InstrStage<1, [A9_MUX0], 0>,
1330 InstrStage<1, [A9_DRegsN], 0, Required>,
1331 // Extra latency cycles since wbck is 7 cycles
1332 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1333 InstrStage<2, [A9_NPipe]>],
1337 // Double-register Integer Multiply (.32)
1338 InstrItinData<IIC_VMULi32D, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1339 InstrStage<1, [A9_MUX0], 0>,
1340 InstrStage<1, [A9_DRegsN], 0, Required>,
1341 // Extra latency cycles since wbck is 7 cycles
1342 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1343 InstrStage<2, [A9_NPipe]>],
1346 // Quad-register Integer Multiply (.32)
1347 InstrItinData<IIC_VMULi32Q, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1348 InstrStage<1, [A9_MUX0], 0>,
1349 InstrStage<1, [A9_DRegsN], 0, Required>,
1350 // Extra latency cycles since wbck is 9 cycles
1351 InstrStage<10, [A9_DRegsVFP], 0, Reserved>,
1352 InstrStage<4, [A9_NPipe]>],
1355 // Double-register Integer Multiply-Accumulate (.8, .16)
1356 InstrItinData<IIC_VMACi16D, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1357 InstrStage<1, [A9_MUX0], 0>,
1358 InstrStage<1, [A9_DRegsN], 0, Required>,
1359 // Extra latency cycles since wbck is 6 cycles
1360 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1361 InstrStage<1, [A9_NPipe]>],
1364 // Double-register Integer Multiply-Accumulate (.32)
1365 InstrItinData<IIC_VMACi32D, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1366 InstrStage<1, [A9_MUX0], 0>,
1367 InstrStage<1, [A9_DRegsN], 0, Required>,
1368 // Extra latency cycles since wbck is 7 cycles
1369 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1370 InstrStage<2, [A9_NPipe]>],
1373 // Quad-register Integer Multiply-Accumulate (.8, .16)
1374 InstrItinData<IIC_VMACi16Q, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1375 InstrStage<1, [A9_MUX0], 0>,
1376 InstrStage<1, [A9_DRegsN], 0, Required>,
1377 // Extra latency cycles since wbck is 7 cycles
1378 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1379 InstrStage<2, [A9_NPipe]>],
1382 // Quad-register Integer Multiply-Accumulate (.32)
1383 InstrItinData<IIC_VMACi32Q, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1384 InstrStage<1, [A9_MUX0], 0>,
1385 InstrStage<1, [A9_DRegsN], 0, Required>,
1386 // Extra latency cycles since wbck is 9 cycles
1387 InstrStage<10, [A9_DRegsVFP], 0, Reserved>,
1388 InstrStage<4, [A9_NPipe]>],
1393 InstrItinData<IIC_VMOV, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1394 InstrStage<1, [A9_MUX0], 0>,
1395 InstrStage<1, [A9_DRegsN], 0, Required>,
1396 InstrStage<1, [A9_DRegsVFP], 0, Reserved>,
1397 InstrStage<1, [A9_NPipe]>],
1401 InstrItinData<IIC_VMOVImm, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1402 InstrStage<1, [A9_MUX0], 0>,
1403 InstrStage<1, [A9_DRegsN], 0, Required>,
1404 // Extra latency cycles since wbck is 6 cycles
1405 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1406 InstrStage<1, [A9_NPipe]>],
1409 // Double-register Permute Move
1410 InstrItinData<IIC_VMOVD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1411 InstrStage<1, [A9_MUX0], 0>,
1412 InstrStage<1, [A9_DRegsN], 0, Required>,
1413 // Extra latency cycles since wbck is 6 cycles
1414 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1415 InstrStage<1, [A9_NPipe]>],
1418 // Quad-register Permute Move
1419 InstrItinData<IIC_VMOVQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1420 InstrStage<1, [A9_MUX0], 0>,
1421 InstrStage<1, [A9_DRegsN], 0, Required>,
1422 // Extra latency cycles since wbck is 6 cycles
1423 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1424 InstrStage<1, [A9_NPipe]>],
1427 // Integer to Single-precision Move
1428 InstrItinData<IIC_VMOVIS , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1429 InstrStage<1, [A9_MUX0], 0>,
1430 InstrStage<1, [A9_DRegsN], 0, Required>,
1431 InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
1432 InstrStage<1, [A9_NPipe]>],
1435 // Integer to Double-precision Move
1436 InstrItinData<IIC_VMOVID , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1437 InstrStage<1, [A9_MUX0], 0>,
1438 InstrStage<1, [A9_DRegsN], 0, Required>,
1439 InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
1440 InstrStage<1, [A9_NPipe]>],
1443 // Single-precision to Integer Move
1444 InstrItinData<IIC_VMOVSI , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1445 InstrStage<1, [A9_MUX0], 0>,
1446 InstrStage<1, [A9_DRegsN], 0, Required>,
1447 InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
1448 InstrStage<1, [A9_NPipe]>],
1451 // Double-precision to Integer Move
1452 InstrItinData<IIC_VMOVDI , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1453 InstrStage<1, [A9_MUX0], 0>,
1454 InstrStage<1, [A9_DRegsN], 0, Required>,
1455 InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
1456 InstrStage<1, [A9_NPipe]>],
1459 // Integer to Lane Move
1460 InstrItinData<IIC_VMOVISL , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1461 InstrStage<1, [A9_MUX0], 0>,
1462 InstrStage<1, [A9_DRegsN], 0, Required>,
1463 InstrStage<4, [A9_DRegsVFP], 0, Reserved>,
1464 InstrStage<2, [A9_NPipe]>],
1468 // Vector narrow move
1469 InstrItinData<IIC_VMOVN, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1470 InstrStage<1, [A9_MUX0], 0>,
1471 InstrStage<1, [A9_DRegsN], 0, Required>,
1472 // Extra latency cycles since wbck is 6 cycles
1473 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1474 InstrStage<1, [A9_NPipe]>],
1477 // Double-register FP Unary
1478 InstrItinData<IIC_VUNAD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1479 InstrStage<1, [A9_MUX0], 0>,
1480 InstrStage<1, [A9_DRegsN], 0, Required>,
1481 // Extra latency cycles since wbck is 6 cycles
1482 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1483 InstrStage<1, [A9_NPipe]>],
1486 // Quad-register FP Unary
1487 // Result written in N5, but that is relative to the last cycle of multicycle,
1488 // so we use 6 for those cases
1489 InstrItinData<IIC_VUNAQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1490 InstrStage<1, [A9_MUX0], 0>,
1491 InstrStage<1, [A9_DRegsN], 0, Required>,
1492 // Extra latency cycles since wbck is 7 cycles
1493 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1494 InstrStage<2, [A9_NPipe]>],
1497 // Double-register FP Binary
1498 // FIXME: We're using this itin for many instructions and [2, 2] here is too
1500 InstrItinData<IIC_VBIND, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1501 InstrStage<1, [A9_MUX0], 0>,
1502 InstrStage<1, [A9_DRegsN], 0, Required>,
1503 // Extra latency cycles since wbck is 6 cycles
1504 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1505 InstrStage<1, [A9_NPipe]>],
1510 InstrItinData<IIC_VPBIND, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1511 InstrStage<1, [A9_MUX0], 0>,
1512 InstrStage<1, [A9_DRegsN], 0, Required>,
1513 // Extra latency cycles since wbck is 6 cycles
1514 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1515 InstrStage<1, [A9_NPipe]>],
1518 // Double-register FP VMUL
1519 InstrItinData<IIC_VFMULD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1520 InstrStage<1, [A9_MUX0], 0>,
1521 InstrStage<1, [A9_DRegsN], 0, Required>,
1522 // Extra latency cycles since wbck is 6 cycles
1523 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1524 InstrStage<1, [A9_NPipe]>],
1527 // Quad-register FP Binary
1528 // Result written in N5, but that is relative to the last cycle of multicycle,
1529 // so we use 6 for those cases
1530 // FIXME: We're using this itin for many instructions and [2, 2] here is too
1532 InstrItinData<IIC_VBINQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1533 InstrStage<1, [A9_MUX0], 0>,
1534 InstrStage<1, [A9_DRegsN], 0, Required>,
1535 // Extra latency cycles since wbck is 7 cycles
1536 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1537 InstrStage<2, [A9_NPipe]>],
1540 // Quad-register FP VMUL
1541 InstrItinData<IIC_VFMULQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1542 InstrStage<1, [A9_MUX0], 0>,
1543 InstrStage<1, [A9_DRegsN], 0, Required>,
1544 // Extra latency cycles since wbck is 7 cycles
1545 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1546 InstrStage<1, [A9_NPipe]>],
1549 // Double-register FP Multiple-Accumulate
1550 InstrItinData<IIC_VMACD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1551 InstrStage<1, [A9_MUX0], 0>,
1552 InstrStage<1, [A9_DRegsN], 0, Required>,
1553 // Extra latency cycles since wbck is 7 cycles
1554 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1555 InstrStage<2, [A9_NPipe]>],
1558 // Quad-register FP Multiple-Accumulate
1559 // Result written in N9, but that is relative to the last cycle of multicycle,
1560 // so we use 10 for those cases
1561 InstrItinData<IIC_VMACQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1562 InstrStage<1, [A9_MUX0], 0>,
1563 InstrStage<1, [A9_DRegsN], 0, Required>,
1564 // Extra latency cycles since wbck is 9 cycles
1565 InstrStage<10, [A9_DRegsVFP], 0, Reserved>,
1566 InstrStage<4, [A9_NPipe]>],
1569 // Double-register Reciprical Step
1570 InstrItinData<IIC_VRECSD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1571 InstrStage<1, [A9_MUX0], 0>,
1572 InstrStage<1, [A9_DRegsN], 0, Required>,
1573 // Extra latency cycles since wbck is 10 cycles
1574 InstrStage<11, [A9_DRegsVFP], 0, Reserved>,
1575 InstrStage<1, [A9_NPipe]>],
1578 // Quad-register Reciprical Step
1579 InstrItinData<IIC_VRECSQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1580 InstrStage<1, [A9_MUX0], 0>,
1581 InstrStage<1, [A9_DRegsN], 0, Required>,
1582 // Extra latency cycles since wbck is 11 cycles
1583 InstrStage<12, [A9_DRegsVFP], 0, Reserved>,
1584 InstrStage<2, [A9_NPipe]>],
1587 // Double-register Permute
1588 InstrItinData<IIC_VPERMD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1589 InstrStage<1, [A9_MUX0], 0>,
1590 InstrStage<1, [A9_DRegsN], 0, Required>,
1591 // Extra latency cycles since wbck is 6 cycles
1592 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1593 InstrStage<1, [A9_NPipe]>],
1596 // Quad-register Permute
1597 // Result written in N2, but that is relative to the last cycle of multicycle,
1598 // so we use 3 for those cases
1599 InstrItinData<IIC_VPERMQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1600 InstrStage<1, [A9_MUX0], 0>,
1601 InstrStage<1, [A9_DRegsN], 0, Required>,
1602 // Extra latency cycles since wbck is 7 cycles
1603 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1604 InstrStage<2, [A9_NPipe]>],
1607 // Quad-register Permute (3 cycle issue)
1608 // Result written in N2, but that is relative to the last cycle of multicycle,
1609 // so we use 4 for those cases
1610 InstrItinData<IIC_VPERMQ3, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1611 InstrStage<1, [A9_MUX0], 0>,
1612 InstrStage<1, [A9_DRegsN], 0, Required>,
1613 // Extra latency cycles since wbck is 8 cycles
1614 InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
1615 InstrStage<3, [A9_NPipe]>],
1619 // Double-register VEXT
1620 InstrItinData<IIC_VEXTD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1621 InstrStage<1, [A9_MUX0], 0>,
1622 InstrStage<1, [A9_DRegsN], 0, Required>,
1623 // Extra latency cycles since wbck is 6 cycles
1624 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1625 InstrStage<1, [A9_NPipe]>],
1628 // Quad-register VEXT
1629 InstrItinData<IIC_VEXTQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1630 InstrStage<1, [A9_MUX0], 0>,
1631 InstrStage<1, [A9_DRegsN], 0, Required>,
1632 // Extra latency cycles since wbck is 7 cycles
1633 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1634 InstrStage<2, [A9_NPipe]>],
1638 InstrItinData<IIC_VTB1, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1639 InstrStage<1, [A9_MUX0], 0>,
1640 InstrStage<1, [A9_DRegsN], 0, Required>,
1641 // Extra latency cycles since wbck is 7 cycles
1642 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1643 InstrStage<2, [A9_NPipe]>],
1645 InstrItinData<IIC_VTB2, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1646 InstrStage<1, [A9_MUX0], 0>,
1647 InstrStage<2, [A9_DRegsN], 0, Required>,
1648 // Extra latency cycles since wbck is 7 cycles
1649 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1650 InstrStage<2, [A9_NPipe]>],
1652 InstrItinData<IIC_VTB3, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1653 InstrStage<1, [A9_MUX0], 0>,
1654 InstrStage<2, [A9_DRegsN], 0, Required>,
1655 // Extra latency cycles since wbck is 8 cycles
1656 InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
1657 InstrStage<3, [A9_NPipe]>],
1659 InstrItinData<IIC_VTB4, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1660 InstrStage<1, [A9_MUX0], 0>,
1661 InstrStage<1, [A9_DRegsN], 0, Required>,
1662 // Extra latency cycles since wbck is 8 cycles
1663 InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
1664 InstrStage<3, [A9_NPipe]>],
1665 [4, 2, 2, 3, 3, 1]>,
1668 InstrItinData<IIC_VTBX1, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1669 InstrStage<1, [A9_MUX0], 0>,
1670 InstrStage<1, [A9_DRegsN], 0, Required>,
1671 // Extra latency cycles since wbck is 7 cycles
1672 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1673 InstrStage<2, [A9_NPipe]>],
1675 InstrItinData<IIC_VTBX2, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1676 InstrStage<1, [A9_MUX0], 0>,
1677 InstrStage<1, [A9_DRegsN], 0, Required>,
1678 // Extra latency cycles since wbck is 7 cycles
1679 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1680 InstrStage<2, [A9_NPipe]>],
1682 InstrItinData<IIC_VTBX3, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1683 InstrStage<1, [A9_MUX0], 0>,
1684 InstrStage<1, [A9_DRegsN], 0, Required>,
1685 // Extra latency cycles since wbck is 8 cycles
1686 InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
1687 InstrStage<3, [A9_NPipe]>],
1688 [4, 1, 2, 2, 3, 1]>,
1689 InstrItinData<IIC_VTBX4, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1690 InstrStage<1, [A9_MUX0], 0>,
1691 InstrStage<1, [A9_DRegsN], 0, Required>,
1692 // Extra latency cycles since wbck is 8 cycles
1693 InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
1694 InstrStage<2, [A9_NPipe]>],
1695 [4, 1, 2, 2, 3, 3, 1]>