1 //=- ARMScheduleA9.td - ARM Cortex-A9 Scheduling Definitions -*- tablegen -*-=//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file defines the itinerary class data for the ARM Cortex A9 processors.
12 //===----------------------------------------------------------------------===//
15 // Ad-hoc scheduling information derived from pretty vague "Cortex-A9 Technical
19 def A9_Issue0 : FuncUnit; // Issue 0
20 def A9_Issue1 : FuncUnit; // Issue 1
21 def A9_Branch : FuncUnit; // Branch
22 def A9_ALU0 : FuncUnit; // ALU / MUL pipeline 0
23 def A9_ALU1 : FuncUnit; // ALU pipeline 1
24 def A9_AGU : FuncUnit; // Address generation unit for ld / st
25 def A9_NPipe : FuncUnit; // NEON pipeline
26 def A9_MUX0 : FuncUnit; // AGU + NEON/FPU multiplexer
27 def A9_LS0 : FuncUnit; // L/S Units, 32-bit per unit. Fake FU to limit l/s.
28 def A9_LS1 : FuncUnit; // L/S Units, 32-bit per unit.
29 def A9_DRegsVFP: FuncUnit; // FP register set, VFP side
30 def A9_DRegsN : FuncUnit; // FP register set, NEON side
33 def A9_LdBypass : Bypass;
35 def CortexA9Itineraries : ProcessorItineraries<
36 [A9_Issue0, A9_Issue1, A9_Branch, A9_ALU0, A9_ALU1, A9_AGU, A9_NPipe, A9_MUX0,
37 A9_LS0, A9_LS1, A9_DRegsVFP, A9_DRegsN],
39 // Two fully-pipelined integer ALU pipelines
42 // Move instructions, unconditional
43 InstrItinData<IIC_iMOVi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
44 InstrStage<1, [A9_ALU0, A9_ALU1]>], [1]>,
45 InstrItinData<IIC_iMOVr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
46 InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>,
47 InstrItinData<IIC_iMOVsi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
48 InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>,
49 InstrItinData<IIC_iMOVsr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
50 InstrStage<2, [A9_ALU0, A9_ALU1]>], [2, 1, 1]>,
51 InstrItinData<IIC_iMOVix2 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
52 InstrStage<1, [A9_ALU0, A9_ALU1]>,
53 InstrStage<1, [A9_ALU0, A9_ALU1]>], [2]>,
56 InstrItinData<IIC_iMVNi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
57 InstrStage<1, [A9_ALU0, A9_ALU1]>],
59 InstrItinData<IIC_iMVNr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
60 InstrStage<1, [A9_ALU0, A9_ALU1]>],
61 [1, 1], [NoBypass, A9_LdBypass]>,
62 InstrItinData<IIC_iMVNsi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
63 InstrStage<2, [A9_ALU0, A9_ALU1]>],
65 InstrItinData<IIC_iMVNsr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
66 InstrStage<3, [A9_ALU0, A9_ALU1]>],
70 InstrItinData<IIC_iALUx , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
71 InstrStage<1, [A9_ALU0, A9_ALU1]>]>,
73 // Binary Instructions that produce a result
74 InstrItinData<IIC_iALUi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
75 InstrStage<1, [A9_ALU0, A9_ALU1]>],
76 [1, 1], [NoBypass, A9_LdBypass]>,
77 InstrItinData<IIC_iALUr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
78 InstrStage<1, [A9_ALU0, A9_ALU1]>],
79 [1, 1, 1], [NoBypass, A9_LdBypass, A9_LdBypass]>,
80 InstrItinData<IIC_iALUsi, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
81 InstrStage<2, [A9_ALU0, A9_ALU1]>],
82 [2, 1, 1], [NoBypass, A9_LdBypass, NoBypass]>,
83 InstrItinData<IIC_iALUsir,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
84 InstrStage<2, [A9_ALU0, A9_ALU1]>],
85 [2, 1, 1], [NoBypass, NoBypass, A9_LdBypass]>,
86 InstrItinData<IIC_iALUsr, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
87 InstrStage<3, [A9_ALU0, A9_ALU1]>],
89 [NoBypass, A9_LdBypass, NoBypass, NoBypass]>,
91 // Bitwise Instructions that produce a result
92 InstrItinData<IIC_iBITi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
93 InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>,
94 InstrItinData<IIC_iBITr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
95 InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1, 1]>,
96 InstrItinData<IIC_iBITsi, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
97 InstrStage<2, [A9_ALU0, A9_ALU1]>], [2, 1, 1]>,
98 InstrItinData<IIC_iBITsr, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
99 InstrStage<3, [A9_ALU0, A9_ALU1]>], [3, 1, 1, 1]>,
101 // Unary Instructions that produce a result
104 InstrItinData<IIC_iUNAr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
105 InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>,
107 // BFC, BFI, UBFX, SBFX
108 InstrItinData<IIC_iUNAsi, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
109 InstrStage<2, [A9_ALU0, A9_ALU1]>], [2, 1]>,
112 // Zero and sign extension instructions
113 InstrItinData<IIC_iEXTr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
114 InstrStage<1, [A9_ALU0, A9_ALU1]>], [2, 1]>,
115 InstrItinData<IIC_iEXTAr, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
116 InstrStage<2, [A9_ALU0, A9_ALU1]>], [3, 1, 1]>,
117 InstrItinData<IIC_iEXTAsr,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
118 InstrStage<3, [A9_ALU0, A9_ALU1]>], [3, 1, 1, 1]>,
120 // Compare instructions
121 InstrItinData<IIC_iCMPi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
122 InstrStage<1, [A9_ALU0, A9_ALU1]>],
124 InstrItinData<IIC_iCMPr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
125 InstrStage<1, [A9_ALU0, A9_ALU1]>],
126 [1, 1], [A9_LdBypass, A9_LdBypass]>,
127 InstrItinData<IIC_iCMPsi , [InstrStage<2, [A9_ALU0, A9_ALU1]>],
128 [1, 1], [A9_LdBypass, NoBypass]>,
129 InstrItinData<IIC_iCMPsr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
130 InstrStage<3, [A9_ALU0, A9_ALU1]>],
131 [1, 1, 1], [A9_LdBypass, NoBypass, NoBypass]>,
134 InstrItinData<IIC_iTSTi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
135 InstrStage<1, [A9_ALU0, A9_ALU1]>], [1]>,
136 InstrItinData<IIC_iTSTr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
137 InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>,
138 InstrItinData<IIC_iTSTsi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
139 InstrStage<2, [A9_ALU0, A9_ALU1]>], [1, 1]>,
140 InstrItinData<IIC_iTSTsr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
141 InstrStage<3, [A9_ALU0, A9_ALU1]>], [1, 1, 1]>,
143 // Move instructions, conditional
144 // FIXME: Correctly model the extra input dep on the destination.
145 InstrItinData<IIC_iCMOVi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
146 InstrStage<1, [A9_ALU0, A9_ALU1]>], [1]>,
147 InstrItinData<IIC_iCMOVr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
148 InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>,
149 InstrItinData<IIC_iCMOVsi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
150 InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>,
151 InstrItinData<IIC_iCMOVsr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
152 InstrStage<2, [A9_ALU0, A9_ALU1]>], [2, 1, 1]>,
154 // Integer multiply pipeline
156 InstrItinData<IIC_iMUL16 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
157 InstrStage<2, [A9_ALU0]>], [3, 1, 1]>,
158 InstrItinData<IIC_iMAC16 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
159 InstrStage<2, [A9_ALU0]>],
161 InstrItinData<IIC_iMUL32 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
162 InstrStage<2, [A9_ALU0]>], [4, 1, 1]>,
163 InstrItinData<IIC_iMAC32 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
164 InstrStage<2, [A9_ALU0]>],
166 InstrItinData<IIC_iMUL64 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
167 InstrStage<3, [A9_ALU0]>], [4, 5, 1, 1]>,
168 InstrItinData<IIC_iMAC64 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
169 InstrStage<3, [A9_ALU0]>],
171 // Integer load pipeline
172 // FIXME: The timings are some rough approximations
175 InstrItinData<IIC_iLoad_i , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
176 InstrStage<1, [A9_MUX0], 0>,
177 InstrStage<1, [A9_AGU]>,
178 InstrStage<1, [A9_LS0, A9_LS1]>],
179 [3, 1], [A9_LdBypass]>,
180 InstrItinData<IIC_iLoad_bh_i, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
181 InstrStage<1, [A9_MUX0], 0>,
182 InstrStage<2, [A9_AGU]>,
183 InstrStage<1, [A9_LS0, A9_LS1]>],
184 [4, 1], [A9_LdBypass]>,
185 // FIXME: If address is 64-bit aligned, AGU cycles is 1.
186 InstrItinData<IIC_iLoad_d_i , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
187 InstrStage<1, [A9_MUX0], 0>,
188 InstrStage<2, [A9_AGU]>,
189 InstrStage<1, [A9_LS0, A9_LS1]>],
190 [3, 3, 1], [A9_LdBypass]>,
193 InstrItinData<IIC_iLoad_r , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
194 InstrStage<1, [A9_MUX0], 0>,
195 InstrStage<1, [A9_AGU]>,
196 InstrStage<1, [A9_LS0, A9_LS1]>],
197 [3, 1, 1], [A9_LdBypass]>,
198 InstrItinData<IIC_iLoad_bh_r, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
199 InstrStage<1, [A9_MUX0], 0>,
200 InstrStage<2, [A9_AGU]>,
201 InstrStage<1, [A9_LS0, A9_LS1]>],
202 [4, 1, 1], [A9_LdBypass]>,
203 InstrItinData<IIC_iLoad_d_r , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
204 InstrStage<1, [A9_MUX0], 0>,
205 InstrStage<2, [A9_AGU]>,
206 InstrStage<1, [A9_LS0, A9_LS1]>],
207 [3, 3, 1, 1], [A9_LdBypass]>,
209 // Scaled register offset
210 InstrItinData<IIC_iLoad_si , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
211 InstrStage<1, [A9_MUX0], 0>,
212 InstrStage<1, [A9_AGU]>,
213 InstrStage<1, [A9_LS0, A9_LS1]>],
214 [4, 1, 1], [A9_LdBypass]>,
215 InstrItinData<IIC_iLoad_bh_si,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
216 InstrStage<1, [A9_MUX0], 0>,
217 InstrStage<2, [A9_AGU]>,
218 InstrStage<1, [A9_LS0, A9_LS1]>],
219 [5, 1, 1], [A9_LdBypass]>,
221 // Immediate offset with update
222 InstrItinData<IIC_iLoad_iu , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
223 InstrStage<1, [A9_MUX0], 0>,
224 InstrStage<1, [A9_AGU]>,
225 InstrStage<1, [A9_LS0, A9_LS1]>],
226 [3, 2, 1], [A9_LdBypass]>,
227 InstrItinData<IIC_iLoad_bh_iu,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
228 InstrStage<1, [A9_MUX0], 0>,
229 InstrStage<2, [A9_AGU]>,
230 InstrStage<1, [A9_LS0, A9_LS1]>],
231 [4, 3, 1], [A9_LdBypass]>,
233 // Register offset with update
234 InstrItinData<IIC_iLoad_ru , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
235 InstrStage<1, [A9_MUX0], 0>,
236 InstrStage<1, [A9_AGU]>,
237 InstrStage<1, [A9_LS0, A9_LS1]>],
238 [3, 2, 1, 1], [A9_LdBypass]>,
239 InstrItinData<IIC_iLoad_bh_ru,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
240 InstrStage<1, [A9_MUX0], 0>,
241 InstrStage<2, [A9_AGU]>,
242 InstrStage<1, [A9_LS0, A9_LS1]>],
243 [4, 3, 1, 1], [A9_LdBypass]>,
244 InstrItinData<IIC_iLoad_d_ru, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
245 InstrStage<1, [A9_MUX0], 0>,
246 InstrStage<2, [A9_AGU]>,
247 InstrStage<1, [A9_LS0, A9_LS1]>],
248 [3, 3, 1, 1], [A9_LdBypass]>,
250 // Scaled register offset with update
251 InstrItinData<IIC_iLoad_siu , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
252 InstrStage<1, [A9_MUX0], 0>,
253 InstrStage<1, [A9_AGU]>,
254 InstrStage<1, [A9_LS0, A9_LS1]>],
255 [4, 3, 1, 1], [A9_LdBypass]>,
256 InstrItinData<IIC_iLoad_bh_siu,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
257 InstrStage<1, [A9_MUX0], 0>,
258 InstrStage<2, [A9_AGU]>,
259 InstrStage<1, [A9_LS0, A9_LS1]>],
260 [5, 4, 1, 1], [A9_LdBypass]>,
262 // Load multiple, def is the 5th operand.
263 // FIXME: This assumes 3 to 4 registers.
264 InstrItinData<IIC_iLoad_m , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
265 InstrStage<1, [A9_MUX0], 0>,
266 InstrStage<2, [A9_AGU]>,
267 InstrStage<2, [A9_LS0, A9_LS1]>],
269 [NoBypass, NoBypass, NoBypass, NoBypass, A9_LdBypass]>,
271 // Load multiple + update, defs are the 1st and 5th operands.
272 InstrItinData<IIC_iLoad_mu , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
273 InstrStage<1, [A9_MUX0], 0>,
274 InstrStage<2, [A9_AGU]>,
275 InstrStage<2, [A9_LS0, A9_LS1]>],
277 [NoBypass, NoBypass, NoBypass, NoBypass, A9_LdBypass]>,
279 // Load multiple plus branch
280 InstrItinData<IIC_iLoad_mBr, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
281 InstrStage<1, [A9_MUX0], 0>,
282 InstrStage<1, [A9_AGU]>,
283 InstrStage<2, [A9_LS0, A9_LS1]>,
284 InstrStage<1, [A9_Branch]>],
286 [NoBypass, NoBypass, NoBypass, NoBypass, A9_LdBypass]>,
288 // Pop, def is the 3rd operand.
289 InstrItinData<IIC_iPop , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
290 InstrStage<1, [A9_MUX0], 0>,
291 InstrStage<2, [A9_AGU]>,
292 InstrStage<2, [A9_LS0, A9_LS1]>],
294 [NoBypass, NoBypass, A9_LdBypass]>,
296 // Pop + branch, def is the 3rd operand.
297 InstrItinData<IIC_iPop_Br, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
298 InstrStage<1, [A9_MUX0], 0>,
299 InstrStage<2, [A9_AGU]>,
300 InstrStage<2, [A9_LS0, A9_LS1]>,
301 InstrStage<1, [A9_Branch]>],
303 [NoBypass, NoBypass, A9_LdBypass]>,
306 // iLoadi + iALUr for t2LDRpci_pic.
307 InstrItinData<IIC_iLoadiALU, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
308 InstrStage<1, [A9_MUX0], 0>,
309 InstrStage<1, [A9_AGU]>,
310 InstrStage<1, [A9_LS0, A9_LS1]>,
311 InstrStage<1, [A9_ALU0, A9_ALU1]>],
314 // Integer store pipeline
317 InstrItinData<IIC_iStore_i , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
318 InstrStage<1, [A9_MUX0], 0>,
319 InstrStage<1, [A9_AGU]>,
320 InstrStage<1, [A9_LS0, A9_LS1]>], [1, 1]>,
321 InstrItinData<IIC_iStore_bh_i,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
322 InstrStage<1, [A9_MUX0], 0>,
323 InstrStage<2, [A9_AGU]>,
324 InstrStage<1, [A9_LS0, A9_LS1]>], [1, 1]>,
325 // FIXME: If address is 64-bit aligned, AGU cycles is 1.
326 InstrItinData<IIC_iStore_d_i, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
327 InstrStage<1, [A9_MUX0], 0>,
328 InstrStage<2, [A9_AGU]>,
329 InstrStage<1, [A9_LS0, A9_LS1]>], [1, 1]>,
332 InstrItinData<IIC_iStore_r , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
333 InstrStage<1, [A9_MUX0], 0>,
334 InstrStage<1, [A9_AGU]>,
335 InstrStage<1, [A9_LS0, A9_LS1]>], [1, 1, 1]>,
336 InstrItinData<IIC_iStore_bh_r,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
337 InstrStage<1, [A9_MUX0], 0>,
338 InstrStage<2, [A9_AGU]>,
339 InstrStage<1, [A9_LS0, A9_LS1]>], [1, 1, 1]>,
340 InstrItinData<IIC_iStore_d_r, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
341 InstrStage<1, [A9_MUX0], 0>,
342 InstrStage<2, [A9_AGU]>,
343 InstrStage<1, [A9_LS0, A9_LS1]>], [1, 1, 1]>,
345 // Scaled register offset
346 InstrItinData<IIC_iStore_si , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
347 InstrStage<1, [A9_MUX0], 0>,
348 InstrStage<1, [A9_AGU]>,
349 InstrStage<1, [A9_LS0, A9_LS1]>], [1, 1, 1]>,
350 InstrItinData<IIC_iStore_bh_si,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
351 InstrStage<1, [A9_MUX0], 0>,
352 InstrStage<2, [A9_AGU]>,
353 InstrStage<1, [A9_LS0, A9_LS1]>], [1, 1, 1]>,
355 // Immediate offset with update
356 InstrItinData<IIC_iStore_iu , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
357 InstrStage<1, [A9_MUX0], 0>,
358 InstrStage<1, [A9_AGU]>,
359 InstrStage<1, [A9_LS0, A9_LS1]>], [2, 1, 1]>,
360 InstrItinData<IIC_iStore_bh_iu,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
361 InstrStage<1, [A9_MUX0], 0>,
362 InstrStage<2, [A9_AGU]>,
363 InstrStage<1, [A9_LS0, A9_LS1]>], [3, 1, 1]>,
365 // Register offset with update
366 InstrItinData<IIC_iStore_ru , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
367 InstrStage<1, [A9_MUX0], 0>,
368 InstrStage<1, [A9_AGU]>,
369 InstrStage<1, [A9_LS0, A9_LS1]>],
371 InstrItinData<IIC_iStore_bh_ru,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
372 InstrStage<1, [A9_MUX0], 0>,
373 InstrStage<2, [A9_AGU]>,
374 InstrStage<1, [A9_LS0, A9_LS1]>],
376 InstrItinData<IIC_iStore_d_ru, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
377 InstrStage<1, [A9_MUX0], 0>,
378 InstrStage<2, [A9_AGU]>,
379 InstrStage<1, [A9_LS0, A9_LS1]>],
382 // Scaled register offset with update
383 InstrItinData<IIC_iStore_siu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
384 InstrStage<1, [A9_MUX0], 0>,
385 InstrStage<1, [A9_AGU]>,
386 InstrStage<1, [A9_LS0, A9_LS1]>],
388 InstrItinData<IIC_iStore_bh_siu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
389 InstrStage<1, [A9_MUX0], 0>,
390 InstrStage<2, [A9_AGU]>,
391 InstrStage<1, [A9_LS0, A9_LS1]>],
395 InstrItinData<IIC_iStore_m , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
396 InstrStage<1, [A9_MUX0], 0>,
397 InstrStage<1, [A9_AGU]>,
398 InstrStage<2, [A9_LS0, A9_LS1]>]>,
400 // Store multiple + update
401 InstrItinData<IIC_iStore_mu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
402 InstrStage<1, [A9_MUX0], 0>,
403 InstrStage<1, [A9_AGU]>,
404 InstrStage<2, [A9_LS0, A9_LS1]>], [2]>,
408 // no delay slots, so the latency of a branch is unimportant
409 InstrItinData<IIC_Br , [InstrStage<1, [A9_Issue0], 0>,
410 InstrStage<1, [A9_Issue1], 0>,
411 InstrStage<1, [A9_Branch]>]>,
413 // VFP and NEON shares the same register file. This means that every VFP
414 // instruction should wait for full completion of the consecutive NEON
415 // instruction and vice-versa. We model this behavior with two artificial FUs:
416 // DRegsVFP and DRegsVFP.
418 // Every VFP instruction:
419 // - Acquires DRegsVFP resource for 1 cycle
420 // - Reserves DRegsN resource for the whole duration (including time to
421 // register file writeback!).
422 // Every NEON instruction does the same but with FUs swapped.
424 // Since the reserved FU cannot be acquired, this models precisely
425 // "cross-domain" stalls.
428 // Issue through integer pipeline, and execute in NEON unit.
430 // FP Special Register to Integer Register File Move
431 InstrItinData<IIC_fpSTAT , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
432 InstrStage<2, [A9_DRegsN], 0, Reserved>,
433 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
434 InstrStage<1, [A9_MUX0], 0>,
435 InstrStage<1, [A9_NPipe]>]>,
437 // Single-precision FP Unary
438 InstrItinData<IIC_fpUNA32 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
439 // Extra latency cycles since wbck is 2 cycles
440 InstrStage<3, [A9_DRegsN], 0, Reserved>,
441 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
442 InstrStage<1, [A9_MUX0], 0>,
443 InstrStage<1, [A9_NPipe]>],
446 // Double-precision FP Unary
447 InstrItinData<IIC_fpUNA64 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
448 // Extra latency cycles since wbck is 2 cycles
449 InstrStage<3, [A9_DRegsN], 0, Reserved>,
450 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
451 InstrStage<1, [A9_MUX0], 0>,
452 InstrStage<1, [A9_NPipe]>],
456 // Single-precision FP Compare
457 InstrItinData<IIC_fpCMP32 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
458 // Extra latency cycles since wbck is 4 cycles
459 InstrStage<5, [A9_DRegsN], 0, Reserved>,
460 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
461 InstrStage<1, [A9_MUX0], 0>,
462 InstrStage<1, [A9_NPipe]>],
465 // Double-precision FP Compare
466 InstrItinData<IIC_fpCMP64 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
467 // Extra latency cycles since wbck is 4 cycles
468 InstrStage<5, [A9_DRegsN], 0, Reserved>,
469 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
470 InstrStage<1, [A9_MUX0], 0>,
471 InstrStage<1, [A9_NPipe]>],
474 // Single to Double FP Convert
475 InstrItinData<IIC_fpCVTSD , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
476 InstrStage<5, [A9_DRegsN], 0, Reserved>,
477 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
478 InstrStage<1, [A9_MUX0], 0>,
479 InstrStage<1, [A9_NPipe]>],
482 // Double to Single FP Convert
483 InstrItinData<IIC_fpCVTDS , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
484 InstrStage<5, [A9_DRegsN], 0, Reserved>,
485 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
486 InstrStage<1, [A9_MUX0], 0>,
487 InstrStage<1, [A9_NPipe]>],
491 // Single to Half FP Convert
492 InstrItinData<IIC_fpCVTSH , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
493 InstrStage<5, [A9_DRegsN], 0, Reserved>,
494 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
495 InstrStage<1, [A9_MUX0], 0>,
496 InstrStage<1, [A9_NPipe]>],
499 // Half to Single FP Convert
500 InstrItinData<IIC_fpCVTHS , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
501 InstrStage<3, [A9_DRegsN], 0, Reserved>,
502 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
503 InstrStage<1, [A9_MUX0], 0>,
504 InstrStage<1, [A9_NPipe]>],
508 // Single-Precision FP to Integer Convert
509 InstrItinData<IIC_fpCVTSI , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
510 InstrStage<5, [A9_DRegsN], 0, Reserved>,
511 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
512 InstrStage<1, [A9_MUX0], 0>,
513 InstrStage<1, [A9_NPipe]>],
516 // Double-Precision FP to Integer Convert
517 InstrItinData<IIC_fpCVTDI , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
518 InstrStage<5, [A9_DRegsN], 0, Reserved>,
519 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
520 InstrStage<1, [A9_MUX0], 0>,
521 InstrStage<1, [A9_NPipe]>],
524 // Integer to Single-Precision FP Convert
525 InstrItinData<IIC_fpCVTIS , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
526 InstrStage<5, [A9_DRegsN], 0, Reserved>,
527 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
528 InstrStage<1, [A9_MUX0], 0>,
529 InstrStage<1, [A9_NPipe]>],
532 // Integer to Double-Precision FP Convert
533 InstrItinData<IIC_fpCVTID , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
534 InstrStage<5, [A9_DRegsN], 0, Reserved>,
535 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
536 InstrStage<1, [A9_MUX0], 0>,
537 InstrStage<1, [A9_NPipe]>],
540 // Single-precision FP ALU
541 InstrItinData<IIC_fpALU32 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
542 InstrStage<5, [A9_DRegsN], 0, Reserved>,
543 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
544 InstrStage<1, [A9_MUX0], 0>,
545 InstrStage<1, [A9_NPipe]>],
548 // Double-precision FP ALU
549 InstrItinData<IIC_fpALU64 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
550 InstrStage<5, [A9_DRegsN], 0, Reserved>,
551 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
552 InstrStage<1, [A9_MUX0], 0>,
553 InstrStage<1, [A9_NPipe]>],
556 // Single-precision FP Multiply
557 InstrItinData<IIC_fpMUL32 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
558 InstrStage<6, [A9_DRegsN], 0, Reserved>,
559 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
560 InstrStage<1, [A9_MUX0], 0>,
561 InstrStage<1, [A9_NPipe]>],
564 // Double-precision FP Multiply
565 InstrItinData<IIC_fpMUL64 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
566 InstrStage<7, [A9_DRegsN], 0, Reserved>,
567 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
568 InstrStage<1, [A9_MUX0], 0>,
569 InstrStage<2, [A9_NPipe]>],
572 // Single-precision FP MAC
573 InstrItinData<IIC_fpMAC32 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
574 InstrStage<9, [A9_DRegsN], 0, Reserved>,
575 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
576 InstrStage<1, [A9_MUX0], 0>,
577 InstrStage<1, [A9_NPipe]>],
580 // Double-precision FP MAC
581 InstrItinData<IIC_fpMAC64 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
582 InstrStage<10, [A9_DRegsN], 0, Reserved>,
583 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
584 InstrStage<1, [A9_MUX0], 0>,
585 InstrStage<2, [A9_NPipe]>],
588 // Single-precision FP DIV
589 InstrItinData<IIC_fpDIV32 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
590 InstrStage<16, [A9_DRegsN], 0, Reserved>,
591 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
592 InstrStage<1, [A9_MUX0], 0>,
593 InstrStage<10, [A9_NPipe]>],
596 // Double-precision FP DIV
597 InstrItinData<IIC_fpDIV64 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
598 InstrStage<26, [A9_DRegsN], 0, Reserved>,
599 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
600 InstrStage<1, [A9_MUX0], 0>,
601 InstrStage<20, [A9_NPipe]>],
604 // Single-precision FP SQRT
605 InstrItinData<IIC_fpSQRT32, [InstrStage<1, [A9_DRegsVFP], 0, Required>,
606 InstrStage<18, [A9_DRegsN], 0, Reserved>,
607 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
608 InstrStage<1, [A9_MUX0], 0>,
609 InstrStage<13, [A9_NPipe]>],
612 // Double-precision FP SQRT
613 InstrItinData<IIC_fpSQRT64, [InstrStage<1, [A9_DRegsVFP], 0, Required>,
614 InstrStage<33, [A9_DRegsN], 0, Reserved>,
615 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
616 InstrStage<1, [A9_MUX0], 0>,
617 InstrStage<28, [A9_NPipe]>],
621 // Integer to Single-precision Move
622 InstrItinData<IIC_fpMOVIS, [InstrStage<1, [A9_DRegsVFP], 0, Required>,
623 // Extra 1 latency cycle since wbck is 2 cycles
624 InstrStage<3, [A9_DRegsN], 0, Reserved>,
625 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
626 InstrStage<1, [A9_MUX0], 0>,
627 InstrStage<1, [A9_NPipe]>],
630 // Integer to Double-precision Move
631 InstrItinData<IIC_fpMOVID, [InstrStage<1, [A9_DRegsVFP], 0, Required>,
632 // Extra 1 latency cycle since wbck is 2 cycles
633 InstrStage<3, [A9_DRegsN], 0, Reserved>,
634 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
635 InstrStage<1, [A9_MUX0], 0>,
636 InstrStage<1, [A9_NPipe]>],
639 // Single-precision to Integer Move
640 InstrItinData<IIC_fpMOVSI, [InstrStage<1, [A9_DRegsVFP], 0, Required>,
641 InstrStage<2, [A9_DRegsN], 0, Reserved>,
642 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
643 InstrStage<1, [A9_MUX0], 0>,
644 InstrStage<1, [A9_NPipe]>],
647 // Double-precision to Integer Move
648 InstrItinData<IIC_fpMOVDI, [InstrStage<1, [A9_DRegsVFP], 0, Required>,
649 InstrStage<2, [A9_DRegsN], 0, Reserved>,
650 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
651 InstrStage<1, [A9_MUX0], 0>,
652 InstrStage<1, [A9_NPipe]>],
655 // Single-precision FP Load
656 InstrItinData<IIC_fpLoad32, [InstrStage<1, [A9_DRegsVFP], 0, Required>,
657 InstrStage<2, [A9_DRegsN], 0, Reserved>,
658 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
659 InstrStage<1, [A9_MUX0], 0>,
660 InstrStage<1, [A9_NPipe]>],
663 // Double-precision FP Load
664 // FIXME: Result latency is 1 if address is 64-bit aligned.
665 InstrItinData<IIC_fpLoad64, [InstrStage<1, [A9_DRegsVFP], 0, Required>,
666 InstrStage<2, [A9_DRegsN], 0, Reserved>,
667 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
668 InstrStage<1, [A9_MUX0], 0>,
669 InstrStage<1, [A9_NPipe]>],
673 InstrItinData<IIC_fpLoad_m, [InstrStage<1, [A9_DRegsVFP], 0, Required>,
674 InstrStage<2, [A9_DRegsN], 0, Reserved>,
675 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
676 InstrStage<1, [A9_MUX0], 0>,
677 InstrStage<1, [A9_NPipe]>], [1, 1, 1, 1]>,
679 // FP Load Multiple + update
680 InstrItinData<IIC_fpLoad_mu,[InstrStage<1, [A9_DRegsVFP], 0, Required>,
681 InstrStage<2, [A9_DRegsN], 0, Reserved>,
682 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
683 InstrStage<1, [A9_MUX0], 0>,
684 InstrStage<1, [A9_NPipe]>], [2, 1, 1, 1]>,
686 // Single-precision FP Store
687 InstrItinData<IIC_fpStore32,[InstrStage<1, [A9_DRegsVFP], 0, Required>,
688 InstrStage<2, [A9_DRegsN], 0, Reserved>,
689 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
690 InstrStage<1, [A9_MUX0], 0>,
691 InstrStage<1, [A9_NPipe]>],
694 // Double-precision FP Store
695 InstrItinData<IIC_fpStore64,[InstrStage<1, [A9_DRegsVFP], 0, Required>,
696 InstrStage<2, [A9_DRegsN], 0, Reserved>,
697 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
698 InstrStage<1, [A9_MUX0], 0>,
699 InstrStage<1, [A9_NPipe]>],
703 InstrItinData<IIC_fpStore_m,[InstrStage<1, [A9_DRegsVFP], 0, Required>,
704 InstrStage<2, [A9_DRegsN], 0, Reserved>,
705 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
706 InstrStage<1, [A9_MUX0], 0>,
707 InstrStage<1, [A9_NPipe]>], [1, 1, 1, 1]>,
709 // FP Store Multiple + update
710 InstrItinData<IIC_fpStore_mu,[InstrStage<1, [A9_DRegsVFP], 0, Required>,
711 InstrStage<2, [A9_DRegsN], 0, Reserved>,
712 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
713 InstrStage<1, [A9_MUX0], 0>,
714 InstrStage<1, [A9_NPipe]>], [2, 1, 1, 1]>,
717 // FIXME: Conservatively assume insufficent alignment.
718 InstrItinData<IIC_VLD1, [InstrStage<1, [A9_DRegsN], 0, Required>,
719 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
720 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
721 InstrStage<1, [A9_MUX0], 0>,
722 InstrStage<2, [A9_NPipe]>],
725 InstrItinData<IIC_VLD1x2, [InstrStage<1, [A9_DRegsN], 0, Required>,
726 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
727 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
728 InstrStage<1, [A9_MUX0], 0>,
729 InstrStage<2, [A9_NPipe]>],
732 InstrItinData<IIC_VLD1x3, [InstrStage<1, [A9_DRegsN], 0, Required>,
733 InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
734 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
735 InstrStage<1, [A9_MUX0], 0>,
736 InstrStage<3, [A9_NPipe]>],
739 InstrItinData<IIC_VLD1x4, [InstrStage<1, [A9_DRegsN], 0, Required>,
740 InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
741 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
742 InstrStage<1, [A9_MUX0], 0>,
743 InstrStage<3, [A9_NPipe]>],
746 InstrItinData<IIC_VLD1u, [InstrStage<1, [A9_DRegsN], 0, Required>,
747 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
748 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
749 InstrStage<1, [A9_MUX0], 0>,
750 InstrStage<2, [A9_NPipe]>],
753 InstrItinData<IIC_VLD1x2u, [InstrStage<1, [A9_DRegsN], 0, Required>,
754 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
755 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
756 InstrStage<1, [A9_MUX0], 0>,
757 InstrStage<2, [A9_NPipe]>],
760 InstrItinData<IIC_VLD1x3u, [InstrStage<1, [A9_DRegsN], 0, Required>,
761 InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
762 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
763 InstrStage<1, [A9_MUX0], 0>,
764 InstrStage<3, [A9_NPipe]>],
767 InstrItinData<IIC_VLD1x4u, [InstrStage<1, [A9_DRegsN], 0, Required>,
768 InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
769 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
770 InstrStage<1, [A9_MUX0], 0>,
771 InstrStage<3, [A9_NPipe]>],
775 InstrItinData<IIC_VLD2, [InstrStage<1, [A9_DRegsN], 0, Required>,
776 // Extra latency cycles since wbck is 7 cycles
777 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
778 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
779 InstrStage<1, [A9_MUX0], 0>,
780 InstrStage<2, [A9_NPipe]>],
784 InstrItinData<IIC_VLD2x2, [InstrStage<1, [A9_DRegsN], 0, Required>,
785 InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
786 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
787 InstrStage<1, [A9_MUX0], 0>,
788 InstrStage<3, [A9_NPipe]>],
792 InstrItinData<IIC_VLD2ln, [InstrStage<1, [A9_DRegsN], 0, Required>,
793 InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
794 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
795 InstrStage<1, [A9_MUX0], 0>,
796 InstrStage<3, [A9_NPipe]>],
800 InstrItinData<IIC_VLD2u, [InstrStage<1, [A9_DRegsN], 0, Required>,
801 // Extra latency cycles since wbck is 7 cycles
802 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
803 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
804 InstrStage<1, [A9_MUX0], 0>,
805 InstrStage<2, [A9_NPipe]>],
809 InstrItinData<IIC_VLD2x2u, [InstrStage<1, [A9_DRegsN], 0, Required>,
810 InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
811 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
812 InstrStage<1, [A9_MUX0], 0>,
813 InstrStage<3, [A9_NPipe]>],
817 InstrItinData<IIC_VLD2lnu, [InstrStage<1, [A9_DRegsN], 0, Required>,
818 InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
819 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
820 InstrStage<1, [A9_MUX0], 0>,
821 InstrStage<3, [A9_NPipe]>],
822 [4, 4, 2, 1, 1, 1, 1, 1]>,
825 InstrItinData<IIC_VLD3, [InstrStage<1, [A9_DRegsN], 0, Required>,
826 InstrStage<10, [A9_DRegsVFP], 0, Reserved>,
827 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
828 InstrStage<1, [A9_MUX0], 0>,
829 InstrStage<4, [A9_NPipe]>],
833 InstrItinData<IIC_VLD3ln, [InstrStage<1, [A9_DRegsN], 0, Required>,
834 InstrStage<11, [A9_DRegsVFP], 0, Reserved>,
835 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
836 InstrStage<1, [A9_MUX0], 0>,
837 InstrStage<5, [A9_NPipe]>],
838 [5, 5, 6, 1, 1, 1, 1, 2]>,
841 InstrItinData<IIC_VLD3u, [InstrStage<1, [A9_DRegsN], 0, Required>,
842 InstrStage<10, [A9_DRegsVFP], 0, Reserved>,
843 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
844 InstrStage<1, [A9_MUX0], 0>,
845 InstrStage<4, [A9_NPipe]>],
849 InstrItinData<IIC_VLD3lnu, [InstrStage<1, [A9_DRegsN], 0, Required>,
850 InstrStage<11, [A9_DRegsVFP], 0, Reserved>,
851 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
852 InstrStage<1, [A9_MUX0], 0>,
853 InstrStage<5, [A9_NPipe]>],
854 [5, 5, 6, 2, 1, 1, 1, 1, 1, 2]>,
857 InstrItinData<IIC_VLD4, [InstrStage<1, [A9_DRegsN], 0, Required>,
858 InstrStage<10, [A9_DRegsVFP], 0, Reserved>,
859 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
860 InstrStage<1, [A9_MUX0], 0>,
861 InstrStage<4, [A9_NPipe]>],
865 InstrItinData<IIC_VLD4ln, [InstrStage<1, [A9_DRegsN], 0, Required>,
866 InstrStage<11, [A9_DRegsVFP], 0, Reserved>,
867 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
868 InstrStage<1, [A9_MUX0], 0>,
869 InstrStage<5, [A9_NPipe]>],
870 [5, 5, 6, 6, 1, 1, 1, 1, 2, 2]>,
873 InstrItinData<IIC_VLD4u, [InstrStage<1, [A9_DRegsN], 0, Required>,
874 InstrStage<10, [A9_DRegsVFP], 0, Reserved>,
875 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
876 InstrStage<1, [A9_MUX0], 0>,
877 InstrStage<4, [A9_NPipe]>],
881 InstrItinData<IIC_VLD4lnu, [InstrStage<1, [A9_DRegsN], 0, Required>,
882 InstrStage<11, [A9_DRegsVFP], 0, Reserved>,
883 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
884 InstrStage<1, [A9_MUX0], 0>,
885 InstrStage<5, [A9_NPipe]>],
886 [5, 5, 6, 6, 2, 1, 1, 1, 1, 1, 2, 2]>,
889 // FIXME: We don't model this instruction properly
890 InstrItinData<IIC_VST, [InstrStage<1, [A9_DRegsN], 0, Required>,
891 // Extra latency cycles since wbck is 6 cycles
892 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
893 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
894 InstrStage<1, [A9_MUX0], 0>,
895 InstrStage<1, [A9_NPipe]>]>,
897 // Double-register Integer Unary
898 InstrItinData<IIC_VUNAiD, [InstrStage<1, [A9_DRegsN], 0, Required>,
899 // Extra latency cycles since wbck is 6 cycles
900 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
901 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
902 InstrStage<1, [A9_MUX0], 0>,
903 InstrStage<1, [A9_NPipe]>],
906 // Quad-register Integer Unary
907 InstrItinData<IIC_VUNAiQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
908 // Extra latency cycles since wbck is 6 cycles
909 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
910 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
911 InstrStage<1, [A9_MUX0], 0>,
912 InstrStage<1, [A9_NPipe]>],
915 // Double-register Integer Q-Unary
916 InstrItinData<IIC_VQUNAiD, [InstrStage<1, [A9_DRegsN], 0, Required>,
917 // Extra latency cycles since wbck is 6 cycles
918 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
919 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
920 InstrStage<1, [A9_MUX0], 0>,
921 InstrStage<1, [A9_NPipe]>],
924 // Quad-register Integer CountQ-Unary
925 InstrItinData<IIC_VQUNAiQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
926 // Extra latency cycles since wbck is 6 cycles
927 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
928 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
929 InstrStage<1, [A9_MUX0], 0>,
930 InstrStage<1, [A9_NPipe]>],
933 // Double-register Integer Binary
934 InstrItinData<IIC_VBINiD, [InstrStage<1, [A9_DRegsN], 0, Required>,
935 // Extra latency cycles since wbck is 6 cycles
936 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
937 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
938 InstrStage<1, [A9_MUX0], 0>,
939 InstrStage<1, [A9_NPipe]>],
942 // Quad-register Integer Binary
943 InstrItinData<IIC_VBINiQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
944 // Extra latency cycles since wbck is 6 cycles
945 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
946 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
947 InstrStage<1, [A9_MUX0], 0>,
948 InstrStage<1, [A9_NPipe]>],
951 // Double-register Integer Subtract
952 InstrItinData<IIC_VSUBiD, [InstrStage<1, [A9_DRegsN], 0, Required>,
953 // Extra latency cycles since wbck is 6 cycles
954 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
955 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
956 InstrStage<1, [A9_MUX0], 0>,
957 InstrStage<1, [A9_NPipe]>],
960 // Quad-register Integer Subtract
961 InstrItinData<IIC_VSUBiQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
962 // Extra latency cycles since wbck is 6 cycles
963 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
964 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
965 InstrStage<1, [A9_MUX0], 0>,
966 InstrStage<1, [A9_NPipe]>],
969 // Double-register Integer Shift
970 InstrItinData<IIC_VSHLiD, [InstrStage<1, [A9_DRegsN], 0, Required>,
971 // Extra latency cycles since wbck is 6 cycles
972 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
973 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
974 InstrStage<1, [A9_MUX0], 0>,
975 InstrStage<1, [A9_NPipe]>],
978 // Quad-register Integer Shift
979 InstrItinData<IIC_VSHLiQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
980 // Extra latency cycles since wbck is 6 cycles
981 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
982 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
983 InstrStage<1, [A9_MUX0], 0>,
984 InstrStage<1, [A9_NPipe]>],
987 // Double-register Integer Shift (4 cycle)
988 InstrItinData<IIC_VSHLi4D, [InstrStage<1, [A9_DRegsN], 0, Required>,
989 // Extra latency cycles since wbck is 6 cycles
990 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
991 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
992 InstrStage<1, [A9_MUX0], 0>,
993 InstrStage<1, [A9_NPipe]>],
996 // Quad-register Integer Shift (4 cycle)
997 InstrItinData<IIC_VSHLi4Q, [InstrStage<1, [A9_DRegsN], 0, Required>,
998 // Extra latency cycles since wbck is 6 cycles
999 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1000 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1001 InstrStage<1, [A9_MUX0], 0>,
1002 InstrStage<1, [A9_NPipe]>],
1005 // Double-register Integer Binary (4 cycle)
1006 InstrItinData<IIC_VBINi4D, [InstrStage<1, [A9_DRegsN], 0, Required>,
1007 // Extra latency cycles since wbck is 6 cycles
1008 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1009 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1010 InstrStage<1, [A9_MUX0], 0>,
1011 InstrStage<1, [A9_NPipe]>],
1014 // Quad-register Integer Binary (4 cycle)
1015 InstrItinData<IIC_VBINi4Q, [InstrStage<1, [A9_DRegsN], 0, Required>,
1016 // Extra latency cycles since wbck is 6 cycles
1017 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1018 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1019 InstrStage<1, [A9_MUX0], 0>,
1020 InstrStage<1, [A9_NPipe]>],
1023 // Double-register Integer Subtract (4 cycle)
1024 InstrItinData<IIC_VSUBiD, [InstrStage<1, [A9_DRegsN], 0, Required>,
1025 // Extra latency cycles since wbck is 6 cycles
1026 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1027 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1028 InstrStage<1, [A9_MUX0], 0>,
1029 InstrStage<1, [A9_NPipe]>],
1032 // Quad-register Integer Subtract (4 cycle)
1033 InstrItinData<IIC_VSUBiQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
1034 // Extra latency cycles since wbck is 6 cycles
1035 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1036 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1037 InstrStage<1, [A9_MUX0], 0>,
1038 InstrStage<1, [A9_NPipe]>],
1042 // Double-register Integer Count
1043 InstrItinData<IIC_VCNTiD, [InstrStage<1, [A9_DRegsN], 0, Required>,
1044 // Extra latency cycles since wbck is 6 cycles
1045 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1046 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1047 InstrStage<1, [A9_MUX0], 0>,
1048 InstrStage<1, [A9_NPipe]>],
1051 // Quad-register Integer Count
1052 // Result written in N3, but that is relative to the last cycle of multicycle,
1053 // so we use 4 for those cases
1054 InstrItinData<IIC_VCNTiQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
1055 // Extra latency cycles since wbck is 7 cycles
1056 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1057 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1058 InstrStage<1, [A9_MUX0], 0>,
1059 InstrStage<2, [A9_NPipe]>],
1062 // Double-register Absolute Difference and Accumulate
1063 InstrItinData<IIC_VABAD, [InstrStage<1, [A9_DRegsN], 0, Required>,
1064 // Extra latency cycles since wbck is 6 cycles
1065 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1066 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1067 InstrStage<1, [A9_MUX0], 0>,
1068 InstrStage<1, [A9_NPipe]>],
1071 // Quad-register Absolute Difference and Accumulate
1072 InstrItinData<IIC_VABAQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
1073 // Extra latency cycles since wbck is 6 cycles
1074 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1075 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1076 InstrStage<1, [A9_MUX0], 0>,
1077 InstrStage<2, [A9_NPipe]>],
1080 // Double-register Integer Pair Add Long
1081 InstrItinData<IIC_VPALiD, [InstrStage<1, [A9_DRegsN], 0, Required>,
1082 // Extra latency cycles since wbck is 6 cycles
1083 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1084 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1085 InstrStage<1, [A9_MUX0], 0>,
1086 InstrStage<1, [A9_NPipe]>],
1089 // Quad-register Integer Pair Add Long
1090 InstrItinData<IIC_VPALiQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
1091 // Extra latency cycles since wbck is 6 cycles
1092 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1093 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1094 InstrStage<1, [A9_MUX0], 0>,
1095 InstrStage<2, [A9_NPipe]>],
1099 // Double-register Integer Multiply (.8, .16)
1100 InstrItinData<IIC_VMULi16D, [InstrStage<1, [A9_DRegsN], 0, Required>,
1101 // Extra latency cycles since wbck is 6 cycles
1102 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1103 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1104 InstrStage<1, [A9_MUX0], 0>,
1105 InstrStage<1, [A9_NPipe]>],
1108 // Quad-register Integer Multiply (.8, .16)
1109 InstrItinData<IIC_VMULi16Q, [InstrStage<1, [A9_DRegsN], 0, Required>,
1110 // Extra latency cycles since wbck is 7 cycles
1111 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1112 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1113 InstrStage<1, [A9_MUX0], 0>,
1114 InstrStage<2, [A9_NPipe]>],
1118 // Double-register Integer Multiply (.32)
1119 InstrItinData<IIC_VMULi32D, [InstrStage<1, [A9_DRegsN], 0, Required>,
1120 // Extra latency cycles since wbck is 7 cycles
1121 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1122 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1123 InstrStage<1, [A9_MUX0], 0>,
1124 InstrStage<2, [A9_NPipe]>],
1127 // Quad-register Integer Multiply (.32)
1128 InstrItinData<IIC_VMULi32Q, [InstrStage<1, [A9_DRegsN], 0, Required>,
1129 // Extra latency cycles since wbck is 9 cycles
1130 InstrStage<10, [A9_DRegsVFP], 0, Reserved>,
1131 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1132 InstrStage<1, [A9_MUX0], 0>,
1133 InstrStage<4, [A9_NPipe]>],
1136 // Double-register Integer Multiply-Accumulate (.8, .16)
1137 InstrItinData<IIC_VMACi16D, [InstrStage<1, [A9_DRegsN], 0, Required>,
1138 // Extra latency cycles since wbck is 6 cycles
1139 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1140 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1141 InstrStage<1, [A9_MUX0], 0>,
1142 InstrStage<1, [A9_NPipe]>],
1145 // Double-register Integer Multiply-Accumulate (.32)
1146 InstrItinData<IIC_VMACi32D, [InstrStage<1, [A9_DRegsN], 0, Required>,
1147 // Extra latency cycles since wbck is 7 cycles
1148 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1149 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1150 InstrStage<1, [A9_MUX0], 0>,
1151 InstrStage<2, [A9_NPipe]>],
1154 // Quad-register Integer Multiply-Accumulate (.8, .16)
1155 InstrItinData<IIC_VMACi16Q, [InstrStage<1, [A9_DRegsN], 0, Required>,
1156 // Extra latency cycles since wbck is 7 cycles
1157 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1158 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1159 InstrStage<1, [A9_MUX0], 0>,
1160 InstrStage<2, [A9_NPipe]>],
1163 // Quad-register Integer Multiply-Accumulate (.32)
1164 InstrItinData<IIC_VMACi32Q, [InstrStage<1, [A9_DRegsN], 0, Required>,
1165 // Extra latency cycles since wbck is 9 cycles
1166 InstrStage<10, [A9_DRegsVFP], 0, Reserved>,
1167 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1168 InstrStage<1, [A9_MUX0], 0>,
1169 InstrStage<4, [A9_NPipe]>],
1174 InstrItinData<IIC_VMOV, [InstrStage<1, [A9_DRegsN], 0, Required>,
1175 InstrStage<1, [A9_DRegsVFP], 0, Reserved>,
1176 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1177 InstrStage<1, [A9_MUX0], 0>,
1178 InstrStage<1, [A9_NPipe]>],
1182 InstrItinData<IIC_VMOVImm, [InstrStage<1, [A9_DRegsN], 0, Required>,
1183 // Extra latency cycles since wbck is 6 cycles
1184 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1185 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1186 InstrStage<1, [A9_MUX0], 0>,
1187 InstrStage<1, [A9_NPipe]>],
1190 // Double-register Permute Move
1191 InstrItinData<IIC_VMOVD, [InstrStage<1, [A9_DRegsN], 0, Required>,
1192 // FIXME: all latencies are arbitrary, no information is available
1193 InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
1194 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1195 InstrStage<1, [A9_MUX0], 0>,
1196 InstrStage<1, [A9_NPipe]>],
1199 // Quad-register Permute Move
1200 InstrItinData<IIC_VMOVQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
1201 // FIXME: all latencies are arbitrary, no information is available
1202 InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
1203 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1204 InstrStage<1, [A9_MUX0], 0>,
1205 InstrStage<1, [A9_NPipe]>],
1208 // Integer to Single-precision Move
1209 InstrItinData<IIC_VMOVIS , [InstrStage<1, [A9_DRegsN], 0, Required>,
1210 // FIXME: all latencies are arbitrary, no information is available
1211 InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
1212 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1213 InstrStage<1, [A9_MUX0], 0>,
1214 InstrStage<1, [A9_NPipe]>],
1217 // Integer to Double-precision Move
1218 InstrItinData<IIC_VMOVID , [InstrStage<1, [A9_DRegsN], 0, Required>,
1219 // FIXME: all latencies are arbitrary, no information is available
1220 InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
1221 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1222 InstrStage<1, [A9_MUX0], 0>,
1223 InstrStage<1, [A9_NPipe]>],
1226 // Single-precision to Integer Move
1227 InstrItinData<IIC_VMOVSI , [InstrStage<1, [A9_DRegsN], 0, Required>,
1228 // FIXME: all latencies are arbitrary, no information is available
1229 InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
1230 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1231 InstrStage<1, [A9_MUX0], 0>,
1232 InstrStage<1, [A9_NPipe]>],
1235 // Double-precision to Integer Move
1236 InstrItinData<IIC_VMOVDI , [InstrStage<1, [A9_DRegsN], 0, Required>,
1237 // FIXME: all latencies are arbitrary, no information is available
1238 InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
1239 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1240 InstrStage<1, [A9_MUX0], 0>,
1241 InstrStage<1, [A9_NPipe]>],
1244 // Integer to Lane Move
1245 InstrItinData<IIC_VMOVISL , [InstrStage<1, [A9_DRegsN], 0, Required>,
1246 // FIXME: all latencies are arbitrary, no information is available
1247 InstrStage<4, [A9_DRegsVFP], 0, Reserved>,
1248 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1249 InstrStage<1, [A9_MUX0], 0>,
1250 InstrStage<2, [A9_NPipe]>],
1254 // Vector narrow move
1255 InstrItinData<IIC_VMOVN, [InstrStage<1, [A9_DRegsN], 0, Required>,
1256 // Extra latency cycles since wbck is 6 cycles
1257 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1258 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1259 InstrStage<1, [A9_MUX0], 0>,
1260 InstrStage<1, [A9_NPipe]>],
1263 // Double-register FP Unary
1264 InstrItinData<IIC_VUNAD, [InstrStage<1, [A9_DRegsN], 0, Required>,
1265 // Extra latency cycles since wbck is 6 cycles
1266 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1267 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1268 InstrStage<1, [A9_MUX0], 0>,
1269 InstrStage<1, [A9_NPipe]>],
1272 // Quad-register FP Unary
1273 // Result written in N5, but that is relative to the last cycle of multicycle,
1274 // so we use 6 for those cases
1275 InstrItinData<IIC_VUNAQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
1276 // Extra latency cycles since wbck is 7 cycles
1277 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1278 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1279 InstrStage<1, [A9_MUX0], 0>,
1280 InstrStage<2, [A9_NPipe]>],
1283 // Double-register FP Binary
1284 // FIXME: We're using this itin for many instructions and [2, 2] here is too
1286 InstrItinData<IIC_VBIND, [InstrStage<1, [A9_DRegsN], 0, Required>,
1287 // Extra latency cycles since wbck is 7 cycles
1288 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1289 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1290 InstrStage<1, [A9_MUX0], 0>,
1291 InstrStage<1, [A9_NPipe]>],
1294 // Quad-register FP Binary
1295 // Result written in N5, but that is relative to the last cycle of multicycle,
1296 // so we use 6 for those cases
1297 // FIXME: We're using this itin for many instructions and [2, 2] here is too
1299 InstrItinData<IIC_VBINQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
1300 // Extra latency cycles since wbck is 8 cycles
1301 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1302 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1303 InstrStage<1, [A9_MUX0], 0>,
1304 InstrStage<2, [A9_NPipe]>],
1307 // Double-register FP Multiple-Accumulate
1308 InstrItinData<IIC_VMACD, [InstrStage<1, [A9_DRegsN], 0, Required>,
1309 // Extra latency cycles since wbck is 7 cycles
1310 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1311 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1312 InstrStage<1, [A9_MUX0], 0>,
1313 InstrStage<2, [A9_NPipe]>],
1316 // Quad-register FP Multiple-Accumulate
1317 // Result written in N9, but that is relative to the last cycle of multicycle,
1318 // so we use 10 for those cases
1319 InstrItinData<IIC_VMACQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
1320 // Extra latency cycles since wbck is 9 cycles
1321 InstrStage<10, [A9_DRegsVFP], 0, Reserved>,
1322 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1323 InstrStage<1, [A9_MUX0], 0>,
1324 InstrStage<4, [A9_NPipe]>],
1327 // Double-register Reciprical Step
1328 InstrItinData<IIC_VRECSD, [InstrStage<1, [A9_DRegsN], 0, Required>,
1329 // Extra latency cycles since wbck is 7 cycles
1330 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1331 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1332 InstrStage<1, [A9_MUX0], 0>,
1333 InstrStage<2, [A9_NPipe]>],
1336 // Quad-register Reciprical Step
1337 InstrItinData<IIC_VRECSQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
1338 // Extra latency cycles since wbck is 9 cycles
1339 InstrStage<10, [A9_DRegsVFP], 0, Reserved>,
1340 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1341 InstrStage<1, [A9_MUX0], 0>,
1342 InstrStage<4, [A9_NPipe]>],
1345 // Double-register Permute
1346 InstrItinData<IIC_VPERMD, [InstrStage<1, [A9_DRegsN], 0, Required>,
1347 // Extra latency cycles since wbck is 6 cycles
1348 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1349 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1350 InstrStage<1, [A9_MUX0], 0>,
1351 InstrStage<1, [A9_NPipe]>],
1354 // Quad-register Permute
1355 // Result written in N2, but that is relative to the last cycle of multicycle,
1356 // so we use 3 for those cases
1357 InstrItinData<IIC_VPERMQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
1358 // Extra latency cycles since wbck is 7 cycles
1359 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1360 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1361 InstrStage<1, [A9_MUX0], 0>,
1362 InstrStage<2, [A9_NPipe]>],
1365 // Quad-register Permute (3 cycle issue)
1366 // Result written in N2, but that is relative to the last cycle of multicycle,
1367 // so we use 4 for those cases
1368 InstrItinData<IIC_VPERMQ3, [InstrStage<1, [A9_DRegsN], 0, Required>,
1369 // Extra latency cycles since wbck is 8 cycles
1370 InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
1371 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1372 InstrStage<1, [A9_MUX0], 0>,
1373 InstrStage<3, [A9_NPipe]>],
1377 // Double-register VEXT
1378 InstrItinData<IIC_VEXTD, [InstrStage<1, [A9_DRegsN], 0, Required>,
1379 // Extra latency cycles since wbck is 7 cycles
1380 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1381 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1382 InstrStage<1, [A9_MUX0], 0>,
1383 InstrStage<1, [A9_NPipe]>],
1386 // Quad-register VEXT
1387 InstrItinData<IIC_VEXTQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
1388 // Extra latency cycles since wbck is 9 cycles
1389 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1390 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1391 InstrStage<1, [A9_MUX0], 0>,
1392 InstrStage<2, [A9_NPipe]>],
1396 InstrItinData<IIC_VTB1, [InstrStage<1, [A9_DRegsN], 0, Required>,
1397 // Extra latency cycles since wbck is 7 cycles
1398 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1399 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1400 InstrStage<1, [A9_MUX0], 0>,
1401 InstrStage<2, [A9_NPipe]>],
1403 InstrItinData<IIC_VTB2, [InstrStage<2, [A9_DRegsN], 0, Required>,
1404 // Extra latency cycles since wbck is 7 cycles
1405 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1406 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1407 InstrStage<1, [A9_MUX0], 0>,
1408 InstrStage<2, [A9_NPipe]>],
1410 InstrItinData<IIC_VTB3, [InstrStage<2, [A9_DRegsN], 0, Required>,
1411 // Extra latency cycles since wbck is 8 cycles
1412 InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
1413 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1414 InstrStage<1, [A9_MUX0], 0>,
1415 InstrStage<3, [A9_NPipe]>],
1417 InstrItinData<IIC_VTB4, [InstrStage<1, [A9_DRegsN], 0, Required>,
1418 // Extra latency cycles since wbck is 8 cycles
1419 InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
1420 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1421 InstrStage<1, [A9_MUX0], 0>,
1422 InstrStage<3, [A9_NPipe]>],
1423 [4, 2, 2, 3, 3, 1]>,
1426 InstrItinData<IIC_VTBX1, [InstrStage<1, [A9_DRegsN], 0, Required>,
1427 // Extra latency cycles since wbck is 7 cycles
1428 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1429 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1430 InstrStage<1, [A9_MUX0], 0>,
1431 InstrStage<2, [A9_NPipe]>],
1433 InstrItinData<IIC_VTBX2, [InstrStage<1, [A9_DRegsN], 0, Required>,
1434 // Extra latency cycles since wbck is 7 cycles
1435 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1436 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1437 InstrStage<1, [A9_MUX0], 0>,
1438 InstrStage<2, [A9_NPipe]>],
1440 InstrItinData<IIC_VTBX3, [InstrStage<1, [A9_DRegsN], 0, Required>,
1441 // Extra latency cycles since wbck is 8 cycles
1442 InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
1443 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1444 InstrStage<1, [A9_MUX0], 0>,
1445 InstrStage<3, [A9_NPipe]>],
1446 [4, 1, 2, 2, 3, 1]>,
1447 InstrItinData<IIC_VTBX4, [InstrStage<1, [A9_DRegsN], 0, Required>,
1448 // Extra latency cycles since wbck is 8 cycles
1449 InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
1450 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1451 InstrStage<1, [A9_MUX0], 0>,
1452 InstrStage<2, [A9_NPipe]>],
1453 [4, 1, 2, 2, 3, 3, 1]>