Update ARMConstantPoolValue to not use a modifier string. Use an explicit
[oota-llvm.git] / lib / Target / ARM / ARMScheduleA9.td
1 //=- ARMScheduleA9.td - ARM Cortex-A9 Scheduling Definitions -*- tablegen -*-=//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file defines the itinerary class data for the ARM Cortex A9 processors.
11 //
12 //===----------------------------------------------------------------------===//
13
14 //
15 // Ad-hoc scheduling information derived from pretty vague "Cortex-A9 Technical
16 // Reference Manual".
17 //
18 // Functional units
19 def A9_Issue0  : FuncUnit; // Issue 0
20 def A9_Issue1  : FuncUnit; // Issue 1
21 def A9_Branch  : FuncUnit; // Branch
22 def A9_ALU0    : FuncUnit; // ALU / MUL pipeline 0
23 def A9_ALU1    : FuncUnit; // ALU pipeline 1
24 def A9_AGU     : FuncUnit; // Address generation unit for ld / st
25 def A9_NPipe   : FuncUnit; // NEON pipeline
26 def A9_MUX0    : FuncUnit; // AGU + NEON/FPU multiplexer
27 def A9_LSUnit  : FuncUnit; // L/S Unit
28 def A9_DRegsVFP: FuncUnit; // FP register set, VFP side
29 def A9_DRegsN  : FuncUnit; // FP register set, NEON side
30
31 // Bypasses
32 def A9_LdBypass : Bypass;
33
34 def CortexA9Itineraries : ProcessorItineraries<
35   [A9_Issue0, A9_Issue1, A9_Branch, A9_ALU0, A9_ALU1, A9_AGU, A9_NPipe, A9_MUX0,
36    A9_LSUnit, A9_DRegsVFP, A9_DRegsN],
37   [A9_LdBypass], [
38   // Two fully-pipelined integer ALU pipelines
39
40   //
41   // Move instructions, unconditional
42   InstrItinData<IIC_iMOVi   , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
43                                InstrStage<1, [A9_ALU0, A9_ALU1]>], [1]>,
44   InstrItinData<IIC_iMOVr   , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
45                                InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>,
46   InstrItinData<IIC_iMOVsi  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
47                                InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>,
48   InstrItinData<IIC_iMOVsr  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
49                                InstrStage<2, [A9_ALU0, A9_ALU1]>], [2, 1, 1]>,
50   InstrItinData<IIC_iMOVix2 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
51                                InstrStage<1, [A9_ALU0, A9_ALU1]>,
52                                InstrStage<1, [A9_ALU0, A9_ALU1]>], [2]>,
53   //
54   // MVN instructions
55   InstrItinData<IIC_iMVNi   , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
56                                InstrStage<1, [A9_ALU0, A9_ALU1]>],
57                               [1]>,
58   InstrItinData<IIC_iMVNr   , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
59                                InstrStage<1, [A9_ALU0, A9_ALU1]>],
60                               [1, 1], [NoBypass, A9_LdBypass]>,
61   InstrItinData<IIC_iMVNsi  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
62                                InstrStage<2, [A9_ALU0, A9_ALU1]>],
63                               [2, 1]>,
64   InstrItinData<IIC_iMVNsr  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
65                                InstrStage<3, [A9_ALU0, A9_ALU1]>],
66                               [3, 1, 1]>,
67   //
68   // No operand cycles
69   InstrItinData<IIC_iALUx   , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
70                                InstrStage<1, [A9_ALU0, A9_ALU1]>]>,
71   //
72   // Binary Instructions that produce a result
73   InstrItinData<IIC_iALUi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
74                              InstrStage<1, [A9_ALU0, A9_ALU1]>],
75                             [1, 1], [NoBypass, A9_LdBypass]>,
76   InstrItinData<IIC_iALUr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
77                              InstrStage<1, [A9_ALU0, A9_ALU1]>],
78                             [1, 1, 1], [NoBypass, A9_LdBypass, A9_LdBypass]>,
79   InstrItinData<IIC_iALUsi, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
80                              InstrStage<2, [A9_ALU0, A9_ALU1]>],
81                             [2, 1, 1], [NoBypass, A9_LdBypass, NoBypass]>,
82   InstrItinData<IIC_iALUsir,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
83                              InstrStage<2, [A9_ALU0, A9_ALU1]>],
84                             [2, 1, 1], [NoBypass, NoBypass, A9_LdBypass]>,
85   InstrItinData<IIC_iALUsr, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
86                              InstrStage<3, [A9_ALU0, A9_ALU1]>],
87                             [3, 1, 1, 1],
88                             [NoBypass, A9_LdBypass, NoBypass, NoBypass]>,
89   //
90   // Bitwise Instructions that produce a result
91   InstrItinData<IIC_iBITi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
92                              InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>,
93   InstrItinData<IIC_iBITr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
94                              InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1, 1]>,
95   InstrItinData<IIC_iBITsi, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
96                              InstrStage<2, [A9_ALU0, A9_ALU1]>], [2, 1, 1]>,
97   InstrItinData<IIC_iBITsr, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
98                              InstrStage<3, [A9_ALU0, A9_ALU1]>], [3, 1, 1, 1]>,
99   //
100   // Unary Instructions that produce a result
101
102   // CLZ, RBIT, etc.
103   InstrItinData<IIC_iUNAr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
104                              InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>,
105
106   // BFC, BFI, UBFX, SBFX
107   InstrItinData<IIC_iUNAsi, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
108                              InstrStage<2, [A9_ALU0, A9_ALU1]>], [2, 1]>,
109
110   //
111   // Zero and sign extension instructions
112   InstrItinData<IIC_iEXTr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
113                              InstrStage<1, [A9_ALU0, A9_ALU1]>], [2, 1]>,
114   InstrItinData<IIC_iEXTAr, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
115                              InstrStage<2, [A9_ALU0, A9_ALU1]>], [3, 1, 1]>,
116   InstrItinData<IIC_iEXTAsr,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
117                              InstrStage<3, [A9_ALU0, A9_ALU1]>], [3, 1, 1, 1]>,
118   //
119   // Compare instructions
120   InstrItinData<IIC_iCMPi   , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
121                                InstrStage<1, [A9_ALU0, A9_ALU1]>],
122                                [1], [A9_LdBypass]>,
123   InstrItinData<IIC_iCMPr   , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
124                                InstrStage<1, [A9_ALU0, A9_ALU1]>],
125                                [1, 1], [A9_LdBypass, A9_LdBypass]>,
126   InstrItinData<IIC_iCMPsi  , [InstrStage<2, [A9_ALU0, A9_ALU1]>],
127                                 [1, 1], [A9_LdBypass, NoBypass]>,
128   InstrItinData<IIC_iCMPsr  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
129                                InstrStage<3, [A9_ALU0, A9_ALU1]>],
130                               [1, 1, 1], [A9_LdBypass, NoBypass, NoBypass]>,
131   //
132   // Test instructions
133   InstrItinData<IIC_iTSTi   , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
134                                InstrStage<1, [A9_ALU0, A9_ALU1]>], [1]>,
135   InstrItinData<IIC_iTSTr   , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
136                                InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>,
137   InstrItinData<IIC_iTSTsi  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
138                                InstrStage<2, [A9_ALU0, A9_ALU1]>], [1, 1]>,
139   InstrItinData<IIC_iTSTsr  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
140                                InstrStage<3, [A9_ALU0, A9_ALU1]>], [1, 1, 1]>,
141   //
142   // Move instructions, conditional
143   // FIXME: Correctly model the extra input dep on the destination.
144   InstrItinData<IIC_iCMOVi  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
145                                InstrStage<1, [A9_ALU0, A9_ALU1]>], [1]>,
146   InstrItinData<IIC_iCMOVr  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
147                                InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>,
148   InstrItinData<IIC_iCMOVsi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
149                                InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>,
150   InstrItinData<IIC_iCMOVsr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
151                                InstrStage<2, [A9_ALU0, A9_ALU1]>], [2, 1, 1]>,
152
153   // Integer multiply pipeline
154   //
155   InstrItinData<IIC_iMUL16  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
156                                InstrStage<2, [A9_ALU0]>], [3, 1, 1]>,
157   InstrItinData<IIC_iMAC16  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
158                                InstrStage<2, [A9_ALU0]>],
159                               [3, 1, 1, 1]>,
160   InstrItinData<IIC_iMUL32  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
161                                InstrStage<2, [A9_ALU0]>], [4, 1, 1]>,
162   InstrItinData<IIC_iMAC32  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
163                                InstrStage<2, [A9_ALU0]>],
164                               [4, 1, 1, 1]>,
165   InstrItinData<IIC_iMUL64  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
166                                InstrStage<3, [A9_ALU0]>], [4, 5, 1, 1]>,
167   InstrItinData<IIC_iMAC64  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
168                                InstrStage<3, [A9_ALU0]>],
169                               [4, 5, 1, 1]>,
170   // Integer load pipeline
171   // FIXME: The timings are some rough approximations
172   //
173   // Immediate offset
174   InstrItinData<IIC_iLoad_i   , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
175                                  InstrStage<1, [A9_MUX0], 0>,
176                                  InstrStage<1, [A9_AGU], 0>,
177                                  InstrStage<1, [A9_LSUnit]>],
178                                 [3, 1], [A9_LdBypass]>,
179   InstrItinData<IIC_iLoad_bh_i, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
180                                  InstrStage<1, [A9_MUX0], 0>,
181                                  InstrStage<2, [A9_AGU], 0>,
182                                  InstrStage<1, [A9_LSUnit]>],
183                                 [4, 1], [A9_LdBypass]>,
184   // FIXME: If address is 64-bit aligned, AGU cycles is 1.
185   InstrItinData<IIC_iLoad_d_i , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
186                                  InstrStage<1, [A9_MUX0], 0>,
187                                  InstrStage<2, [A9_AGU], 0>,
188                                  InstrStage<1, [A9_LSUnit]>],
189                                 [3, 3, 1], [A9_LdBypass]>,
190   //
191   // Register offset
192   InstrItinData<IIC_iLoad_r   , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
193                                  InstrStage<1, [A9_MUX0], 0>,
194                                  InstrStage<1, [A9_AGU], 0>,
195                                  InstrStage<1, [A9_LSUnit]>],
196                                 [3, 1, 1], [A9_LdBypass]>,
197   InstrItinData<IIC_iLoad_bh_r, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
198                                  InstrStage<1, [A9_MUX0], 0>,
199                                  InstrStage<2, [A9_AGU], 0>,
200                                  InstrStage<1, [A9_LSUnit]>],
201                                 [4, 1, 1], [A9_LdBypass]>,
202   InstrItinData<IIC_iLoad_d_r , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
203                                  InstrStage<1, [A9_MUX0], 0>,
204                                  InstrStage<2, [A9_AGU], 0>,
205                                  InstrStage<1, [A9_LSUnit]>],
206                                 [3, 3, 1, 1], [A9_LdBypass]>,
207   //
208   // Scaled register offset
209   InstrItinData<IIC_iLoad_si  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
210                                  InstrStage<1, [A9_MUX0], 0>,
211                                  InstrStage<1, [A9_AGU], 0>,
212                                  InstrStage<1, [A9_LSUnit], 0>],
213                                 [4, 1, 1], [A9_LdBypass]>,
214   InstrItinData<IIC_iLoad_bh_si,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
215                                  InstrStage<1, [A9_MUX0], 0>,
216                                  InstrStage<2, [A9_AGU], 0>,
217                                  InstrStage<1, [A9_LSUnit]>],
218                                 [5, 1, 1], [A9_LdBypass]>,
219   //
220   // Immediate offset with update
221   InstrItinData<IIC_iLoad_iu  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
222                                  InstrStage<1, [A9_MUX0], 0>,
223                                  InstrStage<1, [A9_AGU], 0>,
224                                  InstrStage<1, [A9_LSUnit]>],
225                                 [3, 2, 1], [A9_LdBypass]>,
226   InstrItinData<IIC_iLoad_bh_iu,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
227                                  InstrStage<1, [A9_MUX0], 0>,
228                                  InstrStage<2, [A9_AGU], 0>,
229                                  InstrStage<1, [A9_LSUnit]>],
230                                 [4, 3, 1], [A9_LdBypass]>,
231   //
232   // Register offset with update
233   InstrItinData<IIC_iLoad_ru  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
234                                  InstrStage<1, [A9_MUX0], 0>,
235                                  InstrStage<1, [A9_AGU], 0>,
236                                  InstrStage<1, [A9_LSUnit]>],
237                                 [3, 2, 1, 1], [A9_LdBypass]>,
238   InstrItinData<IIC_iLoad_bh_ru,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
239                                  InstrStage<1, [A9_MUX0], 0>,
240                                  InstrStage<2, [A9_AGU], 0>,
241                                  InstrStage<1, [A9_LSUnit]>],
242                                 [4, 3, 1, 1], [A9_LdBypass]>,
243   InstrItinData<IIC_iLoad_d_ru, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
244                                  InstrStage<1, [A9_MUX0], 0>,
245                                  InstrStage<2, [A9_AGU], 0>,
246                                  InstrStage<1, [A9_LSUnit]>],
247                                 [3, 3, 1, 1], [A9_LdBypass]>,
248   //
249   // Scaled register offset with update
250   InstrItinData<IIC_iLoad_siu , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
251                                  InstrStage<1, [A9_MUX0], 0>,
252                                  InstrStage<1, [A9_AGU], 0>,
253                                  InstrStage<1, [A9_LSUnit]>],
254                                 [4, 3, 1, 1], [A9_LdBypass]>,
255   InstrItinData<IIC_iLoad_bh_siu,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
256                                   InstrStage<1, [A9_MUX0], 0>,
257                                   InstrStage<2, [A9_AGU], 0>,
258                                   InstrStage<1, [A9_LSUnit]>],
259                                  [5, 4, 1, 1], [A9_LdBypass]>,
260   //
261   // Load multiple, def is the 5th operand.
262   // FIXME: This assumes 3 to 4 registers.
263   InstrItinData<IIC_iLoad_m  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
264                                 InstrStage<1, [A9_MUX0], 0>,
265                                 InstrStage<2, [A9_AGU], 1>,
266                                 InstrStage<2, [A9_LSUnit]>],
267                                [1, 1, 1, 1, 3],
268                          [NoBypass, NoBypass, NoBypass, NoBypass, A9_LdBypass]>,
269   //
270   // Load multiple + update, defs are the 1st and 5th operands.
271   InstrItinData<IIC_iLoad_mu , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
272                                 InstrStage<1, [A9_MUX0], 0>,
273                                 InstrStage<2, [A9_AGU], 1>,
274                                 InstrStage<2, [A9_LSUnit]>],
275                                [2, 1, 1, 1, 3],
276                          [NoBypass, NoBypass, NoBypass, NoBypass, A9_LdBypass]>,
277   //
278   // Load multiple plus branch
279   InstrItinData<IIC_iLoad_mBr, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
280                                 InstrStage<1, [A9_MUX0], 0>,
281                                 InstrStage<1, [A9_AGU], 1>,
282                                 InstrStage<2, [A9_LSUnit]>,
283                                 InstrStage<1, [A9_Branch]>],
284                                [1, 2, 1, 1, 3],
285                          [NoBypass, NoBypass, NoBypass, NoBypass, A9_LdBypass]>,
286   //
287   // Pop, def is the 3rd operand.
288   InstrItinData<IIC_iPop  ,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
289                                 InstrStage<1, [A9_MUX0], 0>,
290                                 InstrStage<2, [A9_AGU], 1>,
291                                 InstrStage<2, [A9_LSUnit]>],
292                                [1, 1, 3],
293                                [NoBypass, NoBypass, A9_LdBypass]>,
294   //
295   // Pop + branch, def is the 3rd operand.
296   InstrItinData<IIC_iPop_Br,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
297                                 InstrStage<1, [A9_MUX0], 0>,
298                                 InstrStage<2, [A9_AGU], 1>,
299                                 InstrStage<2, [A9_LSUnit]>,
300                                 InstrStage<1, [A9_Branch]>],
301                                [1, 1, 3],
302                                [NoBypass, NoBypass, A9_LdBypass]>,
303
304   //
305   // iLoadi + iALUr for t2LDRpci_pic.
306   InstrItinData<IIC_iLoadiALU, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
307                                 InstrStage<1, [A9_MUX0], 0>,
308                                 InstrStage<1, [A9_AGU], 0>,
309                                 InstrStage<1, [A9_LSUnit]>,
310                                 InstrStage<1, [A9_ALU0, A9_ALU1]>],
311                                [2, 1]>,
312
313   // Integer store pipeline
314   ///
315   // Immediate offset
316   InstrItinData<IIC_iStore_i  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
317                                  InstrStage<1, [A9_MUX0], 0>,
318                                  InstrStage<1, [A9_AGU], 0>,
319                                  InstrStage<1, [A9_LSUnit]>], [1, 1]>,
320   InstrItinData<IIC_iStore_bh_i,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
321                                  InstrStage<1, [A9_MUX0], 0>,
322                                  InstrStage<2, [A9_AGU], 1>,
323                                  InstrStage<1, [A9_LSUnit]>], [1, 1]>,
324   // FIXME: If address is 64-bit aligned, AGU cycles is 1.
325   InstrItinData<IIC_iStore_d_i, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
326                                  InstrStage<1, [A9_MUX0], 0>,
327                                  InstrStage<2, [A9_AGU], 1>,
328                                  InstrStage<1, [A9_LSUnit]>], [1, 1]>,
329   //
330   // Register offset
331   InstrItinData<IIC_iStore_r  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
332                                  InstrStage<1, [A9_MUX0], 0>,
333                                  InstrStage<1, [A9_AGU], 0>,
334                                  InstrStage<1, [A9_LSUnit]>], [1, 1, 1]>,
335   InstrItinData<IIC_iStore_bh_r,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
336                                  InstrStage<1, [A9_MUX0], 0>,
337                                  InstrStage<2, [A9_AGU], 1>,
338                                  InstrStage<1, [A9_LSUnit]>], [1, 1, 1]>,
339   InstrItinData<IIC_iStore_d_r, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
340                                  InstrStage<1, [A9_MUX0], 0>,
341                                  InstrStage<2, [A9_AGU], 1>,
342                                  InstrStage<1, [A9_LSUnit]>], [1, 1, 1]>,
343   //
344   // Scaled register offset
345   InstrItinData<IIC_iStore_si ,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
346                                   InstrStage<1, [A9_MUX0], 0>,
347                                   InstrStage<1, [A9_AGU], 0>,
348                                   InstrStage<1, [A9_LSUnit]>], [1, 1, 1]>,
349   InstrItinData<IIC_iStore_bh_si,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
350                                   InstrStage<1, [A9_MUX0], 0>,
351                                   InstrStage<2, [A9_AGU], 1>,
352                                   InstrStage<1, [A9_LSUnit]>], [1, 1, 1]>,
353   //
354   // Immediate offset with update
355   InstrItinData<IIC_iStore_iu ,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
356                                   InstrStage<1, [A9_MUX0], 0>,
357                                   InstrStage<1, [A9_AGU], 0>,
358                                   InstrStage<1, [A9_LSUnit]>], [2, 1, 1]>,
359   InstrItinData<IIC_iStore_bh_iu,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
360                                   InstrStage<1, [A9_MUX0], 0>,
361                                   InstrStage<2, [A9_AGU], 1>,
362                                   InstrStage<1, [A9_LSUnit]>], [3, 1, 1]>,
363   //
364   // Register offset with update
365   InstrItinData<IIC_iStore_ru ,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
366                                   InstrStage<1, [A9_MUX0], 0>,
367                                   InstrStage<1, [A9_AGU], 0>,
368                                   InstrStage<1, [A9_LSUnit]>],
369                                  [2, 1, 1, 1]>,
370   InstrItinData<IIC_iStore_bh_ru,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
371                                   InstrStage<1, [A9_MUX0], 0>,
372                                   InstrStage<2, [A9_AGU], 1>,
373                                   InstrStage<1, [A9_LSUnit]>],
374                                  [3, 1, 1, 1]>,
375   InstrItinData<IIC_iStore_d_ru, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
376                                   InstrStage<1, [A9_MUX0], 0>,
377                                   InstrStage<2, [A9_AGU], 1>,
378                                   InstrStage<1, [A9_LSUnit]>],
379                                  [3, 1, 1, 1]>,
380   //
381   // Scaled register offset with update
382   InstrItinData<IIC_iStore_siu,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
383                                     InstrStage<1, [A9_MUX0], 0>,
384                                     InstrStage<1, [A9_AGU], 0>,
385                                     InstrStage<1, [A9_LSUnit]>],
386                                    [2, 1, 1, 1]>,
387   InstrItinData<IIC_iStore_bh_siu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
388                                     InstrStage<1, [A9_MUX0], 0>,
389                                     InstrStage<2, [A9_AGU], 1>,
390                                     InstrStage<1, [A9_LSUnit]>],
391                                    [3, 1, 1, 1]>,
392   //
393   // Store multiple
394   InstrItinData<IIC_iStore_m , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
395                                 InstrStage<1, [A9_MUX0], 0>,
396                                 InstrStage<1, [A9_AGU], 0>,
397                                 InstrStage<2, [A9_LSUnit]>]>,
398   //
399   // Store multiple + update
400   InstrItinData<IIC_iStore_mu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
401                                 InstrStage<1, [A9_MUX0], 0>,
402                                 InstrStage<1, [A9_AGU], 0>,
403                                 InstrStage<2, [A9_LSUnit]>], [2]>,
404
405   //
406   // Preload
407   InstrItinData<IIC_Preload,   [InstrStage<1, [A9_Issue0, A9_Issue1]>], [1, 1]>,
408
409   // Branch
410   //
411   // no delay slots, so the latency of a branch is unimportant
412   InstrItinData<IIC_Br       , [InstrStage<1, [A9_Issue0], 0>,
413                                 InstrStage<1, [A9_Issue1], 0>,
414                                 InstrStage<1, [A9_Branch]>]>,
415
416   // VFP and NEON shares the same register file. This means that every VFP
417   // instruction should wait for full completion of the consecutive NEON
418   // instruction and vice-versa. We model this behavior with two artificial FUs:
419   // DRegsVFP and DRegsVFP.
420   //
421   // Every VFP instruction:
422   //  - Acquires DRegsVFP resource for 1 cycle
423   //  - Reserves DRegsN resource for the whole duration (including time to
424   //    register file writeback!).
425   // Every NEON instruction does the same but with FUs swapped.
426   //
427   // Since the reserved FU cannot be acquired, this models precisely
428   // "cross-domain" stalls.
429
430   // VFP
431   // Issue through integer pipeline, and execute in NEON unit.
432
433   // FP Special Register to Integer Register File Move
434   InstrItinData<IIC_fpSTAT , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
435                               InstrStage<1, [A9_MUX0], 0>,
436                               InstrStage<1, [A9_DRegsVFP], 0, Required>,
437                               InstrStage<2, [A9_DRegsN],   0, Reserved>,
438                               InstrStage<1, [A9_NPipe]>],
439                              [1]>,
440   //
441   // Single-precision FP Unary
442   InstrItinData<IIC_fpUNA32 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
443                                InstrStage<1, [A9_MUX0], 0>,
444                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
445                                // Extra latency cycles since wbck is 2 cycles
446                                InstrStage<3, [A9_DRegsN],   0, Reserved>,
447                                InstrStage<1, [A9_NPipe]>],
448                               [1, 1]>,
449   //
450   // Double-precision FP Unary
451   InstrItinData<IIC_fpUNA64 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
452                                InstrStage<1, [A9_MUX0], 0>,
453                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
454                                // Extra latency cycles since wbck is 2 cycles
455                                InstrStage<3, [A9_DRegsN],   0, Reserved>,
456                                InstrStage<1, [A9_NPipe]>],
457                               [1, 1]>,
458
459   //
460   // Single-precision FP Compare
461   InstrItinData<IIC_fpCMP32 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
462                                InstrStage<1, [A9_MUX0], 0>,
463                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
464                                // Extra latency cycles since wbck is 4 cycles
465                                InstrStage<5, [A9_DRegsN],   0, Reserved>,
466                                InstrStage<1, [A9_NPipe]>],
467                               [1, 1]>,
468   //
469   // Double-precision FP Compare
470   InstrItinData<IIC_fpCMP64 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
471                                InstrStage<1, [A9_MUX0], 0>,
472                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
473                                // Extra latency cycles since wbck is 4 cycles
474                                InstrStage<5, [A9_DRegsN],   0, Reserved>,
475                                InstrStage<1, [A9_NPipe]>],
476                               [1, 1]>,
477   //
478   // Single to Double FP Convert
479   InstrItinData<IIC_fpCVTSD , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
480                                InstrStage<1, [A9_MUX0], 0>,
481                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
482                                InstrStage<5, [A9_DRegsN],   0, Reserved>,
483                                InstrStage<1, [A9_NPipe]>],
484                               [4, 1]>,
485   //
486   // Double to Single FP Convert
487   InstrItinData<IIC_fpCVTDS , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
488                                InstrStage<1, [A9_MUX0], 0>,
489                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
490                                InstrStage<5, [A9_DRegsN],   0, Reserved>,
491                                InstrStage<1, [A9_NPipe]>],
492                               [4, 1]>,
493
494   //
495   // Single to Half FP Convert
496   InstrItinData<IIC_fpCVTSH , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
497                                InstrStage<1, [A9_MUX0], 0>,
498                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
499                                InstrStage<5, [A9_DRegsN],   0, Reserved>,
500                                InstrStage<1, [A9_NPipe]>],
501                               [4, 1]>,
502   //
503   // Half to Single FP Convert
504   InstrItinData<IIC_fpCVTHS , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
505                                InstrStage<1, [A9_MUX0], 0>,
506                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
507                                InstrStage<3, [A9_DRegsN],   0, Reserved>,
508                                InstrStage<1, [A9_NPipe]>],
509                               [2, 1]>,
510
511   //
512   // Single-Precision FP to Integer Convert
513   InstrItinData<IIC_fpCVTSI , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
514                                InstrStage<1, [A9_MUX0], 0>,
515                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
516                                InstrStage<5, [A9_DRegsN],   0, Reserved>,
517                                InstrStage<1, [A9_NPipe]>],
518                               [4, 1]>,
519   //
520   // Double-Precision FP to Integer Convert
521   InstrItinData<IIC_fpCVTDI , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
522                                InstrStage<1, [A9_MUX0], 0>,
523                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
524                                InstrStage<5, [A9_DRegsN],   0, Reserved>,
525                                InstrStage<1, [A9_NPipe]>],
526                               [4, 1]>,
527   //
528   // Integer to Single-Precision FP Convert
529   InstrItinData<IIC_fpCVTIS , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
530                                InstrStage<1, [A9_MUX0], 0>,
531                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
532                                InstrStage<5, [A9_DRegsN],   0, Reserved>,
533                                InstrStage<1, [A9_NPipe]>],
534                               [4, 1]>,
535   //
536   // Integer to Double-Precision FP Convert
537   InstrItinData<IIC_fpCVTID , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
538                                InstrStage<1, [A9_MUX0], 0>,
539                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
540                                InstrStage<5, [A9_DRegsN],   0, Reserved>,
541                                InstrStage<1, [A9_NPipe]>],
542                               [4, 1]>,
543   //
544   // Single-precision FP ALU
545   InstrItinData<IIC_fpALU32 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
546                                InstrStage<1, [A9_MUX0], 0>,
547                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
548                                InstrStage<5, [A9_DRegsN],   0, Reserved>,
549                                InstrStage<1, [A9_NPipe]>],
550                               [4, 1, 1]>,
551   //
552   // Double-precision FP ALU
553   InstrItinData<IIC_fpALU64 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
554                                InstrStage<1, [A9_MUX0], 0>,
555                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
556                                InstrStage<5, [A9_DRegsN],   0, Reserved>,
557                                InstrStage<1, [A9_NPipe]>],
558                               [4, 1, 1]>,
559   //
560   // Single-precision FP Multiply
561   InstrItinData<IIC_fpMUL32 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
562                                InstrStage<1, [A9_MUX0], 0>,
563                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
564                                InstrStage<6, [A9_DRegsN],   0, Reserved>,
565                                InstrStage<1, [A9_NPipe]>],
566                               [5, 1, 1]>,
567   //
568   // Double-precision FP Multiply
569   InstrItinData<IIC_fpMUL64 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
570                                InstrStage<1, [A9_MUX0], 0>,
571                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
572                                InstrStage<7, [A9_DRegsN],   0, Reserved>,
573                                InstrStage<2, [A9_NPipe]>],
574                               [6, 1, 1]>,
575   //
576   // Single-precision FP MAC
577   InstrItinData<IIC_fpMAC32 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
578                                InstrStage<1, [A9_MUX0], 0>,
579                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
580                                InstrStage<9, [A9_DRegsN],   0, Reserved>,
581                                InstrStage<1, [A9_NPipe]>],
582                               [8, 1, 1, 1]>,
583   //
584   // Double-precision FP MAC
585   InstrItinData<IIC_fpMAC64 , [InstrStage<1,  [A9_Issue0, A9_Issue1], 0>,
586                                InstrStage<1,  [A9_MUX0], 0>,
587                                InstrStage<1,  [A9_DRegsVFP], 0, Required>,
588                                InstrStage<10, [A9_DRegsN],  0, Reserved>,
589                                InstrStage<2,  [A9_NPipe]>],
590                               [9, 1, 1, 1]>,
591   //
592   // Single-precision FP DIV
593   InstrItinData<IIC_fpDIV32 , [InstrStage<1,  [A9_Issue0, A9_Issue1], 0>,
594                                InstrStage<1,  [A9_MUX0], 0>,
595                                InstrStage<1,  [A9_DRegsVFP], 0, Required>,
596                                InstrStage<16, [A9_DRegsN],  0, Reserved>,
597                                InstrStage<10, [A9_NPipe]>],
598                               [15, 1, 1]>,
599   //
600   // Double-precision FP DIV
601   InstrItinData<IIC_fpDIV64 , [InstrStage<1,  [A9_Issue0, A9_Issue1], 0>,
602                                InstrStage<1,  [A9_MUX0], 0>,
603                                InstrStage<1,  [A9_DRegsVFP], 0, Required>,
604                                InstrStage<26, [A9_DRegsN],  0, Reserved>,
605                                InstrStage<20, [A9_NPipe]>],
606                               [25, 1, 1]>,
607   //
608   // Single-precision FP SQRT
609   InstrItinData<IIC_fpSQRT32, [InstrStage<1,  [A9_Issue0, A9_Issue1], 0>,
610                                InstrStage<1,  [A9_MUX0], 0>,
611                                InstrStage<1,  [A9_DRegsVFP], 0, Required>,
612                                InstrStage<18, [A9_DRegsN],   0, Reserved>,
613                                InstrStage<13, [A9_NPipe]>],
614                               [17, 1]>,
615   //
616   // Double-precision FP SQRT
617   InstrItinData<IIC_fpSQRT64, [InstrStage<1,  [A9_Issue0, A9_Issue1], 0>,
618                                InstrStage<1,  [A9_MUX0], 0>,
619                                InstrStage<1,  [A9_DRegsVFP], 0, Required>,
620                                InstrStage<33, [A9_DRegsN],   0, Reserved>,
621                                InstrStage<28, [A9_NPipe]>],
622                               [32, 1]>,
623
624   //
625   // Integer to Single-precision Move
626   InstrItinData<IIC_fpMOVIS,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
627                                InstrStage<1, [A9_MUX0], 0>,
628                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
629                                // Extra 1 latency cycle since wbck is 2 cycles
630                                InstrStage<3, [A9_DRegsN],   0, Reserved>,
631                                InstrStage<1, [A9_NPipe]>],
632                               [1, 1]>,
633   //
634   // Integer to Double-precision Move
635   InstrItinData<IIC_fpMOVID,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
636                                InstrStage<1, [A9_MUX0], 0>,
637                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
638                                // Extra 1 latency cycle since wbck is 2 cycles
639                                InstrStage<3, [A9_DRegsN],   0, Reserved>,
640                                InstrStage<1, [A9_NPipe]>],
641                               [1, 1, 1]>,
642   //
643   // Single-precision to Integer Move
644   InstrItinData<IIC_fpMOVSI,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
645                                InstrStage<1, [A9_MUX0], 0>,
646                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
647                                InstrStage<2, [A9_DRegsN],   0, Reserved>,
648                                InstrStage<1, [A9_NPipe]>],
649                               [2, 1]>,
650   //
651   // Double-precision to Integer Move
652   InstrItinData<IIC_fpMOVDI,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
653                                InstrStage<1, [A9_MUX0], 0>,
654                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
655                                InstrStage<2, [A9_DRegsN],   0, Reserved>,
656                                InstrStage<1, [A9_NPipe]>],
657                               [2, 1, 1]>,
658   //
659   // Single-precision FP Load
660   InstrItinData<IIC_fpLoad32, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
661                                InstrStage<1, [A9_MUX0], 0>,
662                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
663                                InstrStage<2, [A9_DRegsN],   0, Reserved>,
664                                InstrStage<1, [A9_NPipe], 0>,
665                                InstrStage<1, [A9_LSUnit]>],
666                               [1, 1]>,
667   //
668   // Double-precision FP Load
669   // FIXME: Result latency is 1 if address is 64-bit aligned.
670   InstrItinData<IIC_fpLoad64, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
671                                InstrStage<1, [A9_MUX0], 0>,
672                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
673                                InstrStage<2, [A9_DRegsN],   0, Reserved>,
674                                InstrStage<1, [A9_NPipe], 0>,
675                                InstrStage<1, [A9_LSUnit]>],
676                               [2, 1]>,
677   //
678   // FP Load Multiple
679   InstrItinData<IIC_fpLoad_m, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
680                                InstrStage<1, [A9_MUX0], 0>,
681                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
682                                InstrStage<2, [A9_DRegsN],   0, Reserved>,
683                                InstrStage<1, [A9_NPipe], 0>,
684                                InstrStage<1, [A9_LSUnit]>], [1, 1, 1, 1]>,
685   //
686   // FP Load Multiple + update
687   InstrItinData<IIC_fpLoad_mu,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
688                                InstrStage<1, [A9_MUX0], 0>,
689                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
690                                InstrStage<2, [A9_DRegsN],   0, Reserved>,
691                                InstrStage<1, [A9_NPipe], 0>,
692                                InstrStage<1, [A9_LSUnit]>], [2, 1, 1, 1]>,
693   //
694   // Single-precision FP Store
695   InstrItinData<IIC_fpStore32,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
696                                InstrStage<1, [A9_MUX0], 0>,
697                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
698                                InstrStage<2, [A9_DRegsN],   0, Reserved>,
699                                InstrStage<1, [A9_NPipe], 0>,
700                                InstrStage<1, [A9_LSUnit]>],
701                               [1, 1]>,
702   //
703   // Double-precision FP Store
704   InstrItinData<IIC_fpStore64,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
705                                InstrStage<1, [A9_MUX0], 0>,
706                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
707                                InstrStage<2, [A9_DRegsN],   0, Reserved>,
708                                InstrStage<1, [A9_NPipe], 0>,
709                                InstrStage<1, [A9_LSUnit]>],
710                               [1, 1]>,
711   //
712   // FP Store Multiple
713   InstrItinData<IIC_fpStore_m,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
714                                InstrStage<1, [A9_MUX0], 0>,
715                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
716                                InstrStage<2, [A9_DRegsN],   0, Reserved>,
717                                InstrStage<1, [A9_NPipe], 0>,
718                                InstrStage<1, [A9_LSUnit]>], [1, 1, 1, 1]>,
719   //
720   // FP Store Multiple + update
721   InstrItinData<IIC_fpStore_mu,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
722                                 InstrStage<1, [A9_MUX0], 0>,
723                                 InstrStage<1, [A9_DRegsVFP], 0, Required>,
724                                 InstrStage<2, [A9_DRegsN],   0, Reserved>,
725                                 InstrStage<1, [A9_NPipe], 0>,
726                                 InstrStage<1, [A9_LSUnit]>], [2, 1, 1, 1]>,
727   // NEON
728   // VLD1
729   // FIXME: Conservatively assume insufficent alignment.
730   InstrItinData<IIC_VLD1,     [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
731                                InstrStage<1, [A9_MUX0], 0>,
732                                InstrStage<1, [A9_DRegsN],   0, Required>,
733                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
734                                InstrStage<2, [A9_NPipe], 0>,
735                                InstrStage<2, [A9_LSUnit]>],
736                               [2, 1]>,
737   // VLD1x2
738   InstrItinData<IIC_VLD1x2,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
739                                InstrStage<1, [A9_MUX0], 0>,
740                                InstrStage<1, [A9_DRegsN],   0, Required>,
741                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
742                                InstrStage<2, [A9_NPipe], 0>,
743                                InstrStage<2, [A9_LSUnit]>],
744                               [2, 2, 1]>,
745   // VLD1x3
746   InstrItinData<IIC_VLD1x3,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
747                                InstrStage<1, [A9_MUX0], 0>,
748                                InstrStage<1, [A9_DRegsN],   0, Required>,
749                                InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
750                                InstrStage<3, [A9_NPipe], 0>,
751                                InstrStage<3, [A9_LSUnit]>],
752                               [2, 2, 3, 1]>,
753   // VLD1x4
754   InstrItinData<IIC_VLD1x4,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
755                                InstrStage<1, [A9_MUX0], 0>,
756                                InstrStage<1, [A9_DRegsN],   0, Required>,
757                                InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
758                                InstrStage<3, [A9_NPipe], 0>,
759                                InstrStage<3, [A9_LSUnit]>],
760                               [2, 2, 3, 3, 1]>,
761   // VLD1u
762   InstrItinData<IIC_VLD1u,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
763                                InstrStage<1, [A9_MUX0], 0>,
764                                InstrStage<1, [A9_DRegsN],   0, Required>,
765                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
766                                InstrStage<2, [A9_NPipe], 0>,
767                                InstrStage<2, [A9_LSUnit]>],
768                               [2, 2, 1]>,
769   // VLD1x2u
770   InstrItinData<IIC_VLD1x2u,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
771                                InstrStage<1, [A9_MUX0], 0>,
772                                InstrStage<1, [A9_DRegsN],   0, Required>,
773                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
774                                InstrStage<2, [A9_NPipe], 0>,
775                                InstrStage<2, [A9_LSUnit]>],
776                               [2, 2, 2, 1]>,
777   // VLD1x3u
778   InstrItinData<IIC_VLD1x3u,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
779                                InstrStage<1, [A9_MUX0], 0>,
780                                InstrStage<1, [A9_DRegsN],   0, Required>,
781                                InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
782                                InstrStage<3, [A9_NPipe], 0>,
783                                InstrStage<3, [A9_LSUnit]>],
784                               [2, 2, 3, 2, 1]>,
785   // VLD1x4u
786   InstrItinData<IIC_VLD1x4u,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
787                                InstrStage<1, [A9_MUX0], 0>,
788                                InstrStage<1, [A9_DRegsN],   0, Required>,
789                                InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
790                                InstrStage<3, [A9_NPipe], 0>,
791                                InstrStage<3, [A9_LSUnit]>],
792                               [2, 2, 3, 3, 2, 1]>,
793   //
794   // VLD1ln
795   InstrItinData<IIC_VLD1ln,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
796                                InstrStage<1, [A9_MUX0], 0>,
797                                InstrStage<1, [A9_DRegsN],   0, Required>,
798                                InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
799                                InstrStage<3, [A9_NPipe], 1>,
800                                InstrStage<3, [A9_LSUnit]>],
801                               [4, 1, 1, 1]>,
802   //
803   // VLD1lnu
804   InstrItinData<IIC_VLD1lnu,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
805                                InstrStage<1, [A9_MUX0], 0>,
806                                InstrStage<1, [A9_DRegsN],   0, Required>,
807                                InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
808                                InstrStage<3, [A9_NPipe], 1>,
809                                InstrStage<3, [A9_LSUnit]>],
810                               [4, 2, 1, 1, 1, 1]>,
811   //
812   // VLD2
813   InstrItinData<IIC_VLD2,     [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
814                                InstrStage<1, [A9_MUX0], 0>,
815                                InstrStage<1, [A9_DRegsN],   0, Required>,
816                                // Extra latency cycles since wbck is 7 cycles
817                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
818                                InstrStage<2, [A9_NPipe], 0>,
819                                InstrStage<2, [A9_LSUnit]>],
820                               [3, 3, 1]>,
821   //
822   // VLD2x2
823   InstrItinData<IIC_VLD2x2,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
824                                InstrStage<1, [A9_MUX0], 0>,
825                                InstrStage<1, [A9_DRegsN],   0, Required>,
826                                InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
827                                InstrStage<3, [A9_NPipe], 0>,
828                                InstrStage<3, [A9_LSUnit]>],
829                               [3, 4, 3, 4, 1]>,
830   //
831   // VLD2ln
832   InstrItinData<IIC_VLD2ln,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
833                                InstrStage<1, [A9_MUX0], 0>,
834                                InstrStage<1, [A9_DRegsN],   0, Required>,
835                                InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
836                                InstrStage<3, [A9_NPipe], 0>,
837                                InstrStage<3, [A9_LSUnit]>],
838                               [4, 4, 1, 1, 1, 1]>,
839   //
840   // VLD2u
841   InstrItinData<IIC_VLD2u,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
842                                InstrStage<1, [A9_MUX0], 0>,
843                                InstrStage<1, [A9_DRegsN],   0, Required>,
844                                // Extra latency cycles since wbck is 7 cycles
845                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
846                                InstrStage<2, [A9_NPipe], 0>,
847                                InstrStage<2, [A9_LSUnit]>],
848                               [3, 3, 2, 1, 1, 1]>,
849   //
850   // VLD2x2u
851   InstrItinData<IIC_VLD2x2u,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
852                                InstrStage<1, [A9_MUX0], 0>,
853                                InstrStage<1, [A9_DRegsN],   0, Required>,
854                                InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
855                                InstrStage<3, [A9_NPipe], 0>,
856                                InstrStage<3, [A9_LSUnit]>],
857                               [3, 4, 3, 4, 2, 1]>,
858   //
859   // VLD2lnu
860   InstrItinData<IIC_VLD2lnu,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
861                                InstrStage<1, [A9_MUX0], 0>,
862                                InstrStage<1, [A9_DRegsN],   0, Required>,
863                                InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
864                                InstrStage<3, [A9_NPipe], 0>,
865                                InstrStage<3, [A9_LSUnit]>],
866                               [4, 4, 2, 1, 1, 1, 1, 1]>,
867   //
868   // VLD3
869   InstrItinData<IIC_VLD3,     [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
870                                InstrStage<1, [A9_MUX0], 0>,
871                                InstrStage<1, [A9_DRegsN],   0, Required>,
872                                InstrStage<10,[A9_DRegsVFP], 0, Reserved>,
873                                InstrStage<4, [A9_NPipe], 0>,
874                                InstrStage<4, [A9_LSUnit]>],
875                               [4, 4, 5, 1]>,
876   //
877   // VLD3ln
878   InstrItinData<IIC_VLD3ln,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
879                                InstrStage<1, [A9_MUX0], 0>,
880                                InstrStage<1, [A9_DRegsN],   0, Required>,
881                                InstrStage<11,[A9_DRegsVFP], 0, Reserved>,
882                                InstrStage<5, [A9_NPipe], 0>,
883                                InstrStage<5, [A9_LSUnit]>],
884                               [5, 5, 6, 1, 1, 1, 1, 2]>,
885   //
886   // VLD3u
887   InstrItinData<IIC_VLD3u,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
888                                InstrStage<1, [A9_MUX0], 0>,
889                                InstrStage<1, [A9_DRegsN],   0, Required>,
890                                InstrStage<10,[A9_DRegsVFP], 0, Reserved>,
891                                InstrStage<4, [A9_NPipe], 0>,
892                                InstrStage<4, [A9_LSUnit]>],
893                               [4, 4, 5, 2, 1]>,
894   //
895   // VLD3lnu
896   InstrItinData<IIC_VLD3lnu,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
897                                InstrStage<1, [A9_MUX0], 0>,
898                                InstrStage<1, [A9_DRegsN],   0, Required>,
899                                InstrStage<11,[A9_DRegsVFP], 0, Reserved>,
900                                InstrStage<5, [A9_NPipe], 0>,
901                                InstrStage<5, [A9_LSUnit]>],
902                               [5, 5, 6, 2, 1, 1, 1, 1, 1, 2]>,
903   //
904   // VLD4
905   InstrItinData<IIC_VLD4,     [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
906                                InstrStage<1, [A9_MUX0], 0>,
907                                InstrStage<1, [A9_DRegsN],   0, Required>,
908                                InstrStage<10,[A9_DRegsVFP], 0, Reserved>,
909                                InstrStage<4, [A9_NPipe], 0>,
910                                InstrStage<4, [A9_LSUnit]>],
911                               [4, 4, 5, 5, 1]>,
912   //
913   // VLD4ln
914   InstrItinData<IIC_VLD4ln,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
915                                InstrStage<1, [A9_MUX0], 0>,
916                                InstrStage<1, [A9_DRegsN],   0, Required>,
917                                InstrStage<11,[A9_DRegsVFP], 0, Reserved>,
918                                InstrStage<5, [A9_NPipe], 0>,
919                                InstrStage<5, [A9_LSUnit]>],
920                               [5, 5, 6, 6, 1, 1, 1, 1, 2, 2]>,
921   //
922   // VLD4u
923   InstrItinData<IIC_VLD4u,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
924                                InstrStage<1, [A9_MUX0], 0>,
925                                InstrStage<1, [A9_DRegsN],   0, Required>,
926                                InstrStage<10,[A9_DRegsVFP], 0, Reserved>,
927                                InstrStage<4, [A9_NPipe], 0>,
928                                InstrStage<4, [A9_LSUnit]>],
929                               [4, 4, 5, 5, 2, 1]>,
930   //
931   // VLD4lnu
932   InstrItinData<IIC_VLD4lnu,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
933                                InstrStage<1, [A9_MUX0], 0>,
934                                InstrStage<1, [A9_DRegsN],   0, Required>,
935                                InstrStage<11,[A9_DRegsVFP], 0, Reserved>,
936                                InstrStage<5, [A9_NPipe], 0>,
937                                InstrStage<5, [A9_LSUnit]>],
938                               [5, 5, 6, 6, 2, 1, 1, 1, 1, 1, 2, 2]>,
939   //
940   // VST1
941   InstrItinData<IIC_VST1,     [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
942                                InstrStage<1, [A9_MUX0], 0>,
943                                InstrStage<1, [A9_DRegsN],   0, Required>,
944                                InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
945                                InstrStage<2, [A9_NPipe], 0>,
946                                InstrStage<2, [A9_LSUnit]>],
947                               [1, 1, 1]>,
948   //
949   // VST1x2
950   InstrItinData<IIC_VST1x2,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
951                                InstrStage<1, [A9_MUX0], 0>,
952                                InstrStage<1, [A9_DRegsN],   0, Required>,
953                                InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
954                                InstrStage<2, [A9_NPipe], 0>,
955                                InstrStage<2, [A9_LSUnit]>],
956                               [1, 1, 1, 1]>,
957   //
958   // VST1x3
959   InstrItinData<IIC_VST1x3,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
960                                InstrStage<1, [A9_MUX0], 0>,
961                                InstrStage<1, [A9_DRegsN],   0, Required>,
962                                InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
963                                InstrStage<3, [A9_NPipe], 0>,
964                                InstrStage<3, [A9_LSUnit]>],
965                               [1, 1, 1, 1, 2]>,
966   //
967   // VST1x4
968   InstrItinData<IIC_VST1x4,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
969                                InstrStage<1, [A9_MUX0], 0>,
970                                InstrStage<1, [A9_DRegsN],   0, Required>,
971                                InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
972                                InstrStage<3, [A9_NPipe], 0>,
973                                InstrStage<3, [A9_LSUnit]>],
974                               [1, 1, 1, 1, 2, 2]>,
975   //
976   // VST1u
977   InstrItinData<IIC_VST1u,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
978                                InstrStage<1, [A9_MUX0], 0>,
979                                InstrStage<1, [A9_DRegsN],   0, Required>,
980                                InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
981                                InstrStage<2, [A9_NPipe], 0>,
982                                InstrStage<2, [A9_LSUnit]>],
983                               [2, 1, 1, 1, 1]>,
984   //
985   // VST1x2u
986   InstrItinData<IIC_VST1x2u,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
987                                InstrStage<1, [A9_MUX0], 0>,
988                                InstrStage<1, [A9_DRegsN],   0, Required>,
989                                InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
990                                InstrStage<2, [A9_NPipe], 0>,
991                                InstrStage<2, [A9_LSUnit]>],
992                               [2, 1, 1, 1, 1, 1]>,
993   //
994   // VST1x3u
995   InstrItinData<IIC_VST1x3u,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
996                                InstrStage<1, [A9_MUX0], 0>,
997                                InstrStage<1, [A9_DRegsN],   0, Required>,
998                                InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
999                                InstrStage<3, [A9_NPipe], 0>,
1000                                InstrStage<3, [A9_LSUnit]>],
1001                               [2, 1, 1, 1, 1, 1, 2]>,
1002   //
1003   // VST1x4u
1004   InstrItinData<IIC_VST1x4u,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1005                                InstrStage<1, [A9_MUX0], 0>,
1006                                InstrStage<1, [A9_DRegsN],   0, Required>,
1007                                InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
1008                                InstrStage<3, [A9_NPipe], 0>,
1009                                InstrStage<3, [A9_LSUnit]>],
1010                               [2, 1, 1, 1, 1, 1, 2, 2]>,
1011   //
1012   // VST1ln
1013   InstrItinData<IIC_VST1ln,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1014                                InstrStage<1, [A9_MUX0], 0>,
1015                                InstrStage<1, [A9_DRegsN],   0, Required>,
1016                                InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
1017                                InstrStage<2, [A9_NPipe], 1>,
1018                                InstrStage<2, [A9_LSUnit]>],
1019                               [1, 1, 1]>,
1020   //
1021   // VST1lnu
1022   InstrItinData<IIC_VST1lnu,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1023                                InstrStage<1, [A9_MUX0], 0>,
1024                                InstrStage<1, [A9_DRegsN],   0, Required>,
1025                                InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
1026                                InstrStage<3, [A9_NPipe], 1>,
1027                                InstrStage<3, [A9_LSUnit]>],
1028                               [2, 1, 1, 1, 1]>,
1029   //
1030   // VST2
1031   InstrItinData<IIC_VST2,     [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1032                                InstrStage<1, [A9_MUX0], 0>,
1033                                InstrStage<1, [A9_DRegsN],   0, Required>,
1034                                InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
1035                                InstrStage<2, [A9_NPipe], 0>,
1036                                InstrStage<2, [A9_LSUnit]>],
1037                               [1, 1, 1, 1]>,
1038   //
1039   // VST2x2
1040   InstrItinData<IIC_VST2x2,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1041                                InstrStage<1, [A9_MUX0], 0>,
1042                                InstrStage<1, [A9_DRegsN],   0, Required>,
1043                                InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
1044                                InstrStage<3, [A9_NPipe], 0>,
1045                                InstrStage<3, [A9_LSUnit]>],
1046                               [1, 1, 1, 1, 2, 2]>,
1047   //
1048   // VST2u
1049   InstrItinData<IIC_VST2u,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1050                                InstrStage<1, [A9_MUX0], 0>,
1051                                InstrStage<1, [A9_DRegsN],   0, Required>,
1052                                InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
1053                                InstrStage<2, [A9_NPipe], 0>,
1054                                InstrStage<2, [A9_LSUnit]>],
1055                               [2, 1, 1, 1, 1, 1]>,
1056   //
1057   // VST2x2u
1058   InstrItinData<IIC_VST2x2u,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1059                                InstrStage<1, [A9_MUX0], 0>,
1060                                InstrStage<1, [A9_DRegsN],   0, Required>,
1061                                InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
1062                                InstrStage<3, [A9_NPipe], 0>,
1063                                InstrStage<3, [A9_LSUnit]>],
1064                               [2, 1, 1, 1, 1, 1, 2, 2]>,
1065   //
1066   // VST2ln
1067   InstrItinData<IIC_VST2ln,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1068                                InstrStage<1, [A9_MUX0], 0>,
1069                                InstrStage<1, [A9_DRegsN],   0, Required>,
1070                                InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
1071                                InstrStage<2, [A9_NPipe], 0>,
1072                                InstrStage<2, [A9_LSUnit]>],
1073                               [1, 1, 1, 1]>,
1074   //
1075   // VST2lnu
1076   InstrItinData<IIC_VST2lnu,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1077                                InstrStage<1, [A9_MUX0], 0>,
1078                                InstrStage<1, [A9_DRegsN],   0, Required>,
1079                                InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
1080                                InstrStage<3, [A9_NPipe], 0>,
1081                                InstrStage<3, [A9_LSUnit]>],
1082                               [2, 1, 1, 1, 1, 1]>,
1083   //
1084   // VST3
1085   InstrItinData<IIC_VST3,     [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1086                                InstrStage<1, [A9_MUX0], 0>,
1087                                InstrStage<1, [A9_DRegsN],   0, Required>,
1088                                InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
1089                                InstrStage<3, [A9_NPipe], 0>,
1090                                InstrStage<3, [A9_LSUnit]>],
1091                               [1, 1, 1, 1, 2]>,
1092   //
1093   // VST3u
1094   InstrItinData<IIC_VST3u,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1095                                InstrStage<1, [A9_MUX0], 0>,
1096                                InstrStage<1, [A9_DRegsN],   0, Required>,
1097                                InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
1098                                InstrStage<3, [A9_NPipe], 0>,
1099                                InstrStage<3, [A9_LSUnit]>],
1100                               [2, 1, 1, 1, 1, 1, 2]>,
1101   //
1102   // VST3ln
1103   InstrItinData<IIC_VST3ln,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1104                                InstrStage<1, [A9_MUX0], 0>,
1105                                InstrStage<1, [A9_DRegsN],   0, Required>,
1106                                InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
1107                                InstrStage<3, [A9_NPipe], 0>,
1108                                InstrStage<3, [A9_LSUnit]>],
1109                               [1, 1, 1, 1, 2]>,
1110   //
1111   // VST3lnu
1112   InstrItinData<IIC_VST3lnu,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1113                                InstrStage<1, [A9_MUX0], 0>,
1114                                InstrStage<1, [A9_DRegsN],   0, Required>,
1115                                InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
1116                                InstrStage<3, [A9_NPipe], 0>,
1117                                InstrStage<3, [A9_LSUnit]>],
1118                               [2, 1, 1, 1, 1, 1, 2]>,
1119   //
1120   // VST4
1121   InstrItinData<IIC_VST4,     [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1122                                InstrStage<1, [A9_MUX0], 0>,
1123                                InstrStage<1, [A9_DRegsN],   0, Required>,
1124                                InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
1125                                InstrStage<3, [A9_NPipe], 0>,
1126                                InstrStage<3, [A9_LSUnit]>],
1127                               [1, 1, 1, 1, 2, 2]>,
1128   //
1129   // VST4u
1130   InstrItinData<IIC_VST4u,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1131                                InstrStage<1, [A9_MUX0], 0>,
1132                                InstrStage<1, [A9_DRegsN],   0, Required>,
1133                                InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
1134                                InstrStage<3, [A9_NPipe], 0>,
1135                                InstrStage<3, [A9_LSUnit]>],
1136                               [2, 1, 1, 1, 1, 1, 2, 2]>,
1137   //
1138   // VST4ln
1139   InstrItinData<IIC_VST4ln,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1140                                InstrStage<1, [A9_MUX0], 0>,
1141                                InstrStage<1, [A9_DRegsN],   0, Required>,
1142                                InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
1143                                InstrStage<3, [A9_NPipe], 0>,
1144                                InstrStage<3, [A9_LSUnit]>],
1145                               [1, 1, 1, 1, 2, 2]>,
1146   //
1147   // VST4lnu
1148   InstrItinData<IIC_VST4lnu,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1149                                InstrStage<1, [A9_MUX0], 0>,
1150                                InstrStage<1, [A9_DRegsN],   0, Required>,
1151                                InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
1152                                InstrStage<3, [A9_NPipe], 0>,
1153                                InstrStage<3, [A9_LSUnit]>],
1154                               [2, 1, 1, 1, 1, 1, 2, 2]>,
1155
1156   //
1157   // Double-register Integer Unary
1158   InstrItinData<IIC_VUNAiD,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1159                                InstrStage<1, [A9_MUX0], 0>,
1160                                InstrStage<1, [A9_DRegsN],   0, Required>,
1161                                // Extra latency cycles since wbck is 6 cycles
1162                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1163                                InstrStage<1, [A9_NPipe]>],
1164                               [4, 2]>,
1165   //
1166   // Quad-register Integer Unary
1167   InstrItinData<IIC_VUNAiQ,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1168                                InstrStage<1, [A9_MUX0], 0>,
1169                                InstrStage<1, [A9_DRegsN],   0, Required>,
1170                                // Extra latency cycles since wbck is 6 cycles
1171                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1172                                InstrStage<1, [A9_NPipe]>],
1173                               [4, 2]>,
1174   //
1175   // Double-register Integer Q-Unary
1176   InstrItinData<IIC_VQUNAiD,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1177                                InstrStage<1, [A9_MUX0], 0>,
1178                                InstrStage<1, [A9_DRegsN],   0, Required>,
1179                                // Extra latency cycles since wbck is 6 cycles
1180                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1181                                InstrStage<1, [A9_NPipe]>],
1182                               [4, 1]>,
1183   //
1184   // Quad-register Integer CountQ-Unary
1185   InstrItinData<IIC_VQUNAiQ,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1186                                InstrStage<1, [A9_MUX0], 0>,
1187                                InstrStage<1, [A9_DRegsN],   0, Required>,
1188                                // Extra latency cycles since wbck is 6 cycles
1189                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1190                                InstrStage<1, [A9_NPipe]>],
1191                               [4, 1]>,
1192   //
1193   // Double-register Integer Binary
1194   InstrItinData<IIC_VBINiD,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1195                                InstrStage<1, [A9_MUX0], 0>,
1196                                InstrStage<1, [A9_DRegsN],   0, Required>,
1197                                // Extra latency cycles since wbck is 6 cycles
1198                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1199                                InstrStage<1, [A9_NPipe]>],
1200                               [3, 2, 2]>,
1201   //
1202   // Quad-register Integer Binary
1203   InstrItinData<IIC_VBINiQ,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1204                                InstrStage<1, [A9_MUX0], 0>,
1205                                InstrStage<1, [A9_DRegsN],   0, Required>,
1206                                // Extra latency cycles since wbck is 6 cycles
1207                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1208                                InstrStage<1, [A9_NPipe]>],
1209                               [3, 2, 2]>,
1210   //
1211   // Double-register Integer Subtract
1212   InstrItinData<IIC_VSUBiD,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1213                                InstrStage<1, [A9_MUX0], 0>,
1214                                InstrStage<1, [A9_DRegsN],   0, Required>,
1215                                // Extra latency cycles since wbck is 6 cycles
1216                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1217                                InstrStage<1, [A9_NPipe]>],
1218                               [3, 2, 1]>,
1219   //
1220   // Quad-register Integer Subtract
1221   InstrItinData<IIC_VSUBiQ,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1222                                InstrStage<1, [A9_MUX0], 0>,
1223                                InstrStage<1, [A9_DRegsN],   0, Required>,
1224                                // Extra latency cycles since wbck is 6 cycles
1225                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1226                                InstrStage<1, [A9_NPipe]>],
1227                               [3, 2, 1]>,
1228   //
1229   // Double-register Integer Shift
1230   InstrItinData<IIC_VSHLiD,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1231                                InstrStage<1, [A9_MUX0], 0>,
1232                                InstrStage<1, [A9_DRegsN],   0, Required>,
1233                                // Extra latency cycles since wbck is 6 cycles
1234                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1235                                InstrStage<1, [A9_NPipe]>],
1236                               [3, 1, 1]>,
1237   //
1238   // Quad-register Integer Shift
1239   InstrItinData<IIC_VSHLiQ,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1240                                InstrStage<1, [A9_MUX0], 0>,
1241                                InstrStage<1, [A9_DRegsN],   0, Required>,
1242                                // Extra latency cycles since wbck is 6 cycles
1243                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1244                                InstrStage<1, [A9_NPipe]>],
1245                               [3, 1, 1]>,
1246   //
1247   // Double-register Integer Shift (4 cycle)
1248   InstrItinData<IIC_VSHLi4D,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1249                                InstrStage<1, [A9_MUX0], 0>,
1250                                InstrStage<1, [A9_DRegsN],   0, Required>,
1251                                // Extra latency cycles since wbck is 6 cycles
1252                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1253                                InstrStage<1, [A9_NPipe]>],
1254                               [4, 1, 1]>,
1255   //
1256   // Quad-register Integer Shift (4 cycle)
1257   InstrItinData<IIC_VSHLi4Q,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1258                                InstrStage<1, [A9_MUX0], 0>,
1259                                InstrStage<1, [A9_DRegsN],   0, Required>,
1260                                // Extra latency cycles since wbck is 6 cycles
1261                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1262                                InstrStage<1, [A9_NPipe]>],
1263                               [4, 1, 1]>,
1264   //
1265   // Double-register Integer Binary (4 cycle)
1266   InstrItinData<IIC_VBINi4D,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1267                                InstrStage<1, [A9_MUX0], 0>,
1268                                InstrStage<1, [A9_DRegsN],   0, Required>,
1269                                // Extra latency cycles since wbck is 6 cycles
1270                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1271                                InstrStage<1, [A9_NPipe]>],
1272                               [4, 2, 2]>,
1273   //
1274   // Quad-register Integer Binary (4 cycle)
1275   InstrItinData<IIC_VBINi4Q,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1276                                InstrStage<1, [A9_MUX0], 0>,
1277                                InstrStage<1, [A9_DRegsN],   0, Required>,
1278                                // Extra latency cycles since wbck is 6 cycles
1279                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1280                                InstrStage<1, [A9_NPipe]>],
1281                               [4, 2, 2]>,
1282   //
1283   // Double-register Integer Subtract (4 cycle)
1284   InstrItinData<IIC_VSUBi4D,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1285                                InstrStage<1, [A9_MUX0], 0>,
1286                                InstrStage<1, [A9_DRegsN],   0, Required>,
1287                                // Extra latency cycles since wbck is 6 cycles
1288                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1289                                InstrStage<1, [A9_NPipe]>],
1290                               [4, 2, 1]>,
1291   //
1292   // Quad-register Integer Subtract (4 cycle)
1293   InstrItinData<IIC_VSUBi4Q,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1294                                InstrStage<1, [A9_MUX0], 0>,
1295                                InstrStage<1, [A9_DRegsN],   0, Required>,
1296                                // Extra latency cycles since wbck is 6 cycles
1297                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1298                                InstrStage<1, [A9_NPipe]>],
1299                               [4, 2, 1]>,
1300
1301   //
1302   // Double-register Integer Count
1303   InstrItinData<IIC_VCNTiD,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1304                                InstrStage<1, [A9_MUX0], 0>,
1305                                InstrStage<1, [A9_DRegsN],   0, Required>,
1306                                // Extra latency cycles since wbck is 6 cycles
1307                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1308                                InstrStage<1, [A9_NPipe]>],
1309                               [3, 2, 2]>,
1310   //
1311   // Quad-register Integer Count
1312   // Result written in N3, but that is relative to the last cycle of multicycle,
1313   // so we use 4 for those cases
1314   InstrItinData<IIC_VCNTiQ,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1315                                InstrStage<1, [A9_MUX0], 0>,
1316                                InstrStage<1, [A9_DRegsN],   0, Required>,
1317                                // Extra latency cycles since wbck is 7 cycles
1318                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1319                                InstrStage<2, [A9_NPipe]>],
1320                               [4, 2, 2]>,
1321   //
1322   // Double-register Absolute Difference and Accumulate
1323   InstrItinData<IIC_VABAD,    [InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1324                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1325                                InstrStage<1, [A9_MUX0], 0>,
1326                                InstrStage<1, [A9_DRegsN],   0, Required>,
1327                                // Extra latency cycles since wbck is 6 cycles
1328                                InstrStage<1, [A9_NPipe]>],
1329                               [6, 3, 2, 1]>,
1330   //
1331   // Quad-register Absolute Difference and Accumulate
1332   InstrItinData<IIC_VABAQ,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1333                                InstrStage<1, [A9_MUX0], 0>,
1334                                InstrStage<1, [A9_DRegsN],   0, Required>,
1335                                // Extra latency cycles since wbck is 6 cycles
1336                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1337                                InstrStage<2, [A9_NPipe]>],
1338                               [6, 3, 2, 1]>,
1339   //
1340   // Double-register Integer Pair Add Long
1341   InstrItinData<IIC_VPALiD,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1342                                InstrStage<1, [A9_MUX0], 0>,
1343                                InstrStage<1, [A9_DRegsN],   0, Required>,
1344                                // Extra latency cycles since wbck is 6 cycles
1345                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1346                                InstrStage<1, [A9_NPipe]>],
1347                               [6, 3, 1]>,
1348   //
1349   // Quad-register Integer Pair Add Long
1350   InstrItinData<IIC_VPALiQ,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1351                                InstrStage<1, [A9_MUX0], 0>,
1352                                InstrStage<1, [A9_DRegsN],   0, Required>,
1353                                // Extra latency cycles since wbck is 6 cycles
1354                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1355                                InstrStage<2, [A9_NPipe]>],
1356                               [6, 3, 1]>,
1357
1358   //
1359   // Double-register Integer Multiply (.8, .16)
1360   InstrItinData<IIC_VMULi16D, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1361                                InstrStage<1, [A9_MUX0], 0>,
1362                                InstrStage<1, [A9_DRegsN],   0, Required>,
1363                                // Extra latency cycles since wbck is 6 cycles
1364                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1365                                InstrStage<1, [A9_NPipe]>],
1366                               [6, 2, 2]>,
1367   //
1368   // Quad-register Integer Multiply (.8, .16)
1369   InstrItinData<IIC_VMULi16Q, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1370                                InstrStage<1, [A9_MUX0], 0>,
1371                                InstrStage<1, [A9_DRegsN],   0, Required>,
1372                                // Extra latency cycles since wbck is 7 cycles
1373                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1374                                InstrStage<2, [A9_NPipe]>],
1375                               [7, 2, 2]>,
1376
1377   //
1378   // Double-register Integer Multiply (.32)
1379   InstrItinData<IIC_VMULi32D, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1380                                InstrStage<1, [A9_MUX0], 0>,
1381                                InstrStage<1, [A9_DRegsN],   0, Required>,
1382                                // Extra latency cycles since wbck is 7 cycles
1383                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1384                                InstrStage<2, [A9_NPipe]>],
1385                               [7, 2, 1]>,
1386   //
1387   // Quad-register Integer Multiply (.32)
1388   InstrItinData<IIC_VMULi32Q, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1389                                InstrStage<1, [A9_MUX0], 0>,
1390                                InstrStage<1, [A9_DRegsN],   0, Required>,
1391                                // Extra latency cycles since wbck is 9 cycles
1392                                InstrStage<10, [A9_DRegsVFP], 0, Reserved>,
1393                                InstrStage<4, [A9_NPipe]>],
1394                               [9, 2, 1]>,
1395   //
1396   // Double-register Integer Multiply-Accumulate (.8, .16)
1397   InstrItinData<IIC_VMACi16D, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1398                                InstrStage<1, [A9_MUX0], 0>,
1399                                InstrStage<1, [A9_DRegsN],   0, Required>,
1400                                // Extra latency cycles since wbck is 6 cycles
1401                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1402                                InstrStage<1, [A9_NPipe]>],
1403                               [6, 3, 2, 2]>,
1404   //
1405   // Double-register Integer Multiply-Accumulate (.32)
1406   InstrItinData<IIC_VMACi32D, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1407                                InstrStage<1, [A9_MUX0], 0>,
1408                                InstrStage<1, [A9_DRegsN],   0, Required>,
1409                                // Extra latency cycles since wbck is 7 cycles
1410                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1411                                InstrStage<2, [A9_NPipe]>],
1412                               [7, 3, 2, 1]>,
1413   //
1414   // Quad-register Integer Multiply-Accumulate (.8, .16)
1415   InstrItinData<IIC_VMACi16Q, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1416                                InstrStage<1, [A9_MUX0], 0>,
1417                                InstrStage<1, [A9_DRegsN],   0, Required>,
1418                                // Extra latency cycles since wbck is 7 cycles
1419                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1420                                InstrStage<2, [A9_NPipe]>],
1421                               [7, 3, 2, 2]>,
1422   //
1423   // Quad-register Integer Multiply-Accumulate (.32)
1424   InstrItinData<IIC_VMACi32Q, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1425                                InstrStage<1, [A9_MUX0], 0>,
1426                                InstrStage<1, [A9_DRegsN],   0, Required>,
1427                                // Extra latency cycles since wbck is 9 cycles
1428                                InstrStage<10, [A9_DRegsVFP], 0, Reserved>,
1429                                InstrStage<4, [A9_NPipe]>],
1430                               [9, 3, 2, 1]>,
1431
1432   //
1433   // Move
1434   InstrItinData<IIC_VMOV,     [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1435                                InstrStage<1, [A9_MUX0], 0>,
1436                                InstrStage<1, [A9_DRegsN],   0, Required>,
1437                                InstrStage<1, [A9_DRegsVFP], 0, Reserved>,
1438                                InstrStage<1, [A9_NPipe]>],
1439                               [1,1]>,
1440   //
1441   // Move Immediate
1442   InstrItinData<IIC_VMOVImm,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1443                                InstrStage<1, [A9_MUX0], 0>,
1444                                InstrStage<1, [A9_DRegsN],   0, Required>,
1445                                // Extra latency cycles since wbck is 6 cycles
1446                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1447                                InstrStage<1, [A9_NPipe]>],
1448                               [3]>,
1449   //
1450   // Double-register Permute Move
1451   InstrItinData<IIC_VMOVD,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1452                                InstrStage<1, [A9_MUX0], 0>,
1453                                InstrStage<1, [A9_DRegsN],   0, Required>,
1454                                // Extra latency cycles since wbck is 6 cycles
1455                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1456                                InstrStage<1, [A9_NPipe]>],
1457                               [2, 1]>,
1458   //
1459   // Quad-register Permute Move
1460   InstrItinData<IIC_VMOVQ,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1461                                InstrStage<1, [A9_MUX0], 0>,
1462                                InstrStage<1, [A9_DRegsN],   0, Required>,
1463                                // Extra latency cycles since wbck is 6 cycles
1464                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1465                                InstrStage<1, [A9_NPipe]>],
1466                               [2, 1]>,
1467   //
1468   // Integer to Single-precision Move
1469   InstrItinData<IIC_VMOVIS ,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1470                                InstrStage<1, [A9_MUX0], 0>,
1471                                InstrStage<1, [A9_DRegsN],   0, Required>,
1472                                InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
1473                                InstrStage<1, [A9_NPipe]>],
1474                               [1, 1]>,
1475   //
1476   // Integer to Double-precision Move
1477   InstrItinData<IIC_VMOVID ,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1478                                InstrStage<1, [A9_MUX0], 0>,
1479                                InstrStage<1, [A9_DRegsN],   0, Required>,
1480                                InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
1481                                InstrStage<1, [A9_NPipe]>],
1482                               [1, 1, 1]>,
1483   //
1484   // Single-precision to Integer Move
1485   InstrItinData<IIC_VMOVSI ,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1486                                InstrStage<1, [A9_MUX0], 0>,
1487                                InstrStage<1, [A9_DRegsN],   0, Required>,
1488                                InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
1489                                InstrStage<1, [A9_NPipe]>],
1490                               [2, 1]>,
1491   //
1492   // Double-precision to Integer Move
1493   InstrItinData<IIC_VMOVDI ,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1494                                InstrStage<1, [A9_MUX0], 0>,
1495                                InstrStage<1, [A9_DRegsN],   0, Required>,
1496                                InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
1497                                InstrStage<1, [A9_NPipe]>],
1498                               [2, 2, 1]>,
1499   //
1500   // Integer to Lane Move
1501   InstrItinData<IIC_VMOVISL , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1502                                InstrStage<1, [A9_MUX0], 0>,
1503                                InstrStage<1, [A9_DRegsN],   0, Required>,
1504                                InstrStage<4, [A9_DRegsVFP], 0, Reserved>,
1505                                InstrStage<2, [A9_NPipe]>],
1506                               [3, 1, 1]>,
1507
1508   //
1509   // Vector narrow move
1510   InstrItinData<IIC_VMOVN,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1511                                InstrStage<1, [A9_MUX0], 0>,
1512                                InstrStage<1, [A9_DRegsN],   0, Required>,
1513                                // Extra latency cycles since wbck is 6 cycles
1514                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1515                                InstrStage<1, [A9_NPipe]>],
1516                               [3, 1]>,
1517   //
1518   // Double-register FP Unary
1519   InstrItinData<IIC_VUNAD,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1520                                InstrStage<1, [A9_MUX0], 0>,
1521                                InstrStage<1, [A9_DRegsN],   0, Required>,
1522                                // Extra latency cycles since wbck is 6 cycles
1523                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1524                                InstrStage<1, [A9_NPipe]>],
1525                               [5, 2]>,
1526   //
1527   // Quad-register FP Unary
1528   // Result written in N5, but that is relative to the last cycle of multicycle,
1529   // so we use 6 for those cases
1530   InstrItinData<IIC_VUNAQ,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1531                                InstrStage<1, [A9_MUX0], 0>,
1532                                InstrStage<1, [A9_DRegsN],   0, Required>,
1533                                // Extra latency cycles since wbck is 7 cycles
1534                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1535                                InstrStage<2, [A9_NPipe]>],
1536                               [6, 2]>,
1537   //
1538   // Double-register FP Binary
1539   // FIXME: We're using this itin for many instructions and [2, 2] here is too
1540   // optimistic.
1541   InstrItinData<IIC_VBIND,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1542                                InstrStage<1, [A9_MUX0], 0>,
1543                                InstrStage<1, [A9_DRegsN],   0, Required>,
1544                                // Extra latency cycles since wbck is 6 cycles
1545                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1546                                InstrStage<1, [A9_NPipe]>],
1547                               [5, 2, 2]>,
1548
1549   //
1550   // VPADD, etc.
1551   InstrItinData<IIC_VPBIND,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1552                                InstrStage<1, [A9_MUX0], 0>,
1553                                InstrStage<1, [A9_DRegsN],   0, Required>,
1554                                // Extra latency cycles since wbck is 6 cycles
1555                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1556                                InstrStage<1, [A9_NPipe]>],
1557                               [5, 1, 1]>,
1558   //
1559   // Double-register FP VMUL
1560   InstrItinData<IIC_VFMULD,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1561                                InstrStage<1, [A9_MUX0], 0>,
1562                                InstrStage<1, [A9_DRegsN],   0, Required>,
1563                                // Extra latency cycles since wbck is 6 cycles
1564                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1565                                InstrStage<1, [A9_NPipe]>],
1566                               [5, 2, 1]>,
1567   //
1568   // Quad-register FP Binary
1569   // Result written in N5, but that is relative to the last cycle of multicycle,
1570   // so we use 6 for those cases
1571   // FIXME: We're using this itin for many instructions and [2, 2] here is too
1572   // optimistic.
1573   InstrItinData<IIC_VBINQ,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1574                                InstrStage<1, [A9_MUX0], 0>,
1575                                InstrStage<1, [A9_DRegsN],   0, Required>,
1576                                // Extra latency cycles since wbck is 7 cycles
1577                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1578                                InstrStage<2, [A9_NPipe]>],
1579                               [6, 2, 2]>,
1580   //
1581   // Quad-register FP VMUL
1582   InstrItinData<IIC_VFMULQ,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1583                                InstrStage<1, [A9_MUX0], 0>,
1584                                InstrStage<1, [A9_DRegsN],   0, Required>,
1585                                // Extra latency cycles since wbck is 7 cycles
1586                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1587                                InstrStage<1, [A9_NPipe]>],
1588                               [6, 2, 1]>,
1589   //
1590   // Double-register FP Multiple-Accumulate
1591   InstrItinData<IIC_VMACD,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1592                                InstrStage<1, [A9_MUX0], 0>,
1593                                InstrStage<1, [A9_DRegsN],   0, Required>,
1594                                // Extra latency cycles since wbck is 7 cycles
1595                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1596                                InstrStage<2, [A9_NPipe]>],
1597                               [6, 3, 2, 1]>,
1598   //
1599   // Quad-register FP Multiple-Accumulate
1600   // Result written in N9, but that is relative to the last cycle of multicycle,
1601   // so we use 10 for those cases
1602   InstrItinData<IIC_VMACQ,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1603                                InstrStage<1, [A9_MUX0], 0>,
1604                                InstrStage<1, [A9_DRegsN],   0, Required>,
1605                                // Extra latency cycles since wbck is 9 cycles
1606                                InstrStage<10, [A9_DRegsVFP], 0, Reserved>,
1607                                InstrStage<4, [A9_NPipe]>],
1608                               [8, 4, 2, 1]>,
1609   //
1610   // Double-register Reciprical Step
1611   InstrItinData<IIC_VRECSD,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1612                                InstrStage<1, [A9_MUX0], 0>,
1613                                InstrStage<1, [A9_DRegsN],   0, Required>,
1614                                // Extra latency cycles since wbck is 10 cycles
1615                                InstrStage<11, [A9_DRegsVFP], 0, Reserved>,
1616                                InstrStage<1, [A9_NPipe]>],
1617                               [9, 2, 2]>,
1618   //
1619   // Quad-register Reciprical Step
1620   InstrItinData<IIC_VRECSQ,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1621                                InstrStage<1, [A9_MUX0], 0>,
1622                                InstrStage<1, [A9_DRegsN],   0, Required>,
1623                                // Extra latency cycles since wbck is 11 cycles
1624                                InstrStage<12, [A9_DRegsVFP], 0, Reserved>,
1625                                InstrStage<2, [A9_NPipe]>],
1626                               [10, 2, 2]>,
1627   //
1628   // Double-register Permute
1629   InstrItinData<IIC_VPERMD,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1630                                InstrStage<1, [A9_MUX0], 0>,
1631                                InstrStage<1, [A9_DRegsN],   0, Required>,
1632                                // Extra latency cycles since wbck is 6 cycles
1633                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1634                                InstrStage<1, [A9_NPipe]>],
1635                               [2, 2, 1, 1]>,
1636   //
1637   // Quad-register Permute
1638   // Result written in N2, but that is relative to the last cycle of multicycle,
1639   // so we use 3 for those cases
1640   InstrItinData<IIC_VPERMQ,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1641                                InstrStage<1, [A9_MUX0], 0>,
1642                                InstrStage<1, [A9_DRegsN],   0, Required>,
1643                                // Extra latency cycles since wbck is 7 cycles
1644                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1645                                InstrStage<2, [A9_NPipe]>],
1646                               [3, 3, 1, 1]>,
1647   //
1648   // Quad-register Permute (3 cycle issue)
1649   // Result written in N2, but that is relative to the last cycle of multicycle,
1650   // so we use 4 for those cases
1651   InstrItinData<IIC_VPERMQ3,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1652                                InstrStage<1, [A9_MUX0], 0>,
1653                                InstrStage<1, [A9_DRegsN],   0, Required>,
1654                                // Extra latency cycles since wbck is 8 cycles
1655                                InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
1656                                InstrStage<3, [A9_NPipe]>],
1657                               [4, 4, 1, 1]>,
1658
1659   //
1660   // Double-register VEXT
1661   InstrItinData<IIC_VEXTD,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1662                                InstrStage<1, [A9_MUX0], 0>,
1663                                InstrStage<1, [A9_DRegsN],   0, Required>,
1664                                // Extra latency cycles since wbck is 6 cycles
1665                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1666                                InstrStage<1, [A9_NPipe]>],
1667                               [2, 1, 1]>,
1668   //
1669   // Quad-register VEXT
1670   InstrItinData<IIC_VEXTQ,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1671                                InstrStage<1, [A9_MUX0], 0>,
1672                                InstrStage<1, [A9_DRegsN],   0, Required>,
1673                                // Extra latency cycles since wbck is 7 cycles
1674                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1675                                InstrStage<2, [A9_NPipe]>],
1676                               [3, 1, 2]>,
1677   //
1678   // VTB
1679   InstrItinData<IIC_VTB1,     [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1680                                InstrStage<1, [A9_MUX0], 0>,
1681                                InstrStage<1, [A9_DRegsN],   0, Required>,
1682                                // Extra latency cycles since wbck is 7 cycles
1683                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1684                                InstrStage<2, [A9_NPipe]>],
1685                               [3, 2, 1]>,
1686   InstrItinData<IIC_VTB2,     [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1687                                InstrStage<1, [A9_MUX0], 0>,
1688                                InstrStage<2, [A9_DRegsN],   0, Required>,
1689                                // Extra latency cycles since wbck is 7 cycles
1690                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1691                                InstrStage<2, [A9_NPipe]>],
1692                               [3, 2, 2, 1]>,
1693   InstrItinData<IIC_VTB3,     [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1694                                InstrStage<1, [A9_MUX0], 0>,
1695                                InstrStage<2, [A9_DRegsN],   0, Required>,
1696                                // Extra latency cycles since wbck is 8 cycles
1697                                InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
1698                                InstrStage<3, [A9_NPipe]>],
1699                               [4, 2, 2, 3, 1]>,
1700   InstrItinData<IIC_VTB4,     [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1701                                InstrStage<1, [A9_MUX0], 0>,
1702                                InstrStage<1, [A9_DRegsN],   0, Required>,
1703                                // Extra latency cycles since wbck is 8 cycles
1704                                InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
1705                                InstrStage<3, [A9_NPipe]>],
1706                               [4, 2, 2, 3, 3, 1]>,
1707   //
1708   // VTBX
1709   InstrItinData<IIC_VTBX1,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1710                                InstrStage<1, [A9_MUX0], 0>,
1711                                InstrStage<1, [A9_DRegsN],   0, Required>,
1712                                // Extra latency cycles since wbck is 7 cycles
1713                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1714                                InstrStage<2, [A9_NPipe]>],
1715                               [3, 1, 2, 1]>,
1716   InstrItinData<IIC_VTBX2,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1717                                InstrStage<1, [A9_MUX0], 0>,
1718                                InstrStage<1, [A9_DRegsN],   0, Required>,
1719                                // Extra latency cycles since wbck is 7 cycles
1720                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1721                                InstrStage<2, [A9_NPipe]>],
1722                               [3, 1, 2, 2, 1]>,
1723   InstrItinData<IIC_VTBX3,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1724                                InstrStage<1, [A9_MUX0], 0>,
1725                                InstrStage<1, [A9_DRegsN],   0, Required>,
1726                                // Extra latency cycles since wbck is 8 cycles
1727                                InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
1728                                InstrStage<3, [A9_NPipe]>],
1729                               [4, 1, 2, 2, 3, 1]>,
1730   InstrItinData<IIC_VTBX4,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1731                                InstrStage<1, [A9_MUX0], 0>,
1732                                InstrStage<1, [A9_DRegsN],   0, Required>,
1733                                // Extra latency cycles since wbck is 8 cycles
1734                                InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
1735                                InstrStage<2, [A9_NPipe]>],
1736                               [4, 1, 2, 2, 3, 3, 1]>
1737 ]>;