Add VLD4 scheduling itineraries.
[oota-llvm.git] / lib / Target / ARM / ARMScheduleA9.td
1 //=- ARMScheduleA9.td - ARM Cortex-A9 Scheduling Definitions -*- tablegen -*-=//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file defines the itinerary class data for the ARM Cortex A9 processors.
11 //
12 //===----------------------------------------------------------------------===//
13
14 //
15 // Ad-hoc scheduling information derived from pretty vague "Cortex-A9 Technical
16 // Reference Manual".
17 //
18 // Functional units
19 def A9_Issue0  : FuncUnit; // Issue 0
20 def A9_Issue1  : FuncUnit; // Issue 1
21 def A9_Branch  : FuncUnit; // Branch
22 def A9_ALU0    : FuncUnit; // ALU / MUL pipeline 0
23 def A9_ALU1    : FuncUnit; // ALU pipeline 1
24 def A9_AGU     : FuncUnit; // Address generation unit for ld / st
25 def A9_NPipe   : FuncUnit; // NEON pipeline
26 def A9_MUX0    : FuncUnit; // AGU + NEON/FPU multiplexer
27 def A9_LS0     : FuncUnit; // L/S Units, 32-bit per unit. Fake FU to limit l/s.
28 def A9_LS1     : FuncUnit; // L/S Units, 32-bit per unit.
29 def A9_DRegsVFP: FuncUnit; // FP register set, VFP side
30 def A9_DRegsN  : FuncUnit; // FP register set, NEON side
31
32 // Bypasses
33 def A9_LdBypass : Bypass;
34
35 def CortexA9Itineraries : ProcessorItineraries<
36   [A9_Issue0, A9_Issue1, A9_Branch, A9_ALU0, A9_ALU1, A9_AGU, A9_NPipe, A9_MUX0,
37    A9_LS0, A9_LS1, A9_DRegsVFP, A9_DRegsN],
38   [A9_LdBypass], [
39   // Two fully-pipelined integer ALU pipelines
40
41   //
42   // Move instructions, unconditional
43   InstrItinData<IIC_iMOVi   , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
44                                InstrStage<1, [A9_ALU0, A9_ALU1]>], [1]>,
45   InstrItinData<IIC_iMOVr   , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
46                                InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>,
47   InstrItinData<IIC_iMOVsi  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
48                                InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>,
49   InstrItinData<IIC_iMOVsr  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
50                                InstrStage<2, [A9_ALU0, A9_ALU1]>], [2, 1, 1]>,
51   InstrItinData<IIC_iMOVix2 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
52                                InstrStage<1, [A9_ALU0, A9_ALU1]>,
53                                InstrStage<1, [A9_ALU0, A9_ALU1]>], [2]>,
54   //
55   // MVN instructions
56   InstrItinData<IIC_iMVNi   , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
57                                InstrStage<1, [A9_ALU0, A9_ALU1]>],
58                               [1]>,
59   InstrItinData<IIC_iMVNr   , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
60                                InstrStage<1, [A9_ALU0, A9_ALU1]>],
61                               [1, 1], [NoBypass, A9_LdBypass]>,
62   InstrItinData<IIC_iMVNsi  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
63                                InstrStage<2, [A9_ALU0, A9_ALU1]>],
64                               [2, 1]>,
65   InstrItinData<IIC_iMVNsr  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
66                                InstrStage<3, [A9_ALU0, A9_ALU1]>],
67                               [3, 1, 1]>,
68   //
69   // No operand cycles
70   InstrItinData<IIC_iALUx   , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
71                                InstrStage<1, [A9_ALU0, A9_ALU1]>]>,
72   //
73   // Binary Instructions that produce a result
74   InstrItinData<IIC_iALUi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
75                              InstrStage<1, [A9_ALU0, A9_ALU1]>],
76                             [1, 1], [NoBypass, A9_LdBypass]>,
77   InstrItinData<IIC_iALUr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
78                              InstrStage<1, [A9_ALU0, A9_ALU1]>],
79                             [1, 1, 1], [NoBypass, A9_LdBypass, A9_LdBypass]>,
80   InstrItinData<IIC_iALUsi, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
81                              InstrStage<2, [A9_ALU0, A9_ALU1]>],
82                             [2, 1, 1], [NoBypass, A9_LdBypass, NoBypass]>,
83   InstrItinData<IIC_iALUsir,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
84                              InstrStage<2, [A9_ALU0, A9_ALU1]>],
85                             [2, 1, 1], [NoBypass, NoBypass, A9_LdBypass]>,
86   InstrItinData<IIC_iALUsr, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
87                              InstrStage<3, [A9_ALU0, A9_ALU1]>],
88                             [3, 1, 1, 1],
89                             [NoBypass, A9_LdBypass, NoBypass, NoBypass]>,
90   //
91   // Bitwise Instructions that produce a result
92   InstrItinData<IIC_iBITi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
93                              InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>,
94   InstrItinData<IIC_iBITr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
95                              InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1, 1]>,
96   InstrItinData<IIC_iBITsi, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
97                              InstrStage<2, [A9_ALU0, A9_ALU1]>], [2, 1, 1]>,
98   InstrItinData<IIC_iBITsr, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
99                              InstrStage<3, [A9_ALU0, A9_ALU1]>], [3, 1, 1, 1]>,
100   //
101   // Unary Instructions that produce a result
102
103   // CLZ, RBIT, etc.
104   InstrItinData<IIC_iUNAr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
105                              InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>,
106
107   // BFC, BFI, UBFX, SBFX
108   InstrItinData<IIC_iUNAsi, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
109                              InstrStage<2, [A9_ALU0, A9_ALU1]>], [2, 1]>,
110
111   //
112   // Zero and sign extension instructions
113   InstrItinData<IIC_iEXTr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
114                              InstrStage<1, [A9_ALU0, A9_ALU1]>], [2, 1]>,
115   InstrItinData<IIC_iEXTAr, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
116                              InstrStage<2, [A9_ALU0, A9_ALU1]>], [3, 1, 1]>,
117   InstrItinData<IIC_iEXTAsr,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
118                              InstrStage<3, [A9_ALU0, A9_ALU1]>], [3, 1, 1, 1]>,
119   //
120   // Compare instructions
121   InstrItinData<IIC_iCMPi   , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
122                                InstrStage<1, [A9_ALU0, A9_ALU1]>],
123                                [1], [A9_LdBypass]>,
124   InstrItinData<IIC_iCMPr   , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
125                                InstrStage<1, [A9_ALU0, A9_ALU1]>],
126                                [1, 1], [A9_LdBypass, A9_LdBypass]>,
127   InstrItinData<IIC_iCMPsi  , [InstrStage<2, [A9_ALU0, A9_ALU1]>],
128                                 [1, 1], [A9_LdBypass, NoBypass]>,
129   InstrItinData<IIC_iCMPsr  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
130                                InstrStage<3, [A9_ALU0, A9_ALU1]>],
131                               [1, 1, 1], [A9_LdBypass, NoBypass, NoBypass]>,
132   //
133   // Test instructions
134   InstrItinData<IIC_iTSTi   , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
135                                InstrStage<1, [A9_ALU0, A9_ALU1]>], [1]>,
136   InstrItinData<IIC_iTSTr   , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
137                                InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>,
138   InstrItinData<IIC_iTSTsi  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
139                                InstrStage<2, [A9_ALU0, A9_ALU1]>], [1, 1]>,
140   InstrItinData<IIC_iTSTsr  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
141                                InstrStage<3, [A9_ALU0, A9_ALU1]>], [1, 1, 1]>,
142   //
143   // Move instructions, conditional
144   // FIXME: Correctly model the extra input dep on the destination.
145   InstrItinData<IIC_iCMOVi  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
146                                InstrStage<1, [A9_ALU0, A9_ALU1]>], [1]>,
147   InstrItinData<IIC_iCMOVr  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
148                                InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>,
149   InstrItinData<IIC_iCMOVsi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
150                                InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>,
151   InstrItinData<IIC_iCMOVsr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
152                                InstrStage<2, [A9_ALU0, A9_ALU1]>], [2, 1, 1]>,
153
154   // Integer multiply pipeline
155   //
156   InstrItinData<IIC_iMUL16  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
157                                InstrStage<2, [A9_ALU0]>], [3, 1, 1]>,
158   InstrItinData<IIC_iMAC16  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
159                                InstrStage<2, [A9_ALU0]>],
160                               [3, 1, 1, 1]>,
161   InstrItinData<IIC_iMUL32  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
162                                InstrStage<2, [A9_ALU0]>], [4, 1, 1]>,
163   InstrItinData<IIC_iMAC32  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
164                                InstrStage<2, [A9_ALU0]>],
165                               [4, 1, 1, 1]>,
166   InstrItinData<IIC_iMUL64  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
167                                InstrStage<3, [A9_ALU0]>], [4, 5, 1, 1]>,
168   InstrItinData<IIC_iMAC64  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
169                                InstrStage<3, [A9_ALU0]>],
170                               [4, 5, 1, 1]>,
171   // Integer load pipeline
172   // FIXME: The timings are some rough approximations
173   //
174   // Immediate offset
175   InstrItinData<IIC_iLoad_i   , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
176                                  InstrStage<1, [A9_MUX0], 0>,
177                                  InstrStage<1, [A9_AGU]>,
178                                  InstrStage<1, [A9_LS0, A9_LS1]>],
179                                 [3, 1], [A9_LdBypass]>,
180   InstrItinData<IIC_iLoad_bh_i, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
181                                  InstrStage<1, [A9_MUX0], 0>,
182                                  InstrStage<2, [A9_AGU]>,
183                                  InstrStage<1, [A9_LS0, A9_LS1]>],
184                                 [4, 1], [A9_LdBypass]>,
185   // FIXME: If address is 64-bit aligned, AGU cycles is 1.
186   InstrItinData<IIC_iLoad_d_i , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
187                                  InstrStage<1, [A9_MUX0], 0>,
188                                  InstrStage<2, [A9_AGU]>,
189                                  InstrStage<1, [A9_LS0, A9_LS1]>],
190                                 [3, 3, 1], [A9_LdBypass]>,
191   //
192   // Register offset
193   InstrItinData<IIC_iLoad_r   , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
194                                  InstrStage<1, [A9_MUX0], 0>,
195                                  InstrStage<1, [A9_AGU]>,
196                                  InstrStage<1, [A9_LS0, A9_LS1]>],
197                                 [3, 1, 1], [A9_LdBypass]>,
198   InstrItinData<IIC_iLoad_bh_r, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
199                                  InstrStage<1, [A9_MUX0], 0>,
200                                  InstrStage<2, [A9_AGU]>,
201                                  InstrStage<1, [A9_LS0, A9_LS1]>],
202                                 [4, 1, 1], [A9_LdBypass]>,
203   InstrItinData<IIC_iLoad_d_r , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
204                                  InstrStage<1, [A9_MUX0], 0>,
205                                  InstrStage<2, [A9_AGU]>,
206                                  InstrStage<1, [A9_LS0, A9_LS1]>],
207                                 [3, 3, 1, 1], [A9_LdBypass]>,
208   //
209   // Scaled register offset
210   InstrItinData<IIC_iLoad_si  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
211                                  InstrStage<1, [A9_MUX0], 0>,
212                                  InstrStage<1, [A9_AGU]>,
213                                  InstrStage<1, [A9_LS0, A9_LS1]>],
214                                 [4, 1, 1], [A9_LdBypass]>,
215   InstrItinData<IIC_iLoad_bh_si,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
216                                  InstrStage<1, [A9_MUX0], 0>,
217                                  InstrStage<2, [A9_AGU]>,
218                                  InstrStage<1, [A9_LS0, A9_LS1]>],
219                                 [5, 1, 1], [A9_LdBypass]>,
220   //
221   // Immediate offset with update
222   InstrItinData<IIC_iLoad_iu  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
223                                  InstrStage<1, [A9_MUX0], 0>,
224                                  InstrStage<1, [A9_AGU]>,
225                                  InstrStage<1, [A9_LS0, A9_LS1]>],
226                                 [3, 2, 1], [A9_LdBypass]>,
227   InstrItinData<IIC_iLoad_bh_iu,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
228                                  InstrStage<1, [A9_MUX0], 0>,
229                                  InstrStage<2, [A9_AGU]>,
230                                  InstrStage<1, [A9_LS0, A9_LS1]>],
231                                 [4, 3, 1], [A9_LdBypass]>,
232   //
233   // Register offset with update
234   InstrItinData<IIC_iLoad_ru  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
235                                  InstrStage<1, [A9_MUX0], 0>,
236                                  InstrStage<1, [A9_AGU]>,
237                                  InstrStage<1, [A9_LS0, A9_LS1]>],
238                                 [3, 2, 1, 1], [A9_LdBypass]>,
239   InstrItinData<IIC_iLoad_bh_ru,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
240                                  InstrStage<1, [A9_MUX0], 0>,
241                                  InstrStage<2, [A9_AGU]>,
242                                  InstrStage<1, [A9_LS0, A9_LS1]>],
243                                 [4, 3, 1, 1], [A9_LdBypass]>,
244   InstrItinData<IIC_iLoad_d_ru, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
245                                  InstrStage<1, [A9_MUX0], 0>,
246                                  InstrStage<2, [A9_AGU]>,
247                                  InstrStage<1, [A9_LS0, A9_LS1]>],
248                                 [3, 3, 1, 1], [A9_LdBypass]>,
249   //
250   // Scaled register offset with update
251   InstrItinData<IIC_iLoad_siu , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
252                                  InstrStage<1, [A9_MUX0], 0>,
253                                  InstrStage<1, [A9_AGU]>,
254                                  InstrStage<1, [A9_LS0, A9_LS1]>],
255                                 [4, 3, 1, 1], [A9_LdBypass]>,
256   InstrItinData<IIC_iLoad_bh_siu,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
257                                   InstrStage<1, [A9_MUX0], 0>,
258                                   InstrStage<2, [A9_AGU]>,
259                                   InstrStage<1, [A9_LS0, A9_LS1]>],
260                                  [5, 4, 1, 1], [A9_LdBypass]>,
261   //
262   // Load multiple, def is the 5th operand.
263   // FIXME: This assumes 3 to 4 registers.
264   InstrItinData<IIC_iLoad_m  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
265                                 InstrStage<1, [A9_MUX0], 0>,
266                                 InstrStage<2, [A9_AGU]>,
267                                 InstrStage<2, [A9_LS0, A9_LS1]>],
268                                [1, 1, 1, 1, 3],
269                          [NoBypass, NoBypass, NoBypass, NoBypass, A9_LdBypass]>,
270   //
271   // Load multiple + update, defs are the 1st and 5th operands.
272   InstrItinData<IIC_iLoad_mu , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
273                                 InstrStage<1, [A9_MUX0], 0>,
274                                 InstrStage<2, [A9_AGU]>,
275                                 InstrStage<2, [A9_LS0, A9_LS1]>],
276                                [2, 1, 1, 1, 3],
277                          [NoBypass, NoBypass, NoBypass, NoBypass, A9_LdBypass]>,
278   //
279   // Load multiple plus branch
280   InstrItinData<IIC_iLoad_mBr, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
281                                 InstrStage<1, [A9_MUX0], 0>,
282                                 InstrStage<1, [A9_AGU]>,
283                                 InstrStage<2, [A9_LS0, A9_LS1]>,
284                                 InstrStage<1, [A9_Branch]>],
285                                [1, 2, 1, 1, 3],
286                          [NoBypass, NoBypass, NoBypass, NoBypass, A9_LdBypass]>,
287   //
288   // Pop, def is the 3rd operand.
289   InstrItinData<IIC_iPop  ,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
290                                 InstrStage<1, [A9_MUX0], 0>,
291                                 InstrStage<2, [A9_AGU]>,
292                                 InstrStage<2, [A9_LS0, A9_LS1]>],
293                                [1, 1, 3],
294                                [NoBypass, NoBypass, A9_LdBypass]>,
295   //
296   // Pop + branch, def is the 3rd operand.
297   InstrItinData<IIC_iPop_Br,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
298                                 InstrStage<1, [A9_MUX0], 0>,
299                                 InstrStage<2, [A9_AGU]>,
300                                 InstrStage<2, [A9_LS0, A9_LS1]>,
301                                 InstrStage<1, [A9_Branch]>],
302                                [1, 1, 3],
303                                [NoBypass, NoBypass, A9_LdBypass]>,
304
305   //
306   // iLoadi + iALUr for t2LDRpci_pic.
307   InstrItinData<IIC_iLoadiALU, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
308                                 InstrStage<1, [A9_MUX0], 0>,
309                                 InstrStage<1, [A9_AGU]>,
310                                 InstrStage<1, [A9_LS0, A9_LS1]>,
311                                 InstrStage<1, [A9_ALU0, A9_ALU1]>],
312                                [2, 1]>,
313
314   // Integer store pipeline
315   ///
316   // Immediate offset
317   InstrItinData<IIC_iStore_i  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
318                                  InstrStage<1, [A9_MUX0], 0>,
319                                  InstrStage<1, [A9_AGU]>,
320                                  InstrStage<1, [A9_LS0, A9_LS1]>], [1, 1]>,
321   InstrItinData<IIC_iStore_bh_i,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
322                                  InstrStage<1, [A9_MUX0], 0>,
323                                  InstrStage<2, [A9_AGU]>,
324                                  InstrStage<1, [A9_LS0, A9_LS1]>], [1, 1]>,
325   // FIXME: If address is 64-bit aligned, AGU cycles is 1.
326   InstrItinData<IIC_iStore_d_i, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
327                                  InstrStage<1, [A9_MUX0], 0>,
328                                  InstrStage<2, [A9_AGU]>,
329                                  InstrStage<1, [A9_LS0, A9_LS1]>], [1, 1]>,
330   //
331   // Register offset
332   InstrItinData<IIC_iStore_r  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
333                                  InstrStage<1, [A9_MUX0], 0>,
334                                  InstrStage<1, [A9_AGU]>,
335                                  InstrStage<1, [A9_LS0, A9_LS1]>], [1, 1, 1]>,
336   InstrItinData<IIC_iStore_bh_r,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
337                                  InstrStage<1, [A9_MUX0], 0>,
338                                  InstrStage<2, [A9_AGU]>,
339                                  InstrStage<1, [A9_LS0, A9_LS1]>], [1, 1, 1]>,
340   InstrItinData<IIC_iStore_d_r, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
341                                  InstrStage<1, [A9_MUX0], 0>,
342                                  InstrStage<2, [A9_AGU]>,
343                                  InstrStage<1, [A9_LS0, A9_LS1]>], [1, 1, 1]>,
344   //
345   // Scaled register offset
346   InstrItinData<IIC_iStore_si ,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
347                                   InstrStage<1, [A9_MUX0], 0>,
348                                   InstrStage<1, [A9_AGU]>,
349                                   InstrStage<1, [A9_LS0, A9_LS1]>], [1, 1, 1]>,
350   InstrItinData<IIC_iStore_bh_si,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
351                                   InstrStage<1, [A9_MUX0], 0>,
352                                   InstrStage<2, [A9_AGU]>,
353                                   InstrStage<1, [A9_LS0, A9_LS1]>], [1, 1, 1]>,
354   //
355   // Immediate offset with update
356   InstrItinData<IIC_iStore_iu ,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
357                                   InstrStage<1, [A9_MUX0], 0>,
358                                   InstrStage<1, [A9_AGU]>,
359                                   InstrStage<1, [A9_LS0, A9_LS1]>], [2, 1, 1]>,
360   InstrItinData<IIC_iStore_bh_iu,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
361                                   InstrStage<1, [A9_MUX0], 0>,
362                                   InstrStage<2, [A9_AGU]>,
363                                   InstrStage<1, [A9_LS0, A9_LS1]>], [3, 1, 1]>,
364   //
365   // Register offset with update
366   InstrItinData<IIC_iStore_ru ,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
367                                   InstrStage<1, [A9_MUX0], 0>,
368                                   InstrStage<1, [A9_AGU]>,
369                                   InstrStage<1, [A9_LS0, A9_LS1]>],
370                                  [2, 1, 1, 1]>,
371   InstrItinData<IIC_iStore_bh_ru,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
372                                   InstrStage<1, [A9_MUX0], 0>,
373                                   InstrStage<2, [A9_AGU]>,
374                                   InstrStage<1, [A9_LS0, A9_LS1]>],
375                                  [3, 1, 1, 1]>,
376   InstrItinData<IIC_iStore_d_ru, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
377                                   InstrStage<1, [A9_MUX0], 0>,
378                                   InstrStage<2, [A9_AGU]>,
379                                   InstrStage<1, [A9_LS0, A9_LS1]>],
380                                  [3, 1, 1, 1]>,
381   //
382   // Scaled register offset with update
383   InstrItinData<IIC_iStore_siu,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
384                                     InstrStage<1, [A9_MUX0], 0>,
385                                     InstrStage<1, [A9_AGU]>,
386                                     InstrStage<1, [A9_LS0, A9_LS1]>],
387                                    [2, 1, 1, 1]>,
388   InstrItinData<IIC_iStore_bh_siu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
389                                     InstrStage<1, [A9_MUX0], 0>,
390                                     InstrStage<2, [A9_AGU]>,
391                                     InstrStage<1, [A9_LS0, A9_LS1]>],
392                                    [3, 1, 1, 1]>,
393   //
394   // Store multiple
395   InstrItinData<IIC_iStore_m , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
396                                 InstrStage<1, [A9_MUX0], 0>,
397                                 InstrStage<1, [A9_AGU]>,
398                                 InstrStage<2, [A9_LS0, A9_LS1]>]>,
399   //
400   // Store multiple + update
401   InstrItinData<IIC_iStore_mu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
402                                 InstrStage<1, [A9_MUX0], 0>,
403                                 InstrStage<1, [A9_AGU]>,
404                                 InstrStage<2, [A9_LS0, A9_LS1]>], [2]>,
405
406   // Branch
407   //
408   // no delay slots, so the latency of a branch is unimportant
409   InstrItinData<IIC_Br       , [InstrStage<1, [A9_Issue0], 0>,
410                                 InstrStage<1, [A9_Issue1], 0>,
411                                 InstrStage<1, [A9_Branch]>]>,
412
413   // VFP and NEON shares the same register file. This means that every VFP
414   // instruction should wait for full completion of the consecutive NEON
415   // instruction and vice-versa. We model this behavior with two artificial FUs:
416   // DRegsVFP and DRegsVFP.
417   //
418   // Every VFP instruction:
419   //  - Acquires DRegsVFP resource for 1 cycle
420   //  - Reserves DRegsN resource for the whole duration (including time to
421   //    register file writeback!).
422   // Every NEON instruction does the same but with FUs swapped.
423   //
424   // Since the reserved FU cannot be acquired, this models precisely
425   // "cross-domain" stalls.
426
427   // VFP
428   // Issue through integer pipeline, and execute in NEON unit.
429
430   // FP Special Register to Integer Register File Move
431   InstrItinData<IIC_fpSTAT , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
432                               InstrStage<2, [A9_DRegsN],   0, Reserved>,
433                               InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
434                               InstrStage<1, [A9_MUX0], 0>,
435                               InstrStage<1, [A9_NPipe]>]>,
436   //
437   // Single-precision FP Unary
438   InstrItinData<IIC_fpUNA32 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
439                                // Extra latency cycles since wbck is 2 cycles
440                                InstrStage<3, [A9_DRegsN],   0, Reserved>,
441                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
442                                InstrStage<1, [A9_MUX0], 0>,
443                                InstrStage<1, [A9_NPipe]>],
444                               [1, 1]>,
445   //
446   // Double-precision FP Unary
447   InstrItinData<IIC_fpUNA64 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
448                                // Extra latency cycles since wbck is 2 cycles
449                                InstrStage<3, [A9_DRegsN],   0, Reserved>,
450                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
451                                InstrStage<1, [A9_MUX0], 0>,
452                                InstrStage<1, [A9_NPipe]>],
453                               [1, 1]>,
454
455   //
456   // Single-precision FP Compare
457   InstrItinData<IIC_fpCMP32 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
458                                // Extra latency cycles since wbck is 4 cycles
459                                InstrStage<5, [A9_DRegsN],   0, Reserved>,
460                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
461                                InstrStage<1, [A9_MUX0], 0>,
462                                InstrStage<1, [A9_NPipe]>],
463                               [1, 1]>,
464   //
465   // Double-precision FP Compare
466   InstrItinData<IIC_fpCMP64 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
467                                // Extra latency cycles since wbck is 4 cycles
468                                InstrStage<5, [A9_DRegsN],   0, Reserved>,
469                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
470                                InstrStage<1, [A9_MUX0], 0>,
471                                InstrStage<1, [A9_NPipe]>],
472                               [1, 1]>,
473   //
474   // Single to Double FP Convert
475   InstrItinData<IIC_fpCVTSD , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
476                                InstrStage<5, [A9_DRegsN],   0, Reserved>,
477                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
478                                InstrStage<1, [A9_MUX0], 0>,
479                                InstrStage<1, [A9_NPipe]>],
480                               [4, 1]>,
481   //
482   // Double to Single FP Convert
483   InstrItinData<IIC_fpCVTDS , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
484                                InstrStage<5, [A9_DRegsN],   0, Reserved>,
485                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
486                                InstrStage<1, [A9_MUX0], 0>,
487                                InstrStage<1, [A9_NPipe]>],
488                               [4, 1]>,
489
490   //
491   // Single to Half FP Convert
492   InstrItinData<IIC_fpCVTSH , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
493                                InstrStage<5, [A9_DRegsN],   0, Reserved>,
494                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
495                                InstrStage<1, [A9_MUX0], 0>,
496                                InstrStage<1, [A9_NPipe]>],
497                               [4, 1]>,
498   //
499   // Half to Single FP Convert
500   InstrItinData<IIC_fpCVTHS , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
501                                InstrStage<3, [A9_DRegsN],   0, Reserved>,
502                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
503                                InstrStage<1, [A9_MUX0], 0>,
504                                InstrStage<1, [A9_NPipe]>],
505                               [2, 1]>,
506
507   //
508   // Single-Precision FP to Integer Convert
509   InstrItinData<IIC_fpCVTSI , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
510                                InstrStage<5, [A9_DRegsN],   0, Reserved>,
511                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
512                                InstrStage<1, [A9_MUX0], 0>,
513                                InstrStage<1, [A9_NPipe]>],
514                               [4, 1]>,
515   //
516   // Double-Precision FP to Integer Convert
517   InstrItinData<IIC_fpCVTDI , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
518                                InstrStage<5, [A9_DRegsN],   0, Reserved>,
519                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
520                                InstrStage<1, [A9_MUX0], 0>,
521                                InstrStage<1, [A9_NPipe]>],
522                               [4, 1]>,
523   //
524   // Integer to Single-Precision FP Convert
525   InstrItinData<IIC_fpCVTIS , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
526                                InstrStage<5, [A9_DRegsN],   0, Reserved>,
527                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
528                                InstrStage<1, [A9_MUX0], 0>,
529                                InstrStage<1, [A9_NPipe]>],
530                               [4, 1]>,
531   //
532   // Integer to Double-Precision FP Convert
533   InstrItinData<IIC_fpCVTID , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
534                                InstrStage<5, [A9_DRegsN],   0, Reserved>,
535                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
536                                InstrStage<1, [A9_MUX0], 0>,
537                                InstrStage<1, [A9_NPipe]>],
538                               [4, 1]>,
539   //
540   // Single-precision FP ALU
541   InstrItinData<IIC_fpALU32 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
542                                InstrStage<5, [A9_DRegsN],   0, Reserved>,
543                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
544                                InstrStage<1, [A9_MUX0], 0>,
545                                InstrStage<1, [A9_NPipe]>],
546                               [4, 1, 1]>,
547   //
548   // Double-precision FP ALU
549   InstrItinData<IIC_fpALU64 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
550                                InstrStage<5, [A9_DRegsN],   0, Reserved>,
551                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
552                                InstrStage<1, [A9_MUX0], 0>,
553                                InstrStage<1, [A9_NPipe]>],
554                               [4, 1, 1]>,
555   //
556   // Single-precision FP Multiply
557   InstrItinData<IIC_fpMUL32 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
558                                InstrStage<6, [A9_DRegsN],   0, Reserved>,
559                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
560                                InstrStage<1, [A9_MUX0], 0>,
561                                InstrStage<1, [A9_NPipe]>],
562                               [5, 1, 1]>,
563   //
564   // Double-precision FP Multiply
565   InstrItinData<IIC_fpMUL64 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
566                                InstrStage<7, [A9_DRegsN],   0, Reserved>,
567                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
568                                InstrStage<1, [A9_MUX0], 0>,
569                                InstrStage<2, [A9_NPipe]>],
570                               [6, 1, 1]>,
571   //
572   // Single-precision FP MAC
573   InstrItinData<IIC_fpMAC32 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
574                                InstrStage<9, [A9_DRegsN],   0, Reserved>,
575                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
576                                InstrStage<1, [A9_MUX0], 0>,
577                                InstrStage<1, [A9_NPipe]>],
578                               [8, 0, 1, 1]>,
579   //
580   // Double-precision FP MAC
581   InstrItinData<IIC_fpMAC64 , [InstrStage<1,  [A9_DRegsVFP], 0, Required>,
582                                InstrStage<10, [A9_DRegsN],  0, Reserved>,
583                                InstrStage<1,  [A9_Issue0, A9_Issue1], 0>,
584                                InstrStage<1,  [A9_MUX0], 0>,
585                                InstrStage<2,  [A9_NPipe]>],
586                               [9, 0, 1, 1]>,
587   //
588   // Single-precision FP DIV
589   InstrItinData<IIC_fpDIV32 , [InstrStage<1,  [A9_DRegsVFP], 0, Required>,
590                                InstrStage<16, [A9_DRegsN],  0, Reserved>,
591                                InstrStage<1,  [A9_Issue0, A9_Issue1], 0>,
592                                InstrStage<1,  [A9_MUX0], 0>,
593                                InstrStage<10, [A9_NPipe]>],
594                               [15, 1, 1]>,
595   //
596   // Double-precision FP DIV
597   InstrItinData<IIC_fpDIV64 , [InstrStage<1,  [A9_DRegsVFP], 0, Required>,
598                                InstrStage<26, [A9_DRegsN],  0, Reserved>,
599                                InstrStage<1,  [A9_Issue0, A9_Issue1], 0>,
600                                InstrStage<1,  [A9_MUX0], 0>,
601                                InstrStage<20, [A9_NPipe]>],
602                               [25, 1, 1]>,
603   //
604   // Single-precision FP SQRT
605   InstrItinData<IIC_fpSQRT32, [InstrStage<1,  [A9_DRegsVFP], 0, Required>,
606                                InstrStage<18, [A9_DRegsN],   0, Reserved>,
607                                InstrStage<1,  [A9_Issue0, A9_Issue1], 0>,
608                                InstrStage<1,  [A9_MUX0], 0>,
609                                InstrStage<13, [A9_NPipe]>],
610                               [17, 1]>,
611   //
612   // Double-precision FP SQRT
613   InstrItinData<IIC_fpSQRT64, [InstrStage<1,  [A9_DRegsVFP], 0, Required>,
614                                InstrStage<33, [A9_DRegsN],   0, Reserved>,
615                                InstrStage<1,  [A9_Issue0, A9_Issue1], 0>,
616                                InstrStage<1,  [A9_MUX0], 0>,
617                                InstrStage<28, [A9_NPipe]>],
618                               [32, 1]>,
619
620   //
621   // Integer to Single-precision Move
622   InstrItinData<IIC_fpMOVIS,  [InstrStage<1, [A9_DRegsVFP], 0, Required>,
623                                // Extra 1 latency cycle since wbck is 2 cycles
624                                InstrStage<3, [A9_DRegsN],   0, Reserved>,
625                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
626                                InstrStage<1, [A9_MUX0], 0>,
627                                InstrStage<1, [A9_NPipe]>],
628                               [1, 1]>,
629   //
630   // Integer to Double-precision Move
631   InstrItinData<IIC_fpMOVID,  [InstrStage<1, [A9_DRegsVFP], 0, Required>,
632                                // Extra 1 latency cycle since wbck is 2 cycles
633                                InstrStage<3, [A9_DRegsN],   0, Reserved>,
634                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
635                                InstrStage<1, [A9_MUX0], 0>,
636                                InstrStage<1, [A9_NPipe]>],
637                               [1, 1, 1]>,
638   //
639   // Single-precision to Integer Move
640   InstrItinData<IIC_fpMOVSI,  [InstrStage<1, [A9_DRegsVFP], 0, Required>,
641                                InstrStage<2, [A9_DRegsN],   0, Reserved>,
642                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
643                                InstrStage<1, [A9_MUX0], 0>,
644                                InstrStage<1, [A9_NPipe]>],
645                               [1, 1]>,
646   //
647   // Double-precision to Integer Move
648   InstrItinData<IIC_fpMOVDI,  [InstrStage<1, [A9_DRegsVFP], 0, Required>,
649                                InstrStage<2, [A9_DRegsN],   0, Reserved>,
650                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
651                                InstrStage<1, [A9_MUX0], 0>,
652                                InstrStage<1, [A9_NPipe]>],
653                               [1, 1, 1]>,
654   //
655   // Single-precision FP Load
656   InstrItinData<IIC_fpLoad32, [InstrStage<1, [A9_DRegsVFP], 0, Required>,
657                                InstrStage<2, [A9_DRegsN],   0, Reserved>,
658                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
659                                InstrStage<1, [A9_MUX0], 0>,
660                                InstrStage<1, [A9_NPipe]>],
661                               [1, 1]>,
662   //
663   // Double-precision FP Load
664   // FIXME: Result latency is 1 if address is 64-bit aligned.
665   InstrItinData<IIC_fpLoad64, [InstrStage<1, [A9_DRegsVFP], 0, Required>,
666                                InstrStage<2, [A9_DRegsN],   0, Reserved>,
667                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
668                                InstrStage<1, [A9_MUX0], 0>,
669                                InstrStage<1, [A9_NPipe]>],
670                               [2, 1]>,
671   //
672   // FP Load Multiple
673   InstrItinData<IIC_fpLoad_m, [InstrStage<1, [A9_DRegsVFP], 0, Required>,
674                                InstrStage<2, [A9_DRegsN],   0, Reserved>,
675                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
676                                InstrStage<1, [A9_MUX0], 0>,
677                                InstrStage<1, [A9_NPipe]>], [1, 1, 1, 1]>,
678   //
679   // FP Load Multiple + update
680   InstrItinData<IIC_fpLoad_mu,[InstrStage<1, [A9_DRegsVFP], 0, Required>,
681                                InstrStage<2, [A9_DRegsN],   0, Reserved>,
682                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
683                                InstrStage<1, [A9_MUX0], 0>,
684                                InstrStage<1, [A9_NPipe]>], [2, 1, 1, 1]>,
685   //
686   // Single-precision FP Store
687   InstrItinData<IIC_fpStore32,[InstrStage<1, [A9_DRegsVFP], 0, Required>,
688                                InstrStage<2, [A9_DRegsN],   0, Reserved>,
689                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
690                                InstrStage<1, [A9_MUX0], 0>,
691                                InstrStage<1, [A9_NPipe]>],
692                               [1, 1]>,
693   //
694   // Double-precision FP Store
695   InstrItinData<IIC_fpStore64,[InstrStage<1, [A9_DRegsVFP], 0, Required>,
696                                InstrStage<2, [A9_DRegsN],   0, Reserved>,
697                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
698                                InstrStage<1, [A9_MUX0], 0>,
699                                InstrStage<1, [A9_NPipe]>],
700                               [1, 1]>,
701   //
702   // FP Store Multiple
703   InstrItinData<IIC_fpStore_m,[InstrStage<1, [A9_DRegsVFP], 0, Required>,
704                                InstrStage<2, [A9_DRegsN],   0, Reserved>,
705                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
706                                InstrStage<1, [A9_MUX0], 0>,
707                                InstrStage<1, [A9_NPipe]>], [1, 1, 1, 1]>,
708   //
709   // FP Store Multiple + update
710   InstrItinData<IIC_fpStore_mu,[InstrStage<1, [A9_DRegsVFP], 0, Required>,
711                                 InstrStage<2, [A9_DRegsN],   0, Reserved>,
712                                 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
713                                 InstrStage<1, [A9_MUX0], 0>,
714                                 InstrStage<1, [A9_NPipe]>], [2, 1, 1, 1]>,
715   // NEON
716   // VLD1
717   // FIXME: Conservatively assume insufficent alignment.
718   InstrItinData<IIC_VLD1,     [InstrStage<1, [A9_DRegsN],   0, Required>,
719                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
720                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
721                                InstrStage<1, [A9_MUX0], 0>,
722                                InstrStage<2, [A9_NPipe]>],
723                               [2, 1]>,
724   // VLD1x2
725   InstrItinData<IIC_VLD1x2,   [InstrStage<1, [A9_DRegsN],   0, Required>,
726                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
727                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
728                                InstrStage<1, [A9_MUX0], 0>,
729                                InstrStage<2, [A9_NPipe]>],
730                               [2, 2, 1]>,
731   // VLD1x3
732   InstrItinData<IIC_VLD1x3,   [InstrStage<1, [A9_DRegsN],   0, Required>,
733                                InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
734                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
735                                InstrStage<1, [A9_MUX0], 0>,
736                                InstrStage<3, [A9_NPipe]>],
737                               [2, 2, 3, 1]>,
738   // VLD1x4
739   InstrItinData<IIC_VLD1x4,   [InstrStage<1, [A9_DRegsN],   0, Required>,
740                                InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
741                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
742                                InstrStage<1, [A9_MUX0], 0>,
743                                InstrStage<3, [A9_NPipe]>],
744                               [2, 2, 3, 3, 1]>,
745   // VLD1u
746   InstrItinData<IIC_VLD1u,    [InstrStage<1, [A9_DRegsN],   0, Required>,
747                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
748                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
749                                InstrStage<1, [A9_MUX0], 0>,
750                                InstrStage<2, [A9_NPipe]>],
751                               [2, 2, 1]>,
752   // VLD1x2u
753   InstrItinData<IIC_VLD1x2u,  [InstrStage<1, [A9_DRegsN],   0, Required>,
754                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
755                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
756                                InstrStage<1, [A9_MUX0], 0>,
757                                InstrStage<2, [A9_NPipe]>],
758                               [2, 2, 2, 1]>,
759   // VLD1x3u
760   InstrItinData<IIC_VLD1x3u,  [InstrStage<1, [A9_DRegsN],   0, Required>,
761                                InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
762                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
763                                InstrStage<1, [A9_MUX0], 0>,
764                                InstrStage<3, [A9_NPipe]>],
765                               [2, 2, 3, 2, 1]>,
766   // VLD1x4u
767   InstrItinData<IIC_VLD1x4u,  [InstrStage<1, [A9_DRegsN],   0, Required>,
768                                InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
769                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
770                                InstrStage<1, [A9_MUX0], 0>,
771                                InstrStage<3, [A9_NPipe]>],
772                               [2, 2, 3, 3, 2, 1]>,
773   //
774   // VLD2
775   InstrItinData<IIC_VLD2,     [InstrStage<1, [A9_DRegsN],   0, Required>,
776                                // Extra latency cycles since wbck is 7 cycles
777                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
778                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
779                                InstrStage<1, [A9_MUX0], 0>,
780                                InstrStage<2, [A9_NPipe]>],
781                               [3, 3, 1]>,
782   //
783   // VLD2x2
784   InstrItinData<IIC_VLD2x2,   [InstrStage<1, [A9_DRegsN],   0, Required>,
785                                InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
786                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
787                                InstrStage<1, [A9_MUX0], 0>,
788                                InstrStage<3, [A9_NPipe]>],
789                               [3, 4, 3, 4, 1]>,
790   //
791   // VLD2ln
792   InstrItinData<IIC_VLD2ln,   [InstrStage<1, [A9_DRegsN],   0, Required>,
793                                InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
794                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
795                                InstrStage<1, [A9_MUX0], 0>,
796                                InstrStage<3, [A9_NPipe]>],
797                               [4, 4, 1, 1, 1, 1]>,
798   //
799   // VLD2u
800   InstrItinData<IIC_VLD2u,    [InstrStage<1, [A9_DRegsN],   0, Required>,
801                                // Extra latency cycles since wbck is 7 cycles
802                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
803                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
804                                InstrStage<1, [A9_MUX0], 0>,
805                                InstrStage<2, [A9_NPipe]>],
806                               [3, 3, 2, 1, 1, 1]>,
807   //
808   // VLD2x2u
809   InstrItinData<IIC_VLD2x2u,  [InstrStage<1, [A9_DRegsN],   0, Required>,
810                                InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
811                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
812                                InstrStage<1, [A9_MUX0], 0>,
813                                InstrStage<3, [A9_NPipe]>],
814                               [3, 4, 3, 4, 2, 1]>,
815   //
816   // VLD2lnu
817   InstrItinData<IIC_VLD2lnu,  [InstrStage<1, [A9_DRegsN],   0, Required>,
818                                InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
819                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
820                                InstrStage<1, [A9_MUX0], 0>,
821                                InstrStage<3, [A9_NPipe]>],
822                               [4, 4, 2, 1, 1, 1, 1, 1]>,
823   //
824   // VLD3
825   InstrItinData<IIC_VLD3,     [InstrStage<1, [A9_DRegsN],   0, Required>,
826                                InstrStage<10, [A9_DRegsVFP], 0, Reserved>,
827                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
828                                InstrStage<1, [A9_MUX0], 0>,
829                                InstrStage<4, [A9_NPipe]>],
830                               [4, 4, 5, 1]>,
831   //
832   // VLD3ln
833   InstrItinData<IIC_VLD3ln,   [InstrStage<1, [A9_DRegsN],   0, Required>,
834                                InstrStage<11, [A9_DRegsVFP], 0, Reserved>,
835                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
836                                InstrStage<1, [A9_MUX0], 0>,
837                                InstrStage<5, [A9_NPipe]>],
838                               [5, 5, 6, 1, 1, 1, 1, 2]>,
839   //
840   // VLD3u
841   InstrItinData<IIC_VLD3u,    [InstrStage<1, [A9_DRegsN],   0, Required>,
842                                InstrStage<10, [A9_DRegsVFP], 0, Reserved>,
843                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
844                                InstrStage<1, [A9_MUX0], 0>,
845                                InstrStage<4, [A9_NPipe]>],
846                               [4, 4, 5, 2, 1]>,
847   //
848   // VLD3lnu
849   InstrItinData<IIC_VLD3lnu,  [InstrStage<1, [A9_DRegsN],   0, Required>,
850                                InstrStage<11, [A9_DRegsVFP], 0, Reserved>,
851                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
852                                InstrStage<1, [A9_MUX0], 0>,
853                                InstrStage<5, [A9_NPipe]>],
854                               [5, 5, 6, 2, 1, 1, 1, 1, 1, 2]>,
855   //
856   // VLD4
857   InstrItinData<IIC_VLD4,     [InstrStage<1, [A9_DRegsN],   0, Required>,
858                                InstrStage<10, [A9_DRegsVFP], 0, Reserved>,
859                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
860                                InstrStage<1, [A9_MUX0], 0>,
861                                InstrStage<4, [A9_NPipe]>],
862                               [4, 4, 5, 5, 1]>,
863   //
864   // VLD4ln
865   InstrItinData<IIC_VLD4ln,   [InstrStage<1, [A9_DRegsN],   0, Required>,
866                                InstrStage<11, [A9_DRegsVFP], 0, Reserved>,
867                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
868                                InstrStage<1, [A9_MUX0], 0>,
869                                InstrStage<5, [A9_NPipe]>],
870                               [5, 5, 6, 6, 1, 1, 1, 1, 2, 2]>,
871   //
872   // VLD4u
873   InstrItinData<IIC_VLD4u,    [InstrStage<1, [A9_DRegsN],   0, Required>,
874                                InstrStage<10, [A9_DRegsVFP], 0, Reserved>,
875                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
876                                InstrStage<1, [A9_MUX0], 0>,
877                                InstrStage<4, [A9_NPipe]>],
878                               [4, 4, 5, 5, 2, 1]>,
879   //
880   // VLD4lnu
881   InstrItinData<IIC_VLD4lnu,  [InstrStage<1, [A9_DRegsN],   0, Required>,
882                                InstrStage<11, [A9_DRegsVFP], 0, Reserved>,
883                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
884                                InstrStage<1, [A9_MUX0], 0>,
885                                InstrStage<5, [A9_NPipe]>],
886                               [5, 5, 6, 6, 2, 1, 1, 1, 1, 1, 2, 2]>,
887   //
888   // VST
889   // FIXME: We don't model this instruction properly
890   InstrItinData<IIC_VST,      [InstrStage<1, [A9_DRegsN],   0, Required>,
891                                // Extra latency cycles since wbck is 6 cycles
892                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
893                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
894                                InstrStage<1, [A9_MUX0], 0>,
895                                InstrStage<1, [A9_NPipe]>]>,
896   //
897   // Double-register Integer Unary
898   InstrItinData<IIC_VUNAiD,   [InstrStage<1, [A9_DRegsN],   0, Required>,
899                                // Extra latency cycles since wbck is 6 cycles
900                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
901                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
902                                InstrStage<1, [A9_MUX0], 0>,
903                                InstrStage<1, [A9_NPipe]>],
904                               [4, 2]>,
905   //
906   // Quad-register Integer Unary
907   InstrItinData<IIC_VUNAiQ,   [InstrStage<1, [A9_DRegsN],   0, Required>,
908                                // Extra latency cycles since wbck is 6 cycles
909                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
910                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
911                                InstrStage<1, [A9_MUX0], 0>,
912                                InstrStage<1, [A9_NPipe]>],
913                               [4, 2]>,
914   //
915   // Double-register Integer Q-Unary
916   InstrItinData<IIC_VQUNAiD,  [InstrStage<1, [A9_DRegsN],   0, Required>,
917                                // Extra latency cycles since wbck is 6 cycles
918                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
919                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
920                                InstrStage<1, [A9_MUX0], 0>,
921                                InstrStage<1, [A9_NPipe]>],
922                               [4, 1]>,
923   //
924   // Quad-register Integer CountQ-Unary
925   InstrItinData<IIC_VQUNAiQ,  [InstrStage<1, [A9_DRegsN],   0, Required>,
926                                // Extra latency cycles since wbck is 6 cycles
927                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
928                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
929                                InstrStage<1, [A9_MUX0], 0>,
930                                InstrStage<1, [A9_NPipe]>],
931                               [4, 1]>,
932   //
933   // Double-register Integer Binary
934   InstrItinData<IIC_VBINiD,   [InstrStage<1, [A9_DRegsN],   0, Required>,
935                                // Extra latency cycles since wbck is 6 cycles
936                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
937                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
938                                InstrStage<1, [A9_MUX0], 0>,
939                                InstrStage<1, [A9_NPipe]>],
940                               [3, 2, 2]>,
941   //
942   // Quad-register Integer Binary
943   InstrItinData<IIC_VBINiQ,   [InstrStage<1, [A9_DRegsN],   0, Required>,
944                                // Extra latency cycles since wbck is 6 cycles
945                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
946                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
947                                InstrStage<1, [A9_MUX0], 0>,
948                                InstrStage<1, [A9_NPipe]>],
949                               [3, 2, 2]>,
950   //
951   // Double-register Integer Subtract
952   InstrItinData<IIC_VSUBiD,   [InstrStage<1, [A9_DRegsN],   0, Required>,
953                                // Extra latency cycles since wbck is 6 cycles
954                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
955                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
956                                InstrStage<1, [A9_MUX0], 0>,
957                                InstrStage<1, [A9_NPipe]>],
958                               [3, 2, 1]>,
959   //
960   // Quad-register Integer Subtract
961   InstrItinData<IIC_VSUBiQ,   [InstrStage<1, [A9_DRegsN],   0, Required>,
962                                // Extra latency cycles since wbck is 6 cycles
963                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
964                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
965                                InstrStage<1, [A9_MUX0], 0>,
966                                InstrStage<1, [A9_NPipe]>],
967                               [3, 2, 1]>,
968   //
969   // Double-register Integer Shift
970   InstrItinData<IIC_VSHLiD,   [InstrStage<1, [A9_DRegsN],   0, Required>,
971                                // Extra latency cycles since wbck is 6 cycles
972                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
973                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
974                                InstrStage<1, [A9_MUX0], 0>,
975                                InstrStage<1, [A9_NPipe]>],
976                               [3, 1, 1]>,
977   //
978   // Quad-register Integer Shift
979   InstrItinData<IIC_VSHLiQ,   [InstrStage<1, [A9_DRegsN],   0, Required>,
980                                // Extra latency cycles since wbck is 6 cycles
981                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
982                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
983                                InstrStage<1, [A9_MUX0], 0>,
984                                InstrStage<1, [A9_NPipe]>],
985                               [3, 1, 1]>,
986   //
987   // Double-register Integer Shift (4 cycle)
988   InstrItinData<IIC_VSHLi4D,  [InstrStage<1, [A9_DRegsN],   0, Required>,
989                                // Extra latency cycles since wbck is 6 cycles
990                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
991                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
992                                InstrStage<1, [A9_MUX0], 0>,
993                                InstrStage<1, [A9_NPipe]>],
994                               [4, 1, 1]>,
995   //
996   // Quad-register Integer Shift (4 cycle)
997   InstrItinData<IIC_VSHLi4Q,  [InstrStage<1, [A9_DRegsN],   0, Required>,
998                                // Extra latency cycles since wbck is 6 cycles
999                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1000                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1001                                InstrStage<1, [A9_MUX0], 0>,
1002                                InstrStage<1, [A9_NPipe]>],
1003                               [4, 1, 1]>,
1004   //
1005   // Double-register Integer Binary (4 cycle)
1006   InstrItinData<IIC_VBINi4D,  [InstrStage<1, [A9_DRegsN],   0, Required>,
1007                                // Extra latency cycles since wbck is 6 cycles
1008                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1009                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1010                                InstrStage<1, [A9_MUX0], 0>,
1011                                InstrStage<1, [A9_NPipe]>],
1012                               [4, 2, 2]>,
1013   //
1014   // Quad-register Integer Binary (4 cycle)
1015   InstrItinData<IIC_VBINi4Q,  [InstrStage<1, [A9_DRegsN],   0, Required>,
1016                                // Extra latency cycles since wbck is 6 cycles
1017                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1018                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1019                                InstrStage<1, [A9_MUX0], 0>,
1020                                InstrStage<1, [A9_NPipe]>],
1021                               [4, 2, 2]>,
1022   //
1023   // Double-register Integer Subtract (4 cycle)
1024   InstrItinData<IIC_VSUBiD,   [InstrStage<1, [A9_DRegsN],   0, Required>,
1025                                // Extra latency cycles since wbck is 6 cycles
1026                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1027                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1028                                InstrStage<1, [A9_MUX0], 0>,
1029                                InstrStage<1, [A9_NPipe]>],
1030                               [4, 2, 1]>,
1031   //
1032   // Quad-register Integer Subtract (4 cycle)
1033   InstrItinData<IIC_VSUBiQ,   [InstrStage<1, [A9_DRegsN],   0, Required>,
1034                                // Extra latency cycles since wbck is 6 cycles
1035                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1036                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1037                                InstrStage<1, [A9_MUX0], 0>,
1038                                InstrStage<1, [A9_NPipe]>],
1039                               [4, 2, 1]>,
1040
1041   //
1042   // Double-register Integer Count
1043   InstrItinData<IIC_VCNTiD,   [InstrStage<1, [A9_DRegsN],   0, Required>,
1044                                // Extra latency cycles since wbck is 6 cycles
1045                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1046                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1047                                InstrStage<1, [A9_MUX0], 0>,
1048                                InstrStage<1, [A9_NPipe]>],
1049                               [3, 2, 2]>,
1050   //
1051   // Quad-register Integer Count
1052   // Result written in N3, but that is relative to the last cycle of multicycle,
1053   // so we use 4 for those cases
1054   InstrItinData<IIC_VCNTiQ,   [InstrStage<1, [A9_DRegsN],   0, Required>,
1055                                // Extra latency cycles since wbck is 7 cycles
1056                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1057                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1058                                InstrStage<1, [A9_MUX0], 0>,
1059                                InstrStage<2, [A9_NPipe]>],
1060                               [4, 2, 2]>,
1061   //
1062   // Double-register Absolute Difference and Accumulate
1063   InstrItinData<IIC_VABAD,    [InstrStage<1, [A9_DRegsN],   0, Required>,
1064                                // Extra latency cycles since wbck is 6 cycles
1065                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1066                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1067                                InstrStage<1, [A9_MUX0], 0>,
1068                                InstrStage<1, [A9_NPipe]>],
1069                               [6, 3, 2, 1]>,
1070   //
1071   // Quad-register Absolute Difference and Accumulate
1072   InstrItinData<IIC_VABAQ,    [InstrStage<1, [A9_DRegsN],   0, Required>,
1073                                // Extra latency cycles since wbck is 6 cycles
1074                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1075                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1076                                InstrStage<1, [A9_MUX0], 0>,
1077                                InstrStage<2, [A9_NPipe]>],
1078                               [6, 3, 2, 1]>,
1079   //
1080   // Double-register Integer Pair Add Long
1081   InstrItinData<IIC_VPALiD,   [InstrStage<1, [A9_DRegsN],   0, Required>,
1082                                // Extra latency cycles since wbck is 6 cycles
1083                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1084                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1085                                InstrStage<1, [A9_MUX0], 0>,
1086                                InstrStage<1, [A9_NPipe]>],
1087                               [6, 3, 1]>,
1088   //
1089   // Quad-register Integer Pair Add Long
1090   InstrItinData<IIC_VPALiQ,   [InstrStage<1, [A9_DRegsN],   0, Required>,
1091                                // Extra latency cycles since wbck is 6 cycles
1092                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1093                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1094                                InstrStage<1, [A9_MUX0], 0>,
1095                                InstrStage<2, [A9_NPipe]>],
1096                               [6, 3, 1]>,
1097
1098   //
1099   // Double-register Integer Multiply (.8, .16)
1100   InstrItinData<IIC_VMULi16D, [InstrStage<1, [A9_DRegsN],   0, Required>,
1101                                // Extra latency cycles since wbck is 6 cycles
1102                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1103                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1104                                InstrStage<1, [A9_MUX0], 0>,
1105                                InstrStage<1, [A9_NPipe]>],
1106                               [6, 2, 2]>,
1107   //
1108   // Quad-register Integer Multiply (.8, .16)
1109   InstrItinData<IIC_VMULi16Q, [InstrStage<1, [A9_DRegsN],   0, Required>,
1110                                // Extra latency cycles since wbck is 7 cycles
1111                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1112                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1113                                InstrStage<1, [A9_MUX0], 0>,
1114                                InstrStage<2, [A9_NPipe]>],
1115                               [7, 2, 2]>,
1116
1117   //
1118   // Double-register Integer Multiply (.32)
1119   InstrItinData<IIC_VMULi32D, [InstrStage<1, [A9_DRegsN],   0, Required>,
1120                                // Extra latency cycles since wbck is 7 cycles
1121                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1122                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1123                                InstrStage<1, [A9_MUX0], 0>,
1124                                InstrStage<2, [A9_NPipe]>],
1125                               [7, 2, 1]>,
1126   //
1127   // Quad-register Integer Multiply (.32)
1128   InstrItinData<IIC_VMULi32Q, [InstrStage<1, [A9_DRegsN],   0, Required>,
1129                                // Extra latency cycles since wbck is 9 cycles
1130                                InstrStage<10, [A9_DRegsVFP], 0, Reserved>,
1131                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1132                                InstrStage<1, [A9_MUX0], 0>,
1133                                InstrStage<4, [A9_NPipe]>],
1134                               [9, 2, 1]>,
1135   //
1136   // Double-register Integer Multiply-Accumulate (.8, .16)
1137   InstrItinData<IIC_VMACi16D, [InstrStage<1, [A9_DRegsN],   0, Required>,
1138                                // Extra latency cycles since wbck is 6 cycles
1139                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1140                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1141                                InstrStage<1, [A9_MUX0], 0>,
1142                                InstrStage<1, [A9_NPipe]>],
1143                               [6, 3, 2, 2]>,
1144   //
1145   // Double-register Integer Multiply-Accumulate (.32)
1146   InstrItinData<IIC_VMACi32D, [InstrStage<1, [A9_DRegsN],   0, Required>,
1147                                // Extra latency cycles since wbck is 7 cycles
1148                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1149                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1150                                InstrStage<1, [A9_MUX0], 0>,
1151                                InstrStage<2, [A9_NPipe]>],
1152                               [7, 3, 2, 1]>,
1153   //
1154   // Quad-register Integer Multiply-Accumulate (.8, .16)
1155   InstrItinData<IIC_VMACi16Q, [InstrStage<1, [A9_DRegsN],   0, Required>,
1156                                // Extra latency cycles since wbck is 7 cycles
1157                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1158                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1159                                InstrStage<1, [A9_MUX0], 0>,
1160                                InstrStage<2, [A9_NPipe]>],
1161                               [7, 3, 2, 2]>,
1162   //
1163   // Quad-register Integer Multiply-Accumulate (.32)
1164   InstrItinData<IIC_VMACi32Q, [InstrStage<1, [A9_DRegsN],   0, Required>,
1165                                // Extra latency cycles since wbck is 9 cycles
1166                                InstrStage<10, [A9_DRegsVFP], 0, Reserved>,
1167                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1168                                InstrStage<1, [A9_MUX0], 0>,
1169                                InstrStage<4, [A9_NPipe]>],
1170                               [9, 3, 2, 1]>,
1171
1172   //
1173   // Move
1174   InstrItinData<IIC_VMOV,     [InstrStage<1, [A9_DRegsN],   0, Required>,
1175                                InstrStage<1, [A9_DRegsVFP], 0, Reserved>,
1176                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1177                                InstrStage<1, [A9_MUX0], 0>,
1178                                InstrStage<1, [A9_NPipe]>],
1179                               [1,1]>,
1180   //
1181   // Move Immediate
1182   InstrItinData<IIC_VMOVImm,  [InstrStage<1, [A9_DRegsN],   0, Required>,
1183                                // Extra latency cycles since wbck is 6 cycles
1184                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1185                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1186                                InstrStage<1, [A9_MUX0], 0>,
1187                                InstrStage<1, [A9_NPipe]>],
1188                               [3]>,
1189   //
1190   // Double-register Permute Move
1191   InstrItinData<IIC_VMOVD,    [InstrStage<1, [A9_DRegsN],   0, Required>,
1192   // FIXME: all latencies are arbitrary, no information is available
1193                                InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
1194                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1195                                InstrStage<1, [A9_MUX0], 0>,
1196                                InstrStage<1, [A9_NPipe]>],
1197                               [2, 1]>,
1198   //
1199   // Quad-register Permute Move
1200   InstrItinData<IIC_VMOVQ,    [InstrStage<1, [A9_DRegsN],   0, Required>,
1201   // FIXME: all latencies are arbitrary, no information is available
1202                                InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
1203                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1204                                InstrStage<1, [A9_MUX0], 0>,
1205                                InstrStage<1, [A9_NPipe]>],
1206                               [2, 1]>,
1207   //
1208   // Integer to Single-precision Move
1209   InstrItinData<IIC_VMOVIS ,  [InstrStage<1, [A9_DRegsN],   0, Required>,
1210   // FIXME: all latencies are arbitrary, no information is available
1211                                InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
1212                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1213                                InstrStage<1, [A9_MUX0], 0>,
1214                                InstrStage<1, [A9_NPipe]>],
1215                               [2, 1]>,
1216   //
1217   // Integer to Double-precision Move
1218   InstrItinData<IIC_VMOVID ,  [InstrStage<1, [A9_DRegsN],   0, Required>,
1219   // FIXME: all latencies are arbitrary, no information is available
1220                                InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
1221                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1222                                InstrStage<1, [A9_MUX0], 0>,
1223                                InstrStage<1, [A9_NPipe]>],
1224                               [2, 1, 1]>,
1225   //
1226   // Single-precision to Integer Move
1227   InstrItinData<IIC_VMOVSI ,  [InstrStage<1, [A9_DRegsN],   0, Required>,
1228   // FIXME: all latencies are arbitrary, no information is available
1229                                InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
1230                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1231                                InstrStage<1, [A9_MUX0], 0>,
1232                                InstrStage<1, [A9_NPipe]>],
1233                               [2, 1]>,
1234   //
1235   // Double-precision to Integer Move
1236   InstrItinData<IIC_VMOVDI ,  [InstrStage<1, [A9_DRegsN],   0, Required>,
1237   // FIXME: all latencies are arbitrary, no information is available
1238                                InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
1239                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1240                                InstrStage<1, [A9_MUX0], 0>,
1241                                InstrStage<1, [A9_NPipe]>],
1242                               [2, 2, 1]>,
1243   //
1244   // Integer to Lane Move
1245   InstrItinData<IIC_VMOVISL , [InstrStage<1, [A9_DRegsN],   0, Required>,
1246   // FIXME: all latencies are arbitrary, no information is available
1247                                InstrStage<4, [A9_DRegsVFP], 0, Reserved>,
1248                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1249                                InstrStage<1, [A9_MUX0], 0>,
1250                                InstrStage<2, [A9_NPipe]>],
1251                               [3, 1, 1]>,
1252
1253   //
1254   // Vector narrow move
1255   InstrItinData<IIC_VMOVN,    [InstrStage<1, [A9_DRegsN],   0, Required>,
1256                                // Extra latency cycles since wbck is 6 cycles
1257                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1258                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1259                                InstrStage<1, [A9_MUX0], 0>,
1260                                InstrStage<1, [A9_NPipe]>],
1261                               [3, 1]>,
1262   //
1263   // Double-register FP Unary
1264   InstrItinData<IIC_VUNAD,    [InstrStage<1, [A9_DRegsN],   0, Required>,
1265                                // Extra latency cycles since wbck is 6 cycles
1266                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1267                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1268                                InstrStage<1, [A9_MUX0], 0>,
1269                                InstrStage<1, [A9_NPipe]>],
1270                               [5, 2]>,
1271   //
1272   // Quad-register FP Unary
1273   // Result written in N5, but that is relative to the last cycle of multicycle,
1274   // so we use 6 for those cases
1275   InstrItinData<IIC_VUNAQ,    [InstrStage<1, [A9_DRegsN],   0, Required>,
1276                                // Extra latency cycles since wbck is 7 cycles
1277                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1278                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1279                                InstrStage<1, [A9_MUX0], 0>,
1280                                InstrStage<2, [A9_NPipe]>],
1281                               [6, 2]>,
1282   //
1283   // Double-register FP Binary
1284   // FIXME: We're using this itin for many instructions and [2, 2] here is too
1285   // optimistic.
1286   InstrItinData<IIC_VBIND,    [InstrStage<1, [A9_DRegsN],   0, Required>,
1287                                // Extra latency cycles since wbck is 7 cycles
1288                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1289                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1290                                InstrStage<1, [A9_MUX0], 0>,
1291                                InstrStage<1, [A9_NPipe]>],
1292                               [5, 2, 2]>,
1293   //
1294   // Quad-register FP Binary
1295   // Result written in N5, but that is relative to the last cycle of multicycle,
1296   // so we use 6 for those cases
1297   // FIXME: We're using this itin for many instructions and [2, 2] here is too
1298   // optimistic.
1299   InstrItinData<IIC_VBINQ,    [InstrStage<1, [A9_DRegsN],   0, Required>,
1300                                // Extra latency cycles since wbck is 8 cycles
1301                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1302                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1303                                InstrStage<1, [A9_MUX0], 0>,
1304                                InstrStage<2, [A9_NPipe]>],
1305                               [6, 2, 2]>,
1306   //
1307   // Double-register FP Multiple-Accumulate
1308   InstrItinData<IIC_VMACD,    [InstrStage<1, [A9_DRegsN],   0, Required>,
1309                                // Extra latency cycles since wbck is 7 cycles
1310                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1311                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1312                                InstrStage<1, [A9_MUX0], 0>,
1313                                InstrStage<2, [A9_NPipe]>],
1314                               [6, 3, 2, 1]>,
1315   //
1316   // Quad-register FP Multiple-Accumulate
1317   // Result written in N9, but that is relative to the last cycle of multicycle,
1318   // so we use 10 for those cases
1319   InstrItinData<IIC_VMACQ,    [InstrStage<1, [A9_DRegsN],   0, Required>,
1320                                // Extra latency cycles since wbck is 9 cycles
1321                                InstrStage<10, [A9_DRegsVFP], 0, Reserved>,
1322                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1323                                InstrStage<1, [A9_MUX0], 0>,
1324                                InstrStage<4, [A9_NPipe]>],
1325                               [8, 4, 2, 1]>,
1326   //
1327   // Double-register Reciprical Step
1328   InstrItinData<IIC_VRECSD,   [InstrStage<1, [A9_DRegsN],   0, Required>,
1329                                // Extra latency cycles since wbck is 7 cycles
1330                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1331                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1332                                InstrStage<1, [A9_MUX0], 0>,
1333                                InstrStage<2, [A9_NPipe]>],
1334                               [6, 2, 2]>,
1335   //
1336   // Quad-register Reciprical Step
1337   InstrItinData<IIC_VRECSQ,   [InstrStage<1, [A9_DRegsN],   0, Required>,
1338                                // Extra latency cycles since wbck is 9 cycles
1339                                InstrStage<10, [A9_DRegsVFP], 0, Reserved>,
1340                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1341                                InstrStage<1, [A9_MUX0], 0>,
1342                                InstrStage<4, [A9_NPipe]>],
1343                               [8, 2, 2]>,
1344   //
1345   // Double-register Permute
1346   InstrItinData<IIC_VPERMD,   [InstrStage<1, [A9_DRegsN],   0, Required>,
1347                                // Extra latency cycles since wbck is 6 cycles
1348                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1349                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1350                                InstrStage<1, [A9_MUX0], 0>,
1351                                InstrStage<1, [A9_NPipe]>],
1352                               [2, 2, 1, 1]>,
1353   //
1354   // Quad-register Permute
1355   // Result written in N2, but that is relative to the last cycle of multicycle,
1356   // so we use 3 for those cases
1357   InstrItinData<IIC_VPERMQ,   [InstrStage<1, [A9_DRegsN],   0, Required>,
1358                                // Extra latency cycles since wbck is 7 cycles
1359                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1360                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1361                                InstrStage<1, [A9_MUX0], 0>,
1362                                InstrStage<2, [A9_NPipe]>],
1363                               [3, 3, 1, 1]>,
1364   //
1365   // Quad-register Permute (3 cycle issue)
1366   // Result written in N2, but that is relative to the last cycle of multicycle,
1367   // so we use 4 for those cases
1368   InstrItinData<IIC_VPERMQ3,  [InstrStage<1, [A9_DRegsN],   0, Required>,
1369                                // Extra latency cycles since wbck is 8 cycles
1370                                InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
1371                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1372                                InstrStage<1, [A9_MUX0], 0>,
1373                                InstrStage<3, [A9_NPipe]>],
1374                               [4, 4, 1, 1]>,
1375
1376   //
1377   // Double-register VEXT
1378   InstrItinData<IIC_VEXTD,    [InstrStage<1, [A9_DRegsN],   0, Required>,
1379                                // Extra latency cycles since wbck is 7 cycles
1380                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1381                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1382                                InstrStage<1, [A9_MUX0], 0>,
1383                                InstrStage<1, [A9_NPipe]>],
1384                               [2, 1, 1]>,
1385   //
1386   // Quad-register VEXT
1387   InstrItinData<IIC_VEXTQ,    [InstrStage<1, [A9_DRegsN],   0, Required>,
1388                                // Extra latency cycles since wbck is 9 cycles
1389                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1390                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1391                                InstrStage<1, [A9_MUX0], 0>,
1392                                InstrStage<2, [A9_NPipe]>],
1393                               [3, 1, 1]>,
1394   //
1395   // VTB
1396   InstrItinData<IIC_VTB1,     [InstrStage<1, [A9_DRegsN],   0, Required>,
1397                                // Extra latency cycles since wbck is 7 cycles
1398                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1399                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1400                                InstrStage<1, [A9_MUX0], 0>,
1401                                InstrStage<2, [A9_NPipe]>],
1402                               [3, 2, 1]>,
1403   InstrItinData<IIC_VTB2,     [InstrStage<2, [A9_DRegsN],   0, Required>,
1404                                // Extra latency cycles since wbck is 7 cycles
1405                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1406                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1407                                InstrStage<1, [A9_MUX0], 0>,
1408                                InstrStage<2, [A9_NPipe]>],
1409                               [3, 2, 2, 1]>,
1410   InstrItinData<IIC_VTB3,     [InstrStage<2, [A9_DRegsN],   0, Required>,
1411                                // Extra latency cycles since wbck is 8 cycles
1412                                InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
1413                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1414                                InstrStage<1, [A9_MUX0], 0>,
1415                                InstrStage<3, [A9_NPipe]>],
1416                               [4, 2, 2, 3, 1]>,
1417   InstrItinData<IIC_VTB4,     [InstrStage<1, [A9_DRegsN],   0, Required>,
1418                                // Extra latency cycles since wbck is 8 cycles
1419                                InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
1420                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1421                                InstrStage<1, [A9_MUX0], 0>,
1422                                InstrStage<3, [A9_NPipe]>],
1423                               [4, 2, 2, 3, 3, 1]>,
1424   //
1425   // VTBX
1426   InstrItinData<IIC_VTBX1,    [InstrStage<1, [A9_DRegsN],   0, Required>,
1427                                // Extra latency cycles since wbck is 7 cycles
1428                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1429                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1430                                InstrStage<1, [A9_MUX0], 0>,
1431                                InstrStage<2, [A9_NPipe]>],
1432                               [3, 1, 2, 1]>,
1433   InstrItinData<IIC_VTBX2,    [InstrStage<1, [A9_DRegsN],   0, Required>,
1434                                // Extra latency cycles since wbck is 7 cycles
1435                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1436                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1437                                InstrStage<1, [A9_MUX0], 0>,
1438                                InstrStage<2, [A9_NPipe]>],
1439                               [3, 1, 2, 2, 1]>,
1440   InstrItinData<IIC_VTBX3,    [InstrStage<1, [A9_DRegsN],   0, Required>,
1441                                // Extra latency cycles since wbck is 8 cycles
1442                                InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
1443                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1444                                InstrStage<1, [A9_MUX0], 0>,
1445                                InstrStage<3, [A9_NPipe]>],
1446                               [4, 1, 2, 2, 3, 1]>,
1447   InstrItinData<IIC_VTBX4,    [InstrStage<1, [A9_DRegsN],   0, Required>,
1448                                // Extra latency cycles since wbck is 8 cycles
1449                                InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
1450                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1451                                InstrStage<1, [A9_MUX0], 0>,
1452                                InstrStage<2, [A9_NPipe]>],
1453                               [4, 1, 2, 2, 3, 3, 1]>
1454 ]>;