c199ef7f2b2a40561b8dd9ad1fb0ed6604e1e90a
[oota-llvm.git] / lib / Target / ARM / ARMScheduleA9.td
1 //=- ARMScheduleA9.td - ARM Cortex-A9 Scheduling Definitions -*- tablegen -*-=//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file defines the itinerary class data for the ARM Cortex A9 processors.
11 //
12 //===----------------------------------------------------------------------===//
13
14 //
15 // Ad-hoc scheduling information derived from pretty vague "Cortex-A9 Technical
16 // Reference Manual".
17 //
18 // Functional units
19 def A9_Issue0  : FuncUnit; // Issue 0
20 def A9_Issue1  : FuncUnit; // Issue 1
21 def A9_Branch  : FuncUnit; // Branch
22 def A9_ALU0    : FuncUnit; // ALU / MUL pipeline 0
23 def A9_ALU1    : FuncUnit; // ALU pipeline 1
24 def A9_AGU     : FuncUnit; // Address generation unit for ld / st
25 def A9_NPipe   : FuncUnit; // NEON pipeline
26 def A9_MUX0    : FuncUnit; // AGU + NEON/FPU multiplexer
27 def A9_LS0     : FuncUnit; // L/S Units, 32-bit per unit. Fake FU to limit l/s.
28 def A9_LS1     : FuncUnit; // L/S Units, 32-bit per unit.
29 def A9_DRegsVFP: FuncUnit; // FP register set, VFP side
30 def A9_DRegsN  : FuncUnit; // FP register set, NEON side
31
32 // Bypasses
33 def A9_LdBypass : Bypass;
34
35 def CortexA9Itineraries : ProcessorItineraries<
36   [A9_Issue0, A9_Issue1, A9_Branch, A9_ALU0, A9_ALU1, A9_AGU, A9_NPipe, A9_MUX0,
37    A9_LS0, A9_LS1, A9_DRegsVFP, A9_DRegsN],
38   [A9_LdBypass], [
39   // Two fully-pipelined integer ALU pipelines
40
41   //
42   // Move instructions, unconditional
43   InstrItinData<IIC_iMOVi   , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
44                                InstrStage<1, [A9_ALU0, A9_ALU1]>], [1]>,
45   InstrItinData<IIC_iMOVr   , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
46                                InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>,
47   InstrItinData<IIC_iMOVsi  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
48                                InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>,
49   InstrItinData<IIC_iMOVsr  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
50                                InstrStage<2, [A9_ALU0, A9_ALU1]>], [2, 1, 1]>,
51   InstrItinData<IIC_iMOVix2 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
52                                InstrStage<1, [A9_ALU0, A9_ALU1]>,
53                                InstrStage<1, [A9_ALU0, A9_ALU1]>], [2]>,
54   //
55   // MVN instructions
56   InstrItinData<IIC_iMVNi   , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
57                                InstrStage<1, [A9_ALU0, A9_ALU1]>],
58                               [1]>,
59   InstrItinData<IIC_iMVNr   , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
60                                InstrStage<1, [A9_ALU0, A9_ALU1]>],
61                               [1, 1], [NoBypass, A9_LdBypass]>,
62   InstrItinData<IIC_iMVNsi  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
63                                InstrStage<2, [A9_ALU0, A9_ALU1]>],
64                               [2, 1]>,
65   InstrItinData<IIC_iMVNsr  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
66                                InstrStage<3, [A9_ALU0, A9_ALU1]>],
67                               [3, 1, 1]>,
68   //
69   // No operand cycles
70   InstrItinData<IIC_iALUx   , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
71                                InstrStage<1, [A9_ALU0, A9_ALU1]>]>,
72   //
73   // Binary Instructions that produce a result
74   InstrItinData<IIC_iALUi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
75                              InstrStage<1, [A9_ALU0, A9_ALU1]>],
76                             [1, 1], [NoBypass, A9_LdBypass]>,
77   InstrItinData<IIC_iALUr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
78                              InstrStage<1, [A9_ALU0, A9_ALU1]>],
79                             [1, 1, 1], [NoBypass, A9_LdBypass, A9_LdBypass]>,
80   InstrItinData<IIC_iALUsi, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
81                              InstrStage<2, [A9_ALU0, A9_ALU1]>],
82                             [2, 1, 1], [NoBypass, A9_LdBypass, NoBypass]>,
83   InstrItinData<IIC_iALUsir,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
84                              InstrStage<2, [A9_ALU0, A9_ALU1]>],
85                             [2, 1, 1], [NoBypass, NoBypass, A9_LdBypass]>,
86   InstrItinData<IIC_iALUsr, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
87                              InstrStage<3, [A9_ALU0, A9_ALU1]>],
88                             [3, 1, 1, 1],
89                             [NoBypass, A9_LdBypass, NoBypass, NoBypass]>,
90   //
91   // Bitwise Instructions that produce a result
92   InstrItinData<IIC_iBITi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
93                              InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>,
94   InstrItinData<IIC_iBITr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
95                              InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1, 1]>,
96   InstrItinData<IIC_iBITsi, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
97                              InstrStage<2, [A9_ALU0, A9_ALU1]>], [2, 1, 1]>,
98   InstrItinData<IIC_iBITsr, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
99                              InstrStage<3, [A9_ALU0, A9_ALU1]>], [3, 1, 1, 1]>,
100   //
101   // Unary Instructions that produce a result
102
103   // CLZ, RBIT, etc.
104   InstrItinData<IIC_iUNAr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
105                              InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>,
106
107   // BFC, BFI, UBFX, SBFX
108   InstrItinData<IIC_iUNAsi, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
109                              InstrStage<2, [A9_ALU0, A9_ALU1]>], [2, 1]>,
110
111   //
112   // Zero and sign extension instructions
113   InstrItinData<IIC_iEXTr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
114                              InstrStage<1, [A9_ALU0, A9_ALU1]>], [2, 1]>,
115   InstrItinData<IIC_iEXTAr, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
116                              InstrStage<2, [A9_ALU0, A9_ALU1]>], [3, 1, 1]>,
117   InstrItinData<IIC_iEXTAsr,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
118                              InstrStage<3, [A9_ALU0, A9_ALU1]>], [3, 1, 1, 1]>,
119   //
120   // Compare instructions
121   InstrItinData<IIC_iCMPi   , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
122                                InstrStage<1, [A9_ALU0, A9_ALU1]>],
123                                [1], [A9_LdBypass]>,
124   InstrItinData<IIC_iCMPr   , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
125                                InstrStage<1, [A9_ALU0, A9_ALU1]>],
126                                [1, 1], [A9_LdBypass, A9_LdBypass]>,
127   InstrItinData<IIC_iCMPsi  , [InstrStage<2, [A9_ALU0, A9_ALU1]>],
128                                 [1, 1], [A9_LdBypass, NoBypass]>,
129   InstrItinData<IIC_iCMPsr  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
130                                InstrStage<3, [A9_ALU0, A9_ALU1]>],
131                               [1, 1, 1], [A9_LdBypass, NoBypass, NoBypass]>,
132   //
133   // Test instructions
134   InstrItinData<IIC_iTSTi   , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
135                                InstrStage<1, [A9_ALU0, A9_ALU1]>], [1]>,
136   InstrItinData<IIC_iTSTr   , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
137                                InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>,
138   InstrItinData<IIC_iTSTsi  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
139                                InstrStage<2, [A9_ALU0, A9_ALU1]>], [1, 1]>,
140   InstrItinData<IIC_iTSTsr  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
141                                InstrStage<3, [A9_ALU0, A9_ALU1]>], [1, 1, 1]>,
142   //
143   // Move instructions, conditional
144   // FIXME: Correctly model the extra input dep on the destination.
145   InstrItinData<IIC_iCMOVi  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
146                                InstrStage<1, [A9_ALU0, A9_ALU1]>], [1]>,
147   InstrItinData<IIC_iCMOVr  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
148                                InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>,
149   InstrItinData<IIC_iCMOVsi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
150                                InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>,
151   InstrItinData<IIC_iCMOVsr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
152                                InstrStage<2, [A9_ALU0, A9_ALU1]>], [2, 1, 1]>,
153
154   // Integer multiply pipeline
155   //
156   InstrItinData<IIC_iMUL16  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
157                                InstrStage<2, [A9_ALU0]>], [3, 1, 1]>,
158   InstrItinData<IIC_iMAC16  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
159                                InstrStage<2, [A9_ALU0]>],
160                               [3, 1, 1, 1]>,
161   InstrItinData<IIC_iMUL32  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
162                                InstrStage<2, [A9_ALU0]>], [4, 1, 1]>,
163   InstrItinData<IIC_iMAC32  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
164                                InstrStage<2, [A9_ALU0]>],
165                               [4, 1, 1, 1]>,
166   InstrItinData<IIC_iMUL64  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
167                                InstrStage<3, [A9_ALU0]>], [4, 5, 1, 1]>,
168   InstrItinData<IIC_iMAC64  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
169                                InstrStage<3, [A9_ALU0]>],
170                               [4, 5, 1, 1]>,
171   // Integer load pipeline
172   // FIXME: The timings are some rough approximations
173   //
174   // Immediate offset
175   InstrItinData<IIC_iLoad_i   , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
176                                  InstrStage<1, [A9_MUX0], 0>,
177                                  InstrStage<1, [A9_AGU]>,
178                                  InstrStage<1, [A9_LS0, A9_LS1]>],
179                                 [3, 1], [A9_LdBypass]>,
180   InstrItinData<IIC_iLoad_bh_i, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
181                                  InstrStage<1, [A9_MUX0], 0>,
182                                  InstrStage<2, [A9_AGU]>,
183                                  InstrStage<1, [A9_LS0, A9_LS1]>],
184                                 [4, 1], [A9_LdBypass]>,
185   // FIXME: If address is 64-bit aligned, AGU cycles is 1.
186   InstrItinData<IIC_iLoad_d_i , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
187                                  InstrStage<1, [A9_MUX0], 0>,
188                                  InstrStage<2, [A9_AGU]>,
189                                  InstrStage<1, [A9_LS0, A9_LS1]>],
190                                 [3, 3, 1], [A9_LdBypass]>,
191   //
192   // Register offset
193   InstrItinData<IIC_iLoad_r   , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
194                                  InstrStage<1, [A9_MUX0], 0>,
195                                  InstrStage<1, [A9_AGU]>,
196                                  InstrStage<1, [A9_LS0, A9_LS1]>],
197                                 [3, 1, 1], [A9_LdBypass]>,
198   InstrItinData<IIC_iLoad_bh_r, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
199                                  InstrStage<1, [A9_MUX0], 0>,
200                                  InstrStage<2, [A9_AGU]>,
201                                  InstrStage<1, [A9_LS0, A9_LS1]>],
202                                 [4, 1, 1], [A9_LdBypass]>,
203   InstrItinData<IIC_iLoad_d_r , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
204                                  InstrStage<1, [A9_MUX0], 0>,
205                                  InstrStage<2, [A9_AGU]>,
206                                  InstrStage<1, [A9_LS0, A9_LS1]>],
207                                 [3, 3, 1, 1], [A9_LdBypass]>,
208   //
209   // Scaled register offset
210   InstrItinData<IIC_iLoad_si  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
211                                  InstrStage<1, [A9_MUX0], 0>,
212                                  InstrStage<1, [A9_AGU]>,
213                                  InstrStage<1, [A9_LS0, A9_LS1]>],
214                                 [4, 1, 1], [A9_LdBypass]>,
215   InstrItinData<IIC_iLoad_bh_si,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
216                                  InstrStage<1, [A9_MUX0], 0>,
217                                  InstrStage<2, [A9_AGU]>,
218                                  InstrStage<1, [A9_LS0, A9_LS1]>],
219                                 [5, 1, 1], [A9_LdBypass]>,
220   //
221   // Immediate offset with update
222   InstrItinData<IIC_iLoad_iu  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
223                                  InstrStage<1, [A9_MUX0], 0>,
224                                  InstrStage<1, [A9_AGU]>,
225                                  InstrStage<1, [A9_LS0, A9_LS1]>],
226                                 [3, 2, 1], [A9_LdBypass]>,
227   InstrItinData<IIC_iLoad_bh_iu,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
228                                  InstrStage<1, [A9_MUX0], 0>,
229                                  InstrStage<2, [A9_AGU]>,
230                                  InstrStage<1, [A9_LS0, A9_LS1]>],
231                                 [4, 3, 1], [A9_LdBypass]>,
232   //
233   // Register offset with update
234   InstrItinData<IIC_iLoad_ru  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
235                                  InstrStage<1, [A9_MUX0], 0>,
236                                  InstrStage<1, [A9_AGU]>,
237                                  InstrStage<1, [A9_LS0, A9_LS1]>],
238                                 [3, 2, 1, 1], [A9_LdBypass]>,
239   InstrItinData<IIC_iLoad_bh_ru,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
240                                  InstrStage<1, [A9_MUX0], 0>,
241                                  InstrStage<2, [A9_AGU]>,
242                                  InstrStage<1, [A9_LS0, A9_LS1]>],
243                                 [4, 3, 1, 1], [A9_LdBypass]>,
244   InstrItinData<IIC_iLoad_d_ru, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
245                                  InstrStage<1, [A9_MUX0], 0>,
246                                  InstrStage<2, [A9_AGU]>,
247                                  InstrStage<1, [A9_LS0, A9_LS1]>],
248                                 [3, 3, 1, 1], [A9_LdBypass]>,
249   //
250   // Scaled register offset with update
251   InstrItinData<IIC_iLoad_siu , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
252                                  InstrStage<1, [A9_MUX0], 0>,
253                                  InstrStage<1, [A9_AGU]>,
254                                  InstrStage<1, [A9_LS0, A9_LS1]>],
255                                 [4, 3, 1, 1], [A9_LdBypass]>,
256   InstrItinData<IIC_iLoad_bh_siu,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
257                                   InstrStage<1, [A9_MUX0], 0>,
258                                   InstrStage<2, [A9_AGU]>,
259                                   InstrStage<1, [A9_LS0, A9_LS1]>],
260                                  [5, 4, 1, 1], [A9_LdBypass]>,
261   //
262   // Load multiple, def is the 5th operand.
263   // FIXME: This assumes 3 to 4 registers.
264   InstrItinData<IIC_iLoad_m  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
265                                 InstrStage<1, [A9_MUX0], 0>,
266                                 InstrStage<2, [A9_AGU]>,
267                                 InstrStage<2, [A9_LS0, A9_LS1]>],
268                                [1, 1, 1, 1, 3],
269                          [NoBypass, NoBypass, NoBypass, NoBypass, A9_LdBypass]>,
270   //
271   // Load multiple + update, defs are the 1st and 5th operands.
272   InstrItinData<IIC_iLoad_mu , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
273                                 InstrStage<1, [A9_MUX0], 0>,
274                                 InstrStage<2, [A9_AGU]>,
275                                 InstrStage<2, [A9_LS0, A9_LS1]>],
276                                [2, 1, 1, 1, 3],
277                          [NoBypass, NoBypass, NoBypass, NoBypass, A9_LdBypass]>,
278   //
279   // Load multiple plus branch
280   InstrItinData<IIC_iLoad_mBr, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
281                                 InstrStage<1, [A9_MUX0], 0>,
282                                 InstrStage<1, [A9_AGU]>,
283                                 InstrStage<2, [A9_LS0, A9_LS1]>,
284                                 InstrStage<1, [A9_Branch]>],
285                                [1, 2, 1, 1, 3],
286                          [NoBypass, NoBypass, NoBypass, NoBypass, A9_LdBypass]>,
287   //
288   // Pop, def is the 3rd operand.
289   InstrItinData<IIC_iPop  ,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
290                                 InstrStage<1, [A9_MUX0], 0>,
291                                 InstrStage<2, [A9_AGU]>,
292                                 InstrStage<2, [A9_LS0, A9_LS1]>],
293                                [1, 1, 3],
294                                [NoBypass, NoBypass, A9_LdBypass]>,
295   //
296   // Pop + branch, def is the 3rd operand.
297   InstrItinData<IIC_iPop_Br,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
298                                 InstrStage<1, [A9_MUX0], 0>,
299                                 InstrStage<2, [A9_AGU]>,
300                                 InstrStage<2, [A9_LS0, A9_LS1]>,
301                                 InstrStage<1, [A9_Branch]>],
302                                [1, 1, 3],
303                                [NoBypass, NoBypass, A9_LdBypass]>,
304
305   //
306   // iLoadi + iALUr for t2LDRpci_pic.
307   InstrItinData<IIC_iLoadiALU, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
308                                 InstrStage<1, [A9_MUX0], 0>,
309                                 InstrStage<1, [A9_AGU]>,
310                                 InstrStage<1, [A9_LS0, A9_LS1]>,
311                                 InstrStage<1, [A9_ALU0, A9_ALU1]>],
312                                [2, 1]>,
313
314   // Integer store pipeline
315   ///
316   // Immediate offset
317   InstrItinData<IIC_iStore_i  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
318                                  InstrStage<1, [A9_MUX0], 0>,
319                                  InstrStage<1, [A9_AGU]>,
320                                  InstrStage<1, [A9_LS0, A9_LS1]>], [1, 1]>,
321   InstrItinData<IIC_iStore_bh_i,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
322                                  InstrStage<1, [A9_MUX0], 0>,
323                                  InstrStage<2, [A9_AGU]>,
324                                  InstrStage<1, [A9_LS0, A9_LS1]>], [1, 1]>,
325   // FIXME: If address is 64-bit aligned, AGU cycles is 1.
326   InstrItinData<IIC_iStore_d_i, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
327                                  InstrStage<1, [A9_MUX0], 0>,
328                                  InstrStage<2, [A9_AGU]>,
329                                  InstrStage<1, [A9_LS0, A9_LS1]>], [1, 1]>,
330   //
331   // Register offset
332   InstrItinData<IIC_iStore_r  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
333                                  InstrStage<1, [A9_MUX0], 0>,
334                                  InstrStage<1, [A9_AGU]>,
335                                  InstrStage<1, [A9_LS0, A9_LS1]>], [1, 1, 1]>,
336   InstrItinData<IIC_iStore_bh_r,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
337                                  InstrStage<1, [A9_MUX0], 0>,
338                                  InstrStage<2, [A9_AGU]>,
339                                  InstrStage<1, [A9_LS0, A9_LS1]>], [1, 1, 1]>,
340   InstrItinData<IIC_iStore_d_r, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
341                                  InstrStage<1, [A9_MUX0], 0>,
342                                  InstrStage<2, [A9_AGU]>,
343                                  InstrStage<1, [A9_LS0, A9_LS1]>], [1, 1, 1]>,
344   //
345   // Scaled register offset
346   InstrItinData<IIC_iStore_si ,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
347                                   InstrStage<1, [A9_MUX0], 0>,
348                                   InstrStage<1, [A9_AGU]>,
349                                   InstrStage<1, [A9_LS0, A9_LS1]>], [1, 1, 1]>,
350   InstrItinData<IIC_iStore_bh_si,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
351                                   InstrStage<1, [A9_MUX0], 0>,
352                                   InstrStage<2, [A9_AGU]>,
353                                   InstrStage<1, [A9_LS0, A9_LS1]>], [1, 1, 1]>,
354   //
355   // Immediate offset with update
356   InstrItinData<IIC_iStore_iu ,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
357                                   InstrStage<1, [A9_MUX0], 0>,
358                                   InstrStage<1, [A9_AGU]>,
359                                   InstrStage<1, [A9_LS0, A9_LS1]>], [2, 1, 1]>,
360   InstrItinData<IIC_iStore_bh_iu,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
361                                   InstrStage<1, [A9_MUX0], 0>,
362                                   InstrStage<2, [A9_AGU]>,
363                                   InstrStage<1, [A9_LS0, A9_LS1]>], [3, 1, 1]>,
364   //
365   // Register offset with update
366   InstrItinData<IIC_iStore_ru ,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
367                                   InstrStage<1, [A9_MUX0], 0>,
368                                   InstrStage<1, [A9_AGU]>,
369                                   InstrStage<1, [A9_LS0, A9_LS1]>],
370                                  [2, 1, 1, 1]>,
371   InstrItinData<IIC_iStore_bh_ru,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
372                                   InstrStage<1, [A9_MUX0], 0>,
373                                   InstrStage<2, [A9_AGU]>,
374                                   InstrStage<1, [A9_LS0, A9_LS1]>],
375                                  [3, 1, 1, 1]>,
376   InstrItinData<IIC_iStore_d_ru, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
377                                   InstrStage<1, [A9_MUX0], 0>,
378                                   InstrStage<2, [A9_AGU]>,
379                                   InstrStage<1, [A9_LS0, A9_LS1]>],
380                                  [3, 1, 1, 1]>,
381   //
382   // Scaled register offset with update
383   InstrItinData<IIC_iStore_siu,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
384                                     InstrStage<1, [A9_MUX0], 0>,
385                                     InstrStage<1, [A9_AGU]>,
386                                     InstrStage<1, [A9_LS0, A9_LS1]>],
387                                    [2, 1, 1, 1]>,
388   InstrItinData<IIC_iStore_bh_siu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
389                                     InstrStage<1, [A9_MUX0], 0>,
390                                     InstrStage<2, [A9_AGU]>,
391                                     InstrStage<1, [A9_LS0, A9_LS1]>],
392                                    [3, 1, 1, 1]>,
393   //
394   // Store multiple
395   InstrItinData<IIC_iStore_m , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
396                                 InstrStage<1, [A9_MUX0], 0>,
397                                 InstrStage<1, [A9_AGU]>,
398                                 InstrStage<2, [A9_LS0, A9_LS1]>]>,
399   //
400   // Store multiple + update
401   InstrItinData<IIC_iStore_mu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
402                                 InstrStage<1, [A9_MUX0], 0>,
403                                 InstrStage<1, [A9_AGU]>,
404                                 InstrStage<2, [A9_LS0, A9_LS1]>], [2]>,
405
406   // Branch
407   //
408   // no delay slots, so the latency of a branch is unimportant
409   InstrItinData<IIC_Br       , [InstrStage<1, [A9_Issue0], 0>,
410                                 InstrStage<1, [A9_Issue1], 0>,
411                                 InstrStage<1, [A9_Branch]>]>,
412
413   // VFP and NEON shares the same register file. This means that every VFP
414   // instruction should wait for full completion of the consecutive NEON
415   // instruction and vice-versa. We model this behavior with two artificial FUs:
416   // DRegsVFP and DRegsVFP.
417   //
418   // Every VFP instruction:
419   //  - Acquires DRegsVFP resource for 1 cycle
420   //  - Reserves DRegsN resource for the whole duration (including time to
421   //    register file writeback!).
422   // Every NEON instruction does the same but with FUs swapped.
423   //
424   // Since the reserved FU cannot be acquired, this models precisely
425   // "cross-domain" stalls.
426
427   // VFP
428   // Issue through integer pipeline, and execute in NEON unit.
429
430   // FP Special Register to Integer Register File Move
431   InstrItinData<IIC_fpSTAT , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
432                               InstrStage<2, [A9_DRegsN],   0, Reserved>,
433                               InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
434                               InstrStage<1, [A9_MUX0], 0>,
435                               InstrStage<1, [A9_NPipe]>]>,
436   //
437   // Single-precision FP Unary
438   InstrItinData<IIC_fpUNA32 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
439                                // Extra latency cycles since wbck is 2 cycles
440                                InstrStage<3, [A9_DRegsN],   0, Reserved>,
441                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
442                                InstrStage<1, [A9_MUX0], 0>,
443                                InstrStage<1, [A9_NPipe]>],
444                               [1, 1]>,
445   //
446   // Double-precision FP Unary
447   InstrItinData<IIC_fpUNA64 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
448                                // Extra latency cycles since wbck is 2 cycles
449                                InstrStage<3, [A9_DRegsN],   0, Reserved>,
450                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
451                                InstrStage<1, [A9_MUX0], 0>,
452                                InstrStage<1, [A9_NPipe]>],
453                               [1, 1]>,
454
455   //
456   // Single-precision FP Compare
457   InstrItinData<IIC_fpCMP32 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
458                                // Extra latency cycles since wbck is 4 cycles
459                                InstrStage<5, [A9_DRegsN],   0, Reserved>,
460                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
461                                InstrStage<1, [A9_MUX0], 0>,
462                                InstrStage<1, [A9_NPipe]>],
463                               [1, 1]>,
464   //
465   // Double-precision FP Compare
466   InstrItinData<IIC_fpCMP64 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
467                                // Extra latency cycles since wbck is 4 cycles
468                                InstrStage<5, [A9_DRegsN],   0, Reserved>,
469                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
470                                InstrStage<1, [A9_MUX0], 0>,
471                                InstrStage<1, [A9_NPipe]>],
472                               [1, 1]>,
473   //
474   // Single to Double FP Convert
475   InstrItinData<IIC_fpCVTSD , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
476                                InstrStage<5, [A9_DRegsN],   0, Reserved>,
477                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
478                                InstrStage<1, [A9_MUX0], 0>,
479                                InstrStage<1, [A9_NPipe]>],
480                               [4, 1]>,
481   //
482   // Double to Single FP Convert
483   InstrItinData<IIC_fpCVTDS , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
484                                InstrStage<5, [A9_DRegsN],   0, Reserved>,
485                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
486                                InstrStage<1, [A9_MUX0], 0>,
487                                InstrStage<1, [A9_NPipe]>],
488                               [4, 1]>,
489
490   //
491   // Single to Half FP Convert
492   InstrItinData<IIC_fpCVTSH , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
493                                InstrStage<5, [A9_DRegsN],   0, Reserved>,
494                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
495                                InstrStage<1, [A9_MUX0], 0>,
496                                InstrStage<1, [A9_NPipe]>],
497                               [4, 1]>,
498   //
499   // Half to Single FP Convert
500   InstrItinData<IIC_fpCVTHS , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
501                                InstrStage<3, [A9_DRegsN],   0, Reserved>,
502                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
503                                InstrStage<1, [A9_MUX0], 0>,
504                                InstrStage<1, [A9_NPipe]>],
505                               [2, 1]>,
506
507   //
508   // Single-Precision FP to Integer Convert
509   InstrItinData<IIC_fpCVTSI , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
510                                InstrStage<5, [A9_DRegsN],   0, Reserved>,
511                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
512                                InstrStage<1, [A9_MUX0], 0>,
513                                InstrStage<1, [A9_NPipe]>],
514                               [4, 1]>,
515   //
516   // Double-Precision FP to Integer Convert
517   InstrItinData<IIC_fpCVTDI , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
518                                InstrStage<5, [A9_DRegsN],   0, Reserved>,
519                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
520                                InstrStage<1, [A9_MUX0], 0>,
521                                InstrStage<1, [A9_NPipe]>],
522                               [4, 1]>,
523   //
524   // Integer to Single-Precision FP Convert
525   InstrItinData<IIC_fpCVTIS , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
526                                InstrStage<5, [A9_DRegsN],   0, Reserved>,
527                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
528                                InstrStage<1, [A9_MUX0], 0>,
529                                InstrStage<1, [A9_NPipe]>],
530                               [4, 1]>,
531   //
532   // Integer to Double-Precision FP Convert
533   InstrItinData<IIC_fpCVTID , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
534                                InstrStage<5, [A9_DRegsN],   0, Reserved>,
535                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
536                                InstrStage<1, [A9_MUX0], 0>,
537                                InstrStage<1, [A9_NPipe]>],
538                               [4, 1]>,
539   //
540   // Single-precision FP ALU
541   InstrItinData<IIC_fpALU32 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
542                                InstrStage<5, [A9_DRegsN],   0, Reserved>,
543                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
544                                InstrStage<1, [A9_MUX0], 0>,
545                                InstrStage<1, [A9_NPipe]>],
546                               [4, 1, 1]>,
547   //
548   // Double-precision FP ALU
549   InstrItinData<IIC_fpALU64 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
550                                InstrStage<5, [A9_DRegsN],   0, Reserved>,
551                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
552                                InstrStage<1, [A9_MUX0], 0>,
553                                InstrStage<1, [A9_NPipe]>],
554                               [4, 1, 1]>,
555   //
556   // Single-precision FP Multiply
557   InstrItinData<IIC_fpMUL32 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
558                                InstrStage<6, [A9_DRegsN],   0, Reserved>,
559                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
560                                InstrStage<1, [A9_MUX0], 0>,
561                                InstrStage<1, [A9_NPipe]>],
562                               [5, 1, 1]>,
563   //
564   // Double-precision FP Multiply
565   InstrItinData<IIC_fpMUL64 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
566                                InstrStage<7, [A9_DRegsN],   0, Reserved>,
567                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
568                                InstrStage<1, [A9_MUX0], 0>,
569                                InstrStage<2, [A9_NPipe]>],
570                               [6, 1, 1]>,
571   //
572   // Single-precision FP MAC
573   InstrItinData<IIC_fpMAC32 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
574                                InstrStage<9, [A9_DRegsN],   0, Reserved>,
575                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
576                                InstrStage<1, [A9_MUX0], 0>,
577                                InstrStage<1, [A9_NPipe]>],
578                               [8, 0, 1, 1]>,
579   //
580   // Double-precision FP MAC
581   InstrItinData<IIC_fpMAC64 , [InstrStage<1,  [A9_DRegsVFP], 0, Required>,
582                                InstrStage<10, [A9_DRegsN],  0, Reserved>,
583                                InstrStage<1,  [A9_Issue0, A9_Issue1], 0>,
584                                InstrStage<1,  [A9_MUX0], 0>,
585                                InstrStage<2,  [A9_NPipe]>],
586                               [9, 0, 1, 1]>,
587   //
588   // Single-precision FP DIV
589   InstrItinData<IIC_fpDIV32 , [InstrStage<1,  [A9_DRegsVFP], 0, Required>,
590                                InstrStage<16, [A9_DRegsN],  0, Reserved>,
591                                InstrStage<1,  [A9_Issue0, A9_Issue1], 0>,
592                                InstrStage<1,  [A9_MUX0], 0>,
593                                InstrStage<10, [A9_NPipe]>],
594                               [15, 1, 1]>,
595   //
596   // Double-precision FP DIV
597   InstrItinData<IIC_fpDIV64 , [InstrStage<1,  [A9_DRegsVFP], 0, Required>,
598                                InstrStage<26, [A9_DRegsN],  0, Reserved>,
599                                InstrStage<1,  [A9_Issue0, A9_Issue1], 0>,
600                                InstrStage<1,  [A9_MUX0], 0>,
601                                InstrStage<20, [A9_NPipe]>],
602                               [25, 1, 1]>,
603   //
604   // Single-precision FP SQRT
605   InstrItinData<IIC_fpSQRT32, [InstrStage<1,  [A9_DRegsVFP], 0, Required>,
606                                InstrStage<18, [A9_DRegsN],   0, Reserved>,
607                                InstrStage<1,  [A9_Issue0, A9_Issue1], 0>,
608                                InstrStage<1,  [A9_MUX0], 0>,
609                                InstrStage<13, [A9_NPipe]>],
610                               [17, 1]>,
611   //
612   // Double-precision FP SQRT
613   InstrItinData<IIC_fpSQRT64, [InstrStage<1,  [A9_DRegsVFP], 0, Required>,
614                                InstrStage<33, [A9_DRegsN],   0, Reserved>,
615                                InstrStage<1,  [A9_Issue0, A9_Issue1], 0>,
616                                InstrStage<1,  [A9_MUX0], 0>,
617                                InstrStage<28, [A9_NPipe]>],
618                               [32, 1]>,
619
620   //
621   // Integer to Single-precision Move
622   InstrItinData<IIC_fpMOVIS,  [InstrStage<1, [A9_DRegsVFP], 0, Required>,
623                                // Extra 1 latency cycle since wbck is 2 cycles
624                                InstrStage<3, [A9_DRegsN],   0, Reserved>,
625                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
626                                InstrStage<1, [A9_MUX0], 0>,
627                                InstrStage<1, [A9_NPipe]>],
628                               [1, 1]>,
629   //
630   // Integer to Double-precision Move
631   InstrItinData<IIC_fpMOVID,  [InstrStage<1, [A9_DRegsVFP], 0, Required>,
632                                // Extra 1 latency cycle since wbck is 2 cycles
633                                InstrStage<3, [A9_DRegsN],   0, Reserved>,
634                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
635                                InstrStage<1, [A9_MUX0], 0>,
636                                InstrStage<1, [A9_NPipe]>],
637                               [1, 1, 1]>,
638   //
639   // Single-precision to Integer Move
640   InstrItinData<IIC_fpMOVSI,  [InstrStage<1, [A9_DRegsVFP], 0, Required>,
641                                InstrStage<2, [A9_DRegsN],   0, Reserved>,
642                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
643                                InstrStage<1, [A9_MUX0], 0>,
644                                InstrStage<1, [A9_NPipe]>],
645                               [1, 1]>,
646   //
647   // Double-precision to Integer Move
648   InstrItinData<IIC_fpMOVDI,  [InstrStage<1, [A9_DRegsVFP], 0, Required>,
649                                InstrStage<2, [A9_DRegsN],   0, Reserved>,
650                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
651                                InstrStage<1, [A9_MUX0], 0>,
652                                InstrStage<1, [A9_NPipe]>],
653                               [1, 1, 1]>,
654   //
655   // Single-precision FP Load
656   InstrItinData<IIC_fpLoad32, [InstrStage<1, [A9_DRegsVFP], 0, Required>,
657                                InstrStage<2, [A9_DRegsN],   0, Reserved>,
658                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
659                                InstrStage<1, [A9_MUX0], 0>,
660                                InstrStage<1, [A9_NPipe]>],
661                               [1, 1]>,
662   //
663   // Double-precision FP Load
664   // FIXME: Result latency is 1 if address is 64-bit aligned.
665   InstrItinData<IIC_fpLoad64, [InstrStage<1, [A9_DRegsVFP], 0, Required>,
666                                InstrStage<2, [A9_DRegsN],   0, Reserved>,
667                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
668                                InstrStage<1, [A9_MUX0], 0>,
669                                InstrStage<1, [A9_NPipe]>],
670                               [2, 1]>,
671   //
672   // FP Load Multiple
673   InstrItinData<IIC_fpLoad_m, [InstrStage<1, [A9_DRegsVFP], 0, Required>,
674                                InstrStage<2, [A9_DRegsN],   0, Reserved>,
675                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
676                                InstrStage<1, [A9_MUX0], 0>,
677                                InstrStage<1, [A9_NPipe]>], [1, 1, 1, 1]>,
678   //
679   // FP Load Multiple + update
680   InstrItinData<IIC_fpLoad_mu,[InstrStage<1, [A9_DRegsVFP], 0, Required>,
681                                InstrStage<2, [A9_DRegsN],   0, Reserved>,
682                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
683                                InstrStage<1, [A9_MUX0], 0>,
684                                InstrStage<1, [A9_NPipe]>], [2, 1, 1, 1]>,
685   //
686   // Single-precision FP Store
687   InstrItinData<IIC_fpStore32,[InstrStage<1, [A9_DRegsVFP], 0, Required>,
688                                InstrStage<2, [A9_DRegsN],   0, Reserved>,
689                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
690                                InstrStage<1, [A9_MUX0], 0>,
691                                InstrStage<1, [A9_NPipe]>],
692                               [1, 1]>,
693   //
694   // Double-precision FP Store
695   InstrItinData<IIC_fpStore64,[InstrStage<1, [A9_DRegsVFP], 0, Required>,
696                                InstrStage<2, [A9_DRegsN],   0, Reserved>,
697                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
698                                InstrStage<1, [A9_MUX0], 0>,
699                                InstrStage<1, [A9_NPipe]>],
700                               [1, 1]>,
701   //
702   // FP Store Multiple
703   InstrItinData<IIC_fpStore_m,[InstrStage<1, [A9_DRegsVFP], 0, Required>,
704                                InstrStage<2, [A9_DRegsN],   0, Reserved>,
705                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
706                                InstrStage<1, [A9_MUX0], 0>,
707                                InstrStage<1, [A9_NPipe]>], [1, 1, 1, 1]>,
708   //
709   // FP Store Multiple + update
710   InstrItinData<IIC_fpStore_mu,[InstrStage<1, [A9_DRegsVFP], 0, Required>,
711                                 InstrStage<2, [A9_DRegsN],   0, Reserved>,
712                                 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
713                                 InstrStage<1, [A9_MUX0], 0>,
714                                 InstrStage<1, [A9_NPipe]>], [2, 1, 1, 1]>,
715   // NEON
716   // VLD1
717   // FIXME: Conservatively assume insufficent alignment.
718   InstrItinData<IIC_VLD1,     [InstrStage<1, [A9_DRegsN],   0, Required>,
719                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
720                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
721                                InstrStage<1, [A9_MUX0], 0>,
722                                InstrStage<2, [A9_NPipe]>],
723                               [2, 1]>,
724   // VLD1x2
725   InstrItinData<IIC_VLD1x2,   [InstrStage<1, [A9_DRegsN],   0, Required>,
726                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
727                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
728                                InstrStage<1, [A9_MUX0], 0>,
729                                InstrStage<2, [A9_NPipe]>],
730                               [2, 2, 1]>,
731   // VLD1x3
732   InstrItinData<IIC_VLD1x3,   [InstrStage<1, [A9_DRegsN],   0, Required>,
733                                InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
734                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
735                                InstrStage<1, [A9_MUX0], 0>,
736                                InstrStage<3, [A9_NPipe]>],
737                               [2, 2, 3, 1]>,
738   // VLD1x4
739   InstrItinData<IIC_VLD1x4,   [InstrStage<1, [A9_DRegsN],   0, Required>,
740                                InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
741                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
742                                InstrStage<1, [A9_MUX0], 0>,
743                                InstrStage<3, [A9_NPipe]>],
744                               [2, 2, 3, 3, 1]>,
745   // VLD1u
746   InstrItinData<IIC_VLD1u,    [InstrStage<1, [A9_DRegsN],   0, Required>,
747                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
748                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
749                                InstrStage<1, [A9_MUX0], 0>,
750                                InstrStage<2, [A9_NPipe]>],
751                               [2, 2, 1]>,
752   // VLD1x2u
753   InstrItinData<IIC_VLD1x2u,  [InstrStage<1, [A9_DRegsN],   0, Required>,
754                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
755                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
756                                InstrStage<1, [A9_MUX0], 0>,
757                                InstrStage<2, [A9_NPipe]>],
758                               [2, 2, 2, 1]>,
759   // VLD1x3u
760   InstrItinData<IIC_VLD1x3u,  [InstrStage<1, [A9_DRegsN],   0, Required>,
761                                InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
762                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
763                                InstrStage<1, [A9_MUX0], 0>,
764                                InstrStage<3, [A9_NPipe]>],
765                               [2, 2, 3, 2, 1]>,
766   // VLD1x4u
767   InstrItinData<IIC_VLD1x4u,  [InstrStage<1, [A9_DRegsN],   0, Required>,
768                                InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
769                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
770                                InstrStage<1, [A9_MUX0], 0>,
771                                InstrStage<3, [A9_NPipe]>],
772                               [2, 2, 3, 3, 2, 1]>,
773   //
774   // VLD2
775   InstrItinData<IIC_VLD2,     [InstrStage<1, [A9_DRegsN],   0, Required>,
776                                // Extra latency cycles since wbck is 7 cycles
777                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
778                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
779                                InstrStage<1, [A9_MUX0], 0>,
780                                InstrStage<2, [A9_NPipe]>],
781                               [3, 3, 1]>,
782   //
783   // VLD2x2
784   InstrItinData<IIC_VLD2x2,   [InstrStage<1, [A9_DRegsN],   0, Required>,
785                                InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
786                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
787                                InstrStage<1, [A9_MUX0], 0>,
788                                InstrStage<3, [A9_NPipe]>],
789                               [3, 4, 3, 4, 1]>,
790   //
791   // VLD2ln
792   InstrItinData<IIC_VLD2ln,   [InstrStage<1, [A9_DRegsN],   0, Required>,
793                                InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
794                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
795                                InstrStage<1, [A9_MUX0], 0>,
796                                InstrStage<3, [A9_NPipe]>],
797                               [4, 4, 1, 1, 1, 1]>,
798   //
799   // VLD2u
800   InstrItinData<IIC_VLD2u,    [InstrStage<1, [A9_DRegsN],   0, Required>,
801                                // Extra latency cycles since wbck is 7 cycles
802                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
803                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
804                                InstrStage<1, [A9_MUX0], 0>,
805                                InstrStage<2, [A9_NPipe]>],
806                               [3, 3, 2, 1, 1, 1]>,
807   //
808   // VLD2x2u
809   InstrItinData<IIC_VLD2x2u,  [InstrStage<1, [A9_DRegsN],   0, Required>,
810                                InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
811                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
812                                InstrStage<1, [A9_MUX0], 0>,
813                                InstrStage<3, [A9_NPipe]>],
814                               [3, 4, 3, 4, 2, 1]>,
815   //
816   // VLD2lnu
817   InstrItinData<IIC_VLD2lnu,  [InstrStage<1, [A9_DRegsN],   0, Required>,
818                                InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
819                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
820                                InstrStage<1, [A9_MUX0], 0>,
821                                InstrStage<3, [A9_NPipe]>],
822                               [4, 4, 2, 1, 1, 1, 1, 1]>,
823   //
824   // VLD3
825   // FIXME: We don't model this instruction properly
826   InstrItinData<IIC_VLD3,     [InstrStage<1, [A9_DRegsN],   0, Required>,
827                                // Extra latency cycles since wbck is 6 cycles
828                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
829                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
830                                InstrStage<1, [A9_MUX0], 0>,
831                                InstrStage<1, [A9_NPipe]>],
832                               [2, 2, 2, 1]>,
833   //
834   // VLD4
835   // FIXME: We don't model this instruction properly
836   InstrItinData<IIC_VLD4,     [InstrStage<1, [A9_DRegsN],   0, Required>,
837                                // Extra latency cycles since wbck is 6 cycles
838                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
839                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
840                                InstrStage<1, [A9_MUX0], 0>,
841                                InstrStage<1, [A9_NPipe]>],
842                               [2, 2, 2, 2, 1]>,
843   //
844   // VST
845   // FIXME: We don't model this instruction properly
846   InstrItinData<IIC_VST,      [InstrStage<1, [A9_DRegsN],   0, Required>,
847                                // Extra latency cycles since wbck is 6 cycles
848                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
849                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
850                                InstrStage<1, [A9_MUX0], 0>,
851                                InstrStage<1, [A9_NPipe]>]>,
852   //
853   // Double-register Integer Unary
854   InstrItinData<IIC_VUNAiD,   [InstrStage<1, [A9_DRegsN],   0, Required>,
855                                // Extra latency cycles since wbck is 6 cycles
856                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
857                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
858                                InstrStage<1, [A9_MUX0], 0>,
859                                InstrStage<1, [A9_NPipe]>],
860                               [4, 2]>,
861   //
862   // Quad-register Integer Unary
863   InstrItinData<IIC_VUNAiQ,   [InstrStage<1, [A9_DRegsN],   0, Required>,
864                                // Extra latency cycles since wbck is 6 cycles
865                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
866                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
867                                InstrStage<1, [A9_MUX0], 0>,
868                                InstrStage<1, [A9_NPipe]>],
869                               [4, 2]>,
870   //
871   // Double-register Integer Q-Unary
872   InstrItinData<IIC_VQUNAiD,  [InstrStage<1, [A9_DRegsN],   0, Required>,
873                                // Extra latency cycles since wbck is 6 cycles
874                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
875                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
876                                InstrStage<1, [A9_MUX0], 0>,
877                                InstrStage<1, [A9_NPipe]>],
878                               [4, 1]>,
879   //
880   // Quad-register Integer CountQ-Unary
881   InstrItinData<IIC_VQUNAiQ,  [InstrStage<1, [A9_DRegsN],   0, Required>,
882                                // Extra latency cycles since wbck is 6 cycles
883                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
884                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
885                                InstrStage<1, [A9_MUX0], 0>,
886                                InstrStage<1, [A9_NPipe]>],
887                               [4, 1]>,
888   //
889   // Double-register Integer Binary
890   InstrItinData<IIC_VBINiD,   [InstrStage<1, [A9_DRegsN],   0, Required>,
891                                // Extra latency cycles since wbck is 6 cycles
892                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
893                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
894                                InstrStage<1, [A9_MUX0], 0>,
895                                InstrStage<1, [A9_NPipe]>],
896                               [3, 2, 2]>,
897   //
898   // Quad-register Integer Binary
899   InstrItinData<IIC_VBINiQ,   [InstrStage<1, [A9_DRegsN],   0, Required>,
900                                // Extra latency cycles since wbck is 6 cycles
901                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
902                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
903                                InstrStage<1, [A9_MUX0], 0>,
904                                InstrStage<1, [A9_NPipe]>],
905                               [3, 2, 2]>,
906   //
907   // Double-register Integer Subtract
908   InstrItinData<IIC_VSUBiD,   [InstrStage<1, [A9_DRegsN],   0, Required>,
909                                // Extra latency cycles since wbck is 6 cycles
910                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
911                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
912                                InstrStage<1, [A9_MUX0], 0>,
913                                InstrStage<1, [A9_NPipe]>],
914                               [3, 2, 1]>,
915   //
916   // Quad-register Integer Subtract
917   InstrItinData<IIC_VSUBiQ,   [InstrStage<1, [A9_DRegsN],   0, Required>,
918                                // Extra latency cycles since wbck is 6 cycles
919                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
920                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
921                                InstrStage<1, [A9_MUX0], 0>,
922                                InstrStage<1, [A9_NPipe]>],
923                               [3, 2, 1]>,
924   //
925   // Double-register Integer Shift
926   InstrItinData<IIC_VSHLiD,   [InstrStage<1, [A9_DRegsN],   0, Required>,
927                                // Extra latency cycles since wbck is 6 cycles
928                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
929                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
930                                InstrStage<1, [A9_MUX0], 0>,
931                                InstrStage<1, [A9_NPipe]>],
932                               [3, 1, 1]>,
933   //
934   // Quad-register Integer Shift
935   InstrItinData<IIC_VSHLiQ,   [InstrStage<1, [A9_DRegsN],   0, Required>,
936                                // Extra latency cycles since wbck is 6 cycles
937                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
938                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
939                                InstrStage<1, [A9_MUX0], 0>,
940                                InstrStage<1, [A9_NPipe]>],
941                               [3, 1, 1]>,
942   //
943   // Double-register Integer Shift (4 cycle)
944   InstrItinData<IIC_VSHLi4D,  [InstrStage<1, [A9_DRegsN],   0, Required>,
945                                // Extra latency cycles since wbck is 6 cycles
946                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
947                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
948                                InstrStage<1, [A9_MUX0], 0>,
949                                InstrStage<1, [A9_NPipe]>],
950                               [4, 1, 1]>,
951   //
952   // Quad-register Integer Shift (4 cycle)
953   InstrItinData<IIC_VSHLi4Q,  [InstrStage<1, [A9_DRegsN],   0, Required>,
954                                // Extra latency cycles since wbck is 6 cycles
955                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
956                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
957                                InstrStage<1, [A9_MUX0], 0>,
958                                InstrStage<1, [A9_NPipe]>],
959                               [4, 1, 1]>,
960   //
961   // Double-register Integer Binary (4 cycle)
962   InstrItinData<IIC_VBINi4D,  [InstrStage<1, [A9_DRegsN],   0, Required>,
963                                // Extra latency cycles since wbck is 6 cycles
964                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
965                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
966                                InstrStage<1, [A9_MUX0], 0>,
967                                InstrStage<1, [A9_NPipe]>],
968                               [4, 2, 2]>,
969   //
970   // Quad-register Integer Binary (4 cycle)
971   InstrItinData<IIC_VBINi4Q,  [InstrStage<1, [A9_DRegsN],   0, Required>,
972                                // Extra latency cycles since wbck is 6 cycles
973                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
974                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
975                                InstrStage<1, [A9_MUX0], 0>,
976                                InstrStage<1, [A9_NPipe]>],
977                               [4, 2, 2]>,
978   //
979   // Double-register Integer Subtract (4 cycle)
980   InstrItinData<IIC_VSUBiD,   [InstrStage<1, [A9_DRegsN],   0, Required>,
981                                // Extra latency cycles since wbck is 6 cycles
982                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
983                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
984                                InstrStage<1, [A9_MUX0], 0>,
985                                InstrStage<1, [A9_NPipe]>],
986                               [4, 2, 1]>,
987   //
988   // Quad-register Integer Subtract (4 cycle)
989   InstrItinData<IIC_VSUBiQ,   [InstrStage<1, [A9_DRegsN],   0, Required>,
990                                // Extra latency cycles since wbck is 6 cycles
991                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
992                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
993                                InstrStage<1, [A9_MUX0], 0>,
994                                InstrStage<1, [A9_NPipe]>],
995                               [4, 2, 1]>,
996
997   //
998   // Double-register Integer Count
999   InstrItinData<IIC_VCNTiD,   [InstrStage<1, [A9_DRegsN],   0, Required>,
1000                                // Extra latency cycles since wbck is 6 cycles
1001                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1002                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1003                                InstrStage<1, [A9_MUX0], 0>,
1004                                InstrStage<1, [A9_NPipe]>],
1005                               [3, 2, 2]>,
1006   //
1007   // Quad-register Integer Count
1008   // Result written in N3, but that is relative to the last cycle of multicycle,
1009   // so we use 4 for those cases
1010   InstrItinData<IIC_VCNTiQ,   [InstrStage<1, [A9_DRegsN],   0, Required>,
1011                                // Extra latency cycles since wbck is 7 cycles
1012                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1013                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1014                                InstrStage<1, [A9_MUX0], 0>,
1015                                InstrStage<2, [A9_NPipe]>],
1016                               [4, 2, 2]>,
1017   //
1018   // Double-register Absolute Difference and Accumulate
1019   InstrItinData<IIC_VABAD,    [InstrStage<1, [A9_DRegsN],   0, Required>,
1020                                // Extra latency cycles since wbck is 6 cycles
1021                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1022                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1023                                InstrStage<1, [A9_MUX0], 0>,
1024                                InstrStage<1, [A9_NPipe]>],
1025                               [6, 3, 2, 1]>,
1026   //
1027   // Quad-register Absolute Difference and Accumulate
1028   InstrItinData<IIC_VABAQ,    [InstrStage<1, [A9_DRegsN],   0, Required>,
1029                                // Extra latency cycles since wbck is 6 cycles
1030                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1031                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1032                                InstrStage<1, [A9_MUX0], 0>,
1033                                InstrStage<2, [A9_NPipe]>],
1034                               [6, 3, 2, 1]>,
1035   //
1036   // Double-register Integer Pair Add Long
1037   InstrItinData<IIC_VPALiD,   [InstrStage<1, [A9_DRegsN],   0, Required>,
1038                                // Extra latency cycles since wbck is 6 cycles
1039                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1040                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1041                                InstrStage<1, [A9_MUX0], 0>,
1042                                InstrStage<1, [A9_NPipe]>],
1043                               [6, 3, 1]>,
1044   //
1045   // Quad-register Integer Pair Add Long
1046   InstrItinData<IIC_VPALiQ,   [InstrStage<1, [A9_DRegsN],   0, Required>,
1047                                // Extra latency cycles since wbck is 6 cycles
1048                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1049                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1050                                InstrStage<1, [A9_MUX0], 0>,
1051                                InstrStage<2, [A9_NPipe]>],
1052                               [6, 3, 1]>,
1053
1054   //
1055   // Double-register Integer Multiply (.8, .16)
1056   InstrItinData<IIC_VMULi16D, [InstrStage<1, [A9_DRegsN],   0, Required>,
1057                                // Extra latency cycles since wbck is 6 cycles
1058                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1059                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1060                                InstrStage<1, [A9_MUX0], 0>,
1061                                InstrStage<1, [A9_NPipe]>],
1062                               [6, 2, 2]>,
1063   //
1064   // Quad-register Integer Multiply (.8, .16)
1065   InstrItinData<IIC_VMULi16Q, [InstrStage<1, [A9_DRegsN],   0, Required>,
1066                                // Extra latency cycles since wbck is 7 cycles
1067                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1068                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1069                                InstrStage<1, [A9_MUX0], 0>,
1070                                InstrStage<2, [A9_NPipe]>],
1071                               [7, 2, 2]>,
1072
1073   //
1074   // Double-register Integer Multiply (.32)
1075   InstrItinData<IIC_VMULi32D, [InstrStage<1, [A9_DRegsN],   0, Required>,
1076                                // Extra latency cycles since wbck is 7 cycles
1077                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1078                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1079                                InstrStage<1, [A9_MUX0], 0>,
1080                                InstrStage<2, [A9_NPipe]>],
1081                               [7, 2, 1]>,
1082   //
1083   // Quad-register Integer Multiply (.32)
1084   InstrItinData<IIC_VMULi32Q, [InstrStage<1, [A9_DRegsN],   0, Required>,
1085                                // Extra latency cycles since wbck is 9 cycles
1086                                InstrStage<10, [A9_DRegsVFP], 0, Reserved>,
1087                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1088                                InstrStage<1, [A9_MUX0], 0>,
1089                                InstrStage<4, [A9_NPipe]>],
1090                               [9, 2, 1]>,
1091   //
1092   // Double-register Integer Multiply-Accumulate (.8, .16)
1093   InstrItinData<IIC_VMACi16D, [InstrStage<1, [A9_DRegsN],   0, Required>,
1094                                // Extra latency cycles since wbck is 6 cycles
1095                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1096                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1097                                InstrStage<1, [A9_MUX0], 0>,
1098                                InstrStage<1, [A9_NPipe]>],
1099                               [6, 3, 2, 2]>,
1100   //
1101   // Double-register Integer Multiply-Accumulate (.32)
1102   InstrItinData<IIC_VMACi32D, [InstrStage<1, [A9_DRegsN],   0, Required>,
1103                                // Extra latency cycles since wbck is 7 cycles
1104                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1105                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1106                                InstrStage<1, [A9_MUX0], 0>,
1107                                InstrStage<2, [A9_NPipe]>],
1108                               [7, 3, 2, 1]>,
1109   //
1110   // Quad-register Integer Multiply-Accumulate (.8, .16)
1111   InstrItinData<IIC_VMACi16Q, [InstrStage<1, [A9_DRegsN],   0, Required>,
1112                                // Extra latency cycles since wbck is 7 cycles
1113                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1114                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1115                                InstrStage<1, [A9_MUX0], 0>,
1116                                InstrStage<2, [A9_NPipe]>],
1117                               [7, 3, 2, 2]>,
1118   //
1119   // Quad-register Integer Multiply-Accumulate (.32)
1120   InstrItinData<IIC_VMACi32Q, [InstrStage<1, [A9_DRegsN],   0, Required>,
1121                                // Extra latency cycles since wbck is 9 cycles
1122                                InstrStage<10, [A9_DRegsVFP], 0, Reserved>,
1123                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1124                                InstrStage<1, [A9_MUX0], 0>,
1125                                InstrStage<4, [A9_NPipe]>],
1126                               [9, 3, 2, 1]>,
1127
1128   //
1129   // Move
1130   InstrItinData<IIC_VMOV,     [InstrStage<1, [A9_DRegsN],   0, Required>,
1131                                InstrStage<1, [A9_DRegsVFP], 0, Reserved>,
1132                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1133                                InstrStage<1, [A9_MUX0], 0>,
1134                                InstrStage<1, [A9_NPipe]>],
1135                               [1,1]>,
1136   //
1137   // Move Immediate
1138   InstrItinData<IIC_VMOVImm,  [InstrStage<1, [A9_DRegsN],   0, Required>,
1139                                // Extra latency cycles since wbck is 6 cycles
1140                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1141                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1142                                InstrStage<1, [A9_MUX0], 0>,
1143                                InstrStage<1, [A9_NPipe]>],
1144                               [3]>,
1145   //
1146   // Double-register Permute Move
1147   InstrItinData<IIC_VMOVD,    [InstrStage<1, [A9_DRegsN],   0, Required>,
1148   // FIXME: all latencies are arbitrary, no information is available
1149                                InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
1150                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1151                                InstrStage<1, [A9_MUX0], 0>,
1152                                InstrStage<1, [A9_NPipe]>],
1153                               [2, 1]>,
1154   //
1155   // Quad-register Permute Move
1156   InstrItinData<IIC_VMOVQ,    [InstrStage<1, [A9_DRegsN],   0, Required>,
1157   // FIXME: all latencies are arbitrary, no information is available
1158                                InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
1159                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1160                                InstrStage<1, [A9_MUX0], 0>,
1161                                InstrStage<1, [A9_NPipe]>],
1162                               [2, 1]>,
1163   //
1164   // Integer to Single-precision Move
1165   InstrItinData<IIC_VMOVIS ,  [InstrStage<1, [A9_DRegsN],   0, Required>,
1166   // FIXME: all latencies are arbitrary, no information is available
1167                                InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
1168                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1169                                InstrStage<1, [A9_MUX0], 0>,
1170                                InstrStage<1, [A9_NPipe]>],
1171                               [2, 1]>,
1172   //
1173   // Integer to Double-precision Move
1174   InstrItinData<IIC_VMOVID ,  [InstrStage<1, [A9_DRegsN],   0, Required>,
1175   // FIXME: all latencies are arbitrary, no information is available
1176                                InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
1177                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1178                                InstrStage<1, [A9_MUX0], 0>,
1179                                InstrStage<1, [A9_NPipe]>],
1180                               [2, 1, 1]>,
1181   //
1182   // Single-precision to Integer Move
1183   InstrItinData<IIC_VMOVSI ,  [InstrStage<1, [A9_DRegsN],   0, Required>,
1184   // FIXME: all latencies are arbitrary, no information is available
1185                                InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
1186                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1187                                InstrStage<1, [A9_MUX0], 0>,
1188                                InstrStage<1, [A9_NPipe]>],
1189                               [2, 1]>,
1190   //
1191   // Double-precision to Integer Move
1192   InstrItinData<IIC_VMOVDI ,  [InstrStage<1, [A9_DRegsN],   0, Required>,
1193   // FIXME: all latencies are arbitrary, no information is available
1194                                InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
1195                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1196                                InstrStage<1, [A9_MUX0], 0>,
1197                                InstrStage<1, [A9_NPipe]>],
1198                               [2, 2, 1]>,
1199   //
1200   // Integer to Lane Move
1201   InstrItinData<IIC_VMOVISL , [InstrStage<1, [A9_DRegsN],   0, Required>,
1202   // FIXME: all latencies are arbitrary, no information is available
1203                                InstrStage<4, [A9_DRegsVFP], 0, Reserved>,
1204                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1205                                InstrStage<1, [A9_MUX0], 0>,
1206                                InstrStage<2, [A9_NPipe]>],
1207                               [3, 1, 1]>,
1208
1209   //
1210   // Vector narrow move
1211   InstrItinData<IIC_VMOVN,    [InstrStage<1, [A9_DRegsN],   0, Required>,
1212                                // Extra latency cycles since wbck is 6 cycles
1213                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1214                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1215                                InstrStage<1, [A9_MUX0], 0>,
1216                                InstrStage<1, [A9_NPipe]>],
1217                               [3, 1]>,
1218   //
1219   // Double-register FP Unary
1220   InstrItinData<IIC_VUNAD,    [InstrStage<1, [A9_DRegsN],   0, Required>,
1221                                // Extra latency cycles since wbck is 6 cycles
1222                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1223                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1224                                InstrStage<1, [A9_MUX0], 0>,
1225                                InstrStage<1, [A9_NPipe]>],
1226                               [5, 2]>,
1227   //
1228   // Quad-register FP Unary
1229   // Result written in N5, but that is relative to the last cycle of multicycle,
1230   // so we use 6 for those cases
1231   InstrItinData<IIC_VUNAQ,    [InstrStage<1, [A9_DRegsN],   0, Required>,
1232                                // Extra latency cycles since wbck is 7 cycles
1233                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1234                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1235                                InstrStage<1, [A9_MUX0], 0>,
1236                                InstrStage<2, [A9_NPipe]>],
1237                               [6, 2]>,
1238   //
1239   // Double-register FP Binary
1240   // FIXME: We're using this itin for many instructions and [2, 2] here is too
1241   // optimistic.
1242   InstrItinData<IIC_VBIND,    [InstrStage<1, [A9_DRegsN],   0, Required>,
1243                                // Extra latency cycles since wbck is 7 cycles
1244                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1245                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1246                                InstrStage<1, [A9_MUX0], 0>,
1247                                InstrStage<1, [A9_NPipe]>],
1248                               [5, 2, 2]>,
1249   //
1250   // Quad-register FP Binary
1251   // Result written in N5, but that is relative to the last cycle of multicycle,
1252   // so we use 6 for those cases
1253   // FIXME: We're using this itin for many instructions and [2, 2] here is too
1254   // optimistic.
1255   InstrItinData<IIC_VBINQ,    [InstrStage<1, [A9_DRegsN],   0, Required>,
1256                                // Extra latency cycles since wbck is 8 cycles
1257                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1258                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1259                                InstrStage<1, [A9_MUX0], 0>,
1260                                InstrStage<2, [A9_NPipe]>],
1261                               [6, 2, 2]>,
1262   //
1263   // Double-register FP Multiple-Accumulate
1264   InstrItinData<IIC_VMACD,    [InstrStage<1, [A9_DRegsN],   0, Required>,
1265                                // Extra latency cycles since wbck is 7 cycles
1266                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1267                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1268                                InstrStage<1, [A9_MUX0], 0>,
1269                                InstrStage<2, [A9_NPipe]>],
1270                               [6, 3, 2, 1]>,
1271   //
1272   // Quad-register FP Multiple-Accumulate
1273   // Result written in N9, but that is relative to the last cycle of multicycle,
1274   // so we use 10 for those cases
1275   InstrItinData<IIC_VMACQ,    [InstrStage<1, [A9_DRegsN],   0, Required>,
1276                                // Extra latency cycles since wbck is 9 cycles
1277                                InstrStage<10, [A9_DRegsVFP], 0, Reserved>,
1278                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1279                                InstrStage<1, [A9_MUX0], 0>,
1280                                InstrStage<4, [A9_NPipe]>],
1281                               [8, 4, 2, 1]>,
1282   //
1283   // Double-register Reciprical Step
1284   InstrItinData<IIC_VRECSD,   [InstrStage<1, [A9_DRegsN],   0, Required>,
1285                                // Extra latency cycles since wbck is 7 cycles
1286                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1287                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1288                                InstrStage<1, [A9_MUX0], 0>,
1289                                InstrStage<2, [A9_NPipe]>],
1290                               [6, 2, 2]>,
1291   //
1292   // Quad-register Reciprical Step
1293   InstrItinData<IIC_VRECSQ,   [InstrStage<1, [A9_DRegsN],   0, Required>,
1294                                // Extra latency cycles since wbck is 9 cycles
1295                                InstrStage<10, [A9_DRegsVFP], 0, Reserved>,
1296                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1297                                InstrStage<1, [A9_MUX0], 0>,
1298                                InstrStage<4, [A9_NPipe]>],
1299                               [8, 2, 2]>,
1300   //
1301   // Double-register Permute
1302   InstrItinData<IIC_VPERMD,   [InstrStage<1, [A9_DRegsN],   0, Required>,
1303                                // Extra latency cycles since wbck is 6 cycles
1304                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1305                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1306                                InstrStage<1, [A9_MUX0], 0>,
1307                                InstrStage<1, [A9_NPipe]>],
1308                               [2, 2, 1, 1]>,
1309   //
1310   // Quad-register Permute
1311   // Result written in N2, but that is relative to the last cycle of multicycle,
1312   // so we use 3 for those cases
1313   InstrItinData<IIC_VPERMQ,   [InstrStage<1, [A9_DRegsN],   0, Required>,
1314                                // Extra latency cycles since wbck is 7 cycles
1315                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1316                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1317                                InstrStage<1, [A9_MUX0], 0>,
1318                                InstrStage<2, [A9_NPipe]>],
1319                               [3, 3, 1, 1]>,
1320   //
1321   // Quad-register Permute (3 cycle issue)
1322   // Result written in N2, but that is relative to the last cycle of multicycle,
1323   // so we use 4 for those cases
1324   InstrItinData<IIC_VPERMQ3,  [InstrStage<1, [A9_DRegsN],   0, Required>,
1325                                // Extra latency cycles since wbck is 8 cycles
1326                                InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
1327                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1328                                InstrStage<1, [A9_MUX0], 0>,
1329                                InstrStage<3, [A9_NPipe]>],
1330                               [4, 4, 1, 1]>,
1331
1332   //
1333   // Double-register VEXT
1334   InstrItinData<IIC_VEXTD,    [InstrStage<1, [A9_DRegsN],   0, Required>,
1335                                // Extra latency cycles since wbck is 7 cycles
1336                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1337                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1338                                InstrStage<1, [A9_MUX0], 0>,
1339                                InstrStage<1, [A9_NPipe]>],
1340                               [2, 1, 1]>,
1341   //
1342   // Quad-register VEXT
1343   InstrItinData<IIC_VEXTQ,    [InstrStage<1, [A9_DRegsN],   0, Required>,
1344                                // Extra latency cycles since wbck is 9 cycles
1345                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1346                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1347                                InstrStage<1, [A9_MUX0], 0>,
1348                                InstrStage<2, [A9_NPipe]>],
1349                               [3, 1, 1]>,
1350   //
1351   // VTB
1352   InstrItinData<IIC_VTB1,     [InstrStage<1, [A9_DRegsN],   0, Required>,
1353                                // Extra latency cycles since wbck is 7 cycles
1354                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1355                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1356                                InstrStage<1, [A9_MUX0], 0>,
1357                                InstrStage<2, [A9_NPipe]>],
1358                               [3, 2, 1]>,
1359   InstrItinData<IIC_VTB2,     [InstrStage<2, [A9_DRegsN],   0, Required>,
1360                                // Extra latency cycles since wbck is 7 cycles
1361                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1362                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1363                                InstrStage<1, [A9_MUX0], 0>,
1364                                InstrStage<2, [A9_NPipe]>],
1365                               [3, 2, 2, 1]>,
1366   InstrItinData<IIC_VTB3,     [InstrStage<2, [A9_DRegsN],   0, Required>,
1367                                // Extra latency cycles since wbck is 8 cycles
1368                                InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
1369                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1370                                InstrStage<1, [A9_MUX0], 0>,
1371                                InstrStage<3, [A9_NPipe]>],
1372                               [4, 2, 2, 3, 1]>,
1373   InstrItinData<IIC_VTB4,     [InstrStage<1, [A9_DRegsN],   0, Required>,
1374                                // Extra latency cycles since wbck is 8 cycles
1375                                InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
1376                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1377                                InstrStage<1, [A9_MUX0], 0>,
1378                                InstrStage<3, [A9_NPipe]>],
1379                               [4, 2, 2, 3, 3, 1]>,
1380   //
1381   // VTBX
1382   InstrItinData<IIC_VTBX1,    [InstrStage<1, [A9_DRegsN],   0, Required>,
1383                                // Extra latency cycles since wbck is 7 cycles
1384                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1385                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1386                                InstrStage<1, [A9_MUX0], 0>,
1387                                InstrStage<2, [A9_NPipe]>],
1388                               [3, 1, 2, 1]>,
1389   InstrItinData<IIC_VTBX2,    [InstrStage<1, [A9_DRegsN],   0, Required>,
1390                                // Extra latency cycles since wbck is 7 cycles
1391                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1392                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1393                                InstrStage<1, [A9_MUX0], 0>,
1394                                InstrStage<2, [A9_NPipe]>],
1395                               [3, 1, 2, 2, 1]>,
1396   InstrItinData<IIC_VTBX3,    [InstrStage<1, [A9_DRegsN],   0, Required>,
1397                                // Extra latency cycles since wbck is 8 cycles
1398                                InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
1399                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1400                                InstrStage<1, [A9_MUX0], 0>,
1401                                InstrStage<3, [A9_NPipe]>],
1402                               [4, 1, 2, 2, 3, 1]>,
1403   InstrItinData<IIC_VTBX4,    [InstrStage<1, [A9_DRegsN],   0, Required>,
1404                                // Extra latency cycles since wbck is 8 cycles
1405                                InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
1406                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1407                                InstrStage<1, [A9_MUX0], 0>,
1408                                InstrStage<2, [A9_NPipe]>],
1409                               [4, 1, 2, 2, 3, 3, 1]>
1410 ]>;