02058618ad0800eeb6ba2f28bdaba0d2ff170348
[oota-llvm.git] / lib / Target / ARM / ARMScheduleA9.td
1 //=- ARMScheduleA9.td - ARM Cortex-A9 Scheduling Definitions -*- tablegen -*-=//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file defines the itinerary class data for the ARM Cortex A9 processors.
11 //
12 //===----------------------------------------------------------------------===//
13
14 //
15 // Ad-hoc scheduling information derived from pretty vague "Cortex-A9 Technical
16 // Reference Manual".
17 //
18 // Functional units
19 def A9_Pipe0   : FuncUnit; // pipeline 0
20 def A9_Pipe1   : FuncUnit; // pipeline 1
21 def A9_AGU     : FuncUnit; // Address generation unit for ld / st
22 def A9_NPipe   : FuncUnit; // NEON ALU/MUL pipeline
23 def A9_DRegsVFP: FuncUnit; // FP register set, VFP side
24 def A9_DRegsN  : FuncUnit; // FP register set, NEON side
25 def A9_MUX0    : FuncUnit; // AGU + NEON/FPU multiplexer
26
27 // Bypasses
28 def A9_LdBypass : Bypass;
29
30 // Dual issue pipeline represented by A9_Pipe0 | A9_Pipe1
31 //
32 def CortexA9Itineraries : ProcessorItineraries<
33   [A9_Pipe0, A9_Pipe1, A9_AGU, A9_NPipe, A9_DRegsVFP, A9_DRegsN, A9_MUX0],
34   [A9_LdBypass], [
35   // Two fully-pipelined integer ALU pipelines
36
37   //
38   // Move instructions, unconditional
39   InstrItinData<IIC_iMOVi   , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [1]>,
40   InstrItinData<IIC_iMOVr   , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [1, 1]>,
41   InstrItinData<IIC_iMOVsi  , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [1, 1]>,
42   InstrItinData<IIC_iMOVsr  , [InstrStage<2, [A9_Pipe0, A9_Pipe1]>], [2, 1, 1]>,
43   InstrItinData<IIC_iMOVix2 , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
44                                InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [2]>,
45   //
46   // MVN instructions
47   InstrItinData<IIC_iMVNi   , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>],
48                               [1]>,
49   InstrItinData<IIC_iMVNr   , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>],
50                               [1, 1], [NoBypass, A9_LdBypass]>,
51   InstrItinData<IIC_iMVNsi  , [InstrStage<2, [A9_Pipe0, A9_Pipe1]>],
52                               [2, 1]>,
53   InstrItinData<IIC_iMVNsr  , [InstrStage<3, [A9_Pipe0, A9_Pipe1]>],
54                               [3, 1, 1]>,
55   //
56   // No operand cycles
57   InstrItinData<IIC_iALUx   , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>]>,
58   //
59   // Binary Instructions that produce a result
60   InstrItinData<IIC_iALUi , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>],
61                             [1, 1], [NoBypass, A9_LdBypass]>,
62   InstrItinData<IIC_iALUr , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>],
63                             [1, 1, 1], [NoBypass, A9_LdBypass, A9_LdBypass]>,
64   InstrItinData<IIC_iALUsi, [InstrStage<2, [A9_Pipe0, A9_Pipe1]>],
65                             [2, 1, 1], [NoBypass, A9_LdBypass, NoBypass]>,
66   InstrItinData<IIC_iALUsir,[InstrStage<2, [A9_Pipe0, A9_Pipe1]>],
67                             [2, 1, 1], [NoBypass, NoBypass, A9_LdBypass]>,
68   InstrItinData<IIC_iALUsr,[InstrStage<3, [A9_Pipe0, A9_Pipe1]>],
69                             [3, 1, 1, 1],
70                             [NoBypass, A9_LdBypass, NoBypass, NoBypass]>,
71   //
72   // Bitwise Instructions that produce a result
73   InstrItinData<IIC_iBITi , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [1, 1]>,
74   InstrItinData<IIC_iBITr , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [1, 1, 1]>,
75   InstrItinData<IIC_iBITsi, [InstrStage<2, [A9_Pipe0, A9_Pipe1]>], [2, 1, 1]>,
76   InstrItinData<IIC_iBITsr,[InstrStage<3, [A9_Pipe0, A9_Pipe1]>], [3, 1, 1, 1]>,
77   //
78   // Unary Instructions that produce a result
79
80   // CLZ, RBIT, etc.
81   InstrItinData<IIC_iUNAr   , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [1, 1]>,
82
83   // BFC, BFI, UBFX, SBFX
84   InstrItinData<IIC_iUNAsi  , [InstrStage<2, [A9_Pipe0, A9_Pipe1]>], [2, 1]>,
85
86   //
87   // Zero and sign extension instructions
88   InstrItinData<IIC_iEXTr , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [2, 1]>,
89   InstrItinData<IIC_iEXTAr, [InstrStage<2, [A9_Pipe0, A9_Pipe1]>], [3, 1, 1]>,
90   InstrItinData<IIC_iEXTAsr,[InstrStage<3, [A9_Pipe0, A9_Pipe1]>],[3, 1, 1, 1]>,
91   //
92   // Compare instructions
93   InstrItinData<IIC_iCMPi   , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>],
94                               [1], [A9_LdBypass]>,
95   InstrItinData<IIC_iCMPr   , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>],
96                               [1, 1], [A9_LdBypass, A9_LdBypass]>,
97   InstrItinData<IIC_iCMPsi  , [InstrStage<2, [A9_Pipe0, A9_Pipe1]>],
98                               [1, 1], [A9_LdBypass, NoBypass]>,
99   InstrItinData<IIC_iCMPsr  , [InstrStage<3, [A9_Pipe0, A9_Pipe1]>],
100                               [1, 1, 1], [A9_LdBypass, NoBypass, NoBypass]>,
101   //
102   // Test instructions
103   InstrItinData<IIC_iTSTi   , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [1]>,
104   InstrItinData<IIC_iTSTr   , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [1, 1]>,
105   InstrItinData<IIC_iTSTsi  , [InstrStage<2, [A9_Pipe0, A9_Pipe1]>], [1, 1]>,
106   InstrItinData<IIC_iTSTsr  , [InstrStage<3, [A9_Pipe0, A9_Pipe1]>], [1, 1, 1]>,
107   //
108   // Move instructions, conditional
109   // FIXME: Correctly model the extra input dep on the destination.
110   InstrItinData<IIC_iCMOVi  , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [1]>,
111   InstrItinData<IIC_iCMOVr  , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [1, 1]>,
112   InstrItinData<IIC_iCMOVsi , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [1, 1]>,
113   InstrItinData<IIC_iCMOVsr , [InstrStage<2, [A9_Pipe0, A9_Pipe1]>], [2, 1, 1]>,
114
115   // Integer multiply pipeline
116   //
117   InstrItinData<IIC_iMUL16   , [InstrStage<1, [A9_Pipe1], 0>,
118                                 InstrStage<2, [A9_Pipe0]>], [3, 1, 1]>,
119   InstrItinData<IIC_iMAC16   , [InstrStage<1, [A9_Pipe1], 0>,
120                                 InstrStage<2, [A9_Pipe0]>], [3, 1, 1, 1]>,
121   InstrItinData<IIC_iMUL32   , [InstrStage<1, [A9_Pipe1], 0>,
122                                 InstrStage<2, [A9_Pipe0]>], [4, 1, 1]>,
123   InstrItinData<IIC_iMAC32   , [InstrStage<1, [A9_Pipe1], 0>,
124                                 InstrStage<2, [A9_Pipe0]>], [4, 1, 1, 1]>,
125   InstrItinData<IIC_iMUL64   , [InstrStage<1, [A9_Pipe1], 0>,
126                                 InstrStage<3, [A9_Pipe0]>], [4, 5, 1, 1]>,
127   InstrItinData<IIC_iMAC64   , [InstrStage<1, [A9_Pipe1], 0>,
128                                 InstrStage<3, [A9_Pipe0]>], [4, 5, 1, 1]>,
129   // Integer load pipeline
130   // FIXME: The timings are some rough approximations
131   //
132   // Immediate offset
133   InstrItinData<IIC_iLoad_i   , [InstrStage<1, [A9_Pipe1]>,
134                                  InstrStage<1, [A9_MUX0, A9_AGU]>],
135                                 [3, 1], [A9_LdBypass]>,
136   InstrItinData<IIC_iLoad_bh_i, [InstrStage<1, [A9_Pipe1]>,
137                                  InstrStage<2, [A9_MUX0, A9_AGU]>],
138                                 [4, 1], [A9_LdBypass]>,
139   // FIXME: If address is 64-bit aligned, AGU cycles is 1.
140   InstrItinData<IIC_iLoad_d_i , [InstrStage<1, [A9_Pipe1]>,
141                                  InstrStage<2, [A9_MUX0, A9_AGU]>],
142                                 [3, 3, 1], [A9_LdBypass]>,
143   //
144   // Register offset
145   InstrItinData<IIC_iLoad_r   , [InstrStage<1, [A9_Pipe1]>,
146                                  InstrStage<1, [A9_MUX0, A9_AGU]>],
147                                 [3, 1, 1], [A9_LdBypass]>,
148   InstrItinData<IIC_iLoad_bh_r, [InstrStage<1, [A9_Pipe1]>,
149                                  InstrStage<2, [A9_MUX0, A9_AGU]>],
150                                 [4, 1, 1], [A9_LdBypass]>,
151   InstrItinData<IIC_iLoad_d_r , [InstrStage<1, [A9_Pipe1]>,
152                                  InstrStage<2, [A9_MUX0, A9_AGU]>],
153                                 [3, 3, 1, 1], [A9_LdBypass]>,
154   //
155   // Scaled register offset
156   InstrItinData<IIC_iLoad_si  , [InstrStage<1, [A9_Pipe1]>,
157                                  InstrStage<1, [A9_MUX0, A9_AGU]>],
158                                 [4, 1, 1], [A9_LdBypass]>,
159   InstrItinData<IIC_iLoad_bh_si,[InstrStage<1, [A9_Pipe1]>,
160                                  InstrStage<2, [A9_MUX0, A9_AGU]>],
161                                 [5, 1, 1], [A9_LdBypass]>,
162   //
163   // Immediate offset with update
164   InstrItinData<IIC_iLoad_iu  , [InstrStage<1, [A9_Pipe1]>,
165                                  InstrStage<1, [A9_MUX0, A9_AGU]>],
166                                 [3, 2, 1], [A9_LdBypass]>,
167   InstrItinData<IIC_iLoad_bh_iu,[InstrStage<1, [A9_Pipe1]>,
168                                  InstrStage<2, [A9_MUX0, A9_AGU]>],
169                                 [4, 3, 1], [A9_LdBypass]>,
170   //
171   // Register offset with update
172   InstrItinData<IIC_iLoad_ru  , [InstrStage<1, [A9_Pipe1]>,
173                                  InstrStage<1, [A9_MUX0, A9_AGU]>],
174                                 [3, 2, 1, 1], [A9_LdBypass]>,
175   InstrItinData<IIC_iLoad_bh_ru,[InstrStage<1, [A9_Pipe1]>,
176                                  InstrStage<2, [A9_MUX0, A9_AGU]>],
177                                 [4, 3, 1, 1], [A9_LdBypass]>,
178   InstrItinData<IIC_iLoad_d_ru, [InstrStage<1, [A9_Pipe1]>,
179                                  InstrStage<2, [A9_MUX0, A9_AGU]>],
180                                 [3, 3, 1, 1], [A9_LdBypass]>,
181   //
182   // Scaled register offset with update
183   InstrItinData<IIC_iLoad_siu , [InstrStage<1, [A9_Pipe1]>,
184                                  InstrStage<1, [A9_MUX0, A9_AGU]>],
185                                 [4, 3, 1, 1], [A9_LdBypass]>,
186   InstrItinData<IIC_iLoad_bh_siu,[InstrStage<1, [A9_Pipe1]>,
187                                   InstrStage<2, [A9_MUX0, A9_AGU]>],
188                                  [5, 4, 1, 1], [A9_LdBypass]>,
189   //
190   // Load multiple
191   InstrItinData<IIC_iLoadm   , [InstrStage<1, [A9_Pipe1]>,
192                                 InstrStage<2, [A9_MUX0, A9_AGU]>],
193                                [3], [A9_LdBypass]>,
194
195   //
196   // Load multiple plus branch
197   InstrItinData<IIC_iLoadmBr , [InstrStage<1, [A9_Pipe1]>,
198                                 InstrStage<1, [A9_MUX0, A9_AGU]>,
199                                 InstrStage<1, [A9_Pipe0, A9_Pipe1]>]>,
200
201   //
202   // iLoadi + iALUr for t2LDRpci_pic.
203   InstrItinData<IIC_iLoadiALU, [InstrStage<1, [A9_Pipe1]>,
204                                 InstrStage<1, [A9_MUX0, A9_AGU]>,
205                                 InstrStage<1, [A9_Pipe0, A9_Pipe1]>],
206                                [2, 1]>,
207
208   // Integer store pipeline
209   ///
210   // Immediate offset
211   InstrItinData<IIC_iStore_i  , [InstrStage<1, [A9_Pipe1]>,
212                                  InstrStage<1, [A9_MUX0, A9_AGU]>], [1, 1]>,
213   InstrItinData<IIC_iStore_bh_i,[InstrStage<1, [A9_Pipe1]>,
214                                  InstrStage<2, [A9_MUX0, A9_AGU]>], [1, 1]>,
215   // FIXME: If address is 64-bit aligned, AGU cycles is 1.
216   InstrItinData<IIC_iStore_d_i, [InstrStage<1, [A9_Pipe1]>,
217                                  InstrStage<2, [A9_MUX0, A9_AGU]>], [1, 1]>,
218   //
219   // Register offset
220   InstrItinData<IIC_iStore_r  , [InstrStage<1, [A9_Pipe1]>,
221                                  InstrStage<1, [A9_MUX0, A9_AGU]>], [1, 1, 1]>,
222   InstrItinData<IIC_iStore_bh_r,[InstrStage<1, [A9_Pipe1]>,
223                                  InstrStage<2, [A9_MUX0, A9_AGU]>], [1, 1, 1]>,
224   InstrItinData<IIC_iStore_d_r, [InstrStage<1, [A9_Pipe1]>,
225                                  InstrStage<2, [A9_MUX0, A9_AGU]>], [1, 1, 1]>,
226   //
227   // Scaled register offset
228   InstrItinData<IIC_iStore_si , [InstrStage<1, [A9_Pipe1]>,
229                                  InstrStage<1, [A9_MUX0, A9_AGU]>], [1, 1, 1]>,
230   InstrItinData<IIC_iStore_bh_si,[InstrStage<1, [A9_Pipe1]>,
231                                  InstrStage<2, [A9_MUX0, A9_AGU]>], [1, 1, 1]>,
232   //
233   // Immediate offset with update
234   InstrItinData<IIC_iStore_iu , [InstrStage<1, [A9_Pipe1]>,
235                                  InstrStage<1, [A9_MUX0, A9_AGU]>], [2, 1, 1]>,
236   InstrItinData<IIC_iStore_bh_iu,[InstrStage<1, [A9_Pipe1]>,
237                                   InstrStage<2, [A9_MUX0, A9_AGU]>], [3, 1, 1]>,
238   //
239   // Register offset with update
240   InstrItinData<IIC_iStore_ru , [InstrStage<1, [A9_Pipe1]>,
241                                  InstrStage<1, [A9_MUX0, A9_AGU]>],
242                                 [2, 1, 1, 1]>,
243   InstrItinData<IIC_iStore_bh_ru,[InstrStage<1, [A9_Pipe1]>,
244                                   InstrStage<2, [A9_MUX0, A9_AGU]>],
245                                  [3, 1, 1, 1]>,
246   InstrItinData<IIC_iStore_d_ru,[InstrStage<1, [A9_Pipe1]>,
247                                  InstrStage<2, [A9_MUX0, A9_AGU]>],
248                                 [3, 1, 1, 1]>,
249   //
250   // Scaled register offset with update
251   InstrItinData<IIC_iStore_siu, [InstrStage<1, [A9_Pipe1]>,
252                                  InstrStage<1, [A9_MUX0, A9_AGU]>],
253                                 [2, 1, 1, 1]>,
254   InstrItinData<IIC_iStore_bh_siu,[InstrStage<1, [A9_Pipe1]>,
255                                    InstrStage<2, [A9_MUX0, A9_AGU]>],
256                                   [3, 1, 1, 1]>,
257   //
258   // Store multiple
259   InstrItinData<IIC_iStorem  , [InstrStage<1, [A9_Pipe1]>,
260                                 InstrStage<1, [A9_MUX0, A9_AGU]>]>,
261   // Branch
262   //
263   // no delay slots, so the latency of a branch is unimportant
264   InstrItinData<IIC_Br       , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>]>,
265
266   // VFP and NEON shares the same register file. This means that every VFP
267   // instruction should wait for full completion of the consecutive NEON
268   // instruction and vice-versa. We model this behavior with two artificial FUs:
269   // DRegsVFP and DRegsVFP.
270   //
271   // Every VFP instruction:
272   //  - Acquires DRegsVFP resource for 1 cycle
273   //  - Reserves DRegsN resource for the whole duration (including time to
274   //    register file writeback!).
275   // Every NEON instruction does the same but with FUs swapped.
276   //
277   // Since the reserved FU cannot be acquired, this models precisely
278   // "cross-domain" stalls.
279
280   // VFP
281   // Issue through integer pipeline, and execute in NEON unit.
282
283   // FP Special Register to Integer Register File Move
284   InstrItinData<IIC_fpSTAT , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
285                               InstrStage<2, [A9_DRegsN],   0, Reserved>,
286                               InstrStage<1, [A9_Pipe1]>,
287                               InstrStage<1, [A9_MUX0, A9_NPipe]>]>,
288   //
289   // Single-precision FP Unary
290   InstrItinData<IIC_fpUNA32 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
291                                // Extra latency cycles since wbck is 2 cycles
292                                InstrStage<3, [A9_DRegsN],   0, Reserved>,
293                                InstrStage<1, [A9_Pipe1]>,
294                                InstrStage<1, [A9_MUX0, A9_NPipe]>],
295                               [1, 1]>,
296   //
297   // Double-precision FP Unary
298   InstrItinData<IIC_fpUNA64 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
299                                // Extra latency cycles since wbck is 2 cycles
300                                InstrStage<3, [A9_DRegsN],   0, Reserved>,
301                                InstrStage<1, [A9_Pipe1]>,
302                                InstrStage<1, [A9_MUX0, A9_NPipe]>],
303                               [1, 1]>,
304
305   //
306   // Single-precision FP Compare
307   InstrItinData<IIC_fpCMP32 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
308                                // Extra latency cycles since wbck is 4 cycles
309                                InstrStage<5, [A9_DRegsN],   0, Reserved>,
310                                InstrStage<1, [A9_Pipe1]>,
311                                InstrStage<1, [A9_MUX0, A9_NPipe]>],
312                               [1, 1]>,
313   //
314   // Double-precision FP Compare
315   InstrItinData<IIC_fpCMP64 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
316                                // Extra latency cycles since wbck is 4 cycles
317                                InstrStage<5, [A9_DRegsN],   0, Reserved>,
318                                InstrStage<1, [A9_Pipe1]>,
319                                InstrStage<1, [A9_MUX0, A9_NPipe]>],
320                               [1, 1]>,
321   //
322   // Single to Double FP Convert
323   InstrItinData<IIC_fpCVTSD , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
324                                InstrStage<5, [A9_DRegsN],   0, Reserved>,
325                                InstrStage<1, [A9_Pipe1]>,
326                                InstrStage<1, [A9_MUX0, A9_NPipe]>],
327                               [4, 1]>,
328   //
329   // Double to Single FP Convert
330   InstrItinData<IIC_fpCVTDS , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
331                                InstrStage<5, [A9_DRegsN],   0, Reserved>,
332                                InstrStage<1, [A9_Pipe1]>,
333                                InstrStage<1, [A9_MUX0, A9_NPipe]>],
334                               [4, 1]>,
335
336   //
337   // Single to Half FP Convert
338   InstrItinData<IIC_fpCVTSH , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
339                                InstrStage<5, [A9_DRegsN],   0, Reserved>,
340                                InstrStage<1, [A9_Pipe1]>,
341                                InstrStage<1, [A9_MUX0, A9_NPipe]>],
342                               [4, 1]>,
343   //
344   // Half to Single FP Convert
345   InstrItinData<IIC_fpCVTHS , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
346                                InstrStage<3, [A9_DRegsN],   0, Reserved>,
347                                InstrStage<1, [A9_Pipe1]>,
348                                InstrStage<1, [A9_MUX0, A9_NPipe]>],
349                               [2, 1]>,
350
351   //
352   // Single-Precision FP to Integer Convert
353   InstrItinData<IIC_fpCVTSI , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
354                                InstrStage<5, [A9_DRegsN],   0, Reserved>,
355                                InstrStage<1, [A9_Pipe1]>,
356                                InstrStage<1, [A9_MUX0, A9_NPipe]>],
357                               [4, 1]>,
358   //
359   // Double-Precision FP to Integer Convert
360   InstrItinData<IIC_fpCVTDI , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
361                                InstrStage<5, [A9_DRegsN],   0, Reserved>,
362                                InstrStage<1, [A9_Pipe1]>,
363                                InstrStage<1, [A9_MUX0, A9_NPipe]>],
364                               [4, 1]>,
365   //
366   // Integer to Single-Precision FP Convert
367   InstrItinData<IIC_fpCVTIS , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
368                                InstrStage<5, [A9_DRegsN],   0, Reserved>,
369                                InstrStage<1, [A9_Pipe1]>,
370                                InstrStage<1, [A9_MUX0, A9_NPipe]>],
371                               [4, 1]>,
372   //
373   // Integer to Double-Precision FP Convert
374   InstrItinData<IIC_fpCVTID , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
375                                InstrStage<5, [A9_DRegsN],   0, Reserved>,
376                                InstrStage<1, [A9_Pipe1]>,
377                                InstrStage<1, [A9_MUX0, A9_NPipe]>],
378                               [4, 1]>,
379   //
380   // Single-precision FP ALU
381   InstrItinData<IIC_fpALU32 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
382                                InstrStage<5, [A9_DRegsN],   0, Reserved>,
383                                InstrStage<1, [A9_Pipe1]>,
384                                InstrStage<1, [A9_MUX0, A9_NPipe]>],
385                               [4, 1, 1]>,
386   //
387   // Double-precision FP ALU
388   InstrItinData<IIC_fpALU64 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
389                                InstrStage<5, [A9_DRegsN],   0, Reserved>,
390                                InstrStage<1, [A9_Pipe1]>,
391                                InstrStage<1, [A9_MUX0, A9_NPipe]>],
392                               [4, 1, 1]>,
393   //
394   // Single-precision FP Multiply
395   InstrItinData<IIC_fpMUL32 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
396                                InstrStage<6, [A9_DRegsN],   0, Reserved>,
397                                InstrStage<1, [A9_Pipe1]>,
398                                InstrStage<1, [A9_MUX0, A9_NPipe]>],
399                               [5, 1, 1]>,
400   //
401   // Double-precision FP Multiply
402   InstrItinData<IIC_fpMUL64 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
403                                InstrStage<7, [A9_DRegsN],   0, Reserved>,
404                                InstrStage<1, [A9_Pipe1]>,
405                                InstrStage<2, [A9_MUX0, A9_NPipe]>],
406                               [6, 1, 1]>,
407   //
408   // Single-precision FP MAC
409   InstrItinData<IIC_fpMAC32 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
410                                InstrStage<9, [A9_DRegsN],   0, Reserved>,
411                                InstrStage<1, [A9_Pipe1]>,
412                                InstrStage<1, [A9_MUX0, A9_NPipe]>],
413                               [8, 0, 1, 1]>,
414   //
415   // Double-precision FP MAC
416   InstrItinData<IIC_fpMAC64 , [InstrStage<1,  [A9_DRegsVFP], 0, Required>,
417                                InstrStage<10, [A9_DRegsN],  0, Reserved>,
418                                InstrStage<1,  [A9_Pipe1]>,
419                                InstrStage<2,  [A9_MUX0, A9_NPipe]>],
420                               [9, 0, 1, 1]>,
421   //
422   // Single-precision FP DIV
423   InstrItinData<IIC_fpDIV32 , [InstrStage<1,  [A9_DRegsVFP], 0, Required>,
424                                InstrStage<16, [A9_DRegsN],  0, Reserved>,
425                                InstrStage<1,  [A9_Pipe1]>,
426                                InstrStage<10, [A9_MUX0, A9_NPipe]>],
427                               [15, 1, 1]>,
428   //
429   // Double-precision FP DIV
430   InstrItinData<IIC_fpDIV64 , [InstrStage<1,  [A9_DRegsVFP], 0, Required>,
431                                InstrStage<26, [A9_DRegsN],  0, Reserved>,
432                                InstrStage<1,  [A9_Pipe1]>,
433                                InstrStage<20, [A9_MUX0, A9_NPipe]>],
434                               [25, 1, 1]>,
435   //
436   // Single-precision FP SQRT
437   InstrItinData<IIC_fpSQRT32, [InstrStage<1,  [A9_DRegsVFP], 0, Required>,
438                                InstrStage<18, [A9_DRegsN],   0, Reserved>,
439                                InstrStage<1,  [A9_Pipe1]>,
440                                InstrStage<13, [A9_MUX0, A9_NPipe]>],
441                               [17, 1]>,
442   //
443   // Double-precision FP SQRT
444   InstrItinData<IIC_fpSQRT64, [InstrStage<1,  [A9_DRegsVFP], 0, Required>,
445                                InstrStage<33, [A9_DRegsN],   0, Reserved>,
446                                InstrStage<1,  [A9_Pipe1]>,
447                                InstrStage<28, [A9_MUX0, A9_NPipe]>],
448                               [32, 1]>,
449
450   //
451   // Integer to Single-precision Move
452   InstrItinData<IIC_fpMOVIS,  [InstrStage<1, [A9_DRegsVFP], 0, Required>,
453                                // Extra 1 latency cycle since wbck is 2 cycles
454                                InstrStage<3, [A9_DRegsN],   0, Reserved>,
455                                InstrStage<1, [A9_Pipe1]>,
456                                InstrStage<1, [A9_MUX0, A9_NPipe]>],
457                               [1, 1]>,
458   //
459   // Integer to Double-precision Move
460   InstrItinData<IIC_fpMOVID,  [InstrStage<1, [A9_DRegsVFP], 0, Required>,
461                                // Extra 1 latency cycle since wbck is 2 cycles
462                                InstrStage<3, [A9_DRegsN],   0, Reserved>,
463                                InstrStage<1, [A9_Pipe1]>,
464                                InstrStage<1, [A9_MUX0, A9_NPipe]>],
465                               [1, 1, 1]>,
466   //
467   // Single-precision to Integer Move
468   InstrItinData<IIC_fpMOVSI,  [InstrStage<1, [A9_DRegsVFP], 0, Required>,
469                                InstrStage<2, [A9_DRegsN],   0, Reserved>,
470                                InstrStage<1, [A9_Pipe1]>,
471                                InstrStage<1, [A9_MUX0, A9_NPipe]>],
472                               [1, 1]>,
473   //
474   // Double-precision to Integer Move
475   InstrItinData<IIC_fpMOVDI,  [InstrStage<1, [A9_DRegsVFP], 0, Required>,
476                                InstrStage<2, [A9_DRegsN],   0, Reserved>,
477                                InstrStage<1, [A9_Pipe1]>,
478                                InstrStage<1, [A9_MUX0, A9_NPipe]>],
479                               [1, 1, 1]>,
480   //
481   // Single-precision FP Load
482   InstrItinData<IIC_fpLoad32, [InstrStage<1, [A9_DRegsVFP], 0, Required>,
483                                InstrStage<2, [A9_DRegsN],   0, Reserved>,
484                                InstrStage<1, [A9_Pipe1], 0>,
485                                InstrStage<1, [A9_MUX0, A9_NPipe]>]>,
486   //
487   // Double-precision FP Load
488   InstrItinData<IIC_fpLoad64, [InstrStage<1, [A9_DRegsVFP], 0, Required>,
489                                InstrStage<2, [A9_DRegsN],   0, Reserved>,
490                                InstrStage<1, [A9_Pipe1], 0>,
491                                InstrStage<1, [A9_MUX0, A9_NPipe]>]>,
492   //
493   // FP Load Multiple
494   InstrItinData<IIC_fpLoadm,  [InstrStage<1, [A9_DRegsVFP], 0, Required>,
495                                InstrStage<2, [A9_DRegsN],   0, Reserved>,
496                                InstrStage<1, [A9_Pipe1], 0>,
497                                InstrStage<1, [A9_MUX0, A9_NPipe]>]>,
498   //
499   // Single-precision FP Store
500   InstrItinData<IIC_fpStore32,[InstrStage<1, [A9_DRegsVFP], 0, Required>,
501                                InstrStage<2, [A9_DRegsN],   0, Reserved>,
502                                InstrStage<1, [A9_Pipe1], 0>,
503                                InstrStage<1, [A9_MUX0, A9_NPipe]>]>,
504   //
505   // Double-precision FP Store
506   InstrItinData<IIC_fpStore64,[InstrStage<1, [A9_DRegsVFP], 0, Required>,
507                                InstrStage<2, [A9_DRegsN],   0, Reserved>,
508                                InstrStage<1, [A9_Pipe1], 0>,
509                                InstrStage<1, [A9_MUX0, A9_NPipe]>]>,
510   //
511   // FP Store Multiple
512   InstrItinData<IIC_fpStorem, [InstrStage<1, [A9_DRegsVFP], 0, Required>,
513                                InstrStage<2, [A9_DRegsN],   0, Reserved>,
514                                InstrStage<1, [A9_Pipe1], 0>,
515                                InstrStage<1, [A9_MUX0, A9_NPipe]>]>,
516   // NEON
517   // Issue through integer pipeline, and execute in NEON unit.
518   // VLD1
519   // FIXME: We don't model this instruction properly
520   InstrItinData<IIC_VLD1,     [InstrStage<1, [A9_DRegsN],   0, Required>,
521                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
522                                InstrStage<1, [A9_Pipe1], 0>,
523                                InstrStage<1, [A9_MUX0, A9_NPipe]>]>,
524   //
525   // VLD2
526   // FIXME: We don't model this instruction properly
527   InstrItinData<IIC_VLD2,     [InstrStage<1, [A9_DRegsN],   0, Required>,
528                                // Extra latency cycles since wbck is 6 cycles
529                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
530                                InstrStage<1, [A9_Pipe1], 0>,
531                                InstrStage<1, [A9_MUX0, A9_NPipe]>],
532                               [2, 2, 1]>,
533   //
534   // VLD3
535   // FIXME: We don't model this instruction properly
536   InstrItinData<IIC_VLD3,     [InstrStage<1, [A9_DRegsN],   0, Required>,
537                                // Extra latency cycles since wbck is 6 cycles
538                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
539                                InstrStage<1, [A9_Pipe1], 0>,
540                                InstrStage<1, [A9_MUX0, A9_NPipe]>],
541                               [2, 2, 2, 1]>,
542   //
543   // VLD4
544   // FIXME: We don't model this instruction properly
545   InstrItinData<IIC_VLD4,     [InstrStage<1, [A9_DRegsN],   0, Required>,
546                                // Extra latency cycles since wbck is 6 cycles
547                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
548                                InstrStage<1, [A9_Pipe1], 0>,
549                                InstrStage<1, [A9_MUX0, A9_NPipe]>],
550                               [2, 2, 2, 2, 1]>,
551   //
552   // VST
553   // FIXME: We don't model this instruction properly
554   InstrItinData<IIC_VST,      [InstrStage<1, [A9_DRegsN],   0, Required>,
555                                // Extra latency cycles since wbck is 6 cycles
556                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
557                                InstrStage<1, [A9_Pipe1], 0>,
558                                InstrStage<1, [A9_MUX0, A9_NPipe]>]>,
559   //
560   // Double-register Integer Unary
561   InstrItinData<IIC_VUNAiD,   [InstrStage<1, [A9_DRegsN],   0, Required>,
562                                // Extra latency cycles since wbck is 6 cycles
563                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
564                                InstrStage<1, [A9_Pipe1]>,
565                                InstrStage<1, [A9_MUX0, A9_NPipe]>],
566                               [4, 2]>,
567   //
568   // Quad-register Integer Unary
569   InstrItinData<IIC_VUNAiQ,   [InstrStage<1, [A9_DRegsN],   0, Required>,
570                                // Extra latency cycles since wbck is 6 cycles
571                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
572                                InstrStage<1, [A9_Pipe1]>,
573                                InstrStage<1, [A9_MUX0, A9_NPipe]>],
574                               [4, 2]>,
575   //
576   // Double-register Integer Q-Unary
577   InstrItinData<IIC_VQUNAiD,  [InstrStage<1, [A9_DRegsN],   0, Required>,
578                                // Extra latency cycles since wbck is 6 cycles
579                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
580                                InstrStage<1, [A9_Pipe1]>,
581                                InstrStage<1, [A9_MUX0, A9_NPipe]>],
582                               [4, 1]>,
583   //
584   // Quad-register Integer CountQ-Unary
585   InstrItinData<IIC_VQUNAiQ,  [InstrStage<1, [A9_DRegsN],   0, Required>,
586                                // Extra latency cycles since wbck is 6 cycles
587                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
588                                InstrStage<1, [A9_Pipe1]>,
589                                InstrStage<1, [A9_MUX0, A9_NPipe]>],
590                               [4, 1]>,
591   //
592   // Double-register Integer Binary
593   InstrItinData<IIC_VBINiD,   [InstrStage<1, [A9_DRegsN],   0, Required>,
594                                // Extra latency cycles since wbck is 6 cycles
595                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
596                                InstrStage<1, [A9_Pipe1]>,
597                                InstrStage<1, [A9_MUX0, A9_NPipe]>],
598                               [3, 2, 2]>,
599   //
600   // Quad-register Integer Binary
601   InstrItinData<IIC_VBINiQ,   [InstrStage<1, [A9_DRegsN],   0, Required>,
602                                // Extra latency cycles since wbck is 6 cycles
603                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
604                                InstrStage<1, [A9_Pipe1]>,
605                                InstrStage<1, [A9_MUX0, A9_NPipe]>],
606                               [3, 2, 2]>,
607   //
608   // Double-register Integer Subtract
609   InstrItinData<IIC_VSUBiD,   [InstrStage<1, [A9_DRegsN],   0, Required>,
610                                // Extra latency cycles since wbck is 6 cycles
611                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
612                                InstrStage<1, [A9_Pipe1]>,
613                                InstrStage<1, [A9_MUX0, A9_NPipe]>],
614                               [3, 2, 1]>,
615   //
616   // Quad-register Integer Subtract
617   InstrItinData<IIC_VSUBiQ,   [InstrStage<1, [A9_DRegsN],   0, Required>,
618                                // Extra latency cycles since wbck is 6 cycles
619                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
620                                InstrStage<1, [A9_Pipe1]>,
621                                InstrStage<1, [A9_MUX0, A9_NPipe]>],
622                               [3, 2, 1]>,
623   //
624   // Double-register Integer Shift
625   InstrItinData<IIC_VSHLiD,   [InstrStage<1, [A9_DRegsN],   0, Required>,
626                                // Extra latency cycles since wbck is 6 cycles
627                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
628                                InstrStage<1, [A9_Pipe1]>,
629                                InstrStage<1, [A9_MUX0, A9_NPipe]>],
630                               [3, 1, 1]>,
631   //
632   // Quad-register Integer Shift
633   InstrItinData<IIC_VSHLiQ,   [InstrStage<1, [A9_DRegsN],   0, Required>,
634                                // Extra latency cycles since wbck is 6 cycles
635                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
636                                InstrStage<1, [A9_Pipe1]>,
637                                InstrStage<1, [A9_MUX0, A9_NPipe]>],
638                               [3, 1, 1]>,
639   //
640   // Double-register Integer Shift (4 cycle)
641   InstrItinData<IIC_VSHLi4D,  [InstrStage<1, [A9_DRegsN],   0, Required>,
642                                // Extra latency cycles since wbck is 6 cycles
643                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
644                                InstrStage<1, [A9_Pipe1]>,
645                                InstrStage<1, [A9_MUX0, A9_NPipe]>],
646                               [4, 1, 1]>,
647   //
648   // Quad-register Integer Shift (4 cycle)
649   InstrItinData<IIC_VSHLi4Q,  [InstrStage<1, [A9_DRegsN],   0, Required>,
650                                // Extra latency cycles since wbck is 6 cycles
651                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
652                                InstrStage<1, [A9_Pipe1]>,
653                                InstrStage<1, [A9_MUX0, A9_NPipe]>],
654                               [4, 1, 1]>,
655   //
656   // Double-register Integer Binary (4 cycle)
657   InstrItinData<IIC_VBINi4D,  [InstrStage<1, [A9_DRegsN],   0, Required>,
658                                // Extra latency cycles since wbck is 6 cycles
659                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
660                                InstrStage<1, [A9_Pipe1]>,
661                                InstrStage<1, [A9_MUX0, A9_NPipe]>],
662                               [4, 2, 2]>,
663   //
664   // Quad-register Integer Binary (4 cycle)
665   InstrItinData<IIC_VBINi4Q,  [InstrStage<1, [A9_DRegsN],   0, Required>,
666                                // Extra latency cycles since wbck is 6 cycles
667                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
668                                InstrStage<1, [A9_Pipe1]>,
669                                InstrStage<1, [A9_MUX0, A9_NPipe]>],
670                               [4, 2, 2]>,
671   //
672   // Double-register Integer Subtract (4 cycle)
673   InstrItinData<IIC_VSUBiD,   [InstrStage<1, [A9_DRegsN],   0, Required>,
674                                // Extra latency cycles since wbck is 6 cycles
675                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
676                                InstrStage<1, [A9_Pipe1]>,
677                                InstrStage<1, [A9_MUX0, A9_NPipe]>],
678                               [4, 2, 1]>,
679   //
680   // Quad-register Integer Subtract (4 cycle)
681   InstrItinData<IIC_VSUBiQ,   [InstrStage<1, [A9_DRegsN],   0, Required>,
682                                // Extra latency cycles since wbck is 6 cycles
683                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
684                                InstrStage<1, [A9_Pipe1]>,
685                                InstrStage<1, [A9_MUX0, A9_NPipe]>],
686                               [4, 2, 1]>,
687
688   //
689   // Double-register Integer Count
690   InstrItinData<IIC_VCNTiD,   [InstrStage<1, [A9_DRegsN],   0, Required>,
691                                // Extra latency cycles since wbck is 6 cycles
692                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
693                                InstrStage<1, [A9_Pipe1]>,
694                                InstrStage<1, [A9_MUX0, A9_NPipe]>],
695                               [3, 2, 2]>,
696   //
697   // Quad-register Integer Count
698   // Result written in N3, but that is relative to the last cycle of multicycle,
699   // so we use 4 for those cases
700   InstrItinData<IIC_VCNTiQ,   [InstrStage<1, [A9_DRegsN],   0, Required>,
701                                // Extra latency cycles since wbck is 7 cycles
702                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
703                                InstrStage<1, [A9_Pipe1]>,
704                                InstrStage<2, [A9_MUX0, A9_NPipe]>],
705                               [4, 2, 2]>,
706   //
707   // Double-register Absolute Difference and Accumulate
708   InstrItinData<IIC_VABAD,    [InstrStage<1, [A9_DRegsN],   0, Required>,
709                                // Extra latency cycles since wbck is 6 cycles
710                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
711                                InstrStage<1, [A9_Pipe1]>,
712                                InstrStage<1, [A9_MUX0, A9_NPipe]>],
713                               [6, 3, 2, 1]>,
714   //
715   // Quad-register Absolute Difference and Accumulate
716   InstrItinData<IIC_VABAQ,    [InstrStage<1, [A9_DRegsN],   0, Required>,
717                                // Extra latency cycles since wbck is 6 cycles
718                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
719                                InstrStage<1, [A9_Pipe1]>,
720                                InstrStage<2, [A9_MUX0, A9_NPipe]>],
721                               [6, 3, 2, 1]>,
722   //
723   // Double-register Integer Pair Add Long
724   InstrItinData<IIC_VPALiD,   [InstrStage<1, [A9_DRegsN],   0, Required>,
725                                // Extra latency cycles since wbck is 6 cycles
726                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
727                                InstrStage<1, [A9_Pipe1]>,
728                                InstrStage<1, [A9_MUX0, A9_NPipe]>],
729                               [6, 3, 1]>,
730   //
731   // Quad-register Integer Pair Add Long
732   InstrItinData<IIC_VPALiQ,   [InstrStage<1, [A9_DRegsN],   0, Required>,
733                                // Extra latency cycles since wbck is 6 cycles
734                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
735                                InstrStage<1, [A9_Pipe1]>,
736                                InstrStage<2, [A9_MUX0, A9_NPipe]>],
737                               [6, 3, 1]>,
738
739   //
740   // Double-register Integer Multiply (.8, .16)
741   InstrItinData<IIC_VMULi16D, [InstrStage<1, [A9_DRegsN],   0, Required>,
742                                // Extra latency cycles since wbck is 6 cycles
743                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
744                                InstrStage<1, [A9_Pipe1]>,
745                                InstrStage<1, [A9_MUX0, A9_NPipe]>],
746                               [6, 2, 2]>,
747   //
748   // Quad-register Integer Multiply (.8, .16)
749   InstrItinData<IIC_VMULi16Q, [InstrStage<1, [A9_DRegsN],   0, Required>,
750                                // Extra latency cycles since wbck is 7 cycles
751                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
752                                InstrStage<1, [A9_Pipe1]>,
753                                InstrStage<2, [A9_MUX0, A9_NPipe]>],
754                               [7, 2, 2]>,
755
756   //
757   // Double-register Integer Multiply (.32)
758   InstrItinData<IIC_VMULi32D, [InstrStage<1, [A9_DRegsN],   0, Required>,
759                                // Extra latency cycles since wbck is 7 cycles
760                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
761                                InstrStage<1, [A9_Pipe1]>,
762                                InstrStage<2, [A9_MUX0, A9_NPipe]>],
763                               [7, 2, 1]>,
764   //
765   // Quad-register Integer Multiply (.32)
766   InstrItinData<IIC_VMULi32Q, [InstrStage<1, [A9_DRegsN],   0, Required>,
767                                // Extra latency cycles since wbck is 9 cycles
768                                InstrStage<10, [A9_DRegsVFP], 0, Reserved>,
769                                InstrStage<1, [A9_Pipe1]>,
770                                InstrStage<4, [A9_MUX0, A9_NPipe]>],
771                               [9, 2, 1]>,
772   //
773   // Double-register Integer Multiply-Accumulate (.8, .16)
774   InstrItinData<IIC_VMACi16D, [InstrStage<1, [A9_DRegsN],   0, Required>,
775                                // Extra latency cycles since wbck is 6 cycles
776                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
777                                InstrStage<1, [A9_Pipe1]>,
778                                InstrStage<1, [A9_MUX0, A9_NPipe]>],
779                               [6, 3, 2, 2]>,
780   //
781   // Double-register Integer Multiply-Accumulate (.32)
782   InstrItinData<IIC_VMACi32D, [InstrStage<1, [A9_DRegsN],   0, Required>,
783                                // Extra latency cycles since wbck is 7 cycles
784                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
785                                InstrStage<1, [A9_Pipe1]>,
786                                InstrStage<2, [A9_MUX0, A9_NPipe]>],
787                               [7, 3, 2, 1]>,
788   //
789   // Quad-register Integer Multiply-Accumulate (.8, .16)
790   InstrItinData<IIC_VMACi16Q, [InstrStage<1, [A9_DRegsN],   0, Required>,
791                                // Extra latency cycles since wbck is 7 cycles
792                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
793                                InstrStage<1, [A9_Pipe1]>,
794                                InstrStage<2, [A9_MUX0, A9_NPipe]>],
795                               [7, 3, 2, 2]>,
796   //
797   // Quad-register Integer Multiply-Accumulate (.32)
798   InstrItinData<IIC_VMACi32Q, [InstrStage<1, [A9_DRegsN],   0, Required>,
799                                // Extra latency cycles since wbck is 9 cycles
800                                InstrStage<10, [A9_DRegsVFP], 0, Reserved>,
801                                InstrStage<1, [A9_Pipe1]>,
802                                InstrStage<4, [A9_MUX0, A9_NPipe]>],
803                               [9, 3, 2, 1]>,
804   //
805   // Move Immediate
806   InstrItinData<IIC_VMOVImm,  [InstrStage<1, [A9_DRegsN],   0, Required>,
807                                // Extra latency cycles since wbck is 6 cycles
808                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
809                                InstrStage<1, [A9_Pipe1]>,
810                                InstrStage<1, [A9_MUX0, A9_NPipe]>],
811                               [3]>,
812   //
813   // Double-register Permute Move
814   InstrItinData<IIC_VMOVD,    [InstrStage<1, [A9_DRegsN],   0, Required>,
815   // FIXME: all latencies are arbitrary, no information is available
816                                InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
817                                InstrStage<1, [A9_Pipe1]>,
818                                InstrStage<1, [A9_MUX0, A9_NPipe]>],
819                               [2, 1]>,
820   //
821   // Quad-register Permute Move
822   // Result written in N2, but that is relative to the last cycle of multicycle,
823   // so we use 3 for those cases
824   InstrItinData<IIC_VMOVQ,    [InstrStage<1, [A9_DRegsN],   0, Required>,
825   // FIXME: all latencies are arbitrary, no information is available
826                                InstrStage<4, [A9_DRegsVFP], 0, Reserved>,
827                                InstrStage<1, [A9_Pipe1]>,
828                                InstrStage<2, [A9_MUX0, A9_NPipe]>],
829                               [3, 1]>,
830   //
831   // Integer to Single-precision Move
832   InstrItinData<IIC_VMOVIS ,  [InstrStage<1, [A9_DRegsN],   0, Required>,
833   // FIXME: all latencies are arbitrary, no information is available
834                                InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
835                                InstrStage<1, [A9_Pipe1]>,
836                                InstrStage<1, [A9_MUX0, A9_NPipe]>],
837                               [2, 1]>,
838   //
839   // Integer to Double-precision Move
840   InstrItinData<IIC_VMOVID ,  [InstrStage<1, [A9_DRegsN],   0, Required>,
841   // FIXME: all latencies are arbitrary, no information is available
842                                InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
843                                InstrStage<1, [A9_Pipe1]>,
844                                InstrStage<1, [A9_MUX0, A9_NPipe]>],
845                               [2, 1, 1]>,
846   //
847   // Single-precision to Integer Move
848   InstrItinData<IIC_VMOVSI ,  [InstrStage<1, [A9_DRegsN],   0, Required>,
849   // FIXME: all latencies are arbitrary, no information is available
850                                InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
851                                InstrStage<1, [A9_Pipe1]>,
852                                InstrStage<1, [A9_MUX0, A9_NPipe]>],
853                               [2, 1]>,
854   //
855   // Double-precision to Integer Move
856   InstrItinData<IIC_VMOVDI ,  [InstrStage<1, [A9_DRegsN],   0, Required>,
857   // FIXME: all latencies are arbitrary, no information is available
858                                InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
859                                InstrStage<1, [A9_Pipe1]>,
860                                InstrStage<1, [A9_MUX0, A9_NPipe]>],
861                               [2, 2, 1]>,
862   //
863   // Integer to Lane Move
864   InstrItinData<IIC_VMOVISL , [InstrStage<1, [A9_DRegsN],   0, Required>,
865   // FIXME: all latencies are arbitrary, no information is available
866                                InstrStage<4, [A9_DRegsVFP], 0, Reserved>,
867                                InstrStage<1, [A9_Pipe1]>,
868                                InstrStage<2, [A9_MUX0, A9_NPipe]>],
869                               [3, 1, 1]>,
870
871   //
872   // Double-register FP Unary
873   InstrItinData<IIC_VUNAD,    [InstrStage<1, [A9_DRegsN],   0, Required>,
874                                // Extra latency cycles since wbck is 6 cycles
875                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
876                                InstrStage<1, [A9_Pipe1]>,
877                                InstrStage<1, [A9_MUX0, A9_NPipe]>],
878                               [5, 2]>,
879   //
880   // Quad-register FP Unary
881   // Result written in N5, but that is relative to the last cycle of multicycle,
882   // so we use 6 for those cases
883   InstrItinData<IIC_VUNAQ,    [InstrStage<1, [A9_DRegsN],   0, Required>,
884                                // Extra latency cycles since wbck is 7 cycles
885                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
886                                InstrStage<1, [A9_Pipe1]>,
887                                InstrStage<2, [A9_MUX0, A9_NPipe]>],
888                               [6, 2]>,
889   //
890   // Double-register FP Binary
891   // FIXME: We're using this itin for many instructions and [2, 2] here is too
892   // optimistic.
893   InstrItinData<IIC_VBIND,    [InstrStage<1, [A9_DRegsN],   0, Required>,
894                                // Extra latency cycles since wbck is 7 cycles
895                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
896                                InstrStage<1, [A9_Pipe1]>,
897                                InstrStage<1, [A9_MUX0, A9_NPipe]>],
898                               [5, 2, 2]>,
899   //
900   // Quad-register FP Binary
901   // Result written in N5, but that is relative to the last cycle of multicycle,
902   // so we use 6 for those cases
903   // FIXME: We're using this itin for many instructions and [2, 2] here is too
904   // optimistic.
905   InstrItinData<IIC_VBINQ,    [InstrStage<1, [A9_DRegsN],   0, Required>,
906                                // Extra latency cycles since wbck is 8 cycles
907                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
908                                InstrStage<1, [A9_Pipe1]>,
909                                InstrStage<2, [A9_MUX0, A9_NPipe]>],
910                               [6, 2, 2]>,
911   //
912   // Double-register FP Multiple-Accumulate
913   InstrItinData<IIC_VMACD,    [InstrStage<1, [A9_DRegsN],   0, Required>,
914                                // Extra latency cycles since wbck is 7 cycles
915                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
916                                InstrStage<1, [A9_Pipe1]>,
917                                InstrStage<2, [A9_MUX0, A9_NPipe]>],
918                               [6, 3, 2, 1]>,
919   //
920   // Quad-register FP Multiple-Accumulate
921   // Result written in N9, but that is relative to the last cycle of multicycle,
922   // so we use 10 for those cases
923   InstrItinData<IIC_VMACQ,    [InstrStage<1, [A9_DRegsN],   0, Required>,
924                                // Extra latency cycles since wbck is 9 cycles
925                                InstrStage<10, [A9_DRegsVFP], 0, Reserved>,
926                                InstrStage<1, [A9_Pipe1]>,
927                                InstrStage<4, [A9_MUX0, A9_NPipe]>],
928                               [8, 4, 2, 1]>,
929   //
930   // Double-register Reciprical Step
931   InstrItinData<IIC_VRECSD,   [InstrStage<1, [A9_DRegsN],   0, Required>,
932                                // Extra latency cycles since wbck is 7 cycles
933                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
934                                InstrStage<1, [A9_Pipe1]>,
935                                InstrStage<2, [A9_MUX0, A9_NPipe]>],
936                               [6, 2, 2]>,
937   //
938   // Quad-register Reciprical Step
939   InstrItinData<IIC_VRECSQ,   [InstrStage<1, [A9_DRegsN],   0, Required>,
940                                // Extra latency cycles since wbck is 9 cycles
941                                InstrStage<10, [A9_DRegsVFP], 0, Reserved>,
942                                InstrStage<1, [A9_Pipe1]>,
943                                InstrStage<4, [A9_MUX0, A9_NPipe]>],
944                               [8, 2, 2]>,
945   //
946   // Double-register Permute
947   InstrItinData<IIC_VPERMD,   [InstrStage<1, [A9_DRegsN],   0, Required>,
948                                // Extra latency cycles since wbck is 6 cycles
949                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
950                                InstrStage<1, [A9_Pipe1]>,
951                                InstrStage<1, [A9_MUX0, A9_NPipe]>],
952                               [2, 2, 1, 1]>,
953   //
954   // Quad-register Permute
955   // Result written in N2, but that is relative to the last cycle of multicycle,
956   // so we use 3 for those cases
957   InstrItinData<IIC_VPERMQ,   [InstrStage<1, [A9_DRegsN],   0, Required>,
958                                // Extra latency cycles since wbck is 7 cycles
959                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
960                                InstrStage<1, [A9_Pipe1]>,
961                                InstrStage<2, [A9_MUX0, A9_NPipe]>],
962                               [3, 3, 1, 1]>,
963   //
964   // Quad-register Permute (3 cycle issue)
965   // Result written in N2, but that is relative to the last cycle of multicycle,
966   // so we use 4 for those cases
967   InstrItinData<IIC_VPERMQ3,  [InstrStage<1, [A9_DRegsN],   0, Required>,
968                                // Extra latency cycles since wbck is 8 cycles
969                                InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
970                                InstrStage<1, [A9_Pipe1]>,
971                                InstrStage<3, [A9_MUX0, A9_NPipe]>],
972                               [4, 4, 1, 1]>,
973
974   //
975   // Double-register VEXT
976   InstrItinData<IIC_VEXTD,    [InstrStage<1, [A9_DRegsN],   0, Required>,
977                                // Extra latency cycles since wbck is 7 cycles
978                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
979                                InstrStage<1, [A9_Pipe1]>,
980                                InstrStage<1, [A9_MUX0, A9_NPipe]>],
981                               [2, 1, 1]>,
982   //
983   // Quad-register VEXT
984   InstrItinData<IIC_VEXTQ,    [InstrStage<1, [A9_DRegsN],   0, Required>,
985                                // Extra latency cycles since wbck is 9 cycles
986                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
987                                InstrStage<1, [A9_Pipe1]>,
988                                InstrStage<2, [A9_MUX0, A9_NPipe]>],
989                               [3, 1, 1]>,
990   //
991   // VTB
992   InstrItinData<IIC_VTB1,     [InstrStage<1, [A9_DRegsN],   0, Required>,
993                                // Extra latency cycles since wbck is 7 cycles
994                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
995                                InstrStage<1, [A9_Pipe1]>,
996                                InstrStage<2, [A9_MUX0, A9_NPipe]>],
997                               [3, 2, 1]>,
998   InstrItinData<IIC_VTB2,     [InstrStage<2, [A9_DRegsN],   0, Required>,
999                                // Extra latency cycles since wbck is 7 cycles
1000                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1001                                InstrStage<1, [A9_Pipe1]>,
1002                                InstrStage<2, [A9_MUX0, A9_NPipe]>],
1003                               [3, 2, 2, 1]>,
1004   InstrItinData<IIC_VTB3,     [InstrStage<2, [A9_DRegsN],   0, Required>,
1005                                // Extra latency cycles since wbck is 8 cycles
1006                                InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
1007                                InstrStage<1, [A9_Pipe1]>,
1008                                InstrStage<3, [A9_MUX0, A9_NPipe]>],
1009                               [4, 2, 2, 3, 1]>,
1010   InstrItinData<IIC_VTB4,     [InstrStage<1, [A9_DRegsN],   0, Required>,
1011                                // Extra latency cycles since wbck is 8 cycles
1012                                InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
1013                                InstrStage<1, [A9_Pipe1]>,
1014                                InstrStage<3, [A9_MUX0, A9_NPipe]>],
1015                               [4, 2, 2, 3, 3, 1]>,
1016   //
1017   // VTBX
1018   InstrItinData<IIC_VTBX1,    [InstrStage<1, [A9_DRegsN],   0, Required>,
1019                                // Extra latency cycles since wbck is 7 cycles
1020                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1021                                InstrStage<1, [A9_Pipe1]>,
1022                                InstrStage<2, [A9_MUX0, A9_NPipe]>],
1023                               [3, 1, 2, 1]>,
1024   InstrItinData<IIC_VTBX2,    [InstrStage<1, [A9_DRegsN],   0, Required>,
1025                                // Extra latency cycles since wbck is 7 cycles
1026                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1027                                InstrStage<1, [A9_Pipe1]>,
1028                                InstrStage<2, [A9_MUX0, A9_NPipe]>],
1029                               [3, 1, 2, 2, 1]>,
1030   InstrItinData<IIC_VTBX3,    [InstrStage<1, [A9_DRegsN],   0, Required>,
1031                                // Extra latency cycles since wbck is 8 cycles
1032                                InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
1033                                InstrStage<1, [A9_Pipe1]>,
1034                                InstrStage<3, [A9_MUX0, A9_NPipe]>],
1035                               [4, 1, 2, 2, 3, 1]>,
1036   InstrItinData<IIC_VTBX4,    [InstrStage<1, [A9_DRegsN],   0, Required>,
1037                                // Extra latency cycles since wbck is 8 cycles
1038                                InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
1039                                InstrStage<1, [A9_Pipe1]>,
1040                                InstrStage<2, [A9_MUX0, A9_NPipe]>],
1041                               [4, 1, 2, 2, 3, 3, 1]>
1042 ]>;