14197c824da874574e463ce275f5f177a039068f
[oota-llvm.git] / lib / Target / ARM / ARMScheduleA9.td
1 //=- ARMScheduleA9.td - ARM Cortex-A9 Scheduling Definitions -*- tablegen -*-=//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file defines the itinerary class data for the ARM Cortex A9 processors.
11 //
12 //===----------------------------------------------------------------------===//
13
14 //
15 // Ad-hoc scheduling information derived from pretty vague "Cortex-A9 Technical
16 // Reference Manual".
17 //
18 // Functional units
19 def A9_Pipe0   : FuncUnit; // pipeline 0
20 def A9_Pipe1   : FuncUnit; // pipeline 1
21 def A9_LSPipe  : FuncUnit; // LS pipe
22 def A9_NPipe   : FuncUnit; // NEON ALU/MUL pipe
23 def A9_DRegsVFP: FuncUnit; // FP register set, VFP side
24 def A9_DRegsN  : FuncUnit; // FP register set, NEON side
25
26 // Dual issue pipeline represented by A9_Pipe0 | A9_Pipe1
27 //
28 def CortexA9Itineraries : ProcessorItineraries<
29   [A9_NPipe, A9_DRegsN, A9_DRegsVFP, A9_LSPipe, A9_Pipe0, A9_Pipe1], [], [
30   // Two fully-pipelined integer ALU pipelines
31
32   //
33   // Move instructions, unconditional
34   InstrItinData<IIC_iMOVi   , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [1]>,
35   InstrItinData<IIC_iMOVr   , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [1, 1]>,
36   InstrItinData<IIC_iMOVsi  , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [1, 1]>,
37   InstrItinData<IIC_iMOVsr  , [InstrStage<2, [A9_Pipe0, A9_Pipe1]>], [2, 2, 1]>,
38   InstrItinData<IIC_iMOVix2 , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
39                                InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [2]>,
40   //
41   // MVN instructions
42   InstrItinData<IIC_iMVNi   , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [1]>,
43   InstrItinData<IIC_iMVNr   , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [1, 1]>,
44   InstrItinData<IIC_iMVNsi  , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [1, 1]>,
45   InstrItinData<IIC_iMVNsr  , [InstrStage<2, [A9_Pipe0, A9_Pipe1]>], [2, 2, 1]>,
46   //
47   // No operand cycles
48   InstrItinData<IIC_iALUx    , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>]>,
49   //
50   // Binary Instructions that produce a result
51   InstrItinData<IIC_iALUi , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [2, 2]>,
52   InstrItinData<IIC_iALUr , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [2, 2, 2]>,
53   InstrItinData<IIC_iALUsi, [InstrStage<2, [A9_Pipe0, A9_Pipe1]>], [2, 2, 1]>,
54   InstrItinData<IIC_iALUsr,[InstrStage<3, [A9_Pipe0, A9_Pipe1]>], [2, 2, 1, 1]>,
55   //
56   // Bitwise Instructions that produce a result
57   InstrItinData<IIC_iBITi , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [2, 2]>,
58   InstrItinData<IIC_iBITr , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [2, 2, 2]>,
59   InstrItinData<IIC_iBITsi, [InstrStage<2, [A9_Pipe0, A9_Pipe1]>], [2, 2, 1]>,
60   InstrItinData<IIC_iBITsr,[InstrStage<3, [A9_Pipe0, A9_Pipe1]>], [2, 2, 1, 1]>,
61   //
62   // Unary Instructions that produce a result
63   InstrItinData<IIC_iUNAr   , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [2, 2]>,
64   InstrItinData<IIC_iUNAsi  , [InstrStage<2, [A9_Pipe0, A9_Pipe1]>], [2, 1]>,
65   //
66   // Zero and sign extension instructions
67   InstrItinData<IIC_iEXTr , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [2, 1]>,
68   InstrItinData<IIC_iEXTAr, [InstrStage<2, [A9_Pipe0, A9_Pipe1]>], [3, 1, 1]>,
69   InstrItinData<IIC_iEXTAsr,[InstrStage<2, [A9_Pipe0, A9_Pipe1]>],[3, 1, 1, 1]>,
70   //
71   // Compare instructions
72   InstrItinData<IIC_iCMPi   , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [2]>,
73   InstrItinData<IIC_iCMPr   , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [2, 2]>,
74   InstrItinData<IIC_iCMPsi  , [InstrStage<2, [A9_Pipe0, A9_Pipe1]>], [2, 1]>,
75   InstrItinData<IIC_iCMPsr  , [InstrStage<3, [A9_Pipe0, A9_Pipe1]>], [2, 1, 1]>,
76   //
77   // Test instructions
78   InstrItinData<IIC_iTSTi   , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [2]>,
79   InstrItinData<IIC_iTSTr   , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [2, 2]>,
80   InstrItinData<IIC_iTSTsi  , [InstrStage<2, [A9_Pipe0, A9_Pipe1]>], [2, 1]>,
81   InstrItinData<IIC_iTSTsr  , [InstrStage<3, [A9_Pipe0, A9_Pipe1]>], [2, 1, 1]>,
82   //
83   // Move instructions, conditional
84   InstrItinData<IIC_iCMOVi  , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [2]>,
85   InstrItinData<IIC_iCMOVr  , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [2, 1]>,
86   InstrItinData<IIC_iCMOVsi , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [2, 1]>,
87   InstrItinData<IIC_iCMOVsr , [InstrStage<2, [A9_Pipe0, A9_Pipe1]>], [2, 1, 1]>,
88
89   // Integer multiply pipeline
90   //
91   InstrItinData<IIC_iMUL16   , [InstrStage<1, [A9_Pipe1], 0>,
92                                 InstrStage<2, [A9_Pipe0]>], [4, 1, 1]>,
93   InstrItinData<IIC_iMAC16   , [InstrStage<1, [A9_Pipe1], 0>,
94                                 InstrStage<2, [A9_Pipe0]>], [4, 1, 1, 2]>,
95   InstrItinData<IIC_iMUL32   , [InstrStage<1, [A9_Pipe1], 0>,
96                                 InstrStage<2, [A9_Pipe0]>], [4, 1, 1]>,
97   InstrItinData<IIC_iMAC32   , [InstrStage<1, [A9_Pipe1], 0>,
98                                 InstrStage<2, [A9_Pipe0]>], [4, 1, 1, 2]>,
99   InstrItinData<IIC_iMUL64   , [InstrStage<2, [A9_Pipe1], 0>,
100                                 InstrStage<3, [A9_Pipe0]>], [4, 5, 1, 1]>,
101   InstrItinData<IIC_iMAC64   , [InstrStage<2, [A9_Pipe1], 0>,
102                                 InstrStage<3, [A9_Pipe0]>], [4, 5, 1, 1]>,
103   // Integer load pipeline
104   // FIXME: The timings are some rough approximations
105   //
106   // Immediate offset
107   InstrItinData<IIC_iLoadi   , [InstrStage<1, [A9_Pipe1]>,
108                                 InstrStage<1, [A9_LSPipe]>], [3, 1]>,
109   //
110   // Register offset
111   InstrItinData<IIC_iLoadr   , [InstrStage<1, [A9_Pipe1]>,
112                                 InstrStage<1, [A9_LSPipe]>], [3, 1, 1]>,
113   //
114   // Scaled register offset
115   InstrItinData<IIC_iLoadsi  , [InstrStage<1, [A9_Pipe1]>,
116                                 InstrStage<2, [A9_LSPipe]>], [4, 1, 1]>,
117   //
118   // Immediate offset with update
119   InstrItinData<IIC_iLoadiu  , [InstrStage<1, [A9_Pipe1]>,
120                                 InstrStage<2, [A9_LSPipe]>], [3, 2, 1]>,
121   //
122   // Register offset with update
123   InstrItinData<IIC_iLoadru  , [InstrStage<1, [A9_Pipe1]>,
124                                 InstrStage<2, [A9_LSPipe]>], [3, 2, 1, 1]>,
125   //
126   // Scaled register offset with update
127   InstrItinData<IIC_iLoadsiu , [InstrStage<1, [A9_Pipe1]>,
128                                 InstrStage<2, [A9_LSPipe]>], [4, 3, 1, 1]>,
129   //
130   // Load multiple
131   InstrItinData<IIC_iLoadm   , [InstrStage<1, [A9_Pipe1]>,
132                                 InstrStage<1, [A9_LSPipe]>]>,
133
134   //
135   // Load multiple plus branch
136   InstrItinData<IIC_iLoadmBr , [InstrStage<1, [A9_Pipe1]>,
137                                 InstrStage<1, [A9_LSPipe]>,
138                                 InstrStage<1, [A9_Pipe0, A9_Pipe1]>]>,
139
140   //
141   // iLoadi + iALUr for t2LDRpci_pic.
142   InstrItinData<IIC_iLoadiALU, [InstrStage<1, [A9_Pipe1]>,
143                                 InstrStage<1, [A9_LSPipe]>,
144                                 InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [4, 1]>,
145
146   // Integer store pipeline
147   ///
148   // Immediate offset
149   InstrItinData<IIC_iStorei  , [InstrStage<1, [A9_Pipe1]>,
150                                 InstrStage<1, [A9_LSPipe]>], [3, 1]>,
151   //
152   // Register offset
153   InstrItinData<IIC_iStorer  , [InstrStage<1, [ A9_Pipe1]>,
154                                 InstrStage<1, [A9_LSPipe]>], [3, 1, 1]>,
155   //
156   // Scaled register offset
157   InstrItinData<IIC_iStoresi , [InstrStage<1, [A9_Pipe1]>,
158                                 InstrStage<2, [A9_LSPipe]>], [3, 1, 1]>,
159   //
160   // Immediate offset with update
161   InstrItinData<IIC_iStoreiu , [InstrStage<1, [A9_Pipe1]>,
162                                 InstrStage<1, [A9_LSPipe]>], [2, 3, 1]>,
163   //
164   // Register offset with update
165   InstrItinData<IIC_iStoreru , [InstrStage<1, [A9_Pipe1]>,
166                                 InstrStage<1, [A9_LSPipe]>], [2, 3, 1, 1]>,
167   //
168   // Scaled register offset with update
169   InstrItinData<IIC_iStoresiu, [InstrStage<1, [A9_Pipe1]>,
170                                 InstrStage<2, [A9_LSPipe]>], [3, 3, 1, 1]>,
171   //
172   // Store multiple
173   InstrItinData<IIC_iStorem  , [InstrStage<1, [A9_Pipe1]>,
174                                 InstrStage<1, [A9_LSPipe]>]>,
175   // Branch
176   //
177   // no delay slots, so the latency of a branch is unimportant
178   InstrItinData<IIC_Br       , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>]>,
179
180   // VFP and NEON shares the same register file. This means that every VFP
181   // instruction should wait for full completion of the consecutive NEON
182   // instruction and vice-versa. We model this behavior with two artificial FUs:
183   // DRegsVFP and DRegsVFP.
184   //
185   // Every VFP instruction:
186   //  - Acquires DRegsVFP resource for 1 cycle
187   //  - Reserves DRegsN resource for the whole duration (including time to
188   //    register file writeback!).
189   // Every NEON instruction does the same but with FUs swapped.
190   //
191   // Since the reserved FU cannot be acquired, this models precisely
192   // "cross-domain" stalls.
193
194   // VFP
195   // Issue through integer pipeline, and execute in NEON unit.
196
197   // FP Special Register to Integer Register File Move
198   InstrItinData<IIC_fpSTAT , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
199                               InstrStage<2, [A9_DRegsN],   0, Reserved>,
200                               InstrStage<1, [A9_Pipe1]>,
201                               InstrStage<1, [A9_NPipe]>]>,
202   //
203   // Single-precision FP Unary
204   InstrItinData<IIC_fpUNA32 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
205                                // Extra latency cycles since wbck is 2 cycles
206                                InstrStage<3, [A9_DRegsN],   0, Reserved>,
207                                InstrStage<1, [A9_Pipe1]>,
208                                InstrStage<1, [A9_NPipe]>], [1, 1]>,
209   //
210   // Double-precision FP Unary
211   InstrItinData<IIC_fpUNA64 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
212                                // Extra latency cycles since wbck is 2 cycles
213                                InstrStage<3, [A9_DRegsN],   0, Reserved>,
214                                InstrStage<1, [A9_Pipe1]>,
215                                InstrStage<1, [A9_NPipe]>], [1, 1]>,
216
217   //
218   // Single-precision FP Compare
219   InstrItinData<IIC_fpCMP32 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
220                                // Extra latency cycles since wbck is 4 cycles
221                                InstrStage<5, [A9_DRegsN],   0, Reserved>,
222                                InstrStage<1, [A9_Pipe1]>,
223                                InstrStage<1, [A9_NPipe]>], [1, 1]>,
224   //
225   // Double-precision FP Compare
226   InstrItinData<IIC_fpCMP64 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
227                                // Extra latency cycles since wbck is 4 cycles
228                                InstrStage<5, [A9_DRegsN],   0, Reserved>,
229                                InstrStage<1, [A9_Pipe1]>,
230                                InstrStage<1, [A9_NPipe]>], [1, 1]>,
231   //
232   // Single to Double FP Convert
233   InstrItinData<IIC_fpCVTSD , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
234                                InstrStage<5, [A9_DRegsN],   0, Reserved>,
235                                InstrStage<1, [A9_Pipe1]>,
236                                InstrStage<1, [A9_NPipe]>], [4, 1]>,
237   //
238   // Double to Single FP Convert
239   InstrItinData<IIC_fpCVTDS , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
240                                InstrStage<5, [A9_DRegsN],   0, Reserved>,
241                                InstrStage<1, [A9_Pipe1]>,
242                                InstrStage<1, [A9_NPipe]>], [4, 1]>,
243
244   //
245   // Single to Half FP Convert
246   InstrItinData<IIC_fpCVTSH , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
247                                InstrStage<5, [A9_DRegsN],   0, Reserved>,
248                                InstrStage<1, [A9_Pipe1]>,
249                                InstrStage<1, [A9_NPipe]>], [4, 1]>,
250   //
251   // Half to Single FP Convert
252   InstrItinData<IIC_fpCVTHS , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
253                                InstrStage<3, [A9_DRegsN],   0, Reserved>,
254                                InstrStage<1, [A9_Pipe1]>,
255                                InstrStage<1, [A9_NPipe]>], [2, 1]>,
256
257   //
258   // Single-Precision FP to Integer Convert
259   InstrItinData<IIC_fpCVTSI , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
260                                InstrStage<5, [A9_DRegsN],   0, Reserved>,
261                                InstrStage<1, [A9_Pipe1]>,
262                                InstrStage<1, [A9_NPipe]>], [4, 1]>,
263   //
264   // Double-Precision FP to Integer Convert
265   InstrItinData<IIC_fpCVTDI , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
266                                InstrStage<5, [A9_DRegsN],   0, Reserved>,
267                                InstrStage<1, [A9_Pipe1]>,
268                                InstrStage<1, [A9_NPipe]>], [4, 1]>,
269   //
270   // Integer to Single-Precision FP Convert
271   InstrItinData<IIC_fpCVTIS , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
272                                InstrStage<5, [A9_DRegsN],   0, Reserved>,
273                                InstrStage<1, [A9_Pipe1]>,
274                                InstrStage<1, [A9_NPipe]>], [4, 1]>,
275   //
276   // Integer to Double-Precision FP Convert
277   InstrItinData<IIC_fpCVTID , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
278                                InstrStage<5, [A9_DRegsN],   0, Reserved>,
279                                InstrStage<1, [A9_Pipe1]>,
280                                InstrStage<1, [A9_NPipe]>], [4, 1]>,
281   //
282   // Single-precision FP ALU
283   InstrItinData<IIC_fpALU32 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
284                                InstrStage<5, [A9_DRegsN],   0, Reserved>,
285                                InstrStage<1, [A9_Pipe1]>,
286                                InstrStage<1, [A9_NPipe]>], [4, 1, 1]>,
287   //
288   // Double-precision FP ALU
289   InstrItinData<IIC_fpALU64 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
290                                InstrStage<5, [A9_DRegsN],   0, Reserved>,
291                                InstrStage<1, [A9_Pipe1]>,
292                                InstrStage<1, [A9_NPipe]>], [4, 1, 1]>,
293   //
294   // Single-precision FP Multiply
295   InstrItinData<IIC_fpMUL32 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
296                                InstrStage<6, [A9_DRegsN],   0, Reserved>,
297                                InstrStage<1, [A9_Pipe1]>,
298                                InstrStage<1, [A9_NPipe]>], [5, 1, 1]>,
299   //
300   // Double-precision FP Multiply
301   InstrItinData<IIC_fpMUL64 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
302                                InstrStage<7, [A9_DRegsN],   0, Reserved>,
303                                InstrStage<1, [A9_Pipe1]>,
304                                InstrStage<2, [A9_NPipe]>], [6, 1, 1]>,
305   //
306   // Single-precision FP MAC
307   InstrItinData<IIC_fpMAC32 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
308                                InstrStage<9, [A9_DRegsN],   0, Reserved>,
309                                InstrStage<1, [A9_Pipe1]>,
310                                InstrStage<1, [A9_NPipe]>], [8, 0, 1, 1]>,
311   //
312   // Double-precision FP MAC
313   InstrItinData<IIC_fpMAC64 , [InstrStage<1,  [A9_DRegsVFP], 0, Required>,
314                                InstrStage<10, [A9_DRegsN],  0, Reserved>,
315                                InstrStage<1,  [A9_Pipe1]>,
316                                InstrStage<2,  [A9_NPipe]>], [9, 0, 1, 1]>,
317   //
318   // Single-precision FP DIV
319   InstrItinData<IIC_fpDIV32 , [InstrStage<1,  [A9_DRegsVFP], 0, Required>,
320                                InstrStage<16, [A9_DRegsN],  0, Reserved>,
321                                InstrStage<1,  [A9_Pipe1]>,
322                                InstrStage<10, [A9_NPipe]>], [15, 1, 1]>,
323   //
324   // Double-precision FP DIV
325   InstrItinData<IIC_fpDIV64 , [InstrStage<1,  [A9_DRegsVFP], 0, Required>,
326                                InstrStage<26, [A9_DRegsN],  0, Reserved>,
327                                InstrStage<1,  [A9_Pipe1]>,
328                                InstrStage<20, [A9_NPipe]>], [25, 1, 1]>,
329   //
330   // Single-precision FP SQRT
331   InstrItinData<IIC_fpSQRT32, [InstrStage<1,  [A9_DRegsVFP], 0, Required>,
332                                InstrStage<18, [A9_DRegsN],   0, Reserved>,
333                                InstrStage<1,  [A9_Pipe1]>,
334                                InstrStage<13, [A9_NPipe]>], [17, 1]>,
335   //
336   // Double-precision FP SQRT
337   InstrItinData<IIC_fpSQRT64, [InstrStage<1,  [A9_DRegsVFP], 0, Required>,
338                                InstrStage<33, [A9_DRegsN],   0, Reserved>,
339                                InstrStage<1,  [A9_Pipe1]>,
340                                InstrStage<28, [A9_NPipe]>], [32, 1]>,
341
342   //
343   // Integer to Single-precision Move
344   InstrItinData<IIC_fpMOVIS,  [InstrStage<1, [A9_DRegsVFP], 0, Required>,
345                                // Extra 1 latency cycle since wbck is 2 cycles
346                                InstrStage<3, [A9_DRegsN],   0, Reserved>,
347                                InstrStage<1, [A9_Pipe1]>,
348                                InstrStage<1, [A9_NPipe]>], [1, 1]>,
349   //
350   // Integer to Double-precision Move
351   InstrItinData<IIC_fpMOVID,  [InstrStage<1, [A9_DRegsVFP], 0, Required>,
352                                // Extra 1 latency cycle since wbck is 2 cycles
353                                InstrStage<3, [A9_DRegsN],   0, Reserved>,
354                                InstrStage<1, [A9_Pipe1]>,
355                                InstrStage<1, [A9_NPipe]>], [1, 1, 1]>,
356   //
357   // Single-precision to Integer Move
358   InstrItinData<IIC_fpMOVSI,  [InstrStage<1, [A9_DRegsVFP], 0, Required>,
359                                InstrStage<2, [A9_DRegsN],   0, Reserved>,
360                                InstrStage<1, [A9_Pipe1]>,
361                                InstrStage<1, [A9_NPipe]>], [1, 1]>,
362   //
363   // Double-precision to Integer Move
364   InstrItinData<IIC_fpMOVDI,  [InstrStage<1, [A9_DRegsVFP], 0, Required>,
365                                InstrStage<2, [A9_DRegsN],   0, Reserved>,
366                                InstrStage<1, [A9_Pipe1]>,
367                                InstrStage<1, [A9_NPipe]>], [1, 1, 1]>,
368   //
369   // Single-precision FP Load
370   InstrItinData<IIC_fpLoad32, [InstrStage<1, [A9_DRegsVFP], 0, Required>,
371                                InstrStage<2, [A9_DRegsN],   0, Reserved>,
372                                InstrStage<1, [A9_Pipe1], 0>,
373                                InstrStage<1, [A9_LSPipe]>,
374                                InstrStage<1, [A9_NPipe]>]>,
375   //
376   // Double-precision FP Load
377   InstrItinData<IIC_fpLoad64, [InstrStage<1, [A9_DRegsVFP], 0, Required>,
378                                InstrStage<2, [A9_DRegsN],   0, Reserved>,
379                                InstrStage<1, [A9_Pipe1], 0>,
380                                InstrStage<1, [A9_LSPipe]>,
381                                InstrStage<1, [A9_NPipe]>]>,
382   //
383   // FP Load Multiple
384   InstrItinData<IIC_fpLoadm,  [InstrStage<1, [A9_DRegsVFP], 0, Required>,
385                                InstrStage<2, [A9_DRegsN],   0, Reserved>,
386                                InstrStage<1, [A9_Pipe1], 0>,
387                                InstrStage<1, [A9_LSPipe]>,
388                                InstrStage<1, [A9_NPipe]>]>,
389   //
390   // Single-precision FP Store
391   InstrItinData<IIC_fpStore32,[InstrStage<1, [A9_DRegsVFP], 0, Required>,
392                                InstrStage<2, [A9_DRegsN],   0, Reserved>,
393                                InstrStage<1, [A9_Pipe1], 0>,
394                                InstrStage<1, [A9_LSPipe]>,
395                                InstrStage<1, [A9_NPipe]>]>,
396   //
397   // Double-precision FP Store
398   InstrItinData<IIC_fpStore64,[InstrStage<1, [A9_DRegsVFP], 0, Required>,
399                                InstrStage<2, [A9_DRegsN],   0, Reserved>,
400                                InstrStage<1, [A9_Pipe1], 0>,
401                                InstrStage<1, [A9_LSPipe]>,
402                                InstrStage<1, [A9_NPipe]>]>,
403   //
404   // FP Store Multiple
405   InstrItinData<IIC_fpStorem, [InstrStage<1, [A9_DRegsVFP], 0, Required>,
406                                InstrStage<2, [A9_DRegsN],   0, Reserved>,
407                                InstrStage<1, [A9_Pipe1], 0>,
408                                InstrStage<1, [A9_LSPipe]>,
409                                InstrStage<1, [A9_NPipe]>]>,
410   // NEON
411   // Issue through integer pipeline, and execute in NEON unit.
412   // FIXME: Neon pipeline and LdSt unit are multiplexed.
413   //        Add some syntactic sugar to model this!
414   // VLD1
415   // FIXME: We don't model this instruction properly
416   InstrItinData<IIC_VLD1,     [InstrStage<1, [A9_DRegsN],   0, Required>,
417                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
418                                InstrStage<1, [A9_Pipe1], 0>,
419                                InstrStage<1, [A9_LSPipe]>,
420                                InstrStage<1, [A9_NPipe]>]>,
421   //
422   // VLD2
423   // FIXME: We don't model this instruction properly
424   InstrItinData<IIC_VLD2,     [InstrStage<1, [A9_DRegsN],   0, Required>,
425                                // Extra latency cycles since wbck is 6 cycles
426                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
427                                InstrStage<1, [A9_Pipe1], 0>,
428                                InstrStage<1, [A9_LSPipe]>,
429                                InstrStage<1, [A9_NPipe]>], [2, 2, 1]>,
430   //
431   // VLD3
432   // FIXME: We don't model this instruction properly
433   InstrItinData<IIC_VLD3,     [InstrStage<1, [A9_DRegsN],   0, Required>,
434                                // Extra latency cycles since wbck is 6 cycles
435                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
436                                InstrStage<1, [A9_Pipe1], 0>,
437                                InstrStage<1, [A9_LSPipe]>,
438                                InstrStage<1, [A9_NPipe]>], [2, 2, 2, 1]>,
439   //
440   // VLD4
441   // FIXME: We don't model this instruction properly
442   InstrItinData<IIC_VLD4,     [InstrStage<1, [A9_DRegsN],   0, Required>,
443                                // Extra latency cycles since wbck is 6 cycles
444                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
445                                InstrStage<1, [A9_Pipe1], 0>,
446                                InstrStage<1, [A9_LSPipe]>,
447                                InstrStage<1, [A9_NPipe]>], [2, 2, 2, 2, 1]>,
448   //
449   // VST
450   // FIXME: We don't model this instruction properly
451   InstrItinData<IIC_VST,      [InstrStage<1, [A9_DRegsN],   0, Required>,
452                                // Extra latency cycles since wbck is 6 cycles
453                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
454                                InstrStage<1, [A9_Pipe1], 0>,
455                                InstrStage<1, [A9_LSPipe]>,
456                                InstrStage<1, [A9_NPipe]>]>,
457   //
458   // Double-register Integer Unary
459   InstrItinData<IIC_VUNAiD,   [InstrStage<1, [A9_DRegsN],   0, Required>,
460                                // Extra latency cycles since wbck is 6 cycles
461                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
462                                InstrStage<1, [A9_Pipe1]>,
463                                InstrStage<1, [A9_NPipe]>], [4, 2]>,
464   //
465   // Quad-register Integer Unary
466   InstrItinData<IIC_VUNAiQ,   [InstrStage<1, [A9_DRegsN],   0, Required>,
467                                // Extra latency cycles since wbck is 6 cycles
468                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
469                                InstrStage<1, [A9_Pipe1]>,
470                                InstrStage<1, [A9_NPipe]>], [4, 2]>,
471   //
472   // Double-register Integer Q-Unary
473   InstrItinData<IIC_VQUNAiD,  [InstrStage<1, [A9_DRegsN],   0, Required>,
474                                // Extra latency cycles since wbck is 6 cycles
475                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
476                                InstrStage<1, [A9_Pipe1]>,
477                                InstrStage<1, [A9_NPipe]>], [4, 1]>,
478   //
479   // Quad-register Integer CountQ-Unary
480   InstrItinData<IIC_VQUNAiQ,  [InstrStage<1, [A9_DRegsN],   0, Required>,
481                                // Extra latency cycles since wbck is 6 cycles
482                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
483                                InstrStage<1, [A9_Pipe1]>,
484                                InstrStage<1, [A9_NPipe]>], [4, 1]>,
485   //
486   // Double-register Integer Binary
487   InstrItinData<IIC_VBINiD,   [InstrStage<1, [A9_DRegsN],   0, Required>,
488                                // Extra latency cycles since wbck is 6 cycles
489                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
490                                InstrStage<1, [A9_Pipe1]>,
491                                InstrStage<1, [A9_NPipe]>], [3, 2, 2]>,
492   //
493   // Quad-register Integer Binary
494   InstrItinData<IIC_VBINiQ,   [InstrStage<1, [A9_DRegsN],   0, Required>,
495                                // Extra latency cycles since wbck is 6 cycles
496                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
497                                InstrStage<1, [A9_Pipe1]>,
498                                InstrStage<1, [A9_NPipe]>], [3, 2, 2]>,
499   //
500   // Double-register Integer Subtract
501   InstrItinData<IIC_VSUBiD,   [InstrStage<1, [A9_DRegsN],   0, Required>,
502                                // Extra latency cycles since wbck is 6 cycles
503                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
504                                InstrStage<1, [A9_Pipe1]>,
505                                InstrStage<1, [A9_NPipe]>], [3, 2, 1]>,
506   //
507   // Quad-register Integer Subtract
508   InstrItinData<IIC_VSUBiQ,   [InstrStage<1, [A9_DRegsN],   0, Required>,
509                                // Extra latency cycles since wbck is 6 cycles
510                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
511                                InstrStage<1, [A9_Pipe1]>,
512                                InstrStage<1, [A9_NPipe]>], [3, 2, 1]>,
513   //
514   // Double-register Integer Shift
515   InstrItinData<IIC_VSHLiD,   [InstrStage<1, [A9_DRegsN],   0, Required>,
516                                // Extra latency cycles since wbck is 6 cycles
517                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
518                                InstrStage<1, [A9_Pipe1]>,
519                                InstrStage<1, [A9_NPipe]>], [3, 1, 1]>,
520   //
521   // Quad-register Integer Shift
522   InstrItinData<IIC_VSHLiQ,   [InstrStage<1, [A9_DRegsN],   0, Required>,
523                                // Extra latency cycles since wbck is 6 cycles
524                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
525                                InstrStage<1, [A9_Pipe1]>,
526                                InstrStage<1, [A9_NPipe]>], [3, 1, 1]>,
527   //
528   // Double-register Integer Shift (4 cycle)
529   InstrItinData<IIC_VSHLi4D,  [InstrStage<1, [A9_DRegsN],   0, Required>,
530                                // Extra latency cycles since wbck is 6 cycles
531                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
532                                InstrStage<1, [A9_Pipe1]>,
533                                InstrStage<1, [A9_NPipe]>], [4, 1, 1]>,
534   //
535   // Quad-register Integer Shift (4 cycle)
536   InstrItinData<IIC_VSHLi4Q,  [InstrStage<1, [A9_DRegsN],   0, Required>,
537                                // Extra latency cycles since wbck is 6 cycles
538                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
539                                InstrStage<1, [A9_Pipe1]>,
540                                InstrStage<1, [A9_NPipe]>], [4, 1, 1]>,
541   //
542   // Double-register Integer Binary (4 cycle)
543   InstrItinData<IIC_VBINi4D,  [InstrStage<1, [A9_DRegsN],   0, Required>,
544                                // Extra latency cycles since wbck is 6 cycles
545                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
546                                InstrStage<1, [A9_Pipe1]>,
547                                InstrStage<1, [A9_NPipe]>], [4, 2, 2]>,
548   //
549   // Quad-register Integer Binary (4 cycle)
550   InstrItinData<IIC_VBINi4Q,  [InstrStage<1, [A9_DRegsN],   0, Required>,
551                                // Extra latency cycles since wbck is 6 cycles
552                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
553                                InstrStage<1, [A9_Pipe1]>,
554                                InstrStage<1, [A9_NPipe]>], [4, 2, 2]>,
555   //
556   // Double-register Integer Subtract (4 cycle)
557   InstrItinData<IIC_VSUBiD,   [InstrStage<1, [A9_DRegsN],   0, Required>,
558                                // Extra latency cycles since wbck is 6 cycles
559                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
560                                InstrStage<1, [A9_Pipe1]>,
561                                InstrStage<1, [A9_NPipe]>], [4, 2, 1]>,
562   //
563   // Quad-register Integer Subtract (4 cycle)
564   InstrItinData<IIC_VSUBiQ,   [InstrStage<1, [A9_DRegsN],   0, Required>,
565                                // Extra latency cycles since wbck is 6 cycles
566                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
567                                InstrStage<1, [A9_Pipe1]>,
568                                InstrStage<1, [A9_NPipe]>], [4, 2, 1]>,
569
570   //
571   // Double-register Integer Count
572   InstrItinData<IIC_VCNTiD,   [InstrStage<1, [A9_DRegsN],   0, Required>,
573                                // Extra latency cycles since wbck is 6 cycles
574                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
575                                InstrStage<1, [A9_Pipe1]>,
576                                InstrStage<1, [A9_NPipe]>], [3, 2, 2]>,
577   //
578   // Quad-register Integer Count
579   // Result written in N3, but that is relative to the last cycle of multicycle,
580   // so we use 4 for those cases
581   InstrItinData<IIC_VCNTiQ,   [InstrStage<1, [A9_DRegsN],   0, Required>,
582                                // Extra latency cycles since wbck is 7 cycles
583                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
584                                InstrStage<1, [A9_Pipe1]>,
585                                InstrStage<2, [A9_NPipe]>], [4, 2, 2]>,
586   //
587   // Double-register Absolute Difference and Accumulate
588   InstrItinData<IIC_VABAD,    [InstrStage<1, [A9_DRegsN],   0, Required>,
589                                // Extra latency cycles since wbck is 6 cycles
590                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
591                                InstrStage<1, [A9_Pipe1]>,
592                                InstrStage<1, [A9_NPipe]>], [6, 3, 2, 1]>,
593   //
594   // Quad-register Absolute Difference and Accumulate
595   InstrItinData<IIC_VABAQ,    [InstrStage<1, [A9_DRegsN],   0, Required>,
596                                // Extra latency cycles since wbck is 6 cycles
597                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
598                                InstrStage<1, [A9_Pipe1]>,
599                                InstrStage<2, [A9_NPipe]>], [6, 3, 2, 1]>,
600   //
601   // Double-register Integer Pair Add Long
602   InstrItinData<IIC_VPALiD,   [InstrStage<1, [A9_DRegsN],   0, Required>,
603                                // Extra latency cycles since wbck is 6 cycles
604                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
605                                InstrStage<1, [A9_Pipe1]>,
606                                InstrStage<1, [A9_NPipe]>], [6, 3, 1]>,
607   //
608   // Quad-register Integer Pair Add Long
609   InstrItinData<IIC_VPALiQ,   [InstrStage<1, [A9_DRegsN],   0, Required>,
610                                // Extra latency cycles since wbck is 6 cycles
611                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
612                                InstrStage<1, [A9_Pipe1]>,
613                                InstrStage<2, [A9_NPipe]>], [6, 3, 1]>,
614
615   //
616   // Double-register Integer Multiply (.8, .16)
617   InstrItinData<IIC_VMULi16D, [InstrStage<1, [A9_DRegsN],   0, Required>,
618                                // Extra latency cycles since wbck is 6 cycles
619                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
620                                InstrStage<1, [A9_Pipe1]>,
621                                InstrStage<1, [A9_NPipe]>], [6, 2, 2]>,
622   //
623   // Quad-register Integer Multiply (.8, .16)
624   InstrItinData<IIC_VMULi16Q, [InstrStage<1, [A9_DRegsN],   0, Required>,
625                                // Extra latency cycles since wbck is 7 cycles
626                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
627                                InstrStage<1, [A9_Pipe1]>,
628                                InstrStage<2, [A9_NPipe]>], [7, 2, 2]>,
629
630   //
631   // Double-register Integer Multiply (.32)
632   InstrItinData<IIC_VMULi32D, [InstrStage<1, [A9_DRegsN],   0, Required>,
633                                // Extra latency cycles since wbck is 7 cycles
634                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
635                                InstrStage<1, [A9_Pipe1]>,
636                                InstrStage<2, [A9_NPipe]>], [7, 2, 1]>,
637   //
638   // Quad-register Integer Multiply (.32)
639   InstrItinData<IIC_VMULi32Q, [InstrStage<1, [A9_DRegsN],   0, Required>,
640                                // Extra latency cycles since wbck is 9 cycles
641                                InstrStage<10, [A9_DRegsVFP], 0, Reserved>,
642                                InstrStage<1, [A9_Pipe1]>,
643                                InstrStage<4, [A9_NPipe]>], [9, 2, 1]>,
644   //
645   // Double-register Integer Multiply-Accumulate (.8, .16)
646   InstrItinData<IIC_VMACi16D, [InstrStage<1, [A9_DRegsN],   0, Required>,
647                                // Extra latency cycles since wbck is 6 cycles
648                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
649                                InstrStage<1, [A9_Pipe1]>,
650                                InstrStage<1, [A9_NPipe]>], [6, 3, 2, 2]>,
651   //
652   // Double-register Integer Multiply-Accumulate (.32)
653   InstrItinData<IIC_VMACi32D, [InstrStage<1, [A9_DRegsN],   0, Required>,
654                                // Extra latency cycles since wbck is 7 cycles
655                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
656                                InstrStage<1, [A9_Pipe1]>,
657                                InstrStage<2, [A9_NPipe]>], [7, 3, 2, 1]>,
658   //
659   // Quad-register Integer Multiply-Accumulate (.8, .16)
660   InstrItinData<IIC_VMACi16Q, [InstrStage<1, [A9_DRegsN],   0, Required>,
661                                // Extra latency cycles since wbck is 7 cycles
662                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
663                                InstrStage<1, [A9_Pipe1]>,
664                                InstrStage<2, [A9_NPipe]>], [7, 3, 2, 2]>,
665   //
666   // Quad-register Integer Multiply-Accumulate (.32)
667   InstrItinData<IIC_VMACi32Q, [InstrStage<1, [A9_DRegsN],   0, Required>,
668                                // Extra latency cycles since wbck is 9 cycles
669                                InstrStage<10, [A9_DRegsVFP], 0, Reserved>,
670                                InstrStage<1, [A9_Pipe1]>,
671                                InstrStage<4, [A9_NPipe]>], [9, 3, 2, 1]>,
672   //
673   // Move Immediate
674   InstrItinData<IIC_VMOVImm,  [InstrStage<1, [A9_DRegsN],   0, Required>,
675                                // Extra latency cycles since wbck is 6 cycles
676                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
677                                InstrStage<1, [A9_Pipe1]>,
678                                InstrStage<1, [A9_NPipe]>], [3]>,
679   //
680   // Double-register Permute Move
681   InstrItinData<IIC_VMOVD,    [InstrStage<1, [A9_DRegsN],   0, Required>,
682   // FIXME: all latencies are arbitrary, no information is available
683                                InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
684                                InstrStage<1, [A9_Pipe1]>,
685                                InstrStage<1, [A9_LSPipe]>], [2, 1]>,
686   //
687   // Quad-register Permute Move
688   // Result written in N2, but that is relative to the last cycle of multicycle,
689   // so we use 3 for those cases
690   InstrItinData<IIC_VMOVQ,    [InstrStage<1, [A9_DRegsN],   0, Required>,
691   // FIXME: all latencies are arbitrary, no information is available
692                                InstrStage<4, [A9_DRegsVFP], 0, Reserved>,
693                                InstrStage<1, [A9_Pipe1]>,
694                                InstrStage<2, [A9_NPipe]>], [3, 1]>,
695   //
696   // Integer to Single-precision Move
697   InstrItinData<IIC_VMOVIS ,  [InstrStage<1, [A9_DRegsN],   0, Required>,
698   // FIXME: all latencies are arbitrary, no information is available
699                                InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
700                                InstrStage<1, [A9_Pipe1]>,
701                                InstrStage<1, [A9_NPipe]>], [2, 1]>,
702   //
703   // Integer to Double-precision Move
704   InstrItinData<IIC_VMOVID ,  [InstrStage<1, [A9_DRegsN],   0, Required>,
705   // FIXME: all latencies are arbitrary, no information is available
706                                InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
707                                InstrStage<1, [A9_Pipe1]>,
708                                InstrStage<1, [A9_NPipe]>], [2, 1, 1]>,
709   //
710   // Single-precision to Integer Move
711   InstrItinData<IIC_VMOVSI ,  [InstrStage<1, [A9_DRegsN],   0, Required>,
712   // FIXME: all latencies are arbitrary, no information is available
713                                InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
714                                InstrStage<1, [A9_Pipe1]>,
715                                InstrStage<1, [A9_NPipe]>], [2, 1]>,
716   //
717   // Double-precision to Integer Move
718   InstrItinData<IIC_VMOVDI ,  [InstrStage<1, [A9_DRegsN],   0, Required>,
719   // FIXME: all latencies are arbitrary, no information is available
720                                InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
721                                InstrStage<1, [A9_Pipe1]>,
722                                InstrStage<1, [A9_NPipe]>], [2, 2, 1]>,
723   //
724   // Integer to Lane Move
725   InstrItinData<IIC_VMOVISL , [InstrStage<1, [A9_DRegsN],   0, Required>,
726   // FIXME: all latencies are arbitrary, no information is available
727                                InstrStage<4, [A9_DRegsVFP], 0, Reserved>,
728                                InstrStage<1, [A9_Pipe1]>,
729                                InstrStage<2, [A9_NPipe]>], [3, 1, 1]>,
730
731   //
732   // Double-register FP Unary
733   InstrItinData<IIC_VUNAD,    [InstrStage<1, [A9_DRegsN],   0, Required>,
734                                // Extra latency cycles since wbck is 6 cycles
735                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
736                                InstrStage<1, [A9_Pipe1]>,
737                                InstrStage<1, [A9_NPipe]>], [5, 2]>,
738   //
739   // Quad-register FP Unary
740   // Result written in N5, but that is relative to the last cycle of multicycle,
741   // so we use 6 for those cases
742   InstrItinData<IIC_VUNAQ,    [InstrStage<1, [A9_DRegsN],   0, Required>,
743                                // Extra latency cycles since wbck is 7 cycles
744                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
745                                InstrStage<1, [A9_Pipe1]>,
746                                InstrStage<2, [A9_NPipe]>], [6, 2]>,
747   //
748   // Double-register FP Binary
749   // FIXME: We're using this itin for many instructions and [2, 2] here is too
750   // optimistic.
751   InstrItinData<IIC_VBIND,    [InstrStage<1, [A9_DRegsN],   0, Required>,
752                                // Extra latency cycles since wbck is 7 cycles
753                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
754                                InstrStage<1, [A9_Pipe1]>,
755                                InstrStage<1, [A9_NPipe]>], [5, 2, 2]>,
756   //
757   // Quad-register FP Binary
758   // Result written in N5, but that is relative to the last cycle of multicycle,
759   // so we use 6 for those cases
760   // FIXME: We're using this itin for many instructions and [2, 2] here is too
761   // optimistic.
762   InstrItinData<IIC_VBINQ,    [InstrStage<1, [A9_DRegsN],   0, Required>,
763                                // Extra latency cycles since wbck is 8 cycles
764                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
765                                InstrStage<1, [A9_Pipe1]>,
766                                InstrStage<2, [A9_NPipe]>], [6, 2, 2]>,
767   //
768   // Double-register FP Multiple-Accumulate
769   InstrItinData<IIC_VMACD,    [InstrStage<1, [A9_DRegsN],   0, Required>,
770                                // Extra latency cycles since wbck is 7 cycles
771                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
772                                InstrStage<1, [A9_Pipe1]>,
773                                InstrStage<2, [A9_NPipe]>], [6, 3, 2, 1]>,
774   //
775   // Quad-register FP Multiple-Accumulate
776   // Result written in N9, but that is relative to the last cycle of multicycle,
777   // so we use 10 for those cases
778   InstrItinData<IIC_VMACQ,    [InstrStage<1, [A9_DRegsN],   0, Required>,
779                                // Extra latency cycles since wbck is 9 cycles
780                                InstrStage<10, [A9_DRegsVFP], 0, Reserved>,
781                                InstrStage<1, [A9_Pipe1]>,
782                                InstrStage<4, [A9_NPipe]>], [8, 4, 2, 1]>,
783   //
784   // Double-register Reciprical Step
785   InstrItinData<IIC_VRECSD,   [InstrStage<1, [A9_DRegsN],   0, Required>,
786                                // Extra latency cycles since wbck is 7 cycles
787                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
788                                InstrStage<1, [A9_Pipe1]>,
789                                InstrStage<2, [A9_NPipe]>], [6, 2, 2]>,
790   //
791   // Quad-register Reciprical Step
792   InstrItinData<IIC_VRECSQ,   [InstrStage<1, [A9_DRegsN],   0, Required>,
793                                // Extra latency cycles since wbck is 9 cycles
794                                InstrStage<10, [A9_DRegsVFP], 0, Reserved>,
795                                InstrStage<1, [A9_Pipe1]>,
796                                InstrStage<4, [A9_NPipe]>], [8, 2, 2]>,
797   //
798   // Double-register Permute
799   InstrItinData<IIC_VPERMD,   [InstrStage<1, [A9_DRegsN],   0, Required>,
800                                // Extra latency cycles since wbck is 6 cycles
801                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
802                                InstrStage<1, [A9_Pipe1]>,
803                                InstrStage<1, [A9_NPipe]>], [2, 2, 1, 1]>,
804   //
805   // Quad-register Permute
806   // Result written in N2, but that is relative to the last cycle of multicycle,
807   // so we use 3 for those cases
808   InstrItinData<IIC_VPERMQ,   [InstrStage<1, [A9_DRegsN],   0, Required>,
809                                // Extra latency cycles since wbck is 7 cycles
810                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
811                                InstrStage<1, [A9_Pipe1]>,
812                                InstrStage<2, [A9_NPipe]>], [3, 3, 1, 1]>,
813   //
814   // Quad-register Permute (3 cycle issue)
815   // Result written in N2, but that is relative to the last cycle of multicycle,
816   // so we use 4 for those cases
817   InstrItinData<IIC_VPERMQ3,  [InstrStage<1, [A9_DRegsN],   0, Required>,
818                                // Extra latency cycles since wbck is 8 cycles
819                                InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
820                                InstrStage<1, [A9_Pipe1]>,
821                                InstrStage<3, [A9_LSPipe]>], [4, 4, 1, 1]>,
822
823   //
824   // Double-register VEXT
825   InstrItinData<IIC_VEXTD,    [InstrStage<1, [A9_DRegsN],   0, Required>,
826                                // Extra latency cycles since wbck is 7 cycles
827                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
828                                InstrStage<1, [A9_Pipe1]>,
829                                InstrStage<1, [A9_NPipe]>], [2, 1, 1]>,
830   //
831   // Quad-register VEXT
832   InstrItinData<IIC_VEXTQ,    [InstrStage<1, [A9_DRegsN],   0, Required>,
833                                // Extra latency cycles since wbck is 9 cycles
834                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
835                                InstrStage<1, [A9_Pipe1]>,
836                                InstrStage<2, [A9_NPipe]>], [3, 1, 1]>,
837   //
838   // VTB
839   InstrItinData<IIC_VTB1,     [InstrStage<1, [A9_DRegsN],   0, Required>,
840                                // Extra latency cycles since wbck is 7 cycles
841                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
842                                InstrStage<1, [A9_Pipe1]>,
843                                InstrStage<2, [A9_NPipe]>], [3, 2, 1]>,
844   InstrItinData<IIC_VTB2,     [InstrStage<2, [A9_DRegsN],   0, Required>,
845                                // Extra latency cycles since wbck is 7 cycles
846                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
847                                InstrStage<1, [A9_Pipe1]>,
848                                InstrStage<2, [A9_NPipe]>], [3, 2, 2, 1]>,
849   InstrItinData<IIC_VTB3,     [InstrStage<2, [A9_DRegsN],   0, Required>,
850                                // Extra latency cycles since wbck is 8 cycles
851                                InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
852                                InstrStage<1, [A9_Pipe1]>,
853                                InstrStage<3, [A9_NPipe]>], [4, 2, 2, 3, 1]>,
854   InstrItinData<IIC_VTB4,     [InstrStage<1, [A9_DRegsN],   0, Required>,
855                                // Extra latency cycles since wbck is 8 cycles
856                                InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
857                                InstrStage<1, [A9_Pipe1]>,
858                                InstrStage<3, [A9_NPipe]>], [4, 2, 2, 3, 3, 1]>,
859   //
860   // VTBX
861   InstrItinData<IIC_VTBX1,    [InstrStage<1, [A9_DRegsN],   0, Required>,
862                                // Extra latency cycles since wbck is 7 cycles
863                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
864                                InstrStage<1, [A9_Pipe1]>,
865                                InstrStage<2, [A9_NPipe]>], [3, 1, 2, 1]>,
866   InstrItinData<IIC_VTBX2,    [InstrStage<1, [A9_DRegsN],   0, Required>,
867                                // Extra latency cycles since wbck is 7 cycles
868                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
869                                InstrStage<1, [A9_Pipe1]>,
870                                InstrStage<2, [A9_NPipe]>], [3, 1, 2, 2, 1]>,
871   InstrItinData<IIC_VTBX3,    [InstrStage<1, [A9_DRegsN],   0, Required>,
872                                // Extra latency cycles since wbck is 8 cycles
873                                InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
874                                InstrStage<1, [A9_Pipe1]>,
875                                InstrStage<3, [A9_NPipe]>], [4, 1, 2, 2, 3, 1]>,
876   InstrItinData<IIC_VTBX4,    [InstrStage<1, [A9_DRegsN],   0, Required>,
877                                // Extra latency cycles since wbck is 8 cycles
878                                InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
879                                InstrStage<1, [A9_Pipe1]>,
880                               InstrStage<2, [A9_NPipe]>], [4, 1, 2, 2, 3, 3, 1]>
881 ]>;