ARM disassembler did not react to recent changes to the NEON instruction table.
[oota-llvm.git] / lib / Target / ARM / ARMScheduleA9.td
1 //=- ARMScheduleA9.td - ARM Cortex-A9 Scheduling Definitions -*- tablegen -*-=//
2 // 
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 // 
8 //===----------------------------------------------------------------------===//
9 //
10 // This file defines the itinerary class data for the ARM Cortex A9 processors.
11 //
12 //===----------------------------------------------------------------------===//
13
14 //
15 // Ad-hoc scheduling information derived from pretty vague "Cortex-A9 Technical
16 // Reference Manual".
17 //
18 // Functional units
19 def A9_Issue   : FuncUnit; // issue
20 def A9_Pipe0   : FuncUnit; // pipeline 0
21 def A9_Pipe1   : FuncUnit; // pipeline 1
22 def A9_LSPipe  : FuncUnit; // LS pipe
23 def A9_NPipe   : FuncUnit; // NEON ALU/MUL pipe
24 def A9_DRegsVFP: FuncUnit; // FP register set, VFP side
25 def A9_DRegsN  : FuncUnit; // FP register set, NEON side
26
27 // Dual issue pipeline represented by A9_Pipe0 | A9_Pipe1
28 //
29 def CortexA9Itineraries : ProcessorItineraries<
30   [A9_NPipe, A9_DRegsN, A9_DRegsVFP, A9_LSPipe, A9_Pipe0, A9_Pipe1, A9_Issue], [
31   // VFP and NEON shares the same register file. This means that every VFP
32   // instruction should wait for full completion of the consecutive NEON
33   // instruction and vice-versa. We model this behavior with two artificial FUs:
34   // DRegsVFP and DRegsVFP.
35   //
36   // Every VFP instruction:
37   //  - Acquires DRegsVFP resource for 1 cycle
38   //  - Reserves DRegsN resource for the whole duration (including time to
39   //    register file writeback!).
40   // Every NEON instruction does the same but with FUs swapped.
41   //
42   // Since the reserved FU cannot be acquired this models precisly "cross-domain"
43   // stalls.
44
45   // VFP
46   // Issue through integer pipeline, and execute in NEON unit.
47
48   // FP Special Register to Integer Register File Move
49   InstrItinData<IIC_fpSTAT , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
50                               InstrStage<2, [A9_DRegsN],   0, Reserved>,
51                               InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
52                               InstrStage<1, [A9_NPipe]>]>,
53   //
54   // Single-precision FP Unary
55   InstrItinData<IIC_fpUNA32 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
56                                // Extra latency cycles since wbck is 2 cycles
57                                InstrStage<3, [A9_DRegsN],   0, Reserved>,
58                                InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
59                                InstrStage<1, [A9_NPipe]>], [1, 1]>,
60   //
61   // Double-precision FP Unary
62   InstrItinData<IIC_fpUNA64 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
63                                // Extra latency cycles since wbck is 2 cycles
64                                InstrStage<3, [A9_DRegsN],   0, Reserved>,
65                                InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
66                                InstrStage<1, [A9_NPipe]>], [1, 1]>,
67
68   //
69   // Single-precision FP Compare
70   InstrItinData<IIC_fpCMP32 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
71                                // Extra latency cycles since wbck is 4 cycles
72                                InstrStage<5, [A9_DRegsN],   0, Reserved>,
73                                InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
74                                InstrStage<1, [A9_NPipe]>], [1, 1]>,
75   //
76   // Double-precision FP Compare
77   InstrItinData<IIC_fpCMP64 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
78                                // Extra latency cycles since wbck is 4 cycles
79                                InstrStage<5, [A9_DRegsN],   0, Reserved>,
80                                InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
81                                InstrStage<1, [A9_NPipe]>], [1, 1]>,
82   //
83   // Single to Double FP Convert
84   InstrItinData<IIC_fpCVTSD , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
85                                InstrStage<5, [A9_DRegsN],   0, Reserved>,
86                                InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
87                                InstrStage<1, [A9_NPipe]>], [4, 1]>,
88   //
89   // Double to Single FP Convert
90   InstrItinData<IIC_fpCVTDS , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
91                                InstrStage<5, [A9_DRegsN],   0, Reserved>,
92                                InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
93                                InstrStage<1, [A9_NPipe]>], [4, 1]>,
94
95   //
96   // Single to Half FP Convert
97   InstrItinData<IIC_fpCVTSH , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
98                                InstrStage<5, [A9_DRegsN],   0, Reserved>,
99                                InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
100                                InstrStage<1, [A9_NPipe]>], [4, 1]>,
101   //
102   // Half to Single FP Convert
103   InstrItinData<IIC_fpCVTHS , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
104                                InstrStage<3, [A9_DRegsN],   0, Reserved>,
105                                InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
106                                InstrStage<1, [A9_NPipe]>], [2, 1]>,
107
108   //
109   // Single-Precision FP to Integer Convert
110   InstrItinData<IIC_fpCVTSI , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
111                                InstrStage<5, [A9_DRegsN],   0, Reserved>,
112                                InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
113                                InstrStage<1, [A9_NPipe]>], [4, 1]>,
114   //
115   // Double-Precision FP to Integer Convert
116   InstrItinData<IIC_fpCVTDI , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
117                                InstrStage<5, [A9_DRegsN],   0, Reserved>,
118                                InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
119                                InstrStage<1, [A9_NPipe]>], [4, 1]>,
120   //
121   // Integer to Single-Precision FP Convert
122   InstrItinData<IIC_fpCVTIS , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
123                                InstrStage<5, [A9_DRegsN],   0, Reserved>,
124                                InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
125                                InstrStage<1, [A9_NPipe]>], [4, 1]>,
126   //
127   // Integer to Double-Precision FP Convert
128   InstrItinData<IIC_fpCVTID , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
129                                InstrStage<5, [A9_DRegsN],   0, Reserved>,
130                                InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
131                                InstrStage<1, [A9_NPipe]>], [4, 1]>,
132   //
133   // Single-precision FP ALU
134   InstrItinData<IIC_fpALU32 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
135                                InstrStage<5, [A9_DRegsN],   0, Reserved>,
136                                InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
137                                InstrStage<1, [A9_NPipe]>], [4, 1, 1]>,
138   //
139   // Double-precision FP ALU
140   InstrItinData<IIC_fpALU64 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
141                                InstrStage<5, [A9_DRegsN],   0, Reserved>,
142                                InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
143                                InstrStage<1, [A9_NPipe]>], [4, 1, 1]>,
144   //
145   // Single-precision FP Multiply
146   InstrItinData<IIC_fpMUL32 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
147                                InstrStage<6, [A9_DRegsN],   0, Reserved>,
148                                InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
149                                InstrStage<1, [A9_NPipe]>], [5, 1, 1]>,
150   //
151   // Double-precision FP Multiply
152   InstrItinData<IIC_fpMUL64 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
153                                InstrStage<7, [A9_DRegsN],   0, Reserved>,
154                                InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
155                                InstrStage<2, [A9_NPipe]>], [6, 1, 1]>,
156   //
157   // Single-precision FP MAC
158   InstrItinData<IIC_fpMAC32 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
159                                InstrStage<9, [A9_DRegsN],   0, Reserved>,
160                                InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
161                                InstrStage<1, [A9_NPipe]>], [8, 0, 1, 1]>,
162   //
163   // Double-precision FP MAC
164   InstrItinData<IIC_fpMAC64 , [InstrStage<1,  [A9_DRegsVFP], 0, Required>,
165                                InstrStage<10, [A9_DRegsN],  0, Reserved>,
166                                InstrStage<1,  [A9_Pipe0, A9_Pipe1]>,
167                                InstrStage<2,  [A9_NPipe]>], [9, 0, 1, 1]>,
168   //
169   // Single-precision FP DIV
170   InstrItinData<IIC_fpDIV32 , [InstrStage<1,  [A9_DRegsVFP], 0, Required>,
171                                InstrStage<16, [A9_DRegsN],  0, Reserved>,
172                                InstrStage<1,  [A9_Pipe0, A9_Pipe1]>,
173                                InstrStage<10, [A9_NPipe]>], [15, 1, 1]>,
174   //
175   // Double-precision FP DIV
176   InstrItinData<IIC_fpDIV64 , [InstrStage<1,  [A9_DRegsVFP], 0, Required>,
177                                InstrStage<26, [A9_DRegsN],  0, Reserved>,
178                                InstrStage<1,  [A9_Pipe0, A9_Pipe1]>,
179                                InstrStage<20, [A9_NPipe]>], [25, 1, 1]>,
180   //
181   // Single-precision FP SQRT
182   InstrItinData<IIC_fpSQRT32, [InstrStage<1,  [A9_DRegsVFP], 0, Required>,
183                                InstrStage<18, [A9_DRegsN],   0, Reserved>,
184                                InstrStage<1,   [A9_Pipe0, A9_Pipe1]>,
185                                InstrStage<13,  [A9_NPipe]>], [17, 1]>,
186   //
187   // Double-precision FP SQRT
188   InstrItinData<IIC_fpSQRT64, [InstrStage<1,  [A9_DRegsVFP], 0, Required>,
189                                InstrStage<33, [A9_DRegsN],   0, Reserved>,
190                                InstrStage<1,  [A9_Pipe0, A9_Pipe1]>,
191                                InstrStage<28, [A9_NPipe]>], [32, 1]>,
192
193   //
194   // Integer to Single-precision Move
195   InstrItinData<IIC_fpMOVIS,  [InstrStage<1, [A9_DRegsVFP], 0, Required>,
196                                // Extra 1 latency cycle since wbck is 2 cycles
197                                InstrStage<3, [A9_DRegsN],   0, Reserved>,
198                                InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
199                                InstrStage<1, [A9_NPipe]>], [1, 1]>,
200   //
201   // Integer to Double-precision Move
202   InstrItinData<IIC_fpMOVID,  [InstrStage<1, [A9_DRegsVFP], 0, Required>,
203                                // Extra 1 latency cycle since wbck is 2 cycles
204                                InstrStage<3, [A9_DRegsN],   0, Reserved>,
205                                InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
206                                InstrStage<1, [A9_NPipe]>], [1, 1, 1]>,
207   //
208   // Single-precision to Integer Move
209   InstrItinData<IIC_fpMOVSI,  [InstrStage<1, [A9_DRegsVFP], 0, Required>,
210                                InstrStage<2, [A9_DRegsN],   0, Reserved>,
211                                InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
212                                InstrStage<1, [A9_NPipe]>], [1, 1]>,
213   //
214   // Double-precision to Integer Move
215   InstrItinData<IIC_fpMOVDI,  [InstrStage<1, [A9_DRegsVFP], 0, Required>,
216                                InstrStage<2, [A9_DRegsN],   0, Reserved>,
217                                InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
218                                InstrStage<1, [A9_NPipe]>], [1, 1, 1]>,
219   //
220   // Single-precision FP Load
221   // use A9_Issue to enforce the 1 load/store per cycle limit
222   InstrItinData<IIC_fpLoad32, [InstrStage<1, [A9_DRegsVFP], 0, Required>,
223                                InstrStage<2, [A9_DRegsN],   0, Reserved>,
224                                InstrStage<1, [A9_Issue], 0>, 
225                                InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
226                                InstrStage<1, [A9_LSPipe], 0>,
227                                InstrStage<1, [A9_NPipe]>]>,
228   //
229   // Double-precision FP Load
230   // use A9_Issue to enforce the 1 load/store per cycle limit
231   InstrItinData<IIC_fpLoad64, [InstrStage<1, [A9_DRegsVFP], 0, Required>,
232                                InstrStage<2, [A9_DRegsN],   0, Reserved>,
233                                InstrStage<1, [A9_Issue], 0>, 
234                                InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
235                                InstrStage<1, [A9_LSPipe], 0>,
236                                InstrStage<1, [A9_NPipe]>]>,
237   //
238   // FP Load Multiple
239   // use A9_Issue to enforce the 1 load/store per cycle limit
240   InstrItinData<IIC_fpLoadm,  [InstrStage<1, [A9_DRegsVFP], 0, Required>,
241                                InstrStage<2, [A9_DRegsN],   0, Reserved>,
242                                InstrStage<1, [A9_Issue], 0>, 
243                                InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
244                                InstrStage<1, [A9_LSPipe], 0>,
245                                InstrStage<1, [A9_NPipe]>]>,
246   //
247   // Single-precision FP Store
248   // use A9_Issue to enforce the 1 load/store per cycle limit
249   InstrItinData<IIC_fpStore32,[InstrStage<1, [A9_DRegsVFP], 0, Required>,
250                                InstrStage<2, [A9_DRegsN],   0, Reserved>,
251                                InstrStage<1, [A9_Issue], 0>, 
252                                InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
253                                InstrStage<1, [A9_LSPipe], 0>,
254                                InstrStage<1, [A9_NPipe]>]>,
255   //
256   // Double-precision FP Store
257   // use A9_Issue to enforce the 1 load/store per cycle limit
258   InstrItinData<IIC_fpStore64,[InstrStage<1, [A9_DRegsVFP], 0, Required>,
259                                InstrStage<2, [A9_DRegsN],   0, Reserved>,
260                                InstrStage<1, [A9_Issue], 0>, 
261                                InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
262                                InstrStage<1, [A9_LSPipe], 0>,
263                                InstrStage<1, [A9_NPipe]>]>,
264   //
265   // FP Store Multiple
266   // use A9_Issue to enforce the 1 load/store per cycle limit
267   InstrItinData<IIC_fpStorem, [InstrStage<1, [A9_DRegsVFP], 0, Required>,
268                                InstrStage<2, [A9_DRegsN],   0, Reserved>,
269                                InstrStage<1, [A9_Issue], 0>, 
270                                InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
271                                InstrStage<1, [A9_LSPipe], 0>,
272                                InstrStage<1, [A9_NPipe]>]>,
273   // NEON
274   // Issue through integer pipeline, and execute in NEON unit.
275   // FIXME: Neon pipeline and LdSt unit are multiplexed. 
276   //        Add some syntactic sugar to model this!
277   // VLD1
278   // FIXME: We don't model this instruction properly
279   InstrItinData<IIC_VLD1,     [InstrStage<1, [A9_DRegsN],   0, Required>,
280                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
281                                InstrStage<1, [A9_Issue], 0>, 
282                                InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
283                                InstrStage<1, [A9_LSPipe], 0>,
284                                InstrStage<1, [A9_NPipe]>]>,
285   //
286   // VLD2
287   // FIXME: We don't model this instruction properly
288   InstrItinData<IIC_VLD2,     [InstrStage<1, [A9_DRegsN],   0, Required>,
289                                // Extra latency cycles since wbck is 6 cycles
290                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
291                                InstrStage<1, [A9_Issue], 0>, 
292                                InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
293                                InstrStage<1, [A9_LSPipe], 0>,
294                                InstrStage<1, [A9_NPipe]>], [2, 2, 1]>,
295   //
296   // VLD3
297   // FIXME: We don't model this instruction properly
298   InstrItinData<IIC_VLD3,     [InstrStage<1, [A9_DRegsN],   0, Required>,
299                                // Extra latency cycles since wbck is 6 cycles
300                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
301                                InstrStage<1, [A9_Issue], 0>, 
302                                InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
303                                InstrStage<1, [A9_LSPipe], 0>,
304                                InstrStage<1, [A9_NPipe]>], [2, 2, 2, 1]>,
305   //
306   // VLD4
307   // FIXME: We don't model this instruction properly
308   InstrItinData<IIC_VLD4,     [InstrStage<1, [A9_DRegsN],   0, Required>,
309                                // Extra latency cycles since wbck is 6 cycles
310                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
311                                InstrStage<1, [A9_Issue], 0>, 
312                                InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
313                                InstrStage<1, [A9_LSPipe], 0>,
314                                InstrStage<1, [A9_NPipe]>], [2, 2, 2, 2, 1]>,
315   //
316   // VST
317   // FIXME: We don't model this instruction properly
318   InstrItinData<IIC_VST,      [InstrStage<1, [A9_DRegsN],   0, Required>,
319                                // Extra latency cycles since wbck is 6 cycles
320                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
321                                InstrStage<1, [A9_Issue], 0>, 
322                                InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
323                                InstrStage<1, [A9_LSPipe], 0>,
324                                InstrStage<1, [A9_NPipe]>]>,
325   //
326   // Double-register Integer Unary
327   InstrItinData<IIC_VUNAiD,   [InstrStage<1, [A9_DRegsN],   0, Required>,
328                                // Extra latency cycles since wbck is 6 cycles
329                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
330                                InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
331                                InstrStage<1, [A9_NPipe]>], [4, 2]>,
332   //
333   // Quad-register Integer Unary
334   InstrItinData<IIC_VUNAiQ,   [InstrStage<1, [A9_DRegsN],   0, Required>,
335                                // Extra latency cycles since wbck is 6 cycles
336                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
337                                InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
338                                InstrStage<1, [A9_NPipe]>], [4, 2]>,
339   //
340   // Double-register Integer Q-Unary
341   InstrItinData<IIC_VQUNAiD,  [InstrStage<1, [A9_DRegsN],   0, Required>,
342                                // Extra latency cycles since wbck is 6 cycles
343                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
344                                InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
345                                InstrStage<1, [A9_NPipe]>], [4, 1]>,
346   //
347   // Quad-register Integer CountQ-Unary
348   InstrItinData<IIC_VQUNAiQ,  [InstrStage<1, [A9_DRegsN],   0, Required>,
349                                // Extra latency cycles since wbck is 6 cycles
350                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
351                                InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
352                                InstrStage<1, [A9_NPipe]>], [4, 1]>,
353   //
354   // Double-register Integer Binary
355   InstrItinData<IIC_VBINiD,   [InstrStage<1, [A9_DRegsN],   0, Required>,
356                                // Extra latency cycles since wbck is 6 cycles
357                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
358                                InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
359                                InstrStage<1, [A9_NPipe]>], [3, 2, 2]>,
360   //
361   // Quad-register Integer Binary
362   InstrItinData<IIC_VBINiQ,   [InstrStage<1, [A9_DRegsN],   0, Required>,
363                                // Extra latency cycles since wbck is 6 cycles
364                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
365                                InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
366                                InstrStage<1, [A9_NPipe]>], [3, 2, 2]>,
367   //
368   // Double-register Integer Subtract
369   InstrItinData<IIC_VSUBiD,   [InstrStage<1, [A9_DRegsN],   0, Required>,
370                                // Extra latency cycles since wbck is 6 cycles
371                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
372                                InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
373                                InstrStage<1, [A9_NPipe]>], [3, 2, 1]>,
374   //
375   // Quad-register Integer Subtract
376   InstrItinData<IIC_VSUBiQ,   [InstrStage<1, [A9_DRegsN],   0, Required>,
377                                // Extra latency cycles since wbck is 6 cycles
378                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
379                                InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
380                                InstrStage<1, [A9_NPipe]>], [3, 2, 1]>,
381   //
382   // Double-register Integer Shift
383   InstrItinData<IIC_VSHLiD,   [InstrStage<1, [A9_DRegsN],   0, Required>,
384                                // Extra latency cycles since wbck is 6 cycles
385                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
386                                InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
387                                InstrStage<1, [A9_NPipe]>], [3, 1, 1]>,
388   //
389   // Quad-register Integer Shift
390   InstrItinData<IIC_VSHLiQ,   [InstrStage<1, [A9_DRegsN],   0, Required>,
391                                // Extra latency cycles since wbck is 6 cycles
392                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
393                                InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
394                                InstrStage<1, [A9_NPipe]>], [3, 1, 1]>,
395   //
396   // Double-register Integer Shift (4 cycle)
397   InstrItinData<IIC_VSHLi4D,  [InstrStage<1, [A9_DRegsN],   0, Required>,
398                                // Extra latency cycles since wbck is 6 cycles
399                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
400                                InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
401                                InstrStage<1, [A9_NPipe]>], [4, 1, 1]>,
402   //
403   // Quad-register Integer Shift (4 cycle)
404   InstrItinData<IIC_VSHLi4Q,  [InstrStage<1, [A9_DRegsN],   0, Required>,
405                                // Extra latency cycles since wbck is 6 cycles
406                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
407                                InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
408                                InstrStage<1, [A9_NPipe]>], [4, 1, 1]>,
409   //
410   // Double-register Integer Binary (4 cycle)
411   InstrItinData<IIC_VBINi4D,  [InstrStage<1, [A9_DRegsN],   0, Required>,
412                                // Extra latency cycles since wbck is 6 cycles
413                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
414                                InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
415                                InstrStage<1, [A9_NPipe]>], [4, 2, 2]>,
416   //
417   // Quad-register Integer Binary (4 cycle)
418   InstrItinData<IIC_VBINi4Q,  [InstrStage<1, [A9_DRegsN],   0, Required>,
419                                // Extra latency cycles since wbck is 6 cycles
420                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
421                                InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
422                                InstrStage<1, [A9_NPipe]>], [4, 2, 2]>,
423   //
424   // Double-register Integer Subtract (4 cycle)
425   InstrItinData<IIC_VSUBiD,   [InstrStage<1, [A9_DRegsN],   0, Required>,
426                                // Extra latency cycles since wbck is 6 cycles
427                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
428                                InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
429                                InstrStage<1, [A9_NPipe]>], [4, 2, 1]>,
430   //
431   // Quad-register Integer Subtract (4 cycle)
432   InstrItinData<IIC_VSUBiQ,   [InstrStage<1, [A9_DRegsN],   0, Required>,
433                                // Extra latency cycles since wbck is 6 cycles
434                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
435                                InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
436                                InstrStage<1, [A9_NPipe]>], [4, 2, 1]>,
437
438   //
439   // Double-register Integer Count
440   InstrItinData<IIC_VCNTiD,   [InstrStage<1, [A9_DRegsN],   0, Required>,
441                                // Extra latency cycles since wbck is 6 cycles
442                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
443                                InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
444                                InstrStage<1, [A9_NPipe]>], [3, 2, 2]>,
445   //
446   // Quad-register Integer Count
447   // Result written in N3, but that is relative to the last cycle of multicycle,
448   // so we use 4 for those cases
449   InstrItinData<IIC_VCNTiQ,   [InstrStage<1, [A9_DRegsN],   0, Required>,
450                                // Extra latency cycles since wbck is 7 cycles
451                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
452                                InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
453                                InstrStage<2, [A9_NPipe]>], [4, 2, 2]>,
454   //
455   // Double-register Absolute Difference and Accumulate
456   InstrItinData<IIC_VABAD,    [InstrStage<1, [A9_DRegsN],   0, Required>,
457                                // Extra latency cycles since wbck is 6 cycles
458                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
459                                InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
460                                InstrStage<1, [A9_NPipe]>], [6, 3, 2, 1]>,
461   //
462   // Quad-register Absolute Difference and Accumulate
463   InstrItinData<IIC_VABAQ,    [InstrStage<1, [A9_DRegsN],   0, Required>,
464                                // Extra latency cycles since wbck is 6 cycles
465                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
466                                InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
467                                InstrStage<2, [A9_NPipe]>], [6, 3, 2, 1]>,
468   //
469   // Double-register Integer Pair Add Long
470   InstrItinData<IIC_VPALiD,   [InstrStage<1, [A9_DRegsN],   0, Required>,
471                                // Extra latency cycles since wbck is 6 cycles
472                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
473                                InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
474                                InstrStage<1, [A9_NPipe]>], [6, 3, 1]>,
475   //
476   // Quad-register Integer Pair Add Long
477   InstrItinData<IIC_VPALiQ,   [InstrStage<1, [A9_DRegsN],   0, Required>,
478                                // Extra latency cycles since wbck is 6 cycles
479                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
480                                InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
481                                InstrStage<2, [A9_NPipe]>], [6, 3, 1]>,
482
483   //
484   // Double-register Integer Multiply (.8, .16)
485   InstrItinData<IIC_VMULi16D, [InstrStage<1, [A9_DRegsN],   0, Required>,
486                                // Extra latency cycles since wbck is 6 cycles
487                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
488                                InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
489                                InstrStage<1, [A9_NPipe]>], [6, 2, 2]>,
490   //
491   // Quad-register Integer Multiply (.8, .16)
492   InstrItinData<IIC_VMULi16Q, [InstrStage<1, [A9_DRegsN],   0, Required>,
493                                // Extra latency cycles since wbck is 7 cycles
494                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
495                                InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
496                                InstrStage<2, [A9_NPipe]>], [7, 2, 2]>,
497
498   //
499   // Double-register Integer Multiply (.32)
500   InstrItinData<IIC_VMULi32D, [InstrStage<1, [A9_DRegsN],   0, Required>,
501                                // Extra latency cycles since wbck is 7 cycles
502                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
503                                InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
504                                InstrStage<2, [A9_NPipe]>], [7, 2, 1]>,
505   //
506   // Quad-register Integer Multiply (.32)
507   InstrItinData<IIC_VMULi32Q, [InstrStage<1, [A9_DRegsN],   0, Required>,
508                                // Extra latency cycles since wbck is 9 cycles
509                                InstrStage<10, [A9_DRegsVFP], 0, Reserved>,
510                                InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
511                                InstrStage<4, [A9_NPipe]>], [9, 2, 1]>,
512   //
513   // Double-register Integer Multiply-Accumulate (.8, .16)
514   InstrItinData<IIC_VMACi16D, [InstrStage<1, [A9_DRegsN],   0, Required>,
515                                // Extra latency cycles since wbck is 6 cycles
516                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
517                                InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
518                                InstrStage<1, [A9_NPipe]>], [6, 3, 2, 2]>,
519   //
520   // Double-register Integer Multiply-Accumulate (.32)
521   InstrItinData<IIC_VMACi32D, [InstrStage<1, [A9_DRegsN],   0, Required>,
522                                // Extra latency cycles since wbck is 7 cycles
523                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
524                                InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
525                                InstrStage<2, [A9_NPipe]>], [7, 3, 2, 1]>,
526   //
527   // Quad-register Integer Multiply-Accumulate (.8, .16)
528   InstrItinData<IIC_VMACi16Q, [InstrStage<1, [A9_DRegsN],   0, Required>,
529                                // Extra latency cycles since wbck is 7 cycles
530                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
531                                InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
532                                InstrStage<2, [A9_NPipe]>], [7, 3, 2, 2]>,
533   //
534   // Quad-register Integer Multiply-Accumulate (.32)
535   InstrItinData<IIC_VMACi32Q, [InstrStage<1, [A9_DRegsN],   0, Required>,
536                                // Extra latency cycles since wbck is 9 cycles
537                                InstrStage<10, [A9_DRegsVFP], 0, Reserved>,
538                                InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
539                                InstrStage<4, [A9_NPipe]>], [9, 3, 2, 1]>,
540   //
541   // Move Immediate
542   InstrItinData<IIC_VMOVImm,  [InstrStage<1, [A9_DRegsN],   0, Required>,
543                                // Extra latency cycles since wbck is 6 cycles
544                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
545                                InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
546                                InstrStage<1, [A9_NPipe]>], [3]>,
547   //
548   // Double-register Permute Move
549   InstrItinData<IIC_VMOVD,    [InstrStage<1, [A9_DRegsN],   0, Required>,
550   // FIXME: all latencies are arbitrary, no information is available
551                                InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
552                                InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
553                                InstrStage<1, [A9_LSPipe]>], [2, 1]>,
554   //
555   // Quad-register Permute Move
556   // Result written in N2, but that is relative to the last cycle of multicycle,
557   // so we use 3 for those cases
558   InstrItinData<IIC_VMOVQ,    [InstrStage<1, [A9_DRegsN],   0, Required>,
559   // FIXME: all latencies are arbitrary, no information is available
560                                InstrStage<4, [A9_DRegsVFP], 0, Reserved>,
561                                InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
562                                InstrStage<2, [A9_NPipe]>], [3, 1]>,
563   //
564   // Integer to Single-precision Move
565   InstrItinData<IIC_VMOVIS ,  [InstrStage<1, [A9_DRegsN],   0, Required>,
566   // FIXME: all latencies are arbitrary, no information is available
567                                InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
568                                InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
569                                InstrStage<1, [A9_NPipe]>], [2, 1]>,
570   //
571   // Integer to Double-precision Move
572   InstrItinData<IIC_VMOVID ,  [InstrStage<1, [A9_DRegsN],   0, Required>,
573   // FIXME: all latencies are arbitrary, no information is available
574                                InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
575                                InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
576                                InstrStage<1, [A9_NPipe]>], [2, 1, 1]>,
577   //
578   // Single-precision to Integer Move
579   InstrItinData<IIC_VMOVSI ,  [InstrStage<1, [A9_DRegsN],   0, Required>,
580   // FIXME: all latencies are arbitrary, no information is available
581                                InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
582                                InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
583                                InstrStage<1, [A9_NPipe]>], [2, 1]>,
584   //
585   // Double-precision to Integer Move
586   InstrItinData<IIC_VMOVDI ,  [InstrStage<1, [A9_DRegsN],   0, Required>,
587   // FIXME: all latencies are arbitrary, no information is available
588                                InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
589                                InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
590                                InstrStage<1, [A9_NPipe]>], [2, 2, 1]>,
591   //
592   // Integer to Lane Move
593   InstrItinData<IIC_VMOVISL , [InstrStage<1, [A9_DRegsN],   0, Required>,
594   // FIXME: all latencies are arbitrary, no information is available
595                                InstrStage<4, [A9_DRegsVFP], 0, Reserved>,
596                                InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
597                                InstrStage<2, [A9_NPipe]>], [3, 1, 1]>,
598
599   //
600   // Double-register FP Unary
601   InstrItinData<IIC_VUNAD,    [InstrStage<1, [A9_DRegsN],   0, Required>,
602                                // Extra latency cycles since wbck is 6 cycles
603                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
604                                InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
605                                InstrStage<1, [A9_NPipe]>], [5, 2]>,
606   //
607   // Quad-register FP Unary
608   // Result written in N5, but that is relative to the last cycle of multicycle,
609   // so we use 6 for those cases
610   InstrItinData<IIC_VUNAQ,    [InstrStage<1, [A9_DRegsN],   0, Required>,
611                                // Extra latency cycles since wbck is 7 cycles
612                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
613                                InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
614                                InstrStage<2, [A9_NPipe]>], [6, 2]>,
615   //
616   // Double-register FP Binary
617   // FIXME: We're using this itin for many instructions and [2, 2] here is too
618   // optimistic.
619   InstrItinData<IIC_VBIND,    [InstrStage<1, [A9_DRegsN],   0, Required>,
620                                // Extra latency cycles since wbck is 7 cycles
621                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
622                                InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
623                                InstrStage<1, [A9_NPipe]>], [5, 2, 2]>,
624   //
625   // Quad-register FP Binary
626   // Result written in N5, but that is relative to the last cycle of multicycle,
627   // so we use 6 for those cases
628   // FIXME: We're using this itin for many instructions and [2, 2] here is too
629   // optimistic.
630   InstrItinData<IIC_VBINQ,    [InstrStage<1, [A9_DRegsN],   0, Required>,
631                                // Extra latency cycles since wbck is 8 cycles
632                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
633                                InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
634                                InstrStage<2, [A9_NPipe]>], [6, 2, 2]>,
635   //
636   // Double-register FP Multiple-Accumulate
637   InstrItinData<IIC_VMACD,    [InstrStage<1, [A9_DRegsN],   0, Required>,
638                                // Extra latency cycles since wbck is 7 cycles
639                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
640                                InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
641                                InstrStage<2, [A9_NPipe]>], [6, 3, 2, 1]>,
642   //
643   // Quad-register FP Multiple-Accumulate
644   // Result written in N9, but that is relative to the last cycle of multicycle,
645   // so we use 10 for those cases
646   InstrItinData<IIC_VMACQ,    [InstrStage<1, [A9_DRegsN],   0, Required>,
647                                // Extra latency cycles since wbck is 9 cycles
648                                InstrStage<10, [A9_DRegsVFP], 0, Reserved>,
649                                InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
650                                InstrStage<4, [A9_NPipe]>], [8, 4, 2, 1]>,
651   //
652   // Double-register Reciprical Step
653   InstrItinData<IIC_VRECSD,   [InstrStage<1, [A9_DRegsN],   0, Required>,
654                                // Extra latency cycles since wbck is 7 cycles
655                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
656                                InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
657                                InstrStage<2, [A9_NPipe]>], [6, 2, 2]>,
658   //
659   // Quad-register Reciprical Step
660   InstrItinData<IIC_VRECSQ,   [InstrStage<1, [A9_DRegsN],   0, Required>,
661                                // Extra latency cycles since wbck is 9 cycles
662                                InstrStage<10, [A9_DRegsVFP], 0, Reserved>,
663                                InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
664                                InstrStage<4, [A9_NPipe]>], [8, 2, 2]>,
665   //
666   // Double-register Permute
667   InstrItinData<IIC_VPERMD,   [InstrStage<1, [A9_DRegsN],   0, Required>,
668                                // Extra latency cycles since wbck is 6 cycles
669                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
670                                InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
671                                InstrStage<1, [A9_NPipe]>], [2, 2, 1, 1]>,
672   //
673   // Quad-register Permute
674   // Result written in N2, but that is relative to the last cycle of multicycle,
675   // so we use 3 for those cases
676   InstrItinData<IIC_VPERMQ,   [InstrStage<1, [A9_DRegsN],   0, Required>,
677                                // Extra latency cycles since wbck is 7 cycles
678                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
679                                InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
680                                InstrStage<2, [A9_NPipe]>], [3, 3, 1, 1]>,
681   //
682   // Quad-register Permute (3 cycle issue)
683   // Result written in N2, but that is relative to the last cycle of multicycle,
684   // so we use 4 for those cases
685   InstrItinData<IIC_VPERMQ3,  [InstrStage<1, [A9_DRegsN],   0, Required>,
686                                // Extra latency cycles since wbck is 8 cycles
687                                InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
688                                InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
689                                InstrStage<3, [A9_LSPipe]>], [4, 4, 1, 1]>,
690
691   //
692   // Double-register VEXT
693   InstrItinData<IIC_VEXTD,    [InstrStage<1, [A9_DRegsN],   0, Required>,
694                                // Extra latency cycles since wbck is 7 cycles
695                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
696                                InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
697                                InstrStage<1, [A9_NPipe]>], [2, 1, 1]>,
698   //
699   // Quad-register VEXT
700   InstrItinData<IIC_VEXTQ,    [InstrStage<1, [A9_DRegsN],   0, Required>,
701                                // Extra latency cycles since wbck is 9 cycles
702                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
703                                InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
704                                InstrStage<2, [A9_NPipe]>], [3, 1, 1]>,
705   //
706   // VTB
707   InstrItinData<IIC_VTB1,     [InstrStage<1, [A9_DRegsN],   0, Required>,
708                                // Extra latency cycles since wbck is 7 cycles
709                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
710                                InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
711                                InstrStage<2, [A9_NPipe]>], [3, 2, 1]>,
712   InstrItinData<IIC_VTB2,     [InstrStage<2, [A9_DRegsN],   0, Required>,
713                                // Extra latency cycles since wbck is 7 cycles
714                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
715                                InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
716                                InstrStage<2, [A9_NPipe]>], [3, 2, 2, 1]>,
717   InstrItinData<IIC_VTB3,     [InstrStage<2, [A9_DRegsN],   0, Required>,
718                                // Extra latency cycles since wbck is 8 cycles
719                                InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
720                                InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
721                                InstrStage<3, [A9_NPipe]>], [4, 2, 2, 3, 1]>,
722   InstrItinData<IIC_VTB4,     [InstrStage<1, [A9_DRegsN],   0, Required>,
723                                // Extra latency cycles since wbck is 8 cycles
724                                InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
725                                InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
726                                InstrStage<3, [A9_NPipe]>], [4, 2, 2, 3, 3, 1]>,
727   //
728   // VTBX
729   InstrItinData<IIC_VTBX1,    [InstrStage<1, [A9_DRegsN],   0, Required>,
730                                // Extra latency cycles since wbck is 7 cycles
731                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
732                                InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
733                                InstrStage<2, [A9_NPipe]>], [3, 1, 2, 1]>,
734   InstrItinData<IIC_VTBX2,    [InstrStage<1, [A9_DRegsN],   0, Required>,
735                                // Extra latency cycles since wbck is 7 cycles
736                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
737                                InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
738                                InstrStage<2, [A9_NPipe]>], [3, 1, 2, 2, 1]>,
739   InstrItinData<IIC_VTBX3,    [InstrStage<1, [A9_DRegsN],   0, Required>,
740                                // Extra latency cycles since wbck is 8 cycles
741                                InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
742                                InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
743                                InstrStage<3, [A9_NPipe]>], [4, 1, 2, 2, 3, 1]>,
744   InstrItinData<IIC_VTBX4,    [InstrStage<1, [A9_DRegsN],   0, Required>,
745                                // Extra latency cycles since wbck is 8 cycles
746                                InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
747                                InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
748                                InstrStage<2, [A9_NPipe]>], [4, 1, 2, 2, 3, 3, 1]>
749 ]>;