MC/ARM: Add an ARMOperand class for condition codes.
[oota-llvm.git] / lib / Target / ARM / ARMScheduleA8.td
1 //=- ARMScheduleA8.td - ARM Cortex-A8 Scheduling Definitions -*- tablegen -*-=//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file defines the itinerary class data for the ARM Cortex A8 processors.
11 //
12 //===----------------------------------------------------------------------===//
13
14 //
15 // Scheduling information derived from "Cortex-A8 Technical Reference Manual".
16 // Functional Units.
17 def A8_Issue   : FuncUnit; // issue
18 def A8_Pipe0   : FuncUnit; // pipeline 0
19 def A8_Pipe1   : FuncUnit; // pipeline 1
20 def A8_LdSt0   : FuncUnit; // pipeline 0 load/store
21 def A8_LdSt1   : FuncUnit; // pipeline 1 load/store
22 def A8_NPipe   : FuncUnit; // NEON ALU/MUL pipe
23 def A8_NLSPipe : FuncUnit; // NEON LS pipe
24 //
25 // Dual issue pipeline represented by A8_Pipe0 | A8_Pipe1
26 //
27 def CortexA8Itineraries : ProcessorItineraries<
28   [A8_Issue, A8_Pipe0, A8_Pipe1, A8_LdSt0, A8_LdSt1, A8_NPipe, A8_NLSPipe], [
29   // Two fully-pipelined integer ALU pipelines
30   //
31   // No operand cycles
32   InstrItinData<IIC_iALUx    , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>]>,
33   //
34   // Binary Instructions that produce a result
35   InstrItinData<IIC_iALUi ,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2]>,
36   InstrItinData<IIC_iALUr ,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2, 2]>,
37   InstrItinData<IIC_iALUsi,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2, 1]>,
38   InstrItinData<IIC_iALUsr,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2, 1, 1]>,
39   //
40   // Unary Instructions that produce a result
41   InstrItinData<IIC_iUNAr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2]>,
42   InstrItinData<IIC_iUNAsi, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1]>,
43   InstrItinData<IIC_iUNAsr, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1, 1]>,
44   //
45   // Compare instructions
46   InstrItinData<IIC_iCMPi , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2]>,
47   InstrItinData<IIC_iCMPr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2]>,
48   InstrItinData<IIC_iCMPsi, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1]>,
49   InstrItinData<IIC_iCMPsr, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1, 1]>,
50   //
51   // Move instructions, unconditional
52   InstrItinData<IIC_iMOVi , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1]>,
53   InstrItinData<IIC_iMOVr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1, 1]>,
54   InstrItinData<IIC_iMOVsi, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1, 1]>,
55   InstrItinData<IIC_iMOVsr, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1, 1, 1]>,
56   //
57   // Move instructions, conditional
58   InstrItinData<IIC_iCMOVi , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2]>,
59   InstrItinData<IIC_iCMOVr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1]>,
60   InstrItinData<IIC_iCMOVsi, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1]>,
61   InstrItinData<IIC_iCMOVsr, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1, 1]>,
62
63   // Integer multiply pipeline
64   // Result written in E5, but that is relative to the last cycle of multicycle,
65   // so we use 6 for those cases
66   //
67   InstrItinData<IIC_iMUL16   , [InstrStage<1, [A8_Pipe0]>], [5, 1, 1]>,
68   InstrItinData<IIC_iMAC16   , [InstrStage<1, [A8_Pipe1], 0>,
69                                 InstrStage<2, [A8_Pipe0]>], [6, 1, 1, 4]>,
70   InstrItinData<IIC_iMUL32   , [InstrStage<1, [A8_Pipe1], 0>,
71                                 InstrStage<2, [A8_Pipe0]>], [6, 1, 1]>,
72   InstrItinData<IIC_iMAC32   , [InstrStage<1, [A8_Pipe1], 0>,
73                                 InstrStage<2, [A8_Pipe0]>], [6, 1, 1, 4]>,
74   InstrItinData<IIC_iMUL64   , [InstrStage<2, [A8_Pipe1], 0>,
75                                 InstrStage<3, [A8_Pipe0]>], [6, 6, 1, 1]>,
76   InstrItinData<IIC_iMAC64   , [InstrStage<2, [A8_Pipe1], 0>,
77                                 InstrStage<3, [A8_Pipe0]>], [6, 6, 1, 1]>,
78
79   // Integer load pipeline
80   //
81   // loads have an extra cycle of latency, but are fully pipelined
82   // use A8_Issue to enforce the 1 load/store per cycle limit
83   //
84   // Immediate offset
85   InstrItinData<IIC_iLoadi   , [InstrStage<1, [A8_Issue], 0>,
86                                 InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
87                                 InstrStage<1, [A8_LdSt0]>], [3, 1]>,
88   //
89   // Register offset
90   InstrItinData<IIC_iLoadr   , [InstrStage<1, [A8_Issue], 0>,
91                                 InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
92                                 InstrStage<1, [A8_LdSt0]>], [3, 1, 1]>,
93   //
94   // Scaled register offset, issues over 2 cycles
95   InstrItinData<IIC_iLoadsi  , [InstrStage<2, [A8_Issue], 0>,
96                                 InstrStage<1, [A8_Pipe0], 0>,
97                                 InstrStage<1, [A8_Pipe1]>,
98                                 InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
99                                 InstrStage<1, [A8_LdSt0]>], [4, 1, 1]>,
100   //
101   // Immediate offset with update
102   InstrItinData<IIC_iLoadiu  , [InstrStage<1, [A8_Issue], 0>,
103                                 InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
104                                 InstrStage<1, [A8_LdSt0]>], [3, 2, 1]>,
105   //
106   // Register offset with update
107   InstrItinData<IIC_iLoadru  , [InstrStage<1, [A8_Issue], 0>,
108                                 InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
109                                 InstrStage<1, [A8_LdSt0]>], [3, 2, 1, 1]>,
110   //
111   // Scaled register offset with update, issues over 2 cycles
112   InstrItinData<IIC_iLoadsiu , [InstrStage<2, [A8_Issue], 0>,
113                                 InstrStage<1, [A8_Pipe0], 0>,
114                                 InstrStage<1, [A8_Pipe1]>,
115                                 InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
116                                 InstrStage<1, [A8_LdSt0]>], [4, 3, 1, 1]>,
117   //
118   // Load multiple
119   InstrItinData<IIC_iLoadm   , [InstrStage<2, [A8_Issue], 0>,
120                                 InstrStage<2, [A8_Pipe0], 0>,
121                                 InstrStage<2, [A8_Pipe1]>,
122                                 InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
123                                 InstrStage<1, [A8_LdSt0]>]>,
124
125   // Integer store pipeline
126   //
127   // use A8_Issue to enforce the 1 load/store per cycle limit
128   //
129   // Immediate offset
130   InstrItinData<IIC_iStorei  , [InstrStage<1, [A8_Issue], 0>,
131                                 InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
132                                 InstrStage<1, [A8_LdSt0]>], [3, 1]>,
133   //
134   // Register offset
135   InstrItinData<IIC_iStorer  , [InstrStage<1, [A8_Issue], 0>,
136                                 InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
137                                 InstrStage<1, [A8_LdSt0]>], [3, 1, 1]>,
138   //
139   // Scaled register offset, issues over 2 cycles
140   InstrItinData<IIC_iStoresi , [InstrStage<2, [A8_Issue], 0>,
141                                 InstrStage<1, [A8_Pipe0], 0>,
142                                 InstrStage<1, [A8_Pipe1]>,
143                                 InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
144                                 InstrStage<1, [A8_LdSt0]>], [3, 1, 1]>,
145   //
146   // Immediate offset with update
147   InstrItinData<IIC_iStoreiu , [InstrStage<1, [A8_Issue], 0>,
148                                 InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
149                                 InstrStage<1, [A8_LdSt0]>], [2, 3, 1]>,
150   //
151   // Register offset with update
152   InstrItinData<IIC_iStoreru  , [InstrStage<1, [A8_Issue], 0>,
153                                 InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
154                                 InstrStage<1, [A8_LdSt0]>], [2, 3, 1, 1]>,
155   //
156   // Scaled register offset with update, issues over 2 cycles
157   InstrItinData<IIC_iStoresiu, [InstrStage<2, [A8_Issue], 0>,
158                                 InstrStage<1, [A8_Pipe0], 0>,
159                                 InstrStage<1, [A8_Pipe1]>,
160                                 InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
161                                 InstrStage<1, [A8_LdSt0]>], [3, 3, 1, 1]>,
162   //
163   // Store multiple
164   InstrItinData<IIC_iStorem  , [InstrStage<2, [A8_Issue], 0>,
165                                 InstrStage<2, [A8_Pipe0], 0>,
166                                 InstrStage<2, [A8_Pipe1]>,
167                                 InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
168                                 InstrStage<1, [A8_LdSt0]>]>,
169
170   // Branch
171   //
172   // no delay slots, so the latency of a branch is unimportant
173   InstrItinData<IIC_Br      , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>]>,
174
175   // VFP
176   // Issue through integer pipeline, and execute in NEON unit. We assume
177   // RunFast mode so that NFP pipeline is used for single-precision when
178   // possible.
179   //
180   // FP Special Register to Integer Register File Move
181   InstrItinData<IIC_fpSTAT , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
182                               InstrStage<1, [A8_NLSPipe]>]>,
183   //
184   // Single-precision FP Unary
185   InstrItinData<IIC_fpUNA32 , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
186                                InstrStage<1, [A8_NPipe]>], [7, 1]>,
187   //
188   // Double-precision FP Unary
189   InstrItinData<IIC_fpUNA64 , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
190                                InstrStage<4, [A8_NPipe], 0>,
191                                InstrStage<4, [A8_NLSPipe]>], [4, 1]>,
192   //
193   // Single-precision FP Compare
194   InstrItinData<IIC_fpCMP32 , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
195                                InstrStage<1, [A8_NPipe]>], [1, 1]>,
196   //
197   // Double-precision FP Compare
198   InstrItinData<IIC_fpCMP64 , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
199                                InstrStage<4, [A8_NPipe], 0>,
200                                InstrStage<4, [A8_NLSPipe]>], [4, 1]>,
201   //
202   // Single to Double FP Convert
203   InstrItinData<IIC_fpCVTSD , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
204                                InstrStage<7, [A8_NPipe], 0>,
205                                InstrStage<7, [A8_NLSPipe]>], [7, 1]>,
206   //
207   // Double to Single FP Convert
208   InstrItinData<IIC_fpCVTDS , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
209                                InstrStage<5, [A8_NPipe], 0>,
210                                InstrStage<5, [A8_NLSPipe]>], [5, 1]>,
211   //
212   // Single-Precision FP to Integer Convert
213   InstrItinData<IIC_fpCVTSI , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
214                                InstrStage<1, [A8_NPipe]>], [7, 1]>,
215   //
216   // Double-Precision FP to Integer Convert
217   InstrItinData<IIC_fpCVTDI , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
218                                InstrStage<8, [A8_NPipe], 0>,
219                                InstrStage<8, [A8_NLSPipe]>], [8, 1]>,
220   //
221   // Integer to Single-Precision FP Convert
222   InstrItinData<IIC_fpCVTIS , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
223                                InstrStage<1, [A8_NPipe]>], [7, 1]>,
224   //
225   // Integer to Double-Precision FP Convert
226   InstrItinData<IIC_fpCVTID , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
227                                InstrStage<8, [A8_NPipe], 0>,
228                                InstrStage<8, [A8_NLSPipe]>], [8, 1]>,
229   //
230   // Single-precision FP ALU
231   InstrItinData<IIC_fpALU32 , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
232                                InstrStage<1, [A8_NPipe]>], [7, 1, 1]>,
233   //
234   // Double-precision FP ALU
235   InstrItinData<IIC_fpALU64 , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
236                                InstrStage<9, [A8_NPipe], 0>,
237                                InstrStage<9, [A8_NLSPipe]>], [9, 1, 1]>,
238   //
239   // Single-precision FP Multiply
240   InstrItinData<IIC_fpMUL32 , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
241                                InstrStage<1, [A8_NPipe]>], [7, 1, 1]>,
242   //
243   // Double-precision FP Multiply
244   InstrItinData<IIC_fpMUL64 , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
245                                InstrStage<11, [A8_NPipe], 0>,
246                                InstrStage<11, [A8_NLSPipe]>], [11, 1, 1]>,
247   //
248   // Single-precision FP MAC
249   InstrItinData<IIC_fpMAC32 , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
250                                InstrStage<1, [A8_NPipe]>], [7, 2, 1, 1]>,
251   //
252   // Double-precision FP MAC
253   InstrItinData<IIC_fpMAC64 , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
254                                InstrStage<19, [A8_NPipe], 0>,
255                                InstrStage<19, [A8_NLSPipe]>], [19, 2, 1, 1]>,
256   //
257   // Single-precision FP DIV
258   InstrItinData<IIC_fpDIV32 , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
259                                InstrStage<20, [A8_NPipe], 0>,
260                                InstrStage<20, [A8_NLSPipe]>], [20, 1, 1]>,
261   //
262   // Double-precision FP DIV
263   InstrItinData<IIC_fpDIV64 , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
264                                InstrStage<29, [A8_NPipe], 0>,
265                                InstrStage<29, [A8_NLSPipe]>], [29, 1, 1]>,
266   //
267   // Single-precision FP SQRT
268   InstrItinData<IIC_fpSQRT32, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
269                                InstrStage<19, [A8_NPipe], 0>,
270                                InstrStage<19, [A8_NLSPipe]>], [19, 1]>,
271   //
272   // Double-precision FP SQRT
273   InstrItinData<IIC_fpSQRT64, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
274                                InstrStage<29, [A8_NPipe], 0>,
275                                InstrStage<29, [A8_NLSPipe]>], [29, 1]>,
276   //
277   // Single-precision FP Load
278   // use A8_Issue to enforce the 1 load/store per cycle limit
279   InstrItinData<IIC_fpLoad32, [InstrStage<1, [A8_Issue], 0>,
280                                InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
281                                InstrStage<1, [A8_LdSt0], 0>,
282                                InstrStage<1, [A8_NLSPipe]>]>,
283   //
284   // Double-precision FP Load
285   // use A8_Issue to enforce the 1 load/store per cycle limit
286   InstrItinData<IIC_fpLoad64, [InstrStage<2, [A8_Issue], 0>,
287                                InstrStage<1, [A8_Pipe0], 0>,
288                                InstrStage<1, [A8_Pipe1]>,
289                                InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
290                                InstrStage<1, [A8_LdSt0], 0>,
291                                InstrStage<1, [A8_NLSPipe]>]>,
292   //
293   // FP Load Multiple
294   // use A8_Issue to enforce the 1 load/store per cycle limit
295   InstrItinData<IIC_fpLoadm,  [InstrStage<3, [A8_Issue], 0>,
296                                InstrStage<2, [A8_Pipe0], 0>,
297                                InstrStage<2, [A8_Pipe1]>,
298                                InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
299                                InstrStage<1, [A8_LdSt0], 0>,
300                                InstrStage<1, [A8_NLSPipe]>]>,
301   //
302   // Single-precision FP Store
303   // use A8_Issue to enforce the 1 load/store per cycle limit
304   InstrItinData<IIC_fpStore32,[InstrStage<1, [A8_Issue], 0>,
305                                InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
306                                InstrStage<1, [A8_LdSt0], 0>,
307                                InstrStage<1, [A8_NLSPipe]>]>,
308   //
309   // Double-precision FP Store
310   // use A8_Issue to enforce the 1 load/store per cycle limit
311   InstrItinData<IIC_fpStore64,[InstrStage<2, [A8_Issue], 0>,
312                                InstrStage<1, [A8_Pipe0], 0>,
313                                InstrStage<1, [A8_Pipe1]>,
314                                InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
315                                InstrStage<1, [A8_LdSt0], 0>,
316                                InstrStage<1, [A8_NLSPipe]>]>,
317   //
318   // FP Store Multiple
319   // use A8_Issue to enforce the 1 load/store per cycle limit
320   InstrItinData<IIC_fpStorem, [InstrStage<3, [A8_Issue], 0>,
321                                InstrStage<2, [A8_Pipe0], 0>,
322                                InstrStage<2, [A8_Pipe1]>,
323                                InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
324                                InstrStage<1, [A8_LdSt0], 0>,
325                                InstrStage<1, [A8_NLSPipe]>]>,
326
327   // NEON
328   // Issue through integer pipeline, and execute in NEON unit.
329   //
330   // VLD1
331   // FIXME: We don't model this instruction properly
332   InstrItinData<IIC_VLD1,     [InstrStage<1, [A8_Issue], 0>,
333                                InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
334                                InstrStage<1, [A8_LdSt0], 0>,
335                                InstrStage<1, [A8_NLSPipe]>]>,
336   //
337   // VLD2
338   // FIXME: We don't model this instruction properly
339   InstrItinData<IIC_VLD2,     [InstrStage<1, [A8_Issue], 0>,
340                                InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
341                                InstrStage<1, [A8_LdSt0], 0>,
342                                InstrStage<1, [A8_NLSPipe]>], [2, 2, 1]>,
343   //
344   // VLD3
345   // FIXME: We don't model this instruction properly
346   InstrItinData<IIC_VLD3,     [InstrStage<1, [A8_Issue], 0>,
347                                InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
348                                InstrStage<1, [A8_LdSt0], 0>,
349                                InstrStage<1, [A8_NLSPipe]>], [2, 2, 2, 1]>,
350   //
351   // VLD4
352   // FIXME: We don't model this instruction properly
353   InstrItinData<IIC_VLD4,     [InstrStage<1, [A8_Issue], 0>,
354                                InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
355                                InstrStage<1, [A8_LdSt0], 0>,
356                                InstrStage<1, [A8_NLSPipe]>], [2, 2, 2, 2, 1]>,
357   //
358   // VST
359   // FIXME: We don't model this instruction properly
360   InstrItinData<IIC_VST,      [InstrStage<1, [A8_Issue], 0>,
361                                InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
362                                InstrStage<1, [A8_LdSt0], 0>,
363                                InstrStage<1, [A8_NLSPipe]>]>,
364   //
365   // Double-register FP Unary
366   InstrItinData<IIC_VUNAD,    [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
367                                InstrStage<1, [A8_NPipe]>], [5, 2]>,
368   //
369   // Quad-register FP Unary
370   // Result written in N5, but that is relative to the last cycle of multicycle,
371   // so we use 6 for those cases
372   InstrItinData<IIC_VUNAQ,    [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
373                                InstrStage<2, [A8_NPipe]>], [6, 2]>,
374   //
375   // Double-register FP Binary
376   InstrItinData<IIC_VBIND,    [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
377                                InstrStage<1, [A8_NPipe]>], [5, 2, 2]>,
378   //
379   // Quad-register FP Binary
380   // Result written in N5, but that is relative to the last cycle of multicycle,
381   // so we use 6 for those cases
382   InstrItinData<IIC_VBINQ,    [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
383                                InstrStage<2, [A8_NPipe]>], [6, 2, 2]>,
384   //
385   // Move Immediate
386   InstrItinData<IIC_VMOVImm,  [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
387                                InstrStage<1, [A8_NPipe]>], [3]>,
388   //
389   // Double-register Permute Move
390   InstrItinData<IIC_VMOVD,    [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
391                                InstrStage<1, [A8_NLSPipe]>], [2, 1]>,
392   //
393   // Quad-register Permute Move
394   // Result written in N2, but that is relative to the last cycle of multicycle,
395   // so we use 3 for those cases
396   InstrItinData<IIC_VMOVQ,    [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
397                                InstrStage<2, [A8_NLSPipe]>], [3, 1]>,
398   //
399   // Integer to Single-precision Move
400   InstrItinData<IIC_VMOVIS ,  [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
401                                InstrStage<1, [A8_NLSPipe]>], [2, 1]>,
402   //
403   // Integer to Double-precision Move
404   InstrItinData<IIC_VMOVID ,  [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
405                                InstrStage<1, [A8_NLSPipe]>], [2, 1, 1]>,
406   //
407   // Single-precision to Integer Move
408   InstrItinData<IIC_VMOVSI ,  [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
409                                InstrStage<1, [A8_NLSPipe]>], [20, 1]>,
410   //
411   // Double-precision to Integer Move
412   InstrItinData<IIC_VMOVDI ,  [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
413                                InstrStage<1, [A8_NLSPipe]>], [20, 20, 1]>,
414   //
415   // Integer to Lane Move
416   InstrItinData<IIC_VMOVISL , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
417                                InstrStage<2, [A8_NLSPipe]>], [3, 1, 1]>,
418   //
419   // Double-register Permute
420   InstrItinData<IIC_VPERMD,   [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
421                                InstrStage<1, [A8_NLSPipe]>], [2, 2, 1, 1]>,
422   //
423   // Quad-register Permute
424   // Result written in N2, but that is relative to the last cycle of multicycle,
425   // so we use 3 for those cases
426   InstrItinData<IIC_VPERMQ,   [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
427                                InstrStage<2, [A8_NLSPipe]>], [3, 3, 1, 1]>,
428   //
429   // Quad-register Permute (3 cycle issue)
430   // Result written in N2, but that is relative to the last cycle of multicycle,
431   // so we use 4 for those cases
432   InstrItinData<IIC_VPERMQ3,  [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
433                                InstrStage<1, [A8_NLSPipe]>,
434                                InstrStage<1, [A8_NPipe], 0>,
435                                InstrStage<2, [A8_NLSPipe]>], [4, 4, 1, 1]>,
436   //
437   // Double-register FP Multiple-Accumulate
438   InstrItinData<IIC_VMACD,    [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
439                                InstrStage<1, [A8_NPipe]>], [9, 3, 2, 2]>,
440   //
441   // Quad-register FP Multiple-Accumulate
442   // Result written in N9, but that is relative to the last cycle of multicycle,
443   // so we use 10 for those cases
444   InstrItinData<IIC_VMACQ,    [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
445                                InstrStage<2, [A8_NPipe]>], [10, 3, 2, 2]>,
446   //
447   // Double-register Reciprical Step
448   InstrItinData<IIC_VRECSD,   [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
449                                InstrStage<1, [A8_NPipe]>], [9, 2, 2]>,
450   //
451   // Quad-register Reciprical Step
452   InstrItinData<IIC_VRECSQ,   [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
453                                InstrStage<2, [A8_NPipe]>], [10, 2, 2]>,
454   //
455   // Double-register Integer Count
456   InstrItinData<IIC_VCNTiD,   [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
457                                InstrStage<1, [A8_NPipe]>], [3, 2, 2]>,
458   //
459   // Quad-register Integer Count
460   // Result written in N3, but that is relative to the last cycle of multicycle,
461   // so we use 4 for those cases
462   InstrItinData<IIC_VCNTiQ,   [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
463                                InstrStage<2, [A8_NPipe]>], [4, 2, 2]>,
464   //
465   // Double-register Integer Unary
466   InstrItinData<IIC_VUNAiD,   [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
467                                InstrStage<1, [A8_NPipe]>], [4, 2]>,
468   //
469   // Quad-register Integer Unary
470   InstrItinData<IIC_VUNAiQ,   [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
471                                InstrStage<1, [A8_NPipe]>], [4, 2]>,
472   //
473   // Double-register Integer Q-Unary
474   InstrItinData<IIC_VQUNAiD,  [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
475                                InstrStage<1, [A8_NPipe]>], [4, 1]>,
476   //
477   // Quad-register Integer CountQ-Unary
478   InstrItinData<IIC_VQUNAiQ,  [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
479                                InstrStage<1, [A8_NPipe]>], [4, 1]>,
480   //
481   // Double-register Integer Binary
482   InstrItinData<IIC_VBINiD,   [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
483                                InstrStage<1, [A8_NPipe]>], [3, 2, 2]>,
484   //
485   // Quad-register Integer Binary
486   InstrItinData<IIC_VBINiQ,   [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
487                                InstrStage<1, [A8_NPipe]>], [3, 2, 2]>,
488   //
489   // Double-register Integer Binary (4 cycle)
490   InstrItinData<IIC_VBINi4D,  [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
491                                InstrStage<1, [A8_NPipe]>], [4, 2, 1]>,
492   //
493   // Quad-register Integer Binary (4 cycle)
494   InstrItinData<IIC_VBINi4Q,  [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
495                                InstrStage<1, [A8_NPipe]>], [4, 2, 1]>,
496
497   //
498   // Double-register Integer Subtract
499   InstrItinData<IIC_VSUBiD,   [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
500                                InstrStage<1, [A8_NPipe]>], [3, 2, 1]>,
501   //
502   // Quad-register Integer Subtract
503   InstrItinData<IIC_VSUBiQ,   [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
504                                InstrStage<1, [A8_NPipe]>], [3, 2, 1]>,
505   //
506   // Double-register Integer Subtract
507   InstrItinData<IIC_VSUBi4D,  [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
508                                InstrStage<1, [A8_NPipe]>], [4, 2, 1]>,
509   //
510   // Quad-register Integer Subtract
511   InstrItinData<IIC_VSUBi4Q,  [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
512                                InstrStage<1, [A8_NPipe]>], [4, 2, 1]>,
513   //
514   // Double-register Integer Shift
515   InstrItinData<IIC_VSHLiD,   [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
516                                InstrStage<1, [A8_NPipe]>], [3, 1, 1]>,
517   //
518   // Quad-register Integer Shift
519   InstrItinData<IIC_VSHLiQ,   [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
520                                InstrStage<2, [A8_NPipe]>], [4, 1, 1]>,
521   //
522   // Double-register Integer Shift (4 cycle)
523   InstrItinData<IIC_VSHLi4D,   [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
524                                InstrStage<1, [A8_NPipe]>], [4, 1, 1]>,
525   //
526   // Quad-register Integer Shift (4 cycle)
527   InstrItinData<IIC_VSHLi4Q,   [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
528                                InstrStage<2, [A8_NPipe]>], [5, 1, 1]>,
529   //
530   // Double-register Integer Pair Add Long
531   InstrItinData<IIC_VPALiD,   [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
532                                InstrStage<1, [A8_NPipe]>], [6, 3, 1]>,
533   //
534   // Quad-register Integer Pair Add Long
535   InstrItinData<IIC_VPALiQ,   [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
536                                InstrStage<2, [A8_NPipe]>], [7, 3, 1]>,
537   //
538   // Double-register Absolute Difference and Accumulate
539   InstrItinData<IIC_VABAD,    [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
540                                InstrStage<1, [A8_NPipe]>], [6, 3, 2, 1]>,
541   //
542   // Quad-register Absolute Difference and Accumulate
543   InstrItinData<IIC_VABAQ,    [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
544                                InstrStage<2, [A8_NPipe]>], [6, 3, 2, 1]>,
545
546   //
547   // Double-register Integer Multiply (.8, .16)
548   InstrItinData<IIC_VMULi16D, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
549                                InstrStage<1, [A8_NPipe]>], [6, 2, 2]>,
550   //
551   // Double-register Integer Multiply (.32)
552   InstrItinData<IIC_VMULi32D, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
553                                InstrStage<2, [A8_NPipe]>], [7, 2, 1]>,
554   //
555   // Quad-register Integer Multiply (.8, .16)
556   InstrItinData<IIC_VMULi16Q, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
557                                InstrStage<2, [A8_NPipe]>], [7, 2, 2]>,
558   //
559   // Quad-register Integer Multiply (.32)
560   InstrItinData<IIC_VMULi32Q, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
561                                InstrStage<1, [A8_NPipe]>,
562                                InstrStage<2, [A8_NLSPipe], 0>,
563                                InstrStage<3, [A8_NPipe]>], [9, 2, 1]>,
564   //
565   // Double-register Integer Multiply-Accumulate (.8, .16)
566   InstrItinData<IIC_VMACi16D, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
567                                InstrStage<1, [A8_NPipe]>], [6, 3, 2, 2]>,
568   //
569   // Double-register Integer Multiply-Accumulate (.32)
570   InstrItinData<IIC_VMACi32D, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
571                                InstrStage<2, [A8_NPipe]>], [7, 3, 2, 1]>,
572   //
573   // Quad-register Integer Multiply-Accumulate (.8, .16)
574   InstrItinData<IIC_VMACi16Q, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
575                                InstrStage<2, [A8_NPipe]>], [7, 3, 2, 2]>,
576   //
577   // Quad-register Integer Multiply-Accumulate (.32)
578   InstrItinData<IIC_VMACi32Q, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
579                                InstrStage<1, [A8_NPipe]>,
580                                InstrStage<2, [A8_NLSPipe], 0>,
581                                InstrStage<3, [A8_NPipe]>], [9, 3, 2, 1]>,
582   //
583   // Double-register VEXT
584   InstrItinData<IIC_VEXTD,    [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
585                                InstrStage<1, [A8_NLSPipe]>], [2, 1, 1]>,
586   //
587   // Quad-register VEXT
588   InstrItinData<IIC_VEXTQ,    [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
589                                InstrStage<2, [A8_NLSPipe]>], [3, 1, 1]>,
590   //
591   // VTB
592   InstrItinData<IIC_VTB1,     [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
593                                InstrStage<2, [A8_NLSPipe]>], [3, 2, 1]>,
594   InstrItinData<IIC_VTB2,     [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
595                                InstrStage<2, [A8_NLSPipe]>], [3, 2, 2, 1]>,
596   InstrItinData<IIC_VTB3,     [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
597                                InstrStage<1, [A8_NLSPipe]>,
598                                InstrStage<1, [A8_NPipe], 0>,
599                                InstrStage<2, [A8_NLSPipe]>], [4, 2, 2, 3, 1]>,
600   InstrItinData<IIC_VTB4,     [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
601                                InstrStage<1, [A8_NLSPipe]>,
602                                InstrStage<1, [A8_NPipe], 0>,
603                                InstrStage<2, [A8_NLSPipe]>],[4, 2, 2, 3, 3, 1]>,
604   //
605   // VTBX
606   InstrItinData<IIC_VTBX1,    [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
607                                InstrStage<2, [A8_NLSPipe]>], [3, 1, 2, 1]>,
608   InstrItinData<IIC_VTBX2,    [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
609                                InstrStage<2, [A8_NLSPipe]>], [3, 1, 2, 2, 1]>,
610   InstrItinData<IIC_VTBX3,    [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
611                                InstrStage<1, [A8_NLSPipe]>,
612                                InstrStage<1, [A8_NPipe], 0>,
613                                InstrStage<2, [A8_NLSPipe]>],[4, 1, 2, 2, 3, 1]>,
614   InstrItinData<IIC_VTBX4,    [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
615                                InstrStage<1, [A8_NLSPipe]>,
616                                InstrStage<1, [A8_NPipe], 0>,
617                             InstrStage<2, [A8_NLSPipe]>], [4, 1, 2, 2, 3, 3, 1]>
618 ]>;