Fixed logic error. Should check Builder for validity before calling SetSession
[oota-llvm.git] / lib / Target / ARM / ARMScheduleA9.td
1 //=- ARMScheduleA9.td - ARM Cortex-A9 Scheduling Definitions -*- tablegen -*-=//
2 // 
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 // 
8 //===----------------------------------------------------------------------===//
9 //
10 // This file defines the itinerary class data for the ARM Cortex A9 processors.
11 //
12 //===----------------------------------------------------------------------===//
13
14 //
15 // Ad-hoc scheduling information derived from pretty vague "Cortex-A9 Technical
16 // Reference Manual".
17 //
18 // Dual issue pipeline represented by FU_Pipe0 | FU_Pipe1
19 //
20 def CortexA9Itineraries : ProcessorItineraries<[
21   // VFP and NEON shares the same register file. This means that every VFP
22   // instruction should wait for full completion of the consecutive NEON
23   // instruction and vice-versa. We model this behavior with two artificial FUs:
24   // DRegsVFP and DRegsVFP.
25   //
26   // Every VFP instruction:
27   //  - Acquires DRegsVFP resource for 1 cycle
28   //  - Reserves DRegsN resource for the whole duration (including time to
29   //    register file writeback!).
30   // Every NEON instruction does the same but with FUs swapped.
31   //
32   // Since the reserved FU cannot be acquired this models precisly "cross-domain"
33   // stalls.
34
35   // VFP
36   // Issue through integer pipeline, and execute in NEON unit.
37
38   // FP Special Register to Integer Register File Move
39   InstrItinData<IIC_fpSTAT , [InstrStage<1, [FU_DRegsVFP], 0, Required>,
40                               InstrStage<2, [FU_DRegsN],   0, Reserved>,
41                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
42                               InstrStage<1, [FU_NPipe]>]>,
43   //
44   // Single-precision FP Unary
45   InstrItinData<IIC_fpUNA32 , [InstrStage<1, [FU_DRegsVFP], 0, Required>,
46                                // Extra latency cycles since wbck is 2 cycles
47                                InstrStage<3, [FU_DRegsN],   0, Reserved>,
48                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
49                                InstrStage<1, [FU_NPipe]>], [1, 1]>,
50   //
51   // Double-precision FP Unary
52   InstrItinData<IIC_fpUNA64 , [InstrStage<1, [FU_DRegsVFP], 0, Required>,
53                                // Extra latency cycles since wbck is 2 cycles
54                                InstrStage<3, [FU_DRegsN],   0, Reserved>,
55                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
56                                InstrStage<1, [FU_NPipe]>], [1, 1]>,
57
58   //
59   // Single-precision FP Compare
60   InstrItinData<IIC_fpCMP32 , [InstrStage<1, [FU_DRegsVFP], 0, Required>,
61                                // Extra latency cycles since wbck is 4 cycles
62                                InstrStage<5, [FU_DRegsN],   0, Reserved>,
63                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
64                                InstrStage<1, [FU_NPipe]>], [1, 1]>,
65   //
66   // Double-precision FP Compare
67   InstrItinData<IIC_fpCMP64 , [InstrStage<1, [FU_DRegsVFP], 0, Required>,
68                                // Extra latency cycles since wbck is 4 cycles
69                                InstrStage<5, [FU_DRegsN],   0, Reserved>,
70                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
71                                InstrStage<1, [FU_NPipe]>], [1, 1]>,
72   //
73   // Single to Double FP Convert
74   InstrItinData<IIC_fpCVTSD , [InstrStage<1, [FU_DRegsVFP], 0, Required>,
75                                InstrStage<5, [FU_DRegsN],   0, Reserved>,
76                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
77                                InstrStage<1, [FU_NPipe]>], [4, 1]>,
78   //
79   // Double to Single FP Convert
80   InstrItinData<IIC_fpCVTDS , [InstrStage<1, [FU_DRegsVFP], 0, Required>,
81                                InstrStage<5, [FU_DRegsN],   0, Reserved>,
82                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
83                                InstrStage<1, [FU_NPipe]>], [4, 1]>,
84
85   //
86   // Single to Half FP Convert
87   InstrItinData<IIC_fpCVTSH , [InstrStage<1, [FU_DRegsVFP], 0, Required>,
88                                InstrStage<5, [FU_DRegsN],   0, Reserved>,
89                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
90                                InstrStage<1, [FU_NPipe]>], [4, 1]>,
91   //
92   // Half to Single FP Convert
93   InstrItinData<IIC_fpCVTHS , [InstrStage<1, [FU_DRegsVFP], 0, Required>,
94                                InstrStage<3, [FU_DRegsN],   0, Reserved>,
95                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
96                                InstrStage<1, [FU_NPipe]>], [2, 1]>,
97
98   //
99   // Single-Precision FP to Integer Convert
100   InstrItinData<IIC_fpCVTSI , [InstrStage<1, [FU_DRegsVFP], 0, Required>,
101                                InstrStage<5, [FU_DRegsN],   0, Reserved>,
102                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
103                                InstrStage<1, [FU_NPipe]>], [4, 1]>,
104   //
105   // Double-Precision FP to Integer Convert
106   InstrItinData<IIC_fpCVTDI , [InstrStage<1, [FU_DRegsVFP], 0, Required>,
107                                InstrStage<5, [FU_DRegsN],   0, Reserved>,
108                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
109                                InstrStage<1, [FU_NPipe]>], [4, 1]>,
110   //
111   // Integer to Single-Precision FP Convert
112   InstrItinData<IIC_fpCVTIS , [InstrStage<1, [FU_DRegsVFP], 0, Required>,
113                                InstrStage<5, [FU_DRegsN],   0, Reserved>,
114                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
115                                InstrStage<1, [FU_NPipe]>], [4, 1]>,
116   //
117   // Integer to Double-Precision FP Convert
118   InstrItinData<IIC_fpCVTID , [InstrStage<1, [FU_DRegsVFP], 0, Required>,
119                                InstrStage<5, [FU_DRegsN],   0, Reserved>,
120                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
121                                InstrStage<1, [FU_NPipe]>], [4, 1]>,
122   //
123   // Single-precision FP ALU
124   InstrItinData<IIC_fpALU32 , [InstrStage<1, [FU_DRegsVFP], 0, Required>,
125                                InstrStage<5, [FU_DRegsN],   0, Reserved>,
126                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
127                                InstrStage<1, [FU_NPipe]>], [4, 1, 1]>,
128   //
129   // Double-precision FP ALU
130   InstrItinData<IIC_fpALU64 , [InstrStage<1, [FU_DRegsVFP], 0, Required>,
131                                InstrStage<5, [FU_DRegsN],   0, Reserved>,
132                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
133                                InstrStage<1, [FU_NPipe]>], [4, 1, 1]>,
134   //
135   // Single-precision FP Multiply
136   InstrItinData<IIC_fpMUL32 , [InstrStage<1, [FU_DRegsVFP], 0, Required>,
137                                InstrStage<6, [FU_DRegsN],   0, Reserved>,
138                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
139                                InstrStage<1, [FU_NPipe]>], [5, 1, 1]>,
140   //
141   // Double-precision FP Multiply
142   InstrItinData<IIC_fpMUL64 , [InstrStage<1, [FU_DRegsVFP], 0, Required>,
143                                InstrStage<7, [FU_DRegsN],   0, Reserved>,
144                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
145                                InstrStage<2, [FU_NPipe]>], [6, 1, 1]>,
146   //
147   // Single-precision FP MAC
148   InstrItinData<IIC_fpMAC32 , [InstrStage<1, [FU_DRegsVFP], 0, Required>,
149                                InstrStage<9, [FU_DRegsN],   0, Reserved>,
150                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
151                                InstrStage<1, [FU_NPipe]>], [8, 0, 1, 1]>,
152   //
153   // Double-precision FP MAC
154   InstrItinData<IIC_fpMAC64 , [InstrStage<1,  [FU_DRegsVFP], 0, Required>,
155                                InstrStage<10, [FU_DRegsN],  0, Reserved>,
156                                InstrStage<1,  [FU_Pipe0, FU_Pipe1]>,
157                                InstrStage<2,  [FU_NPipe]>], [9, 0, 1, 1]>,
158   //
159   // Single-precision FP DIV
160   InstrItinData<IIC_fpDIV32 , [InstrStage<1,  [FU_DRegsVFP], 0, Required>,
161                                InstrStage<16, [FU_DRegsN],  0, Reserved>,
162                                InstrStage<1,  [FU_Pipe0, FU_Pipe1]>,
163                                InstrStage<10, [FU_NPipe]>], [15, 1, 1]>,
164   //
165   // Double-precision FP DIV
166   InstrItinData<IIC_fpDIV64 , [InstrStage<1,  [FU_DRegsVFP], 0, Required>,
167                                InstrStage<26, [FU_DRegsN],  0, Reserved>,
168                                InstrStage<1,  [FU_Pipe0, FU_Pipe1]>,
169                                InstrStage<20, [FU_NPipe]>], [25, 1, 1]>,
170   //
171   // Single-precision FP SQRT
172   InstrItinData<IIC_fpSQRT32, [InstrStage<1,  [FU_DRegsVFP], 0, Required>,
173                                InstrStage<18, [FU_DRegsN],   0, Reserved>,
174                                InstrStage<1,   [FU_Pipe0, FU_Pipe1]>,
175                                InstrStage<13,  [FU_NPipe]>], [17, 1]>,
176   //
177   // Double-precision FP SQRT
178   InstrItinData<IIC_fpSQRT64, [InstrStage<1,  [FU_DRegsVFP], 0, Required>,
179                                InstrStage<33, [FU_DRegsN],   0, Reserved>,
180                                InstrStage<1,  [FU_Pipe0, FU_Pipe1]>,
181                                InstrStage<28, [FU_NPipe]>], [32, 1]>,
182
183   //
184   // Integer to Single-precision Move
185   InstrItinData<IIC_fpMOVIS,  [InstrStage<1, [FU_DRegsVFP], 0, Required>,
186                                // Extra 1 latency cycle since wbck is 2 cycles
187                                InstrStage<3, [FU_DRegsN],   0, Reserved>,
188                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
189                                InstrStage<1, [FU_NPipe]>], [1, 1]>,
190   //
191   // Integer to Double-precision Move
192   InstrItinData<IIC_fpMOVID,  [InstrStage<1, [FU_DRegsVFP], 0, Required>,
193                                // Extra 1 latency cycle since wbck is 2 cycles
194                                InstrStage<3, [FU_DRegsN],   0, Reserved>,
195                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
196                                InstrStage<1, [FU_NPipe]>], [1, 1, 1]>,
197   //
198   // Single-precision to Integer Move
199   InstrItinData<IIC_fpMOVSI,  [InstrStage<1, [FU_DRegsVFP], 0, Required>,
200                                InstrStage<2, [FU_DRegsN],   0, Reserved>,
201                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
202                                InstrStage<1, [FU_NPipe]>], [1, 1]>,
203   //
204   // Double-precision to Integer Move
205   InstrItinData<IIC_fpMOVDI,  [InstrStage<1, [FU_DRegsVFP], 0, Required>,
206                                InstrStage<2, [FU_DRegsN],   0, Reserved>,
207                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
208                                InstrStage<1, [FU_NPipe]>], [1, 1, 1]>,
209   //
210   // Single-precision FP Load
211   // use FU_Issue to enforce the 1 load/store per cycle limit
212   InstrItinData<IIC_fpLoad32, [InstrStage<1, [FU_DRegsVFP], 0, Required>,
213                                InstrStage<2, [FU_DRegsN],   0, Reserved>,
214                                InstrStage<1, [FU_Issue], 0>, 
215                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
216                                InstrStage<1, [FU_LdSt0], 0>,
217                                InstrStage<1, [FU_NPipe]>]>,
218   //
219   // Double-precision FP Load
220   // use FU_Issue to enforce the 1 load/store per cycle limit
221   InstrItinData<IIC_fpLoad64, [InstrStage<1, [FU_DRegsVFP], 0, Required>,
222                                InstrStage<2, [FU_DRegsN],   0, Reserved>,
223                                InstrStage<1, [FU_Issue], 0>, 
224                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
225                                InstrStage<1, [FU_LdSt0], 0>,
226                                InstrStage<1, [FU_NPipe]>]>,
227   //
228   // FP Load Multiple
229   // use FU_Issue to enforce the 1 load/store per cycle limit
230   InstrItinData<IIC_fpLoadm,  [InstrStage<1, [FU_DRegsVFP], 0, Required>,
231                                InstrStage<2, [FU_DRegsN],   0, Reserved>,
232                                InstrStage<1, [FU_Issue], 0>, 
233                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
234                                InstrStage<1, [FU_LdSt0], 0>,
235                                InstrStage<1, [FU_NPipe]>]>,
236   //
237   // Single-precision FP Store
238   // use FU_Issue to enforce the 1 load/store per cycle limit
239   InstrItinData<IIC_fpStore32,[InstrStage<1, [FU_DRegsVFP], 0, Required>,
240                                InstrStage<2, [FU_DRegsN],   0, Reserved>,
241                                InstrStage<1, [FU_Issue], 0>, 
242                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
243                                InstrStage<1, [FU_LdSt0], 0>,
244                                InstrStage<1, [FU_NPipe]>]>,
245   //
246   // Double-precision FP Store
247   // use FU_Issue to enforce the 1 load/store per cycle limit
248   InstrItinData<IIC_fpStore64,[InstrStage<1, [FU_DRegsVFP], 0, Required>,
249                                InstrStage<2, [FU_DRegsN],   0, Reserved>,
250                                InstrStage<1, [FU_Issue], 0>, 
251                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
252                                InstrStage<1, [FU_LdSt0], 0>,
253                                InstrStage<1, [FU_NPipe]>]>,
254   //
255   // FP Store Multiple
256   // use FU_Issue to enforce the 1 load/store per cycle limit
257   InstrItinData<IIC_fpStorem, [InstrStage<1, [FU_DRegsVFP], 0, Required>,
258                                InstrStage<2, [FU_DRegsN],   0, Reserved>,
259                                InstrStage<1, [FU_Issue], 0>, 
260                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
261                                InstrStage<1, [FU_LdSt0], 0>,
262                                InstrStage<1, [FU_NPipe]>]>,
263   // NEON
264   // Issue through integer pipeline, and execute in NEON unit.
265   // FIXME: Neon pipeline and LdSt unit are multiplexed. 
266   //        Add some syntactic sugar to model this!
267   // VLD1
268   // FIXME: We don't model this instruction properly
269   InstrItinData<IIC_VLD1,     [InstrStage<1, [FU_DRegsN],   0, Required>,
270                                InstrStage<7, [FU_DRegsVFP], 0, Reserved>,
271                                InstrStage<1, [FU_Issue], 0>, 
272                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
273                                InstrStage<1, [FU_LdSt0], 0>,
274                                InstrStage<1, [FU_NPipe]>]>,
275   //
276   // VLD2
277   // FIXME: We don't model this instruction properly
278   InstrItinData<IIC_VLD2,     [InstrStage<1, [FU_DRegsN],   0, Required>,
279                                // Extra latency cycles since wbck is 6 cycles
280                                InstrStage<7, [FU_DRegsVFP], 0, Reserved>,
281                                InstrStage<1, [FU_Issue], 0>, 
282                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
283                                InstrStage<1, [FU_LdSt0], 0>,
284                                InstrStage<1, [FU_NPipe]>], [2, 2, 1]>,
285   //
286   // VLD3
287   // FIXME: We don't model this instruction properly
288   InstrItinData<IIC_VLD3,     [InstrStage<1, [FU_DRegsN],   0, Required>,
289                                // Extra latency cycles since wbck is 6 cycles
290                                InstrStage<7, [FU_DRegsVFP], 0, Reserved>,
291                                InstrStage<1, [FU_Issue], 0>, 
292                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
293                                InstrStage<1, [FU_LdSt0], 0>,
294                                InstrStage<1, [FU_NPipe]>], [2, 2, 2, 1]>,
295   //
296   // VLD4
297   // FIXME: We don't model this instruction properly
298   InstrItinData<IIC_VLD4,     [InstrStage<1, [FU_DRegsN],   0, Required>,
299                                // Extra latency cycles since wbck is 6 cycles
300                                InstrStage<7, [FU_DRegsVFP], 0, Reserved>,
301                                InstrStage<1, [FU_Issue], 0>, 
302                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
303                                InstrStage<1, [FU_LdSt0], 0>,
304                                InstrStage<1, [FU_NPipe]>], [2, 2, 2, 2, 1]>,
305   //
306   // VST
307   // FIXME: We don't model this instruction properly
308   InstrItinData<IIC_VST,      [InstrStage<1, [FU_DRegsN],   0, Required>,
309                                // Extra latency cycles since wbck is 6 cycles
310                                InstrStage<7, [FU_DRegsVFP], 0, Reserved>,
311                                InstrStage<1, [FU_Issue], 0>, 
312                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
313                                InstrStage<1, [FU_LdSt0], 0>,
314                                InstrStage<1, [FU_NPipe]>]>,
315   //
316   // Double-register Integer Unary
317   InstrItinData<IIC_VUNAiD,   [InstrStage<1, [FU_DRegsN],   0, Required>,
318                                // Extra latency cycles since wbck is 6 cycles
319                                InstrStage<7, [FU_DRegsVFP], 0, Reserved>,
320                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
321                                InstrStage<1, [FU_NPipe]>], [4, 2]>,
322   //
323   // Quad-register Integer Unary
324   InstrItinData<IIC_VUNAiQ,   [InstrStage<1, [FU_DRegsN],   0, Required>,
325                                // Extra latency cycles since wbck is 6 cycles
326                                InstrStage<7, [FU_DRegsVFP], 0, Reserved>,
327                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
328                                InstrStage<1, [FU_NPipe]>], [4, 2]>,
329   //
330   // Double-register Integer Q-Unary
331   InstrItinData<IIC_VQUNAiD,  [InstrStage<1, [FU_DRegsN],   0, Required>,
332                                // Extra latency cycles since wbck is 6 cycles
333                                InstrStage<7, [FU_DRegsVFP], 0, Reserved>,
334                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
335                                InstrStage<1, [FU_NPipe]>], [4, 1]>,
336   //
337   // Quad-register Integer CountQ-Unary
338   InstrItinData<IIC_VQUNAiQ,  [InstrStage<1, [FU_DRegsN],   0, Required>,
339                                // Extra latency cycles since wbck is 6 cycles
340                                InstrStage<7, [FU_DRegsVFP], 0, Reserved>,
341                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
342                                InstrStage<1, [FU_NPipe]>], [4, 1]>,
343   //
344   // Double-register Integer Binary
345   InstrItinData<IIC_VBINiD,   [InstrStage<1, [FU_DRegsN],   0, Required>,
346                                // Extra latency cycles since wbck is 6 cycles
347                                InstrStage<7, [FU_DRegsVFP], 0, Reserved>,
348                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
349                                InstrStage<1, [FU_NPipe]>], [3, 2, 2]>,
350   //
351   // Quad-register Integer Binary
352   InstrItinData<IIC_VBINiQ,   [InstrStage<1, [FU_DRegsN],   0, Required>,
353                                // Extra latency cycles since wbck is 6 cycles
354                                InstrStage<7, [FU_DRegsVFP], 0, Reserved>,
355                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
356                                InstrStage<1, [FU_NPipe]>], [3, 2, 2]>,
357   //
358   // Double-register Integer Subtract
359   InstrItinData<IIC_VSUBiD,   [InstrStage<1, [FU_DRegsN],   0, Required>,
360                                // Extra latency cycles since wbck is 6 cycles
361                                InstrStage<7, [FU_DRegsVFP], 0, Reserved>,
362                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
363                                InstrStage<1, [FU_NPipe]>], [3, 2, 1]>,
364   //
365   // Quad-register Integer Subtract
366   InstrItinData<IIC_VSUBiQ,   [InstrStage<1, [FU_DRegsN],   0, Required>,
367                                // Extra latency cycles since wbck is 6 cycles
368                                InstrStage<7, [FU_DRegsVFP], 0, Reserved>,
369                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
370                                InstrStage<1, [FU_NPipe]>], [3, 2, 1]>,
371   //
372   // Double-register Integer Shift
373   InstrItinData<IIC_VSHLiD,   [InstrStage<1, [FU_DRegsN],   0, Required>,
374                                // Extra latency cycles since wbck is 6 cycles
375                                InstrStage<7, [FU_DRegsVFP], 0, Reserved>,
376                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
377                                InstrStage<1, [FU_NPipe]>], [3, 1, 1]>,
378   //
379   // Quad-register Integer Shift
380   InstrItinData<IIC_VSHLiQ,   [InstrStage<1, [FU_DRegsN],   0, Required>,
381                                // Extra latency cycles since wbck is 6 cycles
382                                InstrStage<7, [FU_DRegsVFP], 0, Reserved>,
383                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
384                                InstrStage<1, [FU_NPipe]>], [3, 1, 1]>,
385   //
386   // Double-register Integer Shift (4 cycle)
387   InstrItinData<IIC_VSHLi4D,  [InstrStage<1, [FU_DRegsN],   0, Required>,
388                                // Extra latency cycles since wbck is 6 cycles
389                                InstrStage<7, [FU_DRegsVFP], 0, Reserved>,
390                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
391                                InstrStage<1, [FU_NPipe]>], [4, 1, 1]>,
392   //
393   // Quad-register Integer Shift (4 cycle)
394   InstrItinData<IIC_VSHLi4Q,  [InstrStage<1, [FU_DRegsN],   0, Required>,
395                                // Extra latency cycles since wbck is 6 cycles
396                                InstrStage<7, [FU_DRegsVFP], 0, Reserved>,
397                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
398                                InstrStage<1, [FU_NPipe]>], [4, 1, 1]>,
399   //
400   // Double-register Integer Binary (4 cycle)
401   InstrItinData<IIC_VBINi4D,  [InstrStage<1, [FU_DRegsN],   0, Required>,
402                                // Extra latency cycles since wbck is 6 cycles
403                                InstrStage<7, [FU_DRegsVFP], 0, Reserved>,
404                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
405                                InstrStage<1, [FU_NPipe]>], [4, 2, 2]>,
406   //
407   // Quad-register Integer Binary (4 cycle)
408   InstrItinData<IIC_VBINi4Q,  [InstrStage<1, [FU_DRegsN],   0, Required>,
409                                // Extra latency cycles since wbck is 6 cycles
410                                InstrStage<7, [FU_DRegsVFP], 0, Reserved>,
411                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
412                                InstrStage<1, [FU_NPipe]>], [4, 2, 2]>,
413   //
414   // Double-register Integer Subtract (4 cycle)
415   InstrItinData<IIC_VSUBiD,   [InstrStage<1, [FU_DRegsN],   0, Required>,
416                                // Extra latency cycles since wbck is 6 cycles
417                                InstrStage<7, [FU_DRegsVFP], 0, Reserved>,
418                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
419                                InstrStage<1, [FU_NPipe]>], [4, 2, 1]>,
420   //
421   // Quad-register Integer Subtract (4 cycle)
422   InstrItinData<IIC_VSUBiQ,   [InstrStage<1, [FU_DRegsN],   0, Required>,
423                                // Extra latency cycles since wbck is 6 cycles
424                                InstrStage<7, [FU_DRegsVFP], 0, Reserved>,
425                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
426                                InstrStage<1, [FU_NPipe]>], [4, 2, 1]>,
427
428   //
429   // Double-register Integer Count
430   InstrItinData<IIC_VCNTiD,   [InstrStage<1, [FU_DRegsN],   0, Required>,
431                                // Extra latency cycles since wbck is 6 cycles
432                                InstrStage<7, [FU_DRegsVFP], 0, Reserved>,
433                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
434                                InstrStage<1, [FU_NPipe]>], [3, 2, 2]>,
435   //
436   // Quad-register Integer Count
437   // Result written in N3, but that is relative to the last cycle of multicycle,
438   // so we use 4 for those cases
439   InstrItinData<IIC_VCNTiQ,   [InstrStage<1, [FU_DRegsN],   0, Required>,
440                                // Extra latency cycles since wbck is 7 cycles
441                                InstrStage<8, [FU_DRegsVFP], 0, Reserved>,
442                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
443                                InstrStage<2, [FU_NPipe]>], [4, 2, 2]>,
444   //
445   // Double-register Absolute Difference and Accumulate
446   InstrItinData<IIC_VABAD,    [InstrStage<1, [FU_DRegsN],   0, Required>,
447                                // Extra latency cycles since wbck is 6 cycles
448                                InstrStage<7, [FU_DRegsVFP], 0, Reserved>,
449                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
450                                InstrStage<1, [FU_NPipe]>], [6, 3, 2, 1]>,
451   //
452   // Quad-register Absolute Difference and Accumulate
453   InstrItinData<IIC_VABAQ,    [InstrStage<1, [FU_DRegsN],   0, Required>,
454                                // Extra latency cycles since wbck is 6 cycles
455                                InstrStage<7, [FU_DRegsVFP], 0, Reserved>,
456                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
457                                InstrStage<2, [FU_NPipe]>], [6, 3, 2, 1]>,
458   //
459   // Double-register Integer Pair Add Long
460   InstrItinData<IIC_VPALiD,   [InstrStage<1, [FU_DRegsN],   0, Required>,
461                                // Extra latency cycles since wbck is 6 cycles
462                                InstrStage<7, [FU_DRegsVFP], 0, Reserved>,
463                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
464                                InstrStage<1, [FU_NPipe]>], [6, 3, 1]>,
465   //
466   // Quad-register Integer Pair Add Long
467   InstrItinData<IIC_VPALiQ,   [InstrStage<1, [FU_DRegsN],   0, Required>,
468                                // Extra latency cycles since wbck is 6 cycles
469                                InstrStage<7, [FU_DRegsVFP], 0, Reserved>,
470                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
471                                InstrStage<2, [FU_NPipe]>], [6, 3, 1]>,
472
473   //
474   // Double-register Integer Multiply (.8, .16)
475   InstrItinData<IIC_VMULi16D, [InstrStage<1, [FU_DRegsN],   0, Required>,
476                                // Extra latency cycles since wbck is 6 cycles
477                                InstrStage<7, [FU_DRegsVFP], 0, Reserved>,
478                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
479                                InstrStage<1, [FU_NPipe]>], [6, 2, 2]>,
480   //
481   // Quad-register Integer Multiply (.8, .16)
482   InstrItinData<IIC_VMULi16Q, [InstrStage<1, [FU_DRegsN],   0, Required>,
483                                // Extra latency cycles since wbck is 7 cycles
484                                InstrStage<8, [FU_DRegsVFP], 0, Reserved>,
485                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
486                                InstrStage<2, [FU_NPipe]>], [7, 2, 2]>,
487
488   //
489   // Double-register Integer Multiply (.32)
490   InstrItinData<IIC_VMULi32D, [InstrStage<1, [FU_DRegsN],   0, Required>,
491                                // Extra latency cycles since wbck is 7 cycles
492                                InstrStage<8, [FU_DRegsVFP], 0, Reserved>,
493                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
494                                InstrStage<2, [FU_NPipe]>], [7, 2, 1]>,
495   //
496   // Quad-register Integer Multiply (.32)
497   InstrItinData<IIC_VMULi32Q, [InstrStage<1, [FU_DRegsN],   0, Required>,
498                                // Extra latency cycles since wbck is 9 cycles
499                                InstrStage<10, [FU_DRegsVFP], 0, Reserved>,
500                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
501                                InstrStage<4, [FU_NPipe]>], [9, 2, 1]>,
502   //
503   // Double-register Integer Multiply-Accumulate (.8, .16)
504   InstrItinData<IIC_VMACi16D, [InstrStage<1, [FU_DRegsN],   0, Required>,
505                                // Extra latency cycles since wbck is 6 cycles
506                                InstrStage<7, [FU_DRegsVFP], 0, Reserved>,
507                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
508                                InstrStage<1, [FU_NPipe]>], [6, 3, 2, 2]>,
509   //
510   // Double-register Integer Multiply-Accumulate (.32)
511   InstrItinData<IIC_VMACi32D, [InstrStage<1, [FU_DRegsN],   0, Required>,
512                                // Extra latency cycles since wbck is 7 cycles
513                                InstrStage<8, [FU_DRegsVFP], 0, Reserved>,
514                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
515                                InstrStage<2, [FU_NPipe]>], [7, 3, 2, 1]>,
516   //
517   // Quad-register Integer Multiply-Accumulate (.8, .16)
518   InstrItinData<IIC_VMACi16Q, [InstrStage<1, [FU_DRegsN],   0, Required>,
519                                // Extra latency cycles since wbck is 7 cycles
520                                InstrStage<8, [FU_DRegsVFP], 0, Reserved>,
521                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
522                                InstrStage<2, [FU_NPipe]>], [7, 3, 2, 2]>,
523   //
524   // Quad-register Integer Multiply-Accumulate (.32)
525   InstrItinData<IIC_VMACi32Q, [InstrStage<1, [FU_DRegsN],   0, Required>,
526                                // Extra latency cycles since wbck is 9 cycles
527                                InstrStage<10, [FU_DRegsVFP], 0, Reserved>,
528                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
529                                InstrStage<4, [FU_NPipe]>], [9, 3, 2, 1]>,
530   //
531   // Move Immediate
532   InstrItinData<IIC_VMOVImm,  [InstrStage<1, [FU_DRegsN],   0, Required>,
533                                // Extra latency cycles since wbck is 6 cycles
534                                InstrStage<7, [FU_DRegsVFP], 0, Reserved>,
535                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
536                                InstrStage<1, [FU_NPipe]>], [3]>,
537   //
538   // Double-register Permute Move
539   InstrItinData<IIC_VMOVD,    [InstrStage<1, [FU_DRegsN],   0, Required>,
540   // FIXME: all latencies are arbitrary, no information is available
541                                InstrStage<3, [FU_DRegsVFP], 0, Reserved>,
542                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
543                                InstrStage<1, [FU_NLSPipe]>], [2, 1]>,
544   //
545   // Quad-register Permute Move
546   // Result written in N2, but that is relative to the last cycle of multicycle,
547   // so we use 3 for those cases
548   InstrItinData<IIC_VMOVQ,    [InstrStage<1, [FU_DRegsN],   0, Required>,
549   // FIXME: all latencies are arbitrary, no information is available
550                                InstrStage<4, [FU_DRegsVFP], 0, Reserved>,
551                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
552                                InstrStage<2, [FU_NPipe]>], [3, 1]>,
553   //
554   // Integer to Single-precision Move
555   InstrItinData<IIC_VMOVIS ,  [InstrStage<1, [FU_DRegsN],   0, Required>,
556   // FIXME: all latencies are arbitrary, no information is available
557                                InstrStage<3, [FU_DRegsVFP], 0, Reserved>,
558                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
559                                InstrStage<1, [FU_NPipe]>], [2, 1]>,
560   //
561   // Integer to Double-precision Move
562   InstrItinData<IIC_VMOVID ,  [InstrStage<1, [FU_DRegsN],   0, Required>,
563   // FIXME: all latencies are arbitrary, no information is available
564                                InstrStage<3, [FU_DRegsVFP], 0, Reserved>,
565                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
566                                InstrStage<1, [FU_NPipe]>], [2, 1, 1]>,
567   //
568   // Single-precision to Integer Move
569   InstrItinData<IIC_VMOVSI ,  [InstrStage<1, [FU_DRegsN],   0, Required>,
570   // FIXME: all latencies are arbitrary, no information is available
571                                InstrStage<3, [FU_DRegsVFP], 0, Reserved>,
572                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
573                                InstrStage<1, [FU_NPipe]>], [2, 1]>,
574   //
575   // Double-precision to Integer Move
576   InstrItinData<IIC_VMOVDI ,  [InstrStage<1, [FU_DRegsN],   0, Required>,
577   // FIXME: all latencies are arbitrary, no information is available
578                                InstrStage<3, [FU_DRegsVFP], 0, Reserved>,
579                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
580                                InstrStage<1, [FU_NPipe]>], [2, 2, 1]>,
581   //
582   // Integer to Lane Move
583   InstrItinData<IIC_VMOVISL , [InstrStage<1, [FU_DRegsN],   0, Required>,
584   // FIXME: all latencies are arbitrary, no information is available
585                                InstrStage<4, [FU_DRegsVFP], 0, Reserved>,
586                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
587                                InstrStage<2, [FU_NPipe]>], [3, 1, 1]>,
588
589   //
590   // Double-register FP Unary
591   InstrItinData<IIC_VUNAD,    [InstrStage<1, [FU_DRegsN],   0, Required>,
592                                // Extra latency cycles since wbck is 6 cycles
593                                InstrStage<7, [FU_DRegsVFP], 0, Reserved>,
594                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
595                                InstrStage<1, [FU_NPipe]>], [5, 2]>,
596   //
597   // Quad-register FP Unary
598   // Result written in N5, but that is relative to the last cycle of multicycle,
599   // so we use 6 for those cases
600   InstrItinData<IIC_VUNAQ,    [InstrStage<1, [FU_DRegsN],   0, Required>,
601                                // Extra latency cycles since wbck is 7 cycles
602                                InstrStage<8, [FU_DRegsVFP], 0, Reserved>,
603                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
604                                InstrStage<2, [FU_NPipe]>], [6, 2]>,
605   //
606   // Double-register FP Binary
607   // FIXME: We're using this itin for many instructions and [2, 2] here is too
608   // optimistic.
609   InstrItinData<IIC_VBIND,    [InstrStage<1, [FU_DRegsN],   0, Required>,
610                                // Extra latency cycles since wbck is 7 cycles
611                                InstrStage<7, [FU_DRegsVFP], 0, Reserved>,
612                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
613                                InstrStage<1, [FU_NPipe]>], [5, 2, 2]>,
614   //
615   // Quad-register FP Binary
616   // Result written in N5, but that is relative to the last cycle of multicycle,
617   // so we use 6 for those cases
618   // FIXME: We're using this itin for many instructions and [2, 2] here is too
619   // optimistic.
620   InstrItinData<IIC_VBINQ,    [InstrStage<1, [FU_DRegsN],   0, Required>,
621                                // Extra latency cycles since wbck is 8 cycles
622                                InstrStage<8, [FU_DRegsVFP], 0, Reserved>,
623                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
624                                InstrStage<2, [FU_NPipe]>], [6, 2, 2]>,
625   //
626   // Double-register FP Multiple-Accumulate
627   InstrItinData<IIC_VMACD,    [InstrStage<1, [FU_DRegsN],   0, Required>,
628                                // Extra latency cycles since wbck is 7 cycles
629                                InstrStage<8, [FU_DRegsVFP], 0, Reserved>,
630                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
631                                InstrStage<2, [FU_NPipe]>], [6, 3, 2, 1]>,
632   //
633   // Quad-register FP Multiple-Accumulate
634   // Result written in N9, but that is relative to the last cycle of multicycle,
635   // so we use 10 for those cases
636   InstrItinData<IIC_VMACQ,    [InstrStage<1, [FU_DRegsN],   0, Required>,
637                                // Extra latency cycles since wbck is 9 cycles
638                                InstrStage<10, [FU_DRegsVFP], 0, Reserved>,
639                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
640                                InstrStage<4, [FU_NPipe]>], [8, 4, 2, 1]>,
641   //
642   // Double-register Reciprical Step
643   InstrItinData<IIC_VRECSD,   [InstrStage<1, [FU_DRegsN],   0, Required>,
644                                // Extra latency cycles since wbck is 7 cycles
645                                InstrStage<8, [FU_DRegsVFP], 0, Reserved>,
646                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
647                                InstrStage<2, [FU_NPipe]>], [6, 2, 2]>,
648   //
649   // Quad-register Reciprical Step
650   InstrItinData<IIC_VRECSQ,   [InstrStage<1, [FU_DRegsN],   0, Required>,
651                                // Extra latency cycles since wbck is 9 cycles
652                                InstrStage<10, [FU_DRegsVFP], 0, Reserved>,
653                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
654                                InstrStage<4, [FU_NPipe]>], [8, 2, 2]>,
655   //
656   // Double-register Permute
657   InstrItinData<IIC_VPERMD,   [InstrStage<1, [FU_DRegsN],   0, Required>,
658                                // Extra latency cycles since wbck is 6 cycles
659                                InstrStage<7, [FU_DRegsVFP], 0, Reserved>,
660                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
661                                InstrStage<1, [FU_NPipe]>], [2, 2, 1, 1]>,
662   //
663   // Quad-register Permute
664   // Result written in N2, but that is relative to the last cycle of multicycle,
665   // so we use 3 for those cases
666   InstrItinData<IIC_VPERMQ,   [InstrStage<1, [FU_DRegsN],   0, Required>,
667                                // Extra latency cycles since wbck is 7 cycles
668                                InstrStage<8, [FU_DRegsVFP], 0, Reserved>,
669                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
670                                InstrStage<2, [FU_NPipe]>], [3, 3, 1, 1]>,
671   //
672   // Quad-register Permute (3 cycle issue)
673   // Result written in N2, but that is relative to the last cycle of multicycle,
674   // so we use 4 for those cases
675   InstrItinData<IIC_VPERMQ3,  [InstrStage<1, [FU_DRegsN],   0, Required>,
676                                // Extra latency cycles since wbck is 8 cycles
677                                InstrStage<9, [FU_DRegsVFP], 0, Reserved>,
678                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
679                                InstrStage<3, [FU_NLSPipe]>], [4, 4, 1, 1]>,
680
681   //
682   // Double-register VEXT
683   InstrItinData<IIC_VEXTD,    [InstrStage<1, [FU_DRegsN],   0, Required>,
684                                // Extra latency cycles since wbck is 7 cycles
685                                InstrStage<7, [FU_DRegsVFP], 0, Reserved>,
686                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
687                                InstrStage<1, [FU_NPipe]>], [2, 1, 1]>,
688   //
689   // Quad-register VEXT
690   InstrItinData<IIC_VEXTQ,    [InstrStage<1, [FU_DRegsN],   0, Required>,
691                                // Extra latency cycles since wbck is 9 cycles
692                                InstrStage<8, [FU_DRegsVFP], 0, Reserved>,
693                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
694                                InstrStage<2, [FU_NPipe]>], [3, 1, 1]>,
695   //
696   // VTB
697   InstrItinData<IIC_VTB1,     [InstrStage<1, [FU_DRegsN],   0, Required>,
698                                // Extra latency cycles since wbck is 7 cycles
699                                InstrStage<8, [FU_DRegsVFP], 0, Reserved>,
700                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
701                                InstrStage<2, [FU_NPipe]>], [3, 2, 1]>,
702   InstrItinData<IIC_VTB2,     [InstrStage<2, [FU_DRegsN],   0, Required>,
703                                // Extra latency cycles since wbck is 7 cycles
704                                InstrStage<8, [FU_DRegsVFP], 0, Reserved>,
705                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
706                                InstrStage<2, [FU_NPipe]>], [3, 2, 2, 1]>,
707   InstrItinData<IIC_VTB3,     [InstrStage<2, [FU_DRegsN],   0, Required>,
708                                // Extra latency cycles since wbck is 8 cycles
709                                InstrStage<9, [FU_DRegsVFP], 0, Reserved>,
710                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
711                                InstrStage<3, [FU_NPipe]>], [4, 2, 2, 3, 1]>,
712   InstrItinData<IIC_VTB4,     [InstrStage<1, [FU_DRegsN],   0, Required>,
713                                // Extra latency cycles since wbck is 8 cycles
714                                InstrStage<9, [FU_DRegsVFP], 0, Reserved>,
715                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
716                                InstrStage<3, [FU_NPipe]>], [4, 2, 2, 3, 3, 1]>,
717   //
718   // VTBX
719   InstrItinData<IIC_VTBX1,    [InstrStage<1, [FU_DRegsN],   0, Required>,
720                                // Extra latency cycles since wbck is 7 cycles
721                                InstrStage<8, [FU_DRegsVFP], 0, Reserved>,
722                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
723                                InstrStage<2, [FU_NPipe]>], [3, 1, 2, 1]>,
724   InstrItinData<IIC_VTBX2,    [InstrStage<1, [FU_DRegsN],   0, Required>,
725                                // Extra latency cycles since wbck is 7 cycles
726                                InstrStage<8, [FU_DRegsVFP], 0, Reserved>,
727                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
728                                InstrStage<2, [FU_NPipe]>], [3, 1, 2, 2, 1]>,
729   InstrItinData<IIC_VTBX3,    [InstrStage<1, [FU_DRegsN],   0, Required>,
730                                // Extra latency cycles since wbck is 8 cycles
731                                InstrStage<9, [FU_DRegsVFP], 0, Reserved>,
732                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
733                                InstrStage<3, [FU_NPipe]>], [4, 1, 2, 2, 3, 1]>,
734   InstrItinData<IIC_VTBX4,    [InstrStage<1, [FU_DRegsN],   0, Required>,
735                                // Extra latency cycles since wbck is 8 cycles
736                                InstrStage<9, [FU_DRegsVFP], 0, Reserved>,
737                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
738                                InstrStage<2, [FU_NPipe]>], [4, 1, 2, 2, 3, 3, 1]>
739 ]>;