1 //===-- PPCScheduleP8.td - PPC P8 Scheduling Definitions ---*- tablegen -*-===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file defines the itinerary class data for the POWER8 processor.
12 //===----------------------------------------------------------------------===//
14 // Scheduling for the P8 involves tracking two types of resources:
15 // 1. The dispatch bundle slots
16 // 2. The functional unit resources
19 def P8_DU1 : FuncUnit;
20 def P8_DU2 : FuncUnit;
21 def P8_DU3 : FuncUnit;
22 def P8_DU4 : FuncUnit;
23 def P8_DU5 : FuncUnit;
24 def P8_DU6 : FuncUnit;
25 def P8_DU7 : FuncUnit; // Only branch instructions will use DU7,DU8
26 def P8_DU8 : FuncUnit;
28 // 10 insns per cycle (2-LU, 2-LSU, 2-FXU, 2-FPU, 1-CRU, 1-BRU).
30 def P8_LU1 : FuncUnit; // Loads or fixed-point operations 1
31 def P8_LU2 : FuncUnit; // Loads or fixed-point operations 2
33 // Load/Store pipelines can handle Stores, fixed-point loads, and simple
34 // fixed-point operations.
35 def P8_LSU1 : FuncUnit; // Load/Store pipeline 1
36 def P8_LSU2 : FuncUnit; // Load/Store pipeline 2
39 def P8_FXU1 : FuncUnit; // FX pipeline 1
40 def P8_FXU2 : FuncUnit; // FX pipeline 2
42 // The Floating-Point Unit (FPU) and Vector Media Extension (VMX) units
43 // are combined on P7 and newer into a Vector Scalar Unit (VSU).
44 // The P8 Instruction latency documents still refers to the unit as the
45 // FPU, so keep in mind that FPU==VSU.
46 // In contrast to the P7, the VMX units on P8 are symmetric, so no need to
47 // split vector integer ops or 128-bit load/store/perms to the specific units.
48 def P8_FPU1 : FuncUnit; // VS pipeline 1
49 def P8_FPU2 : FuncUnit; // VS pipeline 2
51 def P8_CRU : FuncUnit; // CR unit (CR logicals and move-from-SPRs)
52 def P8_BRU : FuncUnit; // BR unit
54 def P8Itineraries : ProcessorItineraries<
55 [P8_DU1, P8_DU2, P8_DU3, P8_DU4, P8_DU5, P8_DU6, P8_DU7, P8_DU8,
56 P8_LU1, P8_LU2, P8_LSU1, P8_LSU2, P8_FXU1, P8_FXU2,
57 P8_FPU1, P8_FPU2, P8_CRU, P8_BRU], [], [
58 InstrItinData<IIC_IntSimple , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
59 P8_DU4, P8_DU5, P8_DU6], 0>,
60 InstrStage<1, [P8_FXU1, P8_FXU2,
64 InstrItinData<IIC_IntGeneral , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
65 P8_DU4, P8_DU5, P8_DU6], 0>,
66 InstrStage<1, [P8_FXU1, P8_FXU2, P8_LU1,
67 P8_LU2, P8_LSU1, P8_LSU2]>],
69 InstrItinData<IIC_IntCompare , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
70 P8_DU4, P8_DU5, P8_DU6], 0>,
71 InstrStage<1, [P8_FXU1, P8_FXU2]>],
73 InstrItinData<IIC_IntDivW , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
74 P8_DU4, P8_DU5, P8_DU6], 0>,
75 InstrStage<15, [P8_FXU1, P8_FXU2]>],
77 InstrItinData<IIC_IntDivD , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
78 P8_DU4, P8_DU5, P8_DU6], 0>,
79 InstrStage<23, [P8_FXU1, P8_FXU2]>],
81 InstrItinData<IIC_IntMulHW , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
82 P8_DU4, P8_DU5, P8_DU6], 0>,
83 InstrStage<1, [P8_FXU1, P8_FXU2]>],
85 InstrItinData<IIC_IntMulHWU , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
86 P8_DU4, P8_DU5, P8_DU6], 0>,
87 InstrStage<1, [P8_FXU1, P8_FXU2]>],
89 InstrItinData<IIC_IntMulLI , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
90 P8_DU4, P8_DU5, P8_DU6], 0>,
91 InstrStage<1, [P8_FXU1, P8_FXU2]>],
93 InstrItinData<IIC_IntRotate , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
94 P8_DU4, P8_DU5, P8_DU6], 0>,
95 InstrStage<1, [P8_FXU1, P8_FXU2]>],
97 InstrItinData<IIC_IntRotateD , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
98 P8_DU4, P8_DU5, P8_DU6], 0>,
99 InstrStage<1, [P8_FXU1, P8_FXU2]>],
101 InstrItinData<IIC_IntShift , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
102 P8_DU4, P8_DU5, P8_DU6], 0>,
103 InstrStage<1, [P8_FXU1, P8_FXU2]>],
105 InstrItinData<IIC_IntTrapW , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
106 P8_DU4, P8_DU5, P8_DU6], 0>,
107 InstrStage<1, [P8_FXU1, P8_FXU2]>],
109 InstrItinData<IIC_IntTrapD , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
110 P8_DU4, P8_DU5, P8_DU6], 0>,
111 InstrStage<1, [P8_FXU1, P8_FXU2]>],
113 InstrItinData<IIC_BrB , [InstrStage<1, [P8_DU7, P8_DU8], 0>,
114 InstrStage<1, [P8_BRU]>],
116 // FIXME - the Br* groups below are not branch related, so should probably
118 // IIC_BrCR consists of the cr* instructions. (crand,crnor,creqv, etc).
119 // and should be 'First' in dispatch.
120 InstrItinData<IIC_BrCR , [InstrStage<1, [P8_DU1], 0>,
121 InstrStage<1, [P8_CRU]>],
123 // IIC_BrMCR consists of the mcrf instruction.
124 InstrItinData<IIC_BrMCR , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
125 P8_DU4, P8_DU5, P8_DU6], 0>,
126 InstrStage<1, [P8_CRU]>],
128 // IIC_BrMCRX consists of mcrxr (obsolete instruction) and mtcrf, which
129 // should be first in the dispatch group.
130 InstrItinData<IIC_BrMCRX , [InstrStage<1, [P8_DU1], 0>,
131 InstrStage<1, [P8_FXU1, P8_FXU2]>],
133 InstrItinData<IIC_BrMCRX , [InstrStage<1, [P8_DU1], 0>,
134 InstrStage<1, [P8_FXU1, P8_FXU2]>],
136 InstrItinData<IIC_LdStLoad , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
137 P8_DU4, P8_DU5, P8_DU6], 0>,
138 InstrStage<1, [P8_LSU1, P8_LSU2,
141 InstrItinData<IIC_LdStLoadUpd , [InstrStage<1, [P8_DU1], 0>,
142 InstrStage<1, [P8_DU2], 0>,
143 InstrStage<1, [P8_LSU1, P8_LSU2,
144 P8_LU1, P8_LU2 ], 0>,
145 InstrStage<1, [P8_FXU1, P8_FXU2]>],
147 // Update-Indexed form loads/stores are no longer first and last in the
148 // dispatch group. They are simply cracked, so require DU1,DU2.
149 InstrItinData<IIC_LdStLoadUpdX, [InstrStage<1, [P8_DU1], 0>,
150 InstrStage<1, [P8_DU2], 0>,
151 InstrStage<1, [P8_LSU1, P8_LSU2,
153 InstrStage<1, [P8_FXU1, P8_FXU2]>],
155 InstrItinData<IIC_LdStLD , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
156 P8_DU4, P8_DU5, P8_DU6], 0>,
157 InstrStage<1, [P8_LSU1, P8_LSU2,
160 InstrItinData<IIC_LdStLDU , [InstrStage<1, [P8_DU1], 0>,
161 InstrStage<1, [P8_DU2], 0>,
162 InstrStage<1, [P8_LSU1, P8_LSU2,
164 InstrStage<1, [P8_FXU1, P8_FXU2]>],
166 InstrItinData<IIC_LdStLDUX , [InstrStage<1, [P8_DU1], 0>,
167 InstrStage<1, [P8_DU2], 0>,
168 InstrStage<1, [P8_LSU1, P8_LSU2,
170 InstrStage<1, [P8_FXU1, P8_FXU2]>],
172 InstrItinData<IIC_LdStLFD , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
173 P8_DU4, P8_DU5, P8_DU6], 0>,
174 InstrStage<1, [P8_LU1, P8_LU2]>],
176 InstrItinData<IIC_LdStLVecX , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
177 P8_DU4, P8_DU5, P8_DU6], 0>,
178 InstrStage<1, [P8_LU1, P8_LU2]>],
180 InstrItinData<IIC_LdStLFDU , [InstrStage<1, [P8_DU1], 0>,
181 InstrStage<1, [P8_DU2], 0>,
182 InstrStage<1, [P8_LU1, P8_LU2], 0>,
183 InstrStage<1, [P8_FXU1, P8_FXU2]>],
185 InstrItinData<IIC_LdStLFDUX , [InstrStage<1, [P8_DU1], 0>,
186 InstrStage<1, [P8_DU2], 0>,
187 InstrStage<1, [P8_LU1, P8_LU2], 0>,
188 InstrStage<1, [P8_FXU1, P8_FXU2]>],
190 InstrItinData<IIC_LdStLHA , [InstrStage<1, [P8_DU1], 0>,
191 InstrStage<1, [P8_DU2], 0>,
192 InstrStage<1, [P8_LSU1, P8_LSU2,
194 InstrStage<1, [P8_FXU1, P8_FXU2,
197 InstrItinData<IIC_LdStLHAU , [InstrStage<1, [P8_DU1], 0>,
198 InstrStage<1, [P8_DU2], 0>,
199 InstrStage<1, [P8_LSU1, P8_LSU2,
201 InstrStage<1, [P8_FXU1, P8_FXU2]>,
202 InstrStage<1, [P8_FXU1, P8_FXU2]>],
204 // first+last in dispatch group.
205 InstrItinData<IIC_LdStLHAUX , [InstrStage<1, [P8_DU1], 0>,
206 InstrStage<1, [P8_DU2], 0>,
207 InstrStage<1, [P8_DU3], 0>,
208 InstrStage<1, [P8_DU4], 0>,
209 InstrStage<1, [P8_DU5], 0>,
210 InstrStage<1, [P8_DU6], 0>,
211 InstrStage<1, [P8_LSU1, P8_LSU2,
213 InstrStage<1, [P8_FXU1, P8_FXU2]>,
214 InstrStage<1, [P8_FXU1, P8_FXU2]>],
216 InstrItinData<IIC_LdStLWA , [InstrStage<1, [P8_DU1], 0>,
217 InstrStage<1, [P8_DU2], 0>,
218 InstrStage<1, [P8_LSU1, P8_LSU2,
220 InstrStage<1, [P8_FXU1, P8_FXU2]>],
222 InstrItinData<IIC_LdStLWARX, [InstrStage<1, [P8_DU1], 0>,
223 InstrStage<1, [P8_DU2], 0>,
224 InstrStage<1, [P8_DU3], 0>,
225 InstrStage<1, [P8_DU4], 0>,
226 InstrStage<1, [P8_LSU1, P8_LSU2,
230 InstrItinData<IIC_LdStLDARX, [InstrStage<1, [P8_DU1], 0>,
231 InstrStage<1, [P8_DU2], 0>,
232 InstrStage<1, [P8_DU3], 0>,
233 InstrStage<1, [P8_DU4], 0>,
234 InstrStage<1, [P8_DU5], 0>,
235 InstrStage<1, [P8_DU6], 0>,
236 InstrStage<1, [P8_LSU1, P8_LSU2,
239 InstrItinData<IIC_LdStLMW , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
240 P8_DU4, P8_DU5, P8_DU6], 0>,
241 InstrStage<1, [P8_LSU1, P8_LSU2,
244 // Stores are dual-issued from the issue queue, so may only take up one
245 // dispatch slot. The instruction will be broken into two IOPS. The agen
246 // op is issued to the LSU, and the data op (register fetch) is issued
247 // to either the LU (GPR store) or the VSU (FPR store).
248 InstrItinData<IIC_LdStStore , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
249 P8_DU4, P8_DU5, P8_DU6], 0>,
250 InstrStage<1, [P8_LSU1, P8_LSU2]>,
251 InstrStage<1, [P8_LU1, P8_LU2]>],
253 InstrItinData<IIC_LdStSTD , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
254 P8_DU4, P8_DU5, P8_DU6], 0>,
255 InstrStage<1, [P8_LU1, P8_LU2,
258 InstrItinData<IIC_LdStSTDU , [InstrStage<1, [P8_DU1], 0>,
259 InstrStage<1, [P8_DU2], 0>,
260 InstrStage<1, [P8_LU1, P8_LU2,
261 P8_LSU1, P8_LSU2], 0>,
262 InstrStage<1, [P8_FXU1, P8_FXU2]>],
265 InstrItinData<IIC_LdStSTDUX , [InstrStage<1, [P8_DU1], 0>,
266 InstrStage<1, [P8_DU2], 0>,
267 InstrStage<1, [P8_DU3], 0>,
268 InstrStage<1, [P8_DU4], 0>,
269 InstrStage<1, [P8_DU5], 0>,
270 InstrStage<1, [P8_DU6], 0>,
271 InstrStage<1, [P8_LSU1, P8_LSU2], 0>,
272 InstrStage<1, [P8_FXU1, P8_FXU2]>,
273 InstrStage<1, [P8_FXU1, P8_FXU2]>],
275 InstrItinData<IIC_LdStSTFD , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
276 P8_DU4, P8_DU5, P8_DU6], 0>,
277 InstrStage<1, [P8_LSU1, P8_LSU2], 0>,
278 InstrStage<1, [P8_FPU1, P8_FPU2]>],
280 InstrItinData<IIC_LdStSTFDU , [InstrStage<1, [P8_DU1], 0>,
281 InstrStage<1, [P8_DU2], 0>,
282 InstrStage<1, [P8_LSU1, P8_LSU2], 0>,
283 InstrStage<1, [P8_FXU1, P8_FXU2], 0>,
284 InstrStage<1, [P8_FPU1, P8_FPU2]>],
286 InstrItinData<IIC_LdStSTVEBX , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
287 P8_DU4, P8_DU5, P8_DU6], 0>,
288 InstrStage<1, [P8_LSU1, P8_LSU2], 0>,
289 InstrStage<1, [P8_FPU1, P8_FPU2]>],
291 InstrItinData<IIC_LdStSTDCX , [InstrStage<1, [P8_DU1], 0>,
292 InstrStage<1, [P8_DU2], 0>,
293 InstrStage<1, [P8_DU3], 0>,
294 InstrStage<1, [P8_DU4], 0>,
295 InstrStage<1, [P8_DU5], 0>,
296 InstrStage<1, [P8_DU6], 0>,
297 InstrStage<1, [P8_LSU1, P8_LSU2], 0>,
298 InstrStage<1, [P8_LU1, P8_LU2]>],
300 InstrItinData<IIC_LdStSTWCX , [InstrStage<1, [P8_DU1], 0>,
301 InstrStage<1, [P8_DU2], 0>,
302 InstrStage<1, [P8_DU3], 0>,
303 InstrStage<1, [P8_DU4], 0>,
304 InstrStage<1, [P8_DU5], 0>,
305 InstrStage<1, [P8_DU6], 0>,
306 InstrStage<1, [P8_LSU1, P8_LSU2], 0>,
307 InstrStage<1, [P8_LU1, P8_LU2]>],
309 InstrItinData<IIC_SprMFCR , [InstrStage<1, [P8_DU1], 0>,
310 InstrStage<1, [P8_CRU]>],
312 InstrItinData<IIC_SprMFCRF , [InstrStage<1, [P8_DU1], 0>,
313 InstrStage<1, [P8_CRU]>],
315 InstrItinData<IIC_SprMTSPR , [InstrStage<1, [P8_DU1], 0>,
316 InstrStage<1, [P8_FXU1, P8_FXU2]>],
318 InstrItinData<IIC_FPGeneral , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
319 P8_DU4, P8_DU5, P8_DU6], 0>,
320 InstrStage<1, [P8_FPU1, P8_FPU2]>],
322 InstrItinData<IIC_FPCompare , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
323 P8_DU4, P8_DU5, P8_DU6], 0>,
324 InstrStage<1, [P8_FPU1, P8_FPU2]>],
326 InstrItinData<IIC_FPDivD , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
327 P8_DU4, P8_DU5, P8_DU6], 0>,
328 InstrStage<1, [P8_FPU1, P8_FPU2]>],
330 InstrItinData<IIC_FPDivS , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
331 P8_DU4, P8_DU5, P8_DU6], 0>,
332 InstrStage<1, [P8_FPU1, P8_FPU2]>],
334 InstrItinData<IIC_FPSqrtD , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
335 P8_DU4, P8_DU5, P8_DU6], 0>,
336 InstrStage<1, [P8_FPU1, P8_FPU2]>],
338 InstrItinData<IIC_FPSqrtS , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
339 P8_DU4, P8_DU5, P8_DU6], 0>,
340 InstrStage<1, [P8_FPU1, P8_FPU2]>],
342 InstrItinData<IIC_FPFused , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
343 P8_DU4, P8_DU5, P8_DU6], 0>,
344 InstrStage<1, [P8_FPU1, P8_FPU2]>],
346 InstrItinData<IIC_FPRes , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
347 P8_DU4, P8_DU5, P8_DU6], 0>,
348 InstrStage<1, [P8_FPU1, P8_FPU2]>],
350 InstrItinData<IIC_VecGeneral , [InstrStage<1, [P8_DU1], 0>,
351 InstrStage<1, [P8_FPU1, P8_FPU2]>],
353 InstrItinData<IIC_VecVSL , [InstrStage<1, [P8_DU1], 0>,
354 InstrStage<1, [P8_FPU1, P8_FPU2]>],
356 InstrItinData<IIC_VecVSR , [InstrStage<1, [P8_DU1], 0>,
357 InstrStage<1, [P8_FPU1, P8_FPU2]>],
359 InstrItinData<IIC_VecFP , [InstrStage<1, [P8_DU1], 0>,
360 InstrStage<1, [P8_FPU1, P8_FPU2]>],
362 InstrItinData<IIC_VecFPCompare, [InstrStage<1, [P8_DU1], 0>,
363 InstrStage<1, [P8_FPU1, P8_FPU2]>],
365 InstrItinData<IIC_VecFPRound , [InstrStage<1, [P8_DU1], 0>,
366 InstrStage<1, [P8_FPU1, P8_FPU2]>],
368 InstrItinData<IIC_VecComplex , [InstrStage<1, [P8_DU1], 0>,
369 InstrStage<1, [P8_FPU1, P8_FPU2]>],
371 InstrItinData<IIC_VecPerm , [InstrStage<1, [P8_DU1, P8_DU2], 0>,
372 InstrStage<1, [P8_FPU2, P8_FPU2]>],
376 // ===---------------------------------------------------------------------===//
377 // P8 machine model for scheduling and other instruction cost heuristics.
378 // P8 has an 8 insn dispatch group (6 non-branch, 2 branch) and can issue up
379 // to 10 insns per cycle (2-LU, 2-LSU, 2-FXU, 2-FPU, 1-CRU, 1-BRU).
381 def P8Model : SchedMachineModel {
382 let IssueWidth = 8; // up to 8 instructions dispatched per cycle.
383 // up to six non-branch instructions.
384 // up to two branches in a dispatch group.
386 let MinLatency = 0; // Out-of-order dispatch.
387 let LoadLatency = 3; // Optimistic load latency assuming bypass.
388 // This is overriden by OperandCycles if the
389 // Itineraries are queried instead.
390 let MispredictPenalty = 16;
392 let Itineraries = P8Itineraries;