1 //===- NVPTXInstrInfo.td - NVPTX Instruction defs -------------*- tblgen-*-===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file describes the PTX instructions in TableGen format.
12 //===----------------------------------------------------------------------===//
14 include "NVPTXInstrFormats.td"
17 def NOP : NVPTXInst<(outs), (ins), "", []>;
19 // List of vector specific properties
20 def isVecLD : VecInstTypeEnum<1>;
21 def isVecST : VecInstTypeEnum<2>;
22 def isVecBuild : VecInstTypeEnum<3>;
23 def isVecShuffle : VecInstTypeEnum<4>;
24 def isVecExtract : VecInstTypeEnum<5>;
25 def isVecInsert : VecInstTypeEnum<6>;
26 def isVecDest : VecInstTypeEnum<7>;
27 def isVecOther : VecInstTypeEnum<15>;
29 //===----------------------------------------------------------------------===//
30 // NVPTX Operand Definitions.
31 //===----------------------------------------------------------------------===//
33 def brtarget : Operand<OtherVT>;
35 // CVT conversion modes
36 // These must match the enum in NVPTX.h
37 def CvtNONE : PatLeaf<(i32 0x0)>;
38 def CvtRNI : PatLeaf<(i32 0x1)>;
39 def CvtRZI : PatLeaf<(i32 0x2)>;
40 def CvtRMI : PatLeaf<(i32 0x3)>;
41 def CvtRPI : PatLeaf<(i32 0x4)>;
42 def CvtRN : PatLeaf<(i32 0x5)>;
43 def CvtRZ : PatLeaf<(i32 0x6)>;
44 def CvtRM : PatLeaf<(i32 0x7)>;
45 def CvtRP : PatLeaf<(i32 0x8)>;
47 def CvtNONE_FTZ : PatLeaf<(i32 0x10)>;
48 def CvtRNI_FTZ : PatLeaf<(i32 0x11)>;
49 def CvtRZI_FTZ : PatLeaf<(i32 0x12)>;
50 def CvtRMI_FTZ : PatLeaf<(i32 0x13)>;
51 def CvtRPI_FTZ : PatLeaf<(i32 0x14)>;
52 def CvtRN_FTZ : PatLeaf<(i32 0x15)>;
53 def CvtRZ_FTZ : PatLeaf<(i32 0x16)>;
54 def CvtRM_FTZ : PatLeaf<(i32 0x17)>;
55 def CvtRP_FTZ : PatLeaf<(i32 0x18)>;
57 def CvtSAT : PatLeaf<(i32 0x20)>;
58 def CvtSAT_FTZ : PatLeaf<(i32 0x30)>;
60 def CvtMode : Operand<i32> {
61 let PrintMethod = "printCvtMode";
65 // These must match the enum in NVPTX.h
66 def CmpEQ : PatLeaf<(i32 0)>;
67 def CmpNE : PatLeaf<(i32 1)>;
68 def CmpLT : PatLeaf<(i32 2)>;
69 def CmpLE : PatLeaf<(i32 3)>;
70 def CmpGT : PatLeaf<(i32 4)>;
71 def CmpGE : PatLeaf<(i32 5)>;
72 def CmpLO : PatLeaf<(i32 6)>;
73 def CmpLS : PatLeaf<(i32 7)>;
74 def CmpHI : PatLeaf<(i32 8)>;
75 def CmpHS : PatLeaf<(i32 9)>;
76 def CmpEQU : PatLeaf<(i32 10)>;
77 def CmpNEU : PatLeaf<(i32 11)>;
78 def CmpLTU : PatLeaf<(i32 12)>;
79 def CmpLEU : PatLeaf<(i32 13)>;
80 def CmpGTU : PatLeaf<(i32 14)>;
81 def CmpGEU : PatLeaf<(i32 15)>;
82 def CmpNUM : PatLeaf<(i32 16)>;
83 def CmpNAN : PatLeaf<(i32 17)>;
85 def CmpEQ_FTZ : PatLeaf<(i32 0x100)>;
86 def CmpNE_FTZ : PatLeaf<(i32 0x101)>;
87 def CmpLT_FTZ : PatLeaf<(i32 0x102)>;
88 def CmpLE_FTZ : PatLeaf<(i32 0x103)>;
89 def CmpGT_FTZ : PatLeaf<(i32 0x104)>;
90 def CmpGE_FTZ : PatLeaf<(i32 0x105)>;
91 def CmpLO_FTZ : PatLeaf<(i32 0x106)>;
92 def CmpLS_FTZ : PatLeaf<(i32 0x107)>;
93 def CmpHI_FTZ : PatLeaf<(i32 0x108)>;
94 def CmpHS_FTZ : PatLeaf<(i32 0x109)>;
95 def CmpEQU_FTZ : PatLeaf<(i32 0x10A)>;
96 def CmpNEU_FTZ : PatLeaf<(i32 0x10B)>;
97 def CmpLTU_FTZ : PatLeaf<(i32 0x10C)>;
98 def CmpLEU_FTZ : PatLeaf<(i32 0x10D)>;
99 def CmpGTU_FTZ : PatLeaf<(i32 0x10E)>;
100 def CmpGEU_FTZ : PatLeaf<(i32 0x10F)>;
101 def CmpNUM_FTZ : PatLeaf<(i32 0x110)>;
102 def CmpNAN_FTZ : PatLeaf<(i32 0x111)>;
104 def CmpMode : Operand<i32> {
105 let PrintMethod = "printCmpMode";
108 def F32ConstZero : Operand<f32>, PatLeaf<(f32 fpimm)>, SDNodeXForm<fpimm, [{
109 return CurDAG->getTargetConstantFP(0.0, MVT::f32);
111 def F32ConstOne : Operand<f32>, PatLeaf<(f32 fpimm)>, SDNodeXForm<fpimm, [{
112 return CurDAG->getTargetConstantFP(1.0, MVT::f32);
115 //===----------------------------------------------------------------------===//
116 // NVPTX Instruction Predicate Definitions
117 //===----------------------------------------------------------------------===//
120 def hasAtomRedG32 : Predicate<"Subtarget.hasAtomRedG32()">;
121 def hasAtomRedS32 : Predicate<"Subtarget.hasAtomRedS32()">;
122 def hasAtomRedGen32 : Predicate<"Subtarget.hasAtomRedGen32()">;
123 def useAtomRedG32forGen32 :
124 Predicate<"!Subtarget.hasAtomRedGen32() && Subtarget.hasAtomRedG32()">;
125 def hasBrkPt : Predicate<"Subtarget.hasBrkPt()">;
126 def hasAtomRedG64 : Predicate<"Subtarget.hasAtomRedG64()">;
127 def hasAtomRedS64 : Predicate<"Subtarget.hasAtomRedS64()">;
128 def hasAtomRedGen64 : Predicate<"Subtarget.hasAtomRedGen64()">;
129 def useAtomRedG64forGen64 :
130 Predicate<"!Subtarget.hasAtomRedGen64() && Subtarget.hasAtomRedG64()">;
131 def hasAtomAddF32 : Predicate<"Subtarget.hasAtomAddF32()">;
132 def hasVote : Predicate<"Subtarget.hasVote()">;
133 def hasDouble : Predicate<"Subtarget.hasDouble()">;
134 def reqPTX20 : Predicate<"Subtarget.reqPTX20()">;
135 def hasLDG : Predicate<"Subtarget.hasLDG()">;
136 def hasLDU : Predicate<"Subtarget.hasLDU()">;
137 def hasGenericLdSt : Predicate<"Subtarget.hasGenericLdSt()">;
139 def doF32FTZ : Predicate<"useF32FTZ()">;
140 def doNoF32FTZ : Predicate<"!useF32FTZ()">;
142 def doFMAF32 : Predicate<"doFMAF32">;
143 def doFMAF32_ftz : Predicate<"(doFMAF32 && useF32FTZ())">;
144 def doFMAF32AGG : Predicate<"doFMAF32AGG">;
145 def doFMAF32AGG_ftz : Predicate<"(doFMAF32AGG && useF32FTZ())">;
146 def doFMAF64 : Predicate<"doFMAF64">;
147 def doFMAF64AGG : Predicate<"doFMAF64AGG">;
149 def doMulWide : Predicate<"doMulWide">;
151 def allowFMA : Predicate<"allowFMA">;
152 def allowFMA_ftz : Predicate<"(allowFMA && useF32FTZ())">;
154 def do_DIVF32_APPROX : Predicate<"getDivF32Level()==0">;
155 def do_DIVF32_FULL : Predicate<"getDivF32Level()==1">;
157 def do_SQRTF32_APPROX : Predicate<"!usePrecSqrtF32()">;
158 def do_SQRTF32_RN : Predicate<"usePrecSqrtF32()">;
160 def hasHWROT32 : Predicate<"Subtarget.hasHWROT32()">;
161 def noHWROT32 : Predicate<"!Subtarget.hasHWROT32()">;
163 def true : Predicate<"1">;
165 def hasPTX31 : Predicate<"Subtarget.getPTXVersion() >= 31">;
168 //===----------------------------------------------------------------------===//
169 // Some Common Instruction Class Templates
170 //===----------------------------------------------------------------------===//
172 multiclass I3<string OpcStr, SDNode OpNode> {
173 def i64rr : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, Int64Regs:$b),
174 !strconcat(OpcStr, "64 \t$dst, $a, $b;"),
175 [(set Int64Regs:$dst, (OpNode Int64Regs:$a,
177 def i64ri : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, i64imm:$b),
178 !strconcat(OpcStr, "64 \t$dst, $a, $b;"),
179 [(set Int64Regs:$dst, (OpNode Int64Regs:$a, imm:$b))]>;
180 def i32rr : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b),
181 !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
182 [(set Int32Regs:$dst, (OpNode Int32Regs:$a,
184 def i32ri : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b),
185 !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
186 [(set Int32Regs:$dst, (OpNode Int32Regs:$a, imm:$b))]>;
187 def i16rr : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, Int16Regs:$b),
188 !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
189 [(set Int16Regs:$dst, (OpNode Int16Regs:$a,
191 def i16ri : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, i16imm:$b),
192 !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
193 [(set Int16Regs:$dst, (OpNode Int16Regs:$a, (imm):$b))]>;
196 multiclass ADD_SUB_INT_32<string OpcStr, SDNode OpNode> {
197 def i32rr : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a,
199 !strconcat(OpcStr, ".s32 \t$dst, $a, $b;"),
200 [(set Int32Regs:$dst, (OpNode Int32Regs:$a,
202 def i32ri : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b),
203 !strconcat(OpcStr, ".s32 \t$dst, $a, $b;"),
204 [(set Int32Regs:$dst, (OpNode Int32Regs:$a, imm:$b))]>;
207 multiclass F3<string OpcStr, SDNode OpNode> {
208 def f64rr : NVPTXInst<(outs Float64Regs:$dst),
209 (ins Float64Regs:$a, Float64Regs:$b),
210 !strconcat(OpcStr, ".f64 \t$dst, $a, $b;"),
211 [(set Float64Regs:$dst,
212 (OpNode Float64Regs:$a, Float64Regs:$b))]>,
213 Requires<[allowFMA]>;
214 def f64ri : NVPTXInst<(outs Float64Regs:$dst),
215 (ins Float64Regs:$a, f64imm:$b),
216 !strconcat(OpcStr, ".f64 \t$dst, $a, $b;"),
217 [(set Float64Regs:$dst,
218 (OpNode Float64Regs:$a, fpimm:$b))]>,
219 Requires<[allowFMA]>;
220 def f32rr_ftz : NVPTXInst<(outs Float32Regs:$dst),
221 (ins Float32Regs:$a, Float32Regs:$b),
222 !strconcat(OpcStr, ".ftz.f32 \t$dst, $a, $b;"),
223 [(set Float32Regs:$dst,
224 (OpNode Float32Regs:$a, Float32Regs:$b))]>,
225 Requires<[allowFMA_ftz]>;
226 def f32ri_ftz : NVPTXInst<(outs Float32Regs:$dst),
227 (ins Float32Regs:$a, f32imm:$b),
228 !strconcat(OpcStr, ".ftz.f32 \t$dst, $a, $b;"),
229 [(set Float32Regs:$dst,
230 (OpNode Float32Regs:$a, fpimm:$b))]>,
231 Requires<[allowFMA_ftz]>;
232 def f32rr : NVPTXInst<(outs Float32Regs:$dst),
233 (ins Float32Regs:$a, Float32Regs:$b),
234 !strconcat(OpcStr, ".f32 \t$dst, $a, $b;"),
235 [(set Float32Regs:$dst,
236 (OpNode Float32Regs:$a, Float32Regs:$b))]>,
237 Requires<[allowFMA]>;
238 def f32ri : NVPTXInst<(outs Float32Regs:$dst),
239 (ins Float32Regs:$a, f32imm:$b),
240 !strconcat(OpcStr, ".f32 \t$dst, $a, $b;"),
241 [(set Float32Regs:$dst,
242 (OpNode Float32Regs:$a, fpimm:$b))]>,
243 Requires<[allowFMA]>;
246 multiclass F3_rn<string OpcStr, SDNode OpNode> {
247 def f64rr : NVPTXInst<(outs Float64Regs:$dst),
248 (ins Float64Regs:$a, Float64Regs:$b),
249 !strconcat(OpcStr, ".rn.f64 \t$dst, $a, $b;"),
250 [(set Float64Regs:$dst,
251 (OpNode Float64Regs:$a, Float64Regs:$b))]>;
252 def f64ri : NVPTXInst<(outs Float64Regs:$dst),
253 (ins Float64Regs:$a, f64imm:$b),
254 !strconcat(OpcStr, ".rn.f64 \t$dst, $a, $b;"),
255 [(set Float64Regs:$dst,
256 (OpNode Float64Regs:$a, fpimm:$b))]>;
257 def f32rr_ftz : NVPTXInst<(outs Float32Regs:$dst),
258 (ins Float32Regs:$a, Float32Regs:$b),
259 !strconcat(OpcStr, ".rn.ftz.f32 \t$dst, $a, $b;"),
260 [(set Float32Regs:$dst,
261 (OpNode Float32Regs:$a, Float32Regs:$b))]>,
262 Requires<[doF32FTZ]>;
263 def f32ri_ftz : NVPTXInst<(outs Float32Regs:$dst),
264 (ins Float32Regs:$a, f32imm:$b),
265 !strconcat(OpcStr, ".rn.ftz.f32 \t$dst, $a, $b;"),
266 [(set Float32Regs:$dst,
267 (OpNode Float32Regs:$a, fpimm:$b))]>,
268 Requires<[doF32FTZ]>;
269 def f32rr : NVPTXInst<(outs Float32Regs:$dst),
270 (ins Float32Regs:$a, Float32Regs:$b),
271 !strconcat(OpcStr, ".rn.f32 \t$dst, $a, $b;"),
272 [(set Float32Regs:$dst,
273 (OpNode Float32Regs:$a, Float32Regs:$b))]>;
274 def f32ri : NVPTXInst<(outs Float32Regs:$dst),
275 (ins Float32Regs:$a, f32imm:$b),
276 !strconcat(OpcStr, ".rn.f32 \t$dst, $a, $b;"),
277 [(set Float32Regs:$dst,
278 (OpNode Float32Regs:$a, fpimm:$b))]>;
281 multiclass F2<string OpcStr, SDNode OpNode> {
282 def f64 : NVPTXInst<(outs Float64Regs:$dst), (ins Float64Regs:$a),
283 !strconcat(OpcStr, ".f64 \t$dst, $a;"),
284 [(set Float64Regs:$dst, (OpNode Float64Regs:$a))]>;
285 def f32_ftz : NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$a),
286 !strconcat(OpcStr, ".ftz.f32 \t$dst, $a;"),
287 [(set Float32Regs:$dst, (OpNode Float32Regs:$a))]>,
288 Requires<[doF32FTZ]>;
289 def f32 : NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$a),
290 !strconcat(OpcStr, ".f32 \t$dst, $a;"),
291 [(set Float32Regs:$dst, (OpNode Float32Regs:$a))]>;
294 //===----------------------------------------------------------------------===//
295 // NVPTX Instructions.
296 //===----------------------------------------------------------------------===//
298 //-----------------------------------
299 // General Type Conversion
300 //-----------------------------------
302 let neverHasSideEffects = 1 in {
303 // Generate a cvt to the given type from all possible types.
304 // Each instance takes a CvtMode immediate that defines the conversion mode to
305 // use. It can be CvtNONE to omit a conversion mode.
306 multiclass CVT_FROM_ALL<string FromName, RegisterClass RC> {
307 def _s16 : NVPTXInst<(outs RC:$dst),
308 (ins Int16Regs:$src, CvtMode:$mode),
309 !strconcat("cvt${mode:base}${mode:ftz}${mode:sat}.",
310 FromName, ".s16\t$dst, $src;"),
312 def _u16 : NVPTXInst<(outs RC:$dst),
313 (ins Int16Regs:$src, CvtMode:$mode),
314 !strconcat("cvt${mode:base}${mode:ftz}${mode:sat}.",
315 FromName, ".u16\t$dst, $src;"),
317 def _f16 : NVPTXInst<(outs RC:$dst),
318 (ins Int16Regs:$src, CvtMode:$mode),
319 !strconcat("cvt${mode:base}${mode:ftz}${mode:sat}.",
320 FromName, ".f16\t$dst, $src;"),
322 def _s32 : NVPTXInst<(outs RC:$dst),
323 (ins Int32Regs:$src, CvtMode:$mode),
324 !strconcat("cvt${mode:base}${mode:ftz}${mode:sat}.",
325 FromName, ".s32\t$dst, $src;"),
327 def _u32 : NVPTXInst<(outs RC:$dst),
328 (ins Int32Regs:$src, CvtMode:$mode),
329 !strconcat("cvt${mode:base}${mode:ftz}${mode:sat}.",
330 FromName, ".u32\t$dst, $src;"),
332 def _s64 : NVPTXInst<(outs RC:$dst),
333 (ins Int64Regs:$src, CvtMode:$mode),
334 !strconcat("cvt${mode:base}${mode:ftz}${mode:sat}.",
335 FromName, ".s64\t$dst, $src;"),
337 def _u64 : NVPTXInst<(outs RC:$dst),
338 (ins Int64Regs:$src, CvtMode:$mode),
339 !strconcat("cvt${mode:base}${mode:ftz}${mode:sat}.",
340 FromName, ".u64\t$dst, $src;"),
342 def _f32 : NVPTXInst<(outs RC:$dst),
343 (ins Float32Regs:$src, CvtMode:$mode),
344 !strconcat("cvt${mode:base}${mode:ftz}${mode:sat}.",
345 FromName, ".f32\t$dst, $src;"),
347 def _f64 : NVPTXInst<(outs RC:$dst),
348 (ins Float64Regs:$src, CvtMode:$mode),
349 !strconcat("cvt${mode:base}${mode:ftz}${mode:sat}.",
350 FromName, ".f64\t$dst, $src;"),
354 // Generate a cvt to all possible types.
355 defm CVT_s16 : CVT_FROM_ALL<"s16", Int16Regs>;
356 defm CVT_u16 : CVT_FROM_ALL<"u16", Int16Regs>;
357 defm CVT_f16 : CVT_FROM_ALL<"f16", Int16Regs>;
358 defm CVT_s32 : CVT_FROM_ALL<"s32", Int32Regs>;
359 defm CVT_u32 : CVT_FROM_ALL<"u32", Int32Regs>;
360 defm CVT_s64 : CVT_FROM_ALL<"s64", Int64Regs>;
361 defm CVT_u64 : CVT_FROM_ALL<"u64", Int64Regs>;
362 defm CVT_f32 : CVT_FROM_ALL<"f32", Float32Regs>;
363 defm CVT_f64 : CVT_FROM_ALL<"f64", Float64Regs>;
365 // This set of cvt is different from the above. The type of the source
366 // and target are the same.
368 def CVT_INREG_s16_s8 : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$src),
369 "cvt.s16.s8 \t$dst, $src;", []>;
370 def CVT_INREG_s32_s8 : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src),
371 "cvt.s32.s8 \t$dst, $src;", []>;
372 def CVT_INREG_s32_s16 : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src),
373 "cvt.s32.s16 \t$dst, $src;", []>;
374 def CVT_INREG_s64_s8 : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src),
375 "cvt.s64.s8 \t$dst, $src;", []>;
376 def CVT_INREG_s64_s16 : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src),
377 "cvt.s64.s16 \t$dst, $src;", []>;
378 def CVT_INREG_s64_s32 : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src),
379 "cvt.s64.s32 \t$dst, $src;", []>;
382 //-----------------------------------
383 // Integer Arithmetic
384 //-----------------------------------
386 multiclass ADD_SUB_i1<SDNode OpNode> {
387 def _rr: NVPTXInst<(outs Int1Regs:$dst), (ins Int1Regs:$a, Int1Regs:$b),
388 "xor.pred \t$dst, $a, $b;",
389 [(set Int1Regs:$dst, (OpNode Int1Regs:$a, Int1Regs:$b))]>;
390 def _ri: NVPTXInst<(outs Int1Regs:$dst), (ins Int1Regs:$a, i1imm:$b),
391 "xor.pred \t$dst, $a, $b;",
392 [(set Int1Regs:$dst, (OpNode Int1Regs:$a, (imm):$b))]>;
395 defm ADD_i1 : ADD_SUB_i1<add>;
396 defm SUB_i1 : ADD_SUB_i1<sub>;
399 defm ADD : I3<"add.s", add>;
400 defm SUB : I3<"sub.s", sub>;
402 defm ADDCC : ADD_SUB_INT_32<"add.cc", addc>;
403 defm SUBCC : ADD_SUB_INT_32<"sub.cc", subc>;
405 defm ADDCCC : ADD_SUB_INT_32<"addc.cc", adde>;
406 defm SUBCCC : ADD_SUB_INT_32<"subc.cc", sube>;
408 //mul.wide PTX instruction
409 def SInt32Const : PatLeaf<(imm), [{
410 const APInt &v = N->getAPIntValue();
411 if (v.isSignedIntN(32))
416 def UInt32Const : PatLeaf<(imm), [{
417 const APInt &v = N->getAPIntValue();
423 def SInt16Const : PatLeaf<(imm), [{
424 const APInt &v = N->getAPIntValue();
425 if (v.isSignedIntN(16))
430 def UInt16Const : PatLeaf<(imm), [{
431 const APInt &v = N->getAPIntValue();
437 def Int5Const : PatLeaf<(imm), [{
438 const APInt &v = N->getAPIntValue();
439 // Check if 0 <= v < 32
440 // Only then the result from (x << v) will be i32
441 if (v.sge(0) && v.slt(32))
446 def Int4Const : PatLeaf<(imm), [{
447 const APInt &v = N->getAPIntValue();
448 // Check if 0 <= v < 16
449 // Only then the result from (x << v) will be i16
450 if (v.sge(0) && v.slt(16))
455 def SHL2MUL32 : SDNodeXForm<imm, [{
456 const APInt &v = N->getAPIntValue();
458 return CurDAG->getTargetConstant(temp.shl(v), MVT::i32);
461 def SHL2MUL16 : SDNodeXForm<imm, [{
462 const APInt &v = N->getAPIntValue();
464 return CurDAG->getTargetConstant(temp.shl(v), MVT::i16);
468 : NVPTXInst<(outs Int64Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b),
469 "mul.wide.s32 \t$dst, $a, $b;", []>;
471 : NVPTXInst<(outs Int64Regs:$dst), (ins Int32Regs:$a, i32imm:$b),
472 "mul.wide.s32 \t$dst, $a, $b;", []>;
474 : NVPTXInst<(outs Int64Regs:$dst), (ins Int32Regs:$a, i64imm:$b),
475 "mul.wide.s32 \t$dst, $a, $b;", []>;
478 : NVPTXInst<(outs Int64Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b),
479 "mul.wide.u32 \t$dst, $a, $b;", []>;
481 : NVPTXInst<(outs Int64Regs:$dst), (ins Int32Regs:$a, i32imm:$b),
482 "mul.wide.u32 \t$dst, $a, $b;", []>;
484 : NVPTXInst<(outs Int64Regs:$dst), (ins Int32Regs:$a, i64imm:$b),
485 "mul.wide.u32 \t$dst, $a, $b;", []>;
488 : NVPTXInst<(outs Int32Regs:$dst), (ins Int16Regs:$a, Int16Regs:$b),
489 "mul.wide.s16 \t$dst, $a, $b;", []>;
491 : NVPTXInst<(outs Int32Regs:$dst), (ins Int16Regs:$a, i16imm:$b),
492 "mul.wide.s16 \t$dst, $a, $b;", []>;
494 : NVPTXInst<(outs Int32Regs:$dst), (ins Int16Regs:$a, i32imm:$b),
495 "mul.wide.s16 \t$dst, $a, $b;", []>;
498 : NVPTXInst<(outs Int32Regs:$dst), (ins Int16Regs:$a, Int16Regs:$b),
499 "mul.wide.u16 \t$dst, $a, $b;", []>;
501 : NVPTXInst<(outs Int32Regs:$dst), (ins Int16Regs:$a, i16imm:$b),
502 "mul.wide.u16 \t$dst, $a, $b;", []>;
504 : NVPTXInst<(outs Int32Regs:$dst), (ins Int16Regs:$a, i32imm:$b),
505 "mul.wide.u16 \t$dst, $a, $b;", []>;
507 def : Pat<(shl (sext Int32Regs:$a), (i32 Int5Const:$b)),
508 (MULWIDES64Imm Int32Regs:$a, (SHL2MUL32 node:$b))>,
509 Requires<[doMulWide]>;
510 def : Pat<(shl (zext Int32Regs:$a), (i32 Int5Const:$b)),
511 (MULWIDEU64Imm Int32Regs:$a, (SHL2MUL32 node:$b))>,
512 Requires<[doMulWide]>;
514 def : Pat<(shl (sext Int16Regs:$a), (i16 Int4Const:$b)),
515 (MULWIDES32Imm Int16Regs:$a, (SHL2MUL16 node:$b))>,
516 Requires<[doMulWide]>;
517 def : Pat<(shl (zext Int16Regs:$a), (i16 Int4Const:$b)),
518 (MULWIDEU32Imm Int16Regs:$a, (SHL2MUL16 node:$b))>,
519 Requires<[doMulWide]>;
521 def : Pat<(mul (sext Int32Regs:$a), (sext Int32Regs:$b)),
522 (MULWIDES64 Int32Regs:$a, Int32Regs:$b)>,
523 Requires<[doMulWide]>;
524 def : Pat<(mul (sext Int32Regs:$a), (i64 SInt32Const:$b)),
525 (MULWIDES64Imm64 Int32Regs:$a, (i64 SInt32Const:$b))>,
526 Requires<[doMulWide]>;
528 def : Pat<(mul (zext Int32Regs:$a), (zext Int32Regs:$b)),
529 (MULWIDEU64 Int32Regs:$a, Int32Regs:$b)>,
530 Requires<[doMulWide]>;
531 def : Pat<(mul (zext Int32Regs:$a), (i64 UInt32Const:$b)),
532 (MULWIDEU64Imm64 Int32Regs:$a, (i64 UInt32Const:$b))>,
533 Requires<[doMulWide]>;
535 def : Pat<(mul (sext Int16Regs:$a), (sext Int16Regs:$b)),
536 (MULWIDES32 Int16Regs:$a, Int16Regs:$b)>,
537 Requires<[doMulWide]>;
538 def : Pat<(mul (sext Int16Regs:$a), (i32 SInt16Const:$b)),
539 (MULWIDES32Imm32 Int16Regs:$a, (i32 SInt16Const:$b))>,
540 Requires<[doMulWide]>;
542 def : Pat<(mul (zext Int16Regs:$a), (zext Int16Regs:$b)),
543 (MULWIDEU32 Int16Regs:$a, Int16Regs:$b)>,
544 Requires<[doMulWide]>;
545 def : Pat<(mul (zext Int16Regs:$a), (i32 UInt16Const:$b)),
546 (MULWIDEU32Imm32 Int16Regs:$a, (i32 UInt16Const:$b))>,
547 Requires<[doMulWide]>;
551 : SDTypeProfile<1, 2, [SDTCisSameAs<1, 2>]>;
553 : SDNode<"NVPTXISD::MUL_WIDE_SIGNED", SDTMulWide>;
554 def mul_wide_unsigned
555 : SDNode<"NVPTXISD::MUL_WIDE_UNSIGNED", SDTMulWide>;
557 def : Pat<(i32 (mul_wide_signed Int16Regs:$a, Int16Regs:$b)),
558 (MULWIDES32 Int16Regs:$a, Int16Regs:$b)>,
559 Requires<[doMulWide]>;
560 def : Pat<(i32 (mul_wide_signed Int16Regs:$a, imm:$b)),
561 (MULWIDES32Imm Int16Regs:$a, imm:$b)>,
562 Requires<[doMulWide]>;
563 def : Pat<(i32 (mul_wide_unsigned Int16Regs:$a, Int16Regs:$b)),
564 (MULWIDEU32 Int16Regs:$a, Int16Regs:$b)>,
565 Requires<[doMulWide]>;
566 def : Pat<(i32 (mul_wide_unsigned Int16Regs:$a, imm:$b)),
567 (MULWIDEU32Imm Int16Regs:$a, imm:$b)>,
568 Requires<[doMulWide]>;
571 def : Pat<(i64 (mul_wide_signed Int32Regs:$a, Int32Regs:$b)),
572 (MULWIDES64 Int32Regs:$a, Int32Regs:$b)>,
573 Requires<[doMulWide]>;
574 def : Pat<(i64 (mul_wide_signed Int32Regs:$a, imm:$b)),
575 (MULWIDES64Imm Int32Regs:$a, imm:$b)>,
576 Requires<[doMulWide]>;
577 def : Pat<(i64 (mul_wide_unsigned Int32Regs:$a, Int32Regs:$b)),
578 (MULWIDEU64 Int32Regs:$a, Int32Regs:$b)>,
579 Requires<[doMulWide]>;
580 def : Pat<(i64 (mul_wide_unsigned Int32Regs:$a, imm:$b)),
581 (MULWIDEU64Imm Int32Regs:$a, imm:$b)>,
582 Requires<[doMulWide]>;
584 defm MULT : I3<"mul.lo.s", mul>;
586 defm MULTHS : I3<"mul.hi.s", mulhs>;
587 defm MULTHU : I3<"mul.hi.u", mulhu>;
589 defm SDIV : I3<"div.s", sdiv>;
590 defm UDIV : I3<"div.u", udiv>;
592 defm SREM : I3<"rem.s", srem>;
593 // The ri version will not be selected as DAGCombiner::visitSREM will lower it.
594 defm UREM : I3<"rem.u", urem>;
595 // The ri version will not be selected as DAGCombiner::visitUREM will lower it.
598 : SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>, SDTCisInt<0>,
599 SDTCisInt<2>, SDTCisSameAs<0, 2>,
600 SDTCisSameAs<0, 3>]>;
602 : SDNode<"NVPTXISD::IMAD", SDTIMAD>;
604 def MAD16rrr : NVPTXInst<(outs Int16Regs:$dst),
605 (ins Int16Regs:$a, Int16Regs:$b, Int16Regs:$c),
606 "mad.lo.s16 \t$dst, $a, $b, $c;",
607 [(set Int16Regs:$dst,
608 (imad Int16Regs:$a, Int16Regs:$b, Int16Regs:$c))]>;
609 def MAD16rri : NVPTXInst<(outs Int16Regs:$dst),
610 (ins Int16Regs:$a, Int16Regs:$b, i16imm:$c),
611 "mad.lo.s16 \t$dst, $a, $b, $c;",
612 [(set Int16Regs:$dst,
613 (imad Int16Regs:$a, Int16Regs:$b, imm:$c))]>;
614 def MAD16rir : NVPTXInst<(outs Int16Regs:$dst),
615 (ins Int16Regs:$a, i16imm:$b, Int16Regs:$c),
616 "mad.lo.s16 \t$dst, $a, $b, $c;",
617 [(set Int16Regs:$dst,
618 (imad Int16Regs:$a, imm:$b, Int16Regs:$c))]>;
619 def MAD16rii : NVPTXInst<(outs Int16Regs:$dst),
620 (ins Int16Regs:$a, i16imm:$b, i16imm:$c),
621 "mad.lo.s16 \t$dst, $a, $b, $c;",
622 [(set Int16Regs:$dst,
623 (imad Int16Regs:$a, imm:$b, imm:$c))]>;
625 def MAD32rrr : NVPTXInst<(outs Int32Regs:$dst),
626 (ins Int32Regs:$a, Int32Regs:$b, Int32Regs:$c),
627 "mad.lo.s32 \t$dst, $a, $b, $c;",
628 [(set Int32Regs:$dst,
629 (imad Int32Regs:$a, Int32Regs:$b, Int32Regs:$c))]>;
630 def MAD32rri : NVPTXInst<(outs Int32Regs:$dst),
631 (ins Int32Regs:$a, Int32Regs:$b, i32imm:$c),
632 "mad.lo.s32 \t$dst, $a, $b, $c;",
633 [(set Int32Regs:$dst,
634 (imad Int32Regs:$a, Int32Regs:$b, imm:$c))]>;
635 def MAD32rir : NVPTXInst<(outs Int32Regs:$dst),
636 (ins Int32Regs:$a, i32imm:$b, Int32Regs:$c),
637 "mad.lo.s32 \t$dst, $a, $b, $c;",
638 [(set Int32Regs:$dst,
639 (imad Int32Regs:$a, imm:$b, Int32Regs:$c))]>;
640 def MAD32rii : NVPTXInst<(outs Int32Regs:$dst),
641 (ins Int32Regs:$a, i32imm:$b, i32imm:$c),
642 "mad.lo.s32 \t$dst, $a, $b, $c;",
643 [(set Int32Regs:$dst,
644 (imad Int32Regs:$a, imm:$b, imm:$c))]>;
646 def MAD64rrr : NVPTXInst<(outs Int64Regs:$dst),
647 (ins Int64Regs:$a, Int64Regs:$b, Int64Regs:$c),
648 "mad.lo.s64 \t$dst, $a, $b, $c;",
649 [(set Int64Regs:$dst,
650 (imad Int64Regs:$a, Int64Regs:$b, Int64Regs:$c))]>;
651 def MAD64rri : NVPTXInst<(outs Int64Regs:$dst),
652 (ins Int64Regs:$a, Int64Regs:$b, i64imm:$c),
653 "mad.lo.s64 \t$dst, $a, $b, $c;",
654 [(set Int64Regs:$dst,
655 (imad Int64Regs:$a, Int64Regs:$b, imm:$c))]>;
656 def MAD64rir : NVPTXInst<(outs Int64Regs:$dst),
657 (ins Int64Regs:$a, i64imm:$b, Int64Regs:$c),
658 "mad.lo.s64 \t$dst, $a, $b, $c;",
659 [(set Int64Regs:$dst,
660 (imad Int64Regs:$a, imm:$b, Int64Regs:$c))]>;
661 def MAD64rii : NVPTXInst<(outs Int64Regs:$dst),
662 (ins Int64Regs:$a, i64imm:$b, i64imm:$c),
663 "mad.lo.s64 \t$dst, $a, $b, $c;",
664 [(set Int64Regs:$dst,
665 (imad Int64Regs:$a, imm:$b, imm:$c))]>;
667 def INEG16 : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$src),
668 "neg.s16 \t$dst, $src;",
669 [(set Int16Regs:$dst, (ineg Int16Regs:$src))]>;
670 def INEG32 : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src),
671 "neg.s32 \t$dst, $src;",
672 [(set Int32Regs:$dst, (ineg Int32Regs:$src))]>;
673 def INEG64 : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src),
674 "neg.s64 \t$dst, $src;",
675 [(set Int64Regs:$dst, (ineg Int64Regs:$src))]>;
677 //-----------------------------------
678 // Floating Point Arithmetic
679 //-----------------------------------
682 def FloatConst1 : PatLeaf<(fpimm), [{
683 if (&(N->getValueAPF().getSemantics()) != &llvm::APFloat::IEEEsingle)
685 float f = (float)N->getValueAPF().convertToFloat();
688 // Constand (double)1.0
689 def DoubleConst1 : PatLeaf<(fpimm), [{
690 if (&(N->getValueAPF().getSemantics()) != &llvm::APFloat::IEEEdouble)
692 double d = (double)N->getValueAPF().convertToDouble();
696 defm FADD : F3<"add", fadd>;
697 defm FSUB : F3<"sub", fsub>;
698 defm FMUL : F3<"mul", fmul>;
700 defm FADD_rn : F3_rn<"add", fadd>;
701 defm FSUB_rn : F3_rn<"sub", fsub>;
702 defm FMUL_rn : F3_rn<"mul", fmul>;
704 defm FABS : F2<"abs", fabs>;
705 defm FNEG : F2<"neg", fneg>;
706 defm FSQRT : F2<"sqrt.rn", fsqrt>;
711 def FDIV641r : NVPTXInst<(outs Float64Regs:$dst),
712 (ins f64imm:$a, Float64Regs:$b),
713 "rcp.rn.f64 \t$dst, $b;",
714 [(set Float64Regs:$dst,
715 (fdiv DoubleConst1:$a, Float64Regs:$b))]>;
716 def FDIV64rr : NVPTXInst<(outs Float64Regs:$dst),
717 (ins Float64Regs:$a, Float64Regs:$b),
718 "div.rn.f64 \t$dst, $a, $b;",
719 [(set Float64Regs:$dst,
720 (fdiv Float64Regs:$a, Float64Regs:$b))]>;
721 def FDIV64ri : NVPTXInst<(outs Float64Regs:$dst),
722 (ins Float64Regs:$a, f64imm:$b),
723 "div.rn.f64 \t$dst, $a, $b;",
724 [(set Float64Regs:$dst,
725 (fdiv Float64Regs:$a, fpimm:$b))]>;
728 // F32 Approximate reciprocal
730 def FDIV321r_ftz : NVPTXInst<(outs Float32Regs:$dst),
731 (ins f32imm:$a, Float32Regs:$b),
732 "rcp.approx.ftz.f32 \t$dst, $b;",
733 [(set Float32Regs:$dst,
734 (fdiv FloatConst1:$a, Float32Regs:$b))]>,
735 Requires<[do_DIVF32_APPROX, doF32FTZ]>;
736 def FDIV321r : NVPTXInst<(outs Float32Regs:$dst),
737 (ins f32imm:$a, Float32Regs:$b),
738 "rcp.approx.f32 \t$dst, $b;",
739 [(set Float32Regs:$dst,
740 (fdiv FloatConst1:$a, Float32Regs:$b))]>,
741 Requires<[do_DIVF32_APPROX]>;
743 // F32 Approximate division
745 def FDIV32approxrr_ftz : NVPTXInst<(outs Float32Regs:$dst),
746 (ins Float32Regs:$a, Float32Regs:$b),
747 "div.approx.ftz.f32 \t$dst, $a, $b;",
748 [(set Float32Regs:$dst,
749 (fdiv Float32Regs:$a, Float32Regs:$b))]>,
750 Requires<[do_DIVF32_APPROX, doF32FTZ]>;
751 def FDIV32approxri_ftz : NVPTXInst<(outs Float32Regs:$dst),
752 (ins Float32Regs:$a, f32imm:$b),
753 "div.approx.ftz.f32 \t$dst, $a, $b;",
754 [(set Float32Regs:$dst,
755 (fdiv Float32Regs:$a, fpimm:$b))]>,
756 Requires<[do_DIVF32_APPROX, doF32FTZ]>;
757 def FDIV32approxrr : NVPTXInst<(outs Float32Regs:$dst),
758 (ins Float32Regs:$a, Float32Regs:$b),
759 "div.approx.f32 \t$dst, $a, $b;",
760 [(set Float32Regs:$dst,
761 (fdiv Float32Regs:$a, Float32Regs:$b))]>,
762 Requires<[do_DIVF32_APPROX]>;
763 def FDIV32approxri : NVPTXInst<(outs Float32Regs:$dst),
764 (ins Float32Regs:$a, f32imm:$b),
765 "div.approx.f32 \t$dst, $a, $b;",
766 [(set Float32Regs:$dst,
767 (fdiv Float32Regs:$a, fpimm:$b))]>,
768 Requires<[do_DIVF32_APPROX]>;
770 // F32 Semi-accurate reciprocal
772 // rcp.approx gives the same result as div.full(1.0f, a) and is faster.
774 def FDIV321r_approx_ftz : NVPTXInst<(outs Float32Regs:$dst),
775 (ins f32imm:$a, Float32Regs:$b),
776 "rcp.approx.ftz.f32 \t$dst, $b;",
777 [(set Float32Regs:$dst,
778 (fdiv FloatConst1:$a, Float32Regs:$b))]>,
779 Requires<[do_DIVF32_FULL, doF32FTZ]>;
780 def FDIV321r_approx : NVPTXInst<(outs Float32Regs:$dst),
781 (ins f32imm:$a, Float32Regs:$b),
782 "rcp.approx.f32 \t$dst, $b;",
783 [(set Float32Regs:$dst,
784 (fdiv FloatConst1:$a, Float32Regs:$b))]>,
785 Requires<[do_DIVF32_FULL]>;
787 // F32 Semi-accurate division
789 def FDIV32rr_ftz : NVPTXInst<(outs Float32Regs:$dst),
790 (ins Float32Regs:$a, Float32Regs:$b),
791 "div.full.ftz.f32 \t$dst, $a, $b;",
792 [(set Float32Regs:$dst,
793 (fdiv Float32Regs:$a, Float32Regs:$b))]>,
794 Requires<[do_DIVF32_FULL, doF32FTZ]>;
795 def FDIV32ri_ftz : NVPTXInst<(outs Float32Regs:$dst),
796 (ins Float32Regs:$a, f32imm:$b),
797 "div.full.ftz.f32 \t$dst, $a, $b;",
798 [(set Float32Regs:$dst,
799 (fdiv Float32Regs:$a, fpimm:$b))]>,
800 Requires<[do_DIVF32_FULL, doF32FTZ]>;
801 def FDIV32rr : NVPTXInst<(outs Float32Regs:$dst),
802 (ins Float32Regs:$a, Float32Regs:$b),
803 "div.full.f32 \t$dst, $a, $b;",
804 [(set Float32Regs:$dst,
805 (fdiv Float32Regs:$a, Float32Regs:$b))]>,
806 Requires<[do_DIVF32_FULL]>;
807 def FDIV32ri : NVPTXInst<(outs Float32Regs:$dst),
808 (ins Float32Regs:$a, f32imm:$b),
809 "div.full.f32 \t$dst, $a, $b;",
810 [(set Float32Regs:$dst,
811 (fdiv Float32Regs:$a, fpimm:$b))]>,
812 Requires<[do_DIVF32_FULL]>;
814 // F32 Accurate reciprocal
816 def FDIV321r_prec_ftz : NVPTXInst<(outs Float32Regs:$dst),
817 (ins f32imm:$a, Float32Regs:$b),
818 "rcp.rn.ftz.f32 \t$dst, $b;",
819 [(set Float32Regs:$dst,
820 (fdiv FloatConst1:$a, Float32Regs:$b))]>,
821 Requires<[reqPTX20, doF32FTZ]>;
822 def FDIV321r_prec : NVPTXInst<(outs Float32Regs:$dst),
823 (ins f32imm:$a, Float32Regs:$b),
824 "rcp.rn.f32 \t$dst, $b;",
825 [(set Float32Regs:$dst,
826 (fdiv FloatConst1:$a, Float32Regs:$b))]>,
827 Requires<[reqPTX20]>;
829 // F32 Accurate division
831 def FDIV32rr_prec_ftz : NVPTXInst<(outs Float32Regs:$dst),
832 (ins Float32Regs:$a, Float32Regs:$b),
833 "div.rn.ftz.f32 \t$dst, $a, $b;",
834 [(set Float32Regs:$dst,
835 (fdiv Float32Regs:$a, Float32Regs:$b))]>,
836 Requires<[doF32FTZ, reqPTX20]>;
837 def FDIV32ri_prec_ftz : NVPTXInst<(outs Float32Regs:$dst),
838 (ins Float32Regs:$a, f32imm:$b),
839 "div.rn.ftz.f32 \t$dst, $a, $b;",
840 [(set Float32Regs:$dst,
841 (fdiv Float32Regs:$a, fpimm:$b))]>,
842 Requires<[doF32FTZ, reqPTX20]>;
843 def FDIV32rr_prec : NVPTXInst<(outs Float32Regs:$dst),
844 (ins Float32Regs:$a, Float32Regs:$b),
845 "div.rn.f32 \t$dst, $a, $b;",
846 [(set Float32Regs:$dst,
847 (fdiv Float32Regs:$a, Float32Regs:$b))]>,
848 Requires<[reqPTX20]>;
849 def FDIV32ri_prec : NVPTXInst<(outs Float32Regs:$dst),
850 (ins Float32Regs:$a, f32imm:$b),
851 "div.rn.f32 \t$dst, $a, $b;",
852 [(set Float32Regs:$dst,
853 (fdiv Float32Regs:$a, fpimm:$b))]>,
854 Requires<[reqPTX20]>;
860 def RSQRTF32approx1r : NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$b),
861 "rsqrt.approx.f32 \t$dst, $b;", []>;
863 def: Pat<(fdiv FloatConst1, (int_nvvm_sqrt_f Float32Regs:$b)),
864 (RSQRTF32approx1r Float32Regs:$b)>,
865 Requires<[do_DIVF32_FULL, do_SQRTF32_APPROX, doNoF32FTZ]>;
867 multiclass FPCONTRACT32<string OpcStr, Predicate Pred> {
868 def rrr : NVPTXInst<(outs Float32Regs:$dst),
869 (ins Float32Regs:$a, Float32Regs:$b, Float32Regs:$c),
870 !strconcat(OpcStr, " \t$dst, $a, $b, $c;"),
871 [(set Float32Regs:$dst,
872 (fma Float32Regs:$a, Float32Regs:$b, Float32Regs:$c))]>,
874 def rri : NVPTXInst<(outs Float32Regs:$dst),
875 (ins Float32Regs:$a, Float32Regs:$b, f32imm:$c),
876 !strconcat(OpcStr, " \t$dst, $a, $b, $c;"),
877 [(set Float32Regs:$dst,
878 (fma Float32Regs:$a, Float32Regs:$b, fpimm:$c))]>,
880 def rir : NVPTXInst<(outs Float32Regs:$dst),
881 (ins Float32Regs:$a, f32imm:$b, Float32Regs:$c),
882 !strconcat(OpcStr, " \t$dst, $a, $b, $c;"),
883 [(set Float32Regs:$dst,
884 (fma Float32Regs:$a, fpimm:$b, Float32Regs:$c))]>,
886 def rii : NVPTXInst<(outs Float32Regs:$dst),
887 (ins Float32Regs:$a, f32imm:$b, f32imm:$c),
888 !strconcat(OpcStr, " \t$dst, $a, $b, $c;"),
889 [(set Float32Regs:$dst,
890 (fma Float32Regs:$a, fpimm:$b, fpimm:$c))]>,
894 multiclass FPCONTRACT64<string OpcStr, Predicate Pred> {
895 def rrr : NVPTXInst<(outs Float64Regs:$dst),
896 (ins Float64Regs:$a, Float64Regs:$b, Float64Regs:$c),
897 !strconcat(OpcStr, " \t$dst, $a, $b, $c;"),
898 [(set Float64Regs:$dst,
899 (fma Float64Regs:$a, Float64Regs:$b, Float64Regs:$c))]>,
901 def rri : NVPTXInst<(outs Float64Regs:$dst),
902 (ins Float64Regs:$a, Float64Regs:$b, f64imm:$c),
903 !strconcat(OpcStr, " \t$dst, $a, $b, $c;"),
904 [(set Float64Regs:$dst,
905 (fma Float64Regs:$a, Float64Regs:$b, fpimm:$c))]>,
907 def rir : NVPTXInst<(outs Float64Regs:$dst),
908 (ins Float64Regs:$a, f64imm:$b, Float64Regs:$c),
909 !strconcat(OpcStr, " \t$dst, $a, $b, $c;"),
910 [(set Float64Regs:$dst,
911 (fma Float64Regs:$a, fpimm:$b, Float64Regs:$c))]>,
913 def rii : NVPTXInst<(outs Float64Regs:$dst),
914 (ins Float64Regs:$a, f64imm:$b, f64imm:$c),
915 !strconcat(OpcStr, " \t$dst, $a, $b, $c;"),
916 [(set Float64Regs:$dst,
917 (fma Float64Regs:$a, fpimm:$b, fpimm:$c))]>,
921 defm FMA32_ftz : FPCONTRACT32<"fma.rn.ftz.f32", doF32FTZ>;
922 defm FMA32 : FPCONTRACT32<"fma.rn.f32", doNoF32FTZ>;
923 defm FMA64 : FPCONTRACT64<"fma.rn.f64", doNoF32FTZ>;
925 def SINF: NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$src),
926 "sin.approx.f32 \t$dst, $src;",
927 [(set Float32Regs:$dst, (fsin Float32Regs:$src))]>;
928 def COSF: NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$src),
929 "cos.approx.f32 \t$dst, $src;",
930 [(set Float32Regs:$dst, (fcos Float32Regs:$src))]>;
932 // Lower (frem x, y) into (sub x, (mul (floor (div x, y)) y))
933 // e.g. "poor man's fmod()"
936 def : Pat<(frem Float32Regs:$x, Float32Regs:$y),
937 (FSUBf32rr_ftz Float32Regs:$x, (FMULf32rr_ftz (CVT_f32_f32
938 (FDIV32rr_prec_ftz Float32Regs:$x, Float32Regs:$y), CvtRMI_FTZ),
940 Requires<[doF32FTZ]>;
941 def : Pat<(frem Float32Regs:$x, fpimm:$y),
942 (FSUBf32rr_ftz Float32Regs:$x, (FMULf32ri_ftz (CVT_f32_f32
943 (FDIV32ri_prec_ftz Float32Regs:$x, fpimm:$y), CvtRMI_FTZ),
945 Requires<[doF32FTZ]>;
948 def : Pat<(frem Float32Regs:$x, Float32Regs:$y),
949 (FSUBf32rr Float32Regs:$x, (FMULf32rr (CVT_f32_f32
950 (FDIV32rr_prec Float32Regs:$x, Float32Regs:$y), CvtRMI),
952 def : Pat<(frem Float32Regs:$x, fpimm:$y),
953 (FSUBf32rr Float32Regs:$x, (FMULf32ri (CVT_f32_f32
954 (FDIV32ri_prec Float32Regs:$x, fpimm:$y), CvtRMI),
958 def : Pat<(frem Float64Regs:$x, Float64Regs:$y),
959 (FSUBf64rr Float64Regs:$x, (FMULf64rr (CVT_f64_f64
960 (FDIV64rr Float64Regs:$x, Float64Regs:$y), CvtRMI),
962 def : Pat<(frem Float64Regs:$x, fpimm:$y),
963 (FSUBf64rr Float64Regs:$x, (FMULf64ri (CVT_f64_f64
964 (FDIV64ri Float64Regs:$x, fpimm:$y), CvtRMI),
967 //-----------------------------------
968 // Logical Arithmetic
969 //-----------------------------------
971 multiclass LOG_FORMAT<string OpcStr, SDNode OpNode> {
972 def b1rr: NVPTXInst<(outs Int1Regs:$dst), (ins Int1Regs:$a, Int1Regs:$b),
973 !strconcat(OpcStr, ".pred \t$dst, $a, $b;"),
974 [(set Int1Regs:$dst, (OpNode Int1Regs:$a, Int1Regs:$b))]>;
975 def b1ri: NVPTXInst<(outs Int1Regs:$dst), (ins Int1Regs:$a, i1imm:$b),
976 !strconcat(OpcStr, ".pred \t$dst, $a, $b;"),
977 [(set Int1Regs:$dst, (OpNode Int1Regs:$a, imm:$b))]>;
978 def b16rr: NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, Int16Regs:$b),
979 !strconcat(OpcStr, ".b16 \t$dst, $a, $b;"),
980 [(set Int16Regs:$dst, (OpNode Int16Regs:$a,
982 def b16ri: NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, i16imm:$b),
983 !strconcat(OpcStr, ".b16 \t$dst, $a, $b;"),
984 [(set Int16Regs:$dst, (OpNode Int16Regs:$a, imm:$b))]>;
985 def b32rr: NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b),
986 !strconcat(OpcStr, ".b32 \t$dst, $a, $b;"),
987 [(set Int32Regs:$dst, (OpNode Int32Regs:$a,
989 def b32ri: NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b),
990 !strconcat(OpcStr, ".b32 \t$dst, $a, $b;"),
991 [(set Int32Regs:$dst, (OpNode Int32Regs:$a, imm:$b))]>;
992 def b64rr: NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, Int64Regs:$b),
993 !strconcat(OpcStr, ".b64 \t$dst, $a, $b;"),
994 [(set Int64Regs:$dst, (OpNode Int64Regs:$a,
996 def b64ri: NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, i64imm:$b),
997 !strconcat(OpcStr, ".b64 \t$dst, $a, $b;"),
998 [(set Int64Regs:$dst, (OpNode Int64Regs:$a, imm:$b))]>;
1001 defm OR : LOG_FORMAT<"or", or>;
1002 defm AND : LOG_FORMAT<"and", and>;
1003 defm XOR : LOG_FORMAT<"xor", xor>;
1005 def NOT1: NVPTXInst<(outs Int1Regs:$dst), (ins Int1Regs:$src),
1006 "not.pred \t$dst, $src;",
1007 [(set Int1Regs:$dst, (not Int1Regs:$src))]>;
1008 def NOT16: NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$src),
1009 "not.b16 \t$dst, $src;",
1010 [(set Int16Regs:$dst, (not Int16Regs:$src))]>;
1011 def NOT32: NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src),
1012 "not.b32 \t$dst, $src;",
1013 [(set Int32Regs:$dst, (not Int32Regs:$src))]>;
1014 def NOT64: NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src),
1015 "not.b64 \t$dst, $src;",
1016 [(set Int64Regs:$dst, (not Int64Regs:$src))]>;
1018 // For shifts, the second src operand must be 32-bit value
1019 multiclass LSHIFT_FORMAT<string OpcStr, SDNode OpNode> {
1020 def i64rr : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a,
1022 !strconcat(OpcStr, "64 \t$dst, $a, $b;"),
1023 [(set Int64Regs:$dst, (OpNode Int64Regs:$a,
1025 def i64ri : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, i32imm:$b),
1026 !strconcat(OpcStr, "64 \t$dst, $a, $b;"),
1027 [(set Int64Regs:$dst, (OpNode Int64Regs:$a,
1029 def i32rr : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a,
1031 !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
1032 [(set Int32Regs:$dst, (OpNode Int32Regs:$a,
1034 def i32ri : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b),
1035 !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
1036 [(set Int32Regs:$dst, (OpNode Int32Regs:$a,
1038 def i32ii : NVPTXInst<(outs Int32Regs:$dst), (ins i32imm:$a, i32imm:$b),
1039 !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
1040 [(set Int32Regs:$dst, (OpNode (i32 imm:$a),
1042 def i16rr : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a,
1044 !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
1045 [(set Int16Regs:$dst, (OpNode Int16Regs:$a,
1047 def i16ri : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, i32imm:$b),
1048 !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
1049 [(set Int16Regs:$dst, (OpNode Int16Regs:$a,
1053 defm SHL : LSHIFT_FORMAT<"shl.b", shl>;
1055 // For shifts, the second src operand must be 32-bit value
1056 // Need to add cvt for the 8-bits.
1057 multiclass RSHIFT_FORMAT<string OpcStr, SDNode OpNode> {
1058 def i64rr : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a,
1060 !strconcat(OpcStr, "64 \t$dst, $a, $b;"),
1061 [(set Int64Regs:$dst, (OpNode Int64Regs:$a,
1063 def i64ri : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, i32imm:$b),
1064 !strconcat(OpcStr, "64 \t$dst, $a, $b;"),
1065 [(set Int64Regs:$dst, (OpNode Int64Regs:$a,
1067 def i32rr : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a,
1069 !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
1070 [(set Int32Regs:$dst, (OpNode Int32Regs:$a,
1072 def i32ri : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b),
1073 !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
1074 [(set Int32Regs:$dst, (OpNode Int32Regs:$a,
1076 def i32ii : NVPTXInst<(outs Int32Regs:$dst), (ins i32imm:$a, i32imm:$b),
1077 !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
1078 [(set Int32Regs:$dst, (OpNode (i32 imm:$a),
1080 def i16rr : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a,
1082 !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
1083 [(set Int16Regs:$dst, (OpNode Int16Regs:$a,
1085 def i16ri : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, i32imm:$b),
1086 !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
1087 [(set Int16Regs:$dst, (OpNode Int16Regs:$a,
1091 defm SRA : RSHIFT_FORMAT<"shr.s", sra>;
1092 defm SRL : RSHIFT_FORMAT<"shr.u", srl>;
1095 // Rotate: use ptx shf instruction if available.
1098 // 32 bit r2 = rotl r1, n
1100 // r2 = shf.l r1, r1, n
1101 def ROTL32imm_hw : NVPTXInst<(outs Int32Regs:$dst),
1102 (ins Int32Regs:$src, i32imm:$amt),
1103 "shf.l.wrap.b32 \t$dst, $src, $src, $amt;",
1104 [(set Int32Regs:$dst, (rotl Int32Regs:$src, (i32 imm:$amt)))]>,
1105 Requires<[hasHWROT32]> ;
1107 def ROTL32reg_hw : NVPTXInst<(outs Int32Regs:$dst),
1108 (ins Int32Regs:$src, Int32Regs:$amt),
1109 "shf.l.wrap.b32 \t$dst, $src, $src, $amt;",
1110 [(set Int32Regs:$dst, (rotl Int32Regs:$src, Int32Regs:$amt))]>,
1111 Requires<[hasHWROT32]>;
1113 // 32 bit r2 = rotr r1, n
1115 // r2 = shf.r r1, r1, n
1116 def ROTR32imm_hw : NVPTXInst<(outs Int32Regs:$dst),
1117 (ins Int32Regs:$src, i32imm:$amt),
1118 "shf.r.wrap.b32 \t$dst, $src, $src, $amt;",
1119 [(set Int32Regs:$dst, (rotr Int32Regs:$src, (i32 imm:$amt)))]>,
1120 Requires<[hasHWROT32]>;
1122 def ROTR32reg_hw : NVPTXInst<(outs Int32Regs:$dst),
1123 (ins Int32Regs:$src, Int32Regs:$amt),
1124 "shf.r.wrap.b32 \t$dst, $src, $src, $amt;",
1125 [(set Int32Regs:$dst, (rotr Int32Regs:$src, Int32Regs:$amt))]>,
1126 Requires<[hasHWROT32]>;
1129 // Rotate: if ptx shf instruction is not available, then use shift+add
1132 def ROT32imm_sw : NVPTXInst<(outs Int32Regs:$dst),
1133 (ins Int32Regs:$src, i32imm:$amt1, i32imm:$amt2),
1134 !strconcat("{{\n\t",
1135 !strconcat(".reg .b32 %lhs;\n\t",
1136 !strconcat(".reg .b32 %rhs;\n\t",
1137 !strconcat("shl.b32 \t%lhs, $src, $amt1;\n\t",
1138 !strconcat("shr.b32 \t%rhs, $src, $amt2;\n\t",
1139 !strconcat("add.u32 \t$dst, %lhs, %rhs;\n\t",
1140 !strconcat("}}", ""))))))),
1143 def SUB_FRM_32 : SDNodeXForm<imm, [{
1144 return CurDAG->getTargetConstant(32-N->getZExtValue(), MVT::i32);
1147 def : Pat<(rotl Int32Regs:$src, (i32 imm:$amt)),
1148 (ROT32imm_sw Int32Regs:$src, imm:$amt, (SUB_FRM_32 node:$amt))>,
1149 Requires<[noHWROT32]>;
1150 def : Pat<(rotr Int32Regs:$src, (i32 imm:$amt)),
1151 (ROT32imm_sw Int32Regs:$src, (SUB_FRM_32 node:$amt), imm:$amt)>,
1152 Requires<[noHWROT32]>;
1154 def ROTL32reg_sw : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src,
1156 !strconcat("{{\n\t",
1157 !strconcat(".reg .b32 %lhs;\n\t",
1158 !strconcat(".reg .b32 %rhs;\n\t",
1159 !strconcat(".reg .b32 %amt2;\n\t",
1160 !strconcat("shl.b32 \t%lhs, $src, $amt;\n\t",
1161 !strconcat("sub.s32 \t%amt2, 32, $amt;\n\t",
1162 !strconcat("shr.b32 \t%rhs, $src, %amt2;\n\t",
1163 !strconcat("add.u32 \t$dst, %lhs, %rhs;\n\t",
1164 !strconcat("}}", ""))))))))),
1165 [(set Int32Regs:$dst, (rotl Int32Regs:$src, Int32Regs:$amt))]>,
1166 Requires<[noHWROT32]>;
1168 def ROTR32reg_sw : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src,
1170 !strconcat("{{\n\t",
1171 !strconcat(".reg .b32 %lhs;\n\t",
1172 !strconcat(".reg .b32 %rhs;\n\t",
1173 !strconcat(".reg .b32 %amt2;\n\t",
1174 !strconcat("shr.b32 \t%lhs, $src, $amt;\n\t",
1175 !strconcat("sub.s32 \t%amt2, 32, $amt;\n\t",
1176 !strconcat("shl.b32 \t%rhs, $src, %amt2;\n\t",
1177 !strconcat("add.u32 \t$dst, %lhs, %rhs;\n\t",
1178 !strconcat("}}", ""))))))))),
1179 [(set Int32Regs:$dst, (rotr Int32Regs:$src, Int32Regs:$amt))]>,
1180 Requires<[noHWROT32]>;
1183 def ROT64imm_sw : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src,
1184 i32imm:$amt1, i32imm:$amt2),
1185 !strconcat("{{\n\t",
1186 !strconcat(".reg .b64 %lhs;\n\t",
1187 !strconcat(".reg .b64 %rhs;\n\t",
1188 !strconcat("shl.b64 \t%lhs, $src, $amt1;\n\t",
1189 !strconcat("shr.b64 \t%rhs, $src, $amt2;\n\t",
1190 !strconcat("add.u64 \t$dst, %lhs, %rhs;\n\t",
1191 !strconcat("}}", ""))))))),
1194 def SUB_FRM_64 : SDNodeXForm<imm, [{
1195 return CurDAG->getTargetConstant(64-N->getZExtValue(), MVT::i32);
1198 def : Pat<(rotl Int64Regs:$src, (i32 imm:$amt)),
1199 (ROT64imm_sw Int64Regs:$src, imm:$amt, (SUB_FRM_64 node:$amt))>;
1200 def : Pat<(rotr Int64Regs:$src, (i32 imm:$amt)),
1201 (ROT64imm_sw Int64Regs:$src, (SUB_FRM_64 node:$amt), imm:$amt)>;
1203 def ROTL64reg_sw : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src,
1205 !strconcat("{{\n\t",
1206 !strconcat(".reg .b64 %lhs;\n\t",
1207 !strconcat(".reg .b64 %rhs;\n\t",
1208 !strconcat(".reg .u32 %amt2;\n\t",
1209 !strconcat("shl.b64 \t%lhs, $src, $amt;\n\t",
1210 !strconcat("sub.u32 \t%amt2, 64, $amt;\n\t",
1211 !strconcat("shr.b64 \t%rhs, $src, %amt2;\n\t",
1212 !strconcat("add.u64 \t$dst, %lhs, %rhs;\n\t",
1213 !strconcat("}}", ""))))))))),
1214 [(set Int64Regs:$dst, (rotl Int64Regs:$src, Int32Regs:$amt))]>;
1216 def ROTR64reg_sw : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src,
1218 !strconcat("{{\n\t",
1219 !strconcat(".reg .b64 %lhs;\n\t",
1220 !strconcat(".reg .b64 %rhs;\n\t",
1221 !strconcat(".reg .u32 %amt2;\n\t",
1222 !strconcat("shr.b64 \t%lhs, $src, $amt;\n\t",
1223 !strconcat("sub.u32 \t%amt2, 64, $amt;\n\t",
1224 !strconcat("shl.b64 \t%rhs, $src, %amt2;\n\t",
1225 !strconcat("add.u64 \t$dst, %lhs, %rhs;\n\t",
1226 !strconcat("}}", ""))))))))),
1227 [(set Int64Regs:$dst, (rotr Int64Regs:$src, Int32Regs:$amt))]>;
1229 // BFE - bit-field extract
1231 multiclass BFE<string TyStr, RegisterClass RC> {
1232 // BFE supports both 32-bit and 64-bit values, but the start and length
1233 // operands are always 32-bit
1235 : NVPTXInst<(outs RC:$d),
1236 (ins RC:$a, Int32Regs:$b, Int32Regs:$c),
1237 !strconcat("bfe.", TyStr, " \t$d, $a, $b, $c;"), []>;
1239 : NVPTXInst<(outs RC:$d),
1240 (ins RC:$a, Int32Regs:$b, i32imm:$c),
1241 !strconcat("bfe.", TyStr, " \t$d, $a, $b, $c;"), []>;
1243 : NVPTXInst<(outs RC:$d),
1244 (ins RC:$a, i32imm:$b, i32imm:$c),
1245 !strconcat("bfe.", TyStr, " \t$d, $a, $b, $c;"), []>;
1248 defm BFE_S32 : BFE<"s32", Int32Regs>;
1249 defm BFE_U32 : BFE<"u32", Int32Regs>;
1250 defm BFE_S64 : BFE<"s64", Int64Regs>;
1251 defm BFE_U64 : BFE<"u64", Int64Regs>;
1253 //-----------------------------------
1254 // General Comparison
1255 //-----------------------------------
1257 // General setp instructions
1258 multiclass SETP<string TypeStr, RegisterClass RC, Operand ImmCls> {
1259 def rr : NVPTXInst<(outs Int1Regs:$dst),
1260 (ins RC:$a, RC:$b, CmpMode:$cmp),
1261 !strconcat("setp${cmp:base}${cmp:ftz}.", TypeStr, "\t$dst, $a, $b;"),
1263 def ri : NVPTXInst<(outs Int1Regs:$dst),
1264 (ins RC:$a, ImmCls:$b, CmpMode:$cmp),
1265 !strconcat("setp${cmp:base}${cmp:ftz}.", TypeStr, "\t$dst, $a, $b;"),
1267 def ir : NVPTXInst<(outs Int1Regs:$dst),
1268 (ins ImmCls:$a, RC:$b, CmpMode:$cmp),
1269 !strconcat("setp${cmp:base}${cmp:ftz}.", TypeStr, "\t$dst, $a, $b;"),
1273 defm SETP_b16 : SETP<"b16", Int16Regs, i16imm>;
1274 defm SETP_s16 : SETP<"s16", Int16Regs, i16imm>;
1275 defm SETP_u16 : SETP<"u16", Int16Regs, i16imm>;
1276 defm SETP_b32 : SETP<"b32", Int32Regs, i32imm>;
1277 defm SETP_s32 : SETP<"s32", Int32Regs, i32imm>;
1278 defm SETP_u32 : SETP<"u32", Int32Regs, i32imm>;
1279 defm SETP_b64 : SETP<"b64", Int64Regs, i64imm>;
1280 defm SETP_s64 : SETP<"s64", Int64Regs, i64imm>;
1281 defm SETP_u64 : SETP<"u64", Int64Regs, i64imm>;
1282 defm SETP_f32 : SETP<"f32", Float32Regs, f32imm>;
1283 defm SETP_f64 : SETP<"f64", Float64Regs, f64imm>;
1285 // General set instructions
1286 multiclass SET<string TypeStr, RegisterClass RC, Operand ImmCls> {
1287 def rr : NVPTXInst<(outs Int32Regs:$dst),
1288 (ins RC:$a, RC:$b, CmpMode:$cmp),
1289 !strconcat("set$cmp.", TypeStr, "\t$dst, $a, $b;"), []>;
1290 def ri : NVPTXInst<(outs Int32Regs:$dst),
1291 (ins RC:$a, ImmCls:$b, CmpMode:$cmp),
1292 !strconcat("set$cmp.", TypeStr, "\t$dst, $a, $b;"), []>;
1293 def ir : NVPTXInst<(outs Int32Regs:$dst),
1294 (ins ImmCls:$a, RC:$b, CmpMode:$cmp),
1295 !strconcat("set$cmp.", TypeStr, "\t$dst, $a, $b;"), []>;
1298 defm SET_b16 : SET<"b16", Int16Regs, i16imm>;
1299 defm SET_s16 : SET<"s16", Int16Regs, i16imm>;
1300 defm SET_u16 : SET<"u16", Int16Regs, i16imm>;
1301 defm SET_b32 : SET<"b32", Int32Regs, i32imm>;
1302 defm SET_s32 : SET<"s32", Int32Regs, i32imm>;
1303 defm SET_u32 : SET<"u32", Int32Regs, i32imm>;
1304 defm SET_b64 : SET<"b64", Int64Regs, i64imm>;
1305 defm SET_s64 : SET<"s64", Int64Regs, i64imm>;
1306 defm SET_u64 : SET<"u64", Int64Regs, i64imm>;
1307 defm SET_f32 : SET<"f32", Float32Regs, f32imm>;
1308 defm SET_f64 : SET<"f64", Float64Regs, f64imm>;
1310 //-----------------------------------
1311 // General Selection
1312 //-----------------------------------
1314 // General selp instructions
1315 multiclass SELP<string TypeStr, RegisterClass RC, Operand ImmCls> {
1316 def rr : NVPTXInst<(outs RC:$dst),
1317 (ins RC:$a, RC:$b, Int1Regs:$p),
1318 !strconcat("selp.", TypeStr, "\t$dst, $a, $b, $p;"), []>;
1319 def ri : NVPTXInst<(outs RC:$dst),
1320 (ins RC:$a, ImmCls:$b, Int1Regs:$p),
1321 !strconcat("selp.", TypeStr, "\t$dst, $a, $b, $p;"), []>;
1322 def ir : NVPTXInst<(outs RC:$dst),
1323 (ins ImmCls:$a, RC:$b, Int1Regs:$p),
1324 !strconcat("selp.", TypeStr, "\t$dst, $a, $b, $p;"), []>;
1325 def ii : NVPTXInst<(outs RC:$dst),
1326 (ins ImmCls:$a, ImmCls:$b, Int1Regs:$p),
1327 !strconcat("selp.", TypeStr, "\t$dst, $a, $b, $p;"), []>;
1330 multiclass SELP_PATTERN<string TypeStr, RegisterClass RC, Operand ImmCls,
1332 def rr : NVPTXInst<(outs RC:$dst),
1333 (ins RC:$a, RC:$b, Int1Regs:$p),
1334 !strconcat("selp.", TypeStr, "\t$dst, $a, $b, $p;"),
1335 [(set RC:$dst, (select Int1Regs:$p, RC:$a, RC:$b))]>;
1336 def ri : NVPTXInst<(outs RC:$dst),
1337 (ins RC:$a, ImmCls:$b, Int1Regs:$p),
1338 !strconcat("selp.", TypeStr, "\t$dst, $a, $b, $p;"),
1339 [(set RC:$dst, (select Int1Regs:$p, RC:$a, ImmNode:$b))]>;
1340 def ir : NVPTXInst<(outs RC:$dst),
1341 (ins ImmCls:$a, RC:$b, Int1Regs:$p),
1342 !strconcat("selp.", TypeStr, "\t$dst, $a, $b, $p;"),
1343 [(set RC:$dst, (select Int1Regs:$p, ImmNode:$a, RC:$b))]>;
1344 def ii : NVPTXInst<(outs RC:$dst),
1345 (ins ImmCls:$a, ImmCls:$b, Int1Regs:$p),
1346 !strconcat("selp.", TypeStr, "\t$dst, $a, $b, $p;"),
1347 [(set RC:$dst, (select Int1Regs:$p, ImmNode:$a, ImmNode:$b))]>;
1350 defm SELP_b16 : SELP_PATTERN<"b16", Int16Regs, i16imm, imm>;
1351 defm SELP_s16 : SELP<"s16", Int16Regs, i16imm>;
1352 defm SELP_u16 : SELP<"u16", Int16Regs, i16imm>;
1353 defm SELP_b32 : SELP_PATTERN<"b32", Int32Regs, i32imm, imm>;
1354 defm SELP_s32 : SELP<"s32", Int32Regs, i32imm>;
1355 defm SELP_u32 : SELP<"u32", Int32Regs, i32imm>;
1356 defm SELP_b64 : SELP_PATTERN<"b64", Int64Regs, i64imm, imm>;
1357 defm SELP_s64 : SELP<"s64", Int64Regs, i64imm>;
1358 defm SELP_u64 : SELP<"u64", Int64Regs, i64imm>;
1359 defm SELP_f32 : SELP_PATTERN<"f32", Float32Regs, f32imm, fpimm>;
1360 defm SELP_f64 : SELP_PATTERN<"f64", Float64Regs, f64imm, fpimm>;
1362 // Special select for predicate operands
1363 def : Pat<(i1 (select Int1Regs:$p, Int1Regs:$a, Int1Regs:$b)),
1364 (ORb1rr (ANDb1rr Int1Regs:$p, Int1Regs:$a),
1365 (ANDb1rr (NOT1 Int1Regs:$p), Int1Regs:$b))>;
1368 // Funnnel shift in clamp mode
1370 // - SDNodes are created so they can be used in the DAG code,
1371 // e.g. NVPTXISelLowering (LowerShiftLeftParts and LowerShiftRightParts)
1373 def SDTIntShiftDOp: SDTypeProfile<1, 3,
1374 [SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>,
1375 SDTCisInt<0>, SDTCisInt<3>]>;
1376 def FUN_SHFL_CLAMP : SDNode<"NVPTXISD::FUN_SHFL_CLAMP", SDTIntShiftDOp, []>;
1377 def FUN_SHFR_CLAMP : SDNode<"NVPTXISD::FUN_SHFR_CLAMP", SDTIntShiftDOp, []>;
1379 def FUNSHFLCLAMP : NVPTXInst<(outs Int32Regs:$dst),
1380 (ins Int32Regs:$lo, Int32Regs:$hi, Int32Regs:$amt),
1381 "shf.l.clamp.b32 \t$dst, $lo, $hi, $amt;",
1382 [(set Int32Regs:$dst,
1383 (FUN_SHFL_CLAMP Int32Regs:$lo,
1384 Int32Regs:$hi, Int32Regs:$amt))]>;
1386 def FUNSHFRCLAMP : NVPTXInst<(outs Int32Regs:$dst),
1387 (ins Int32Regs:$lo, Int32Regs:$hi, Int32Regs:$amt),
1388 "shf.r.clamp.b32 \t$dst, $lo, $hi, $amt;",
1389 [(set Int32Regs:$dst,
1390 (FUN_SHFR_CLAMP Int32Regs:$lo,
1391 Int32Regs:$hi, Int32Regs:$amt))]>;
1393 //-----------------------------------
1394 // Data Movement (Load / Store, Move)
1395 //-----------------------------------
1397 def ADDRri : ComplexPattern<i32, 2, "SelectADDRri", [frameindex],
1399 def ADDRri64 : ComplexPattern<i64, 2, "SelectADDRri64", [frameindex],
1402 def MEMri : Operand<i32> {
1403 let PrintMethod = "printMemOperand";
1404 let MIOperandInfo = (ops Int32Regs, i32imm);
1406 def MEMri64 : Operand<i64> {
1407 let PrintMethod = "printMemOperand";
1408 let MIOperandInfo = (ops Int64Regs, i64imm);
1411 def imem : Operand<iPTR> {
1412 let PrintMethod = "printOperand";
1415 def imemAny : Operand<iPTRAny> {
1416 let PrintMethod = "printOperand";
1419 def LdStCode : Operand<i32> {
1420 let PrintMethod = "printLdStCode";
1423 def SDTWrapper : SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>, SDTCisPtrTy<0>]>;
1424 def Wrapper : SDNode<"NVPTXISD::Wrapper", SDTWrapper>;
1426 def MOV_ADDR : NVPTXInst<(outs Int32Regs:$dst), (ins imem:$a),
1427 "mov.u32 \t$dst, $a;",
1428 [(set Int32Regs:$dst, (Wrapper tglobaladdr:$a))]>;
1430 def MOV_ADDR64 : NVPTXInst<(outs Int64Regs:$dst), (ins imem:$a),
1431 "mov.u64 \t$dst, $a;",
1432 [(set Int64Regs:$dst, (Wrapper tglobaladdr:$a))]>;
1434 // Get pointer to local stack
1436 : NVPTXInst<(outs Int32Regs:$d), (ins i32imm:$num),
1437 "mov.u32 \t$d, __local_depot$num;", []>;
1438 def MOV_DEPOT_ADDR_64
1439 : NVPTXInst<(outs Int64Regs:$d), (ins i32imm:$num),
1440 "mov.u64 \t$d, __local_depot$num;", []>;
1443 // copyPhysreg is hard-coded in NVPTXInstrInfo.cpp
1444 let IsSimpleMove=1 in {
1445 def IMOV1rr: NVPTXInst<(outs Int1Regs:$dst), (ins Int1Regs:$sss),
1446 "mov.pred \t$dst, $sss;", []>;
1447 def IMOV16rr: NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$sss),
1448 "mov.u16 \t$dst, $sss;", []>;
1449 def IMOV32rr: NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$sss),
1450 "mov.u32 \t$dst, $sss;", []>;
1451 def IMOV64rr: NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$sss),
1452 "mov.u64 \t$dst, $sss;", []>;
1454 def FMOV32rr: NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$src),
1455 "mov.f32 \t$dst, $src;", []>;
1456 def FMOV64rr: NVPTXInst<(outs Float64Regs:$dst), (ins Float64Regs:$src),
1457 "mov.f64 \t$dst, $src;", []>;
1459 def IMOV1ri: NVPTXInst<(outs Int1Regs:$dst), (ins i1imm:$src),
1460 "mov.pred \t$dst, $src;",
1461 [(set Int1Regs:$dst, imm:$src)]>;
1462 def IMOV16ri: NVPTXInst<(outs Int16Regs:$dst), (ins i16imm:$src),
1463 "mov.u16 \t$dst, $src;",
1464 [(set Int16Regs:$dst, imm:$src)]>;
1465 def IMOV32ri: NVPTXInst<(outs Int32Regs:$dst), (ins i32imm:$src),
1466 "mov.u32 \t$dst, $src;",
1467 [(set Int32Regs:$dst, imm:$src)]>;
1468 def IMOV64i: NVPTXInst<(outs Int64Regs:$dst), (ins i64imm:$src),
1469 "mov.u64 \t$dst, $src;",
1470 [(set Int64Regs:$dst, imm:$src)]>;
1472 def FMOV32ri: NVPTXInst<(outs Float32Regs:$dst), (ins f32imm:$src),
1473 "mov.f32 \t$dst, $src;",
1474 [(set Float32Regs:$dst, fpimm:$src)]>;
1475 def FMOV64ri: NVPTXInst<(outs Float64Regs:$dst), (ins f64imm:$src),
1476 "mov.f64 \t$dst, $src;",
1477 [(set Float64Regs:$dst, fpimm:$src)]>;
1479 def : Pat<(i32 (Wrapper texternalsym:$dst)), (IMOV32ri texternalsym:$dst)>;
1481 //---- Copy Frame Index ----
1482 def LEA_ADDRi : NVPTXInst<(outs Int32Regs:$dst), (ins MEMri:$addr),
1483 "add.u32 \t$dst, ${addr:add};",
1484 [(set Int32Regs:$dst, ADDRri:$addr)]>;
1485 def LEA_ADDRi64 : NVPTXInst<(outs Int64Regs:$dst), (ins MEMri64:$addr),
1486 "add.u64 \t$dst, ${addr:add};",
1487 [(set Int64Regs:$dst, ADDRri64:$addr)]>;
1489 //-----------------------------------
1490 // Comparison and Selection
1491 //-----------------------------------
1493 multiclass ISET_FORMAT<PatFrag OpNode, PatLeaf Mode,
1494 Instruction setp_16rr,
1495 Instruction setp_16ri,
1496 Instruction setp_16ir,
1497 Instruction setp_32rr,
1498 Instruction setp_32ri,
1499 Instruction setp_32ir,
1500 Instruction setp_64rr,
1501 Instruction setp_64ri,
1502 Instruction setp_64ir,
1503 Instruction set_16rr,
1504 Instruction set_16ri,
1505 Instruction set_16ir,
1506 Instruction set_32rr,
1507 Instruction set_32ri,
1508 Instruction set_32ir,
1509 Instruction set_64rr,
1510 Instruction set_64ri,
1511 Instruction set_64ir> {
1513 def : Pat<(i1 (OpNode Int16Regs:$a, Int16Regs:$b)),
1514 (setp_16rr Int16Regs:$a, Int16Regs:$b, Mode)>;
1515 def : Pat<(i1 (OpNode Int16Regs:$a, imm:$b)),
1516 (setp_16ri Int16Regs:$a, imm:$b, Mode)>;
1517 def : Pat<(i1 (OpNode imm:$a, Int16Regs:$b)),
1518 (setp_16ir imm:$a, Int16Regs:$b, Mode)>;
1520 def : Pat<(i1 (OpNode Int32Regs:$a, Int32Regs:$b)),
1521 (setp_32rr Int32Regs:$a, Int32Regs:$b, Mode)>;
1522 def : Pat<(i1 (OpNode Int32Regs:$a, imm:$b)),
1523 (setp_32ri Int32Regs:$a, imm:$b, Mode)>;
1524 def : Pat<(i1 (OpNode imm:$a, Int32Regs:$b)),
1525 (setp_32ir imm:$a, Int32Regs:$b, Mode)>;
1527 def : Pat<(i1 (OpNode Int64Regs:$a, Int64Regs:$b)),
1528 (setp_64rr Int64Regs:$a, Int64Regs:$b, Mode)>;
1529 def : Pat<(i1 (OpNode Int64Regs:$a, imm:$b)),
1530 (setp_64ri Int64Regs:$a, imm:$b, Mode)>;
1531 def : Pat<(i1 (OpNode imm:$a, Int64Regs:$b)),
1532 (setp_64ir imm:$a, Int64Regs:$b, Mode)>;
1535 def : Pat<(i32 (OpNode Int16Regs:$a, Int16Regs:$b)),
1536 (set_16rr Int16Regs:$a, Int16Regs:$b, Mode)>;
1537 def : Pat<(i32 (OpNode Int16Regs:$a, imm:$b)),
1538 (set_16ri Int16Regs:$a, imm:$b, Mode)>;
1539 def : Pat<(i32 (OpNode imm:$a, Int16Regs:$b)),
1540 (set_16ir imm:$a, Int16Regs:$b, Mode)>;
1542 def : Pat<(i32 (OpNode Int32Regs:$a, Int32Regs:$b)),
1543 (set_32rr Int32Regs:$a, Int32Regs:$b, Mode)>;
1544 def : Pat<(i32 (OpNode Int32Regs:$a, imm:$b)),
1545 (set_32ri Int32Regs:$a, imm:$b, Mode)>;
1546 def : Pat<(i32 (OpNode imm:$a, Int32Regs:$b)),
1547 (set_32ir imm:$a, Int32Regs:$b, Mode)>;
1549 def : Pat<(i32 (OpNode Int64Regs:$a, Int64Regs:$b)),
1550 (set_64rr Int64Regs:$a, Int64Regs:$b, Mode)>;
1551 def : Pat<(i32 (OpNode Int64Regs:$a, imm:$b)),
1552 (set_64ri Int64Regs:$a, imm:$b, Mode)>;
1553 def : Pat<(i32 (OpNode imm:$a, Int64Regs:$b)),
1554 (set_64ir imm:$a, Int64Regs:$b, Mode)>;
1557 multiclass ISET_FORMAT_SIGNED<PatFrag OpNode, PatLeaf Mode>
1558 : ISET_FORMAT<OpNode, Mode,
1559 SETP_s16rr, SETP_s16ri, SETP_s16ir,
1560 SETP_s32rr, SETP_s32ri, SETP_s32ir,
1561 SETP_s64rr, SETP_s64ri, SETP_s64ir,
1562 SET_s16rr, SET_s16ri, SET_s16ir,
1563 SET_s32rr, SET_s32ri, SET_s32ir,
1564 SET_s64rr, SET_s64ri, SET_s64ir> {
1565 // TableGen doesn't like empty multiclasses
1566 def : PatLeaf<(i32 0)>;
1569 multiclass ISET_FORMAT_UNSIGNED<PatFrag OpNode, PatLeaf Mode>
1570 : ISET_FORMAT<OpNode, Mode,
1571 SETP_u16rr, SETP_u16ri, SETP_u16ir,
1572 SETP_u32rr, SETP_u32ri, SETP_u32ir,
1573 SETP_u64rr, SETP_u64ri, SETP_u64ir,
1574 SET_u16rr, SET_u16ri, SET_u16ir,
1575 SET_u32rr, SET_u32ri, SET_u32ir,
1576 SET_u64rr, SET_u64ri, SET_u64ir> {
1577 // TableGen doesn't like empty multiclasses
1578 def : PatLeaf<(i32 0)>;
1581 defm : ISET_FORMAT_SIGNED<setgt, CmpGT>;
1582 defm : ISET_FORMAT_UNSIGNED<setugt, CmpGT>;
1583 defm : ISET_FORMAT_SIGNED<setlt, CmpLT>;
1584 defm : ISET_FORMAT_UNSIGNED<setult, CmpLT>;
1585 defm : ISET_FORMAT_SIGNED<setge, CmpGE>;
1586 defm : ISET_FORMAT_UNSIGNED<setuge, CmpGE>;
1587 defm : ISET_FORMAT_SIGNED<setle, CmpLE>;
1588 defm : ISET_FORMAT_UNSIGNED<setule, CmpLE>;
1589 defm : ISET_FORMAT_SIGNED<seteq, CmpEQ>;
1590 defm : ISET_FORMAT_UNSIGNED<setueq, CmpEQ>;
1591 defm : ISET_FORMAT_SIGNED<setne, CmpNE>;
1592 defm : ISET_FORMAT_UNSIGNED<setune, CmpNE>;
1595 def : Pat<(setne Int1Regs:$a, Int1Regs:$b),
1596 (XORb1rr Int1Regs:$a, Int1Regs:$b)>;
1597 def : Pat<(setune Int1Regs:$a, Int1Regs:$b),
1598 (XORb1rr Int1Regs:$a, Int1Regs:$b)>;
1600 def : Pat<(seteq Int1Regs:$a, Int1Regs:$b),
1601 (NOT1 (XORb1rr Int1Regs:$a, Int1Regs:$b))>;
1602 def : Pat<(setueq Int1Regs:$a, Int1Regs:$b),
1603 (NOT1 (XORb1rr Int1Regs:$a, Int1Regs:$b))>;
1605 // i1 compare -> i32
1606 def : Pat<(i32 (setne Int1Regs:$a, Int1Regs:$b)),
1607 (SELP_u32ii -1, 0, (XORb1rr Int1Regs:$a, Int1Regs:$b))>;
1608 def : Pat<(i32 (setne Int1Regs:$a, Int1Regs:$b)),
1609 (SELP_u32ii 0, -1, (XORb1rr Int1Regs:$a, Int1Regs:$b))>;
1613 multiclass FSET_FORMAT<PatFrag OpNode, PatLeaf Mode, PatLeaf ModeFTZ> {
1615 def : Pat<(i1 (OpNode Float32Regs:$a, Float32Regs:$b)),
1616 (SETP_f32rr Float32Regs:$a, Float32Regs:$b, ModeFTZ)>,
1617 Requires<[doF32FTZ]>;
1618 def : Pat<(i1 (OpNode Float32Regs:$a, Float32Regs:$b)),
1619 (SETP_f32rr Float32Regs:$a, Float32Regs:$b, Mode)>;
1620 def : Pat<(i1 (OpNode Float32Regs:$a, fpimm:$b)),
1621 (SETP_f32ri Float32Regs:$a, fpimm:$b, ModeFTZ)>,
1622 Requires<[doF32FTZ]>;
1623 def : Pat<(i1 (OpNode Float32Regs:$a, fpimm:$b)),
1624 (SETP_f32ri Float32Regs:$a, fpimm:$b, Mode)>;
1625 def : Pat<(i1 (OpNode fpimm:$a, Float32Regs:$b)),
1626 (SETP_f32ir fpimm:$a, Float32Regs:$b, ModeFTZ)>,
1627 Requires<[doF32FTZ]>;
1628 def : Pat<(i1 (OpNode fpimm:$a, Float32Regs:$b)),
1629 (SETP_f32ir fpimm:$a, Float32Regs:$b, Mode)>;
1632 def : Pat<(i1 (OpNode Float64Regs:$a, Float64Regs:$b)),
1633 (SETP_f64rr Float64Regs:$a, Float64Regs:$b, Mode)>;
1634 def : Pat<(i1 (OpNode Float64Regs:$a, fpimm:$b)),
1635 (SETP_f64ri Float64Regs:$a, fpimm:$b, Mode)>;
1636 def : Pat<(i1 (OpNode fpimm:$a, Float64Regs:$b)),
1637 (SETP_f64ir fpimm:$a, Float64Regs:$b, Mode)>;
1640 def : Pat<(i32 (OpNode Float32Regs:$a, Float32Regs:$b)),
1641 (SET_f32rr Float32Regs:$a, Float32Regs:$b, ModeFTZ)>,
1642 Requires<[doF32FTZ]>;
1643 def : Pat<(i32 (OpNode Float32Regs:$a, Float32Regs:$b)),
1644 (SET_f32rr Float32Regs:$a, Float32Regs:$b, Mode)>;
1645 def : Pat<(i32 (OpNode Float32Regs:$a, fpimm:$b)),
1646 (SET_f32ri Float32Regs:$a, fpimm:$b, ModeFTZ)>,
1647 Requires<[doF32FTZ]>;
1648 def : Pat<(i32 (OpNode Float32Regs:$a, fpimm:$b)),
1649 (SET_f32ri Float32Regs:$a, fpimm:$b, Mode)>;
1650 def : Pat<(i32 (OpNode fpimm:$a, Float32Regs:$b)),
1651 (SET_f32ir fpimm:$a, Float32Regs:$b, ModeFTZ)>,
1652 Requires<[doF32FTZ]>;
1653 def : Pat<(i32 (OpNode fpimm:$a, Float32Regs:$b)),
1654 (SET_f32ir fpimm:$a, Float32Regs:$b, Mode)>;
1657 def : Pat<(i32 (OpNode Float64Regs:$a, Float64Regs:$b)),
1658 (SET_f64rr Float64Regs:$a, Float64Regs:$b, Mode)>;
1659 def : Pat<(i32 (OpNode Float64Regs:$a, fpimm:$b)),
1660 (SET_f64ri Float64Regs:$a, fpimm:$b, Mode)>;
1661 def : Pat<(i32 (OpNode fpimm:$a, Float64Regs:$b)),
1662 (SET_f64ir fpimm:$a, Float64Regs:$b, Mode)>;
1665 defm FSetGT : FSET_FORMAT<setogt, CmpGT, CmpGT_FTZ>;
1666 defm FSetLT : FSET_FORMAT<setolt, CmpLT, CmpLT_FTZ>;
1667 defm FSetGE : FSET_FORMAT<setoge, CmpGE, CmpGE_FTZ>;
1668 defm FSetLE : FSET_FORMAT<setole, CmpLE, CmpLE_FTZ>;
1669 defm FSetEQ : FSET_FORMAT<setoeq, CmpEQ, CmpEQ_FTZ>;
1670 defm FSetNE : FSET_FORMAT<setone, CmpNE, CmpNE_FTZ>;
1672 defm FSetUGT : FSET_FORMAT<setugt, CmpGTU, CmpGTU_FTZ>;
1673 defm FSetULT : FSET_FORMAT<setult, CmpLTU, CmpLTU_FTZ>;
1674 defm FSetUGE : FSET_FORMAT<setuge, CmpGEU, CmpGEU_FTZ>;
1675 defm FSetULE : FSET_FORMAT<setule, CmpLEU, CmpLEU_FTZ>;
1676 defm FSetUEQ : FSET_FORMAT<setueq, CmpEQU, CmpEQU_FTZ>;
1677 defm FSetUNE : FSET_FORMAT<setune, CmpNEU, CmpNEU_FTZ>;
1679 defm FSetNUM : FSET_FORMAT<seto, CmpNUM, CmpNUM_FTZ>;
1680 defm FSetNAN : FSET_FORMAT<setuo, CmpNAN, CmpNAN_FTZ>;
1682 //def ld_param : SDNode<"NVPTXISD::LOAD_PARAM", SDTLoad,
1683 // [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
1685 def SDTDeclareParamProfile : SDTypeProfile<0, 3, [SDTCisInt<0>, SDTCisInt<1>,
1687 def SDTDeclareScalarParamProfile : SDTypeProfile<0, 3, [SDTCisInt<0>,
1688 SDTCisInt<1>, SDTCisInt<2>]>;
1689 def SDTLoadParamProfile : SDTypeProfile<1, 2, [SDTCisInt<1>, SDTCisInt<2>]>;
1690 def SDTLoadParamV2Profile : SDTypeProfile<2, 2, [SDTCisSameAs<0, 1>, SDTCisInt<2>, SDTCisInt<3>]>;
1691 def SDTLoadParamV4Profile : SDTypeProfile<4, 2, [SDTCisInt<4>, SDTCisInt<5>]>;
1692 def SDTPrintCallProfile : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
1693 def SDTPrintCallUniProfile : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
1694 def SDTStoreParamProfile : SDTypeProfile<0, 3, [SDTCisInt<0>, SDTCisInt<1>]>;
1695 def SDTStoreParamV2Profile : SDTypeProfile<0, 4, [SDTCisInt<0>, SDTCisInt<1>]>;
1696 def SDTStoreParamV4Profile : SDTypeProfile<0, 6, [SDTCisInt<0>, SDTCisInt<1>]>;
1697 def SDTStoreParam32Profile : SDTypeProfile<0, 3, [SDTCisInt<0>, SDTCisInt<1>]>;
1698 def SDTCallArgProfile : SDTypeProfile<0, 2, [SDTCisInt<0>]>;
1699 def SDTCallArgMarkProfile : SDTypeProfile<0, 0, []>;
1700 def SDTCallVoidProfile : SDTypeProfile<0, 1, []>;
1701 def SDTCallValProfile : SDTypeProfile<1, 0, []>;
1702 def SDTMoveParamProfile : SDTypeProfile<1, 1, []>;
1703 def SDTStoreRetvalProfile : SDTypeProfile<0, 2, [SDTCisInt<0>]>;
1704 def SDTStoreRetvalV2Profile : SDTypeProfile<0, 3, [SDTCisInt<0>]>;
1705 def SDTStoreRetvalV4Profile : SDTypeProfile<0, 5, [SDTCisInt<0>]>;
1706 def SDTPseudoUseParamProfile : SDTypeProfile<0, 1, []>;
1708 def DeclareParam : SDNode<"NVPTXISD::DeclareParam", SDTDeclareParamProfile,
1709 [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
1710 def DeclareScalarParam : SDNode<"NVPTXISD::DeclareScalarParam",
1711 SDTDeclareScalarParamProfile,
1712 [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
1713 def DeclareRetParam : SDNode<"NVPTXISD::DeclareRetParam",
1714 SDTDeclareParamProfile,
1715 [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
1716 def DeclareRet : SDNode<"NVPTXISD::DeclareRet", SDTDeclareScalarParamProfile,
1717 [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
1718 def LoadParam : SDNode<"NVPTXISD::LoadParam", SDTLoadParamProfile,
1719 [SDNPHasChain, SDNPMayLoad, SDNPOutGlue, SDNPInGlue]>;
1720 def LoadParamV2 : SDNode<"NVPTXISD::LoadParamV2", SDTLoadParamV2Profile,
1721 [SDNPHasChain, SDNPMayLoad, SDNPOutGlue, SDNPInGlue]>;
1722 def LoadParamV4 : SDNode<"NVPTXISD::LoadParamV4", SDTLoadParamV4Profile,
1723 [SDNPHasChain, SDNPMayLoad, SDNPOutGlue, SDNPInGlue]>;
1724 def PrintCall : SDNode<"NVPTXISD::PrintCall", SDTPrintCallProfile,
1725 [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
1726 def PrintCallUni : SDNode<"NVPTXISD::PrintCallUni", SDTPrintCallUniProfile,
1727 [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
1728 def StoreParam : SDNode<"NVPTXISD::StoreParam", SDTStoreParamProfile,
1729 [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
1730 def StoreParamV2 : SDNode<"NVPTXISD::StoreParamV2", SDTStoreParamV2Profile,
1731 [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
1732 def StoreParamV4 : SDNode<"NVPTXISD::StoreParamV4", SDTStoreParamV4Profile,
1733 [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
1734 def StoreParamU32 : SDNode<"NVPTXISD::StoreParamU32", SDTStoreParam32Profile,
1735 [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
1736 def StoreParamS32 : SDNode<"NVPTXISD::StoreParamS32", SDTStoreParam32Profile,
1737 [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
1738 def CallArgBegin : SDNode<"NVPTXISD::CallArgBegin", SDTCallArgMarkProfile,
1739 [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
1740 def CallArg : SDNode<"NVPTXISD::CallArg", SDTCallArgProfile,
1741 [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
1742 def LastCallArg : SDNode<"NVPTXISD::LastCallArg", SDTCallArgProfile,
1743 [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
1744 def CallArgEnd : SDNode<"NVPTXISD::CallArgEnd", SDTCallVoidProfile,
1745 [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
1746 def CallVoid : SDNode<"NVPTXISD::CallVoid", SDTCallVoidProfile,
1747 [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
1748 def Prototype : SDNode<"NVPTXISD::Prototype", SDTCallVoidProfile,
1749 [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
1750 def CallVal : SDNode<"NVPTXISD::CallVal", SDTCallValProfile,
1751 [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
1752 def MoveParam : SDNode<"NVPTXISD::MoveParam", SDTMoveParamProfile,
1754 def StoreRetval : SDNode<"NVPTXISD::StoreRetval", SDTStoreRetvalProfile,
1755 [SDNPHasChain, SDNPSideEffect]>;
1756 def StoreRetvalV2 : SDNode<"NVPTXISD::StoreRetvalV2", SDTStoreRetvalV2Profile,
1757 [SDNPHasChain, SDNPSideEffect]>;
1758 def StoreRetvalV4 : SDNode<"NVPTXISD::StoreRetvalV4", SDTStoreRetvalV4Profile,
1759 [SDNPHasChain, SDNPSideEffect]>;
1760 def PseudoUseParam : SDNode<"NVPTXISD::PseudoUseParam",
1761 SDTPseudoUseParamProfile,
1762 [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
1763 def RETURNNode : SDNode<"NVPTXISD::RETURN", SDTCallArgMarkProfile,
1764 [SDNPHasChain, SDNPSideEffect]>;
1766 class LoadParamMemInst<NVPTXRegClass regclass, string opstr> :
1767 NVPTXInst<(outs regclass:$dst), (ins i32imm:$b),
1768 !strconcat(!strconcat("ld.param", opstr),
1769 "\t$dst, [retval0+$b];"),
1772 class LoadParamRegInst<NVPTXRegClass regclass, string opstr> :
1773 NVPTXInst<(outs regclass:$dst), (ins i32imm:$b),
1774 !strconcat(!strconcat("mov", opstr),
1775 "\t$dst, retval$b;"),
1776 [(set regclass:$dst, (LoadParam (i32 0), (i32 imm:$b)))]>;
1778 class LoadParamV2MemInst<NVPTXRegClass regclass, string opstr> :
1779 NVPTXInst<(outs regclass:$dst, regclass:$dst2), (ins i32imm:$b),
1780 !strconcat(!strconcat("ld.param.v2", opstr),
1781 "\t{{$dst, $dst2}}, [retval0+$b];"), []>;
1783 class LoadParamV4MemInst<NVPTXRegClass regclass, string opstr> :
1784 NVPTXInst<(outs regclass:$dst, regclass:$dst2, regclass:$dst3,
1787 !strconcat(!strconcat("ld.param.v4", opstr),
1788 "\t{{$dst, $dst2, $dst3, $dst4}}, [retval0+$b];"), []>;
1790 class StoreParamInst<NVPTXRegClass regclass, string opstr> :
1791 NVPTXInst<(outs), (ins regclass:$val, i32imm:$a, i32imm:$b),
1792 !strconcat(!strconcat("st.param", opstr),
1793 "\t[param$a+$b], $val;"),
1796 class StoreParamV2Inst<NVPTXRegClass regclass, string opstr> :
1797 NVPTXInst<(outs), (ins regclass:$val, regclass:$val2,
1798 i32imm:$a, i32imm:$b),
1799 !strconcat(!strconcat("st.param.v2", opstr),
1800 "\t[param$a+$b], {{$val, $val2}};"),
1803 class StoreParamV4Inst<NVPTXRegClass regclass, string opstr> :
1804 NVPTXInst<(outs), (ins regclass:$val, regclass:$val1, regclass:$val2,
1805 regclass:$val3, i32imm:$a, i32imm:$b),
1806 !strconcat(!strconcat("st.param.v4", opstr),
1807 "\t[param$a+$b], {{$val, $val2, $val3, $val4}};"),
1810 class StoreRetvalInst<NVPTXRegClass regclass, string opstr> :
1811 NVPTXInst<(outs), (ins regclass:$val, i32imm:$a),
1812 !strconcat(!strconcat("st.param", opstr),
1813 "\t[func_retval0+$a], $val;"),
1816 class StoreRetvalV2Inst<NVPTXRegClass regclass, string opstr> :
1817 NVPTXInst<(outs), (ins regclass:$val, regclass:$val2, i32imm:$a),
1818 !strconcat(!strconcat("st.param.v2", opstr),
1819 "\t[func_retval0+$a], {{$val, $val2}};"),
1822 class StoreRetvalV4Inst<NVPTXRegClass regclass, string opstr> :
1824 (ins regclass:$val, regclass:$val2, regclass:$val3,
1825 regclass:$val4, i32imm:$a),
1826 !strconcat(!strconcat("st.param.v4", opstr),
1827 "\t[func_retval0+$a], {{$val, $val2, $val3, $val4}};"),
1830 def PrintCallRetInst1 : NVPTXInst<(outs), (ins),
1832 [(PrintCall (i32 1))]>;
1833 def PrintCallRetInst2 : NVPTXInst<(outs), (ins),
1834 "call (retval0, retval1), ",
1835 [(PrintCall (i32 2))]>;
1836 def PrintCallRetInst3 : NVPTXInst<(outs), (ins),
1837 "call (retval0, retval1, retval2), ",
1838 [(PrintCall (i32 3))]>;
1839 def PrintCallRetInst4 : NVPTXInst<(outs), (ins),
1840 "call (retval0, retval1, retval2, retval3), ",
1841 [(PrintCall (i32 4))]>;
1842 def PrintCallRetInst5 : NVPTXInst<(outs), (ins),
1843 "call (retval0, retval1, retval2, retval3, retval4), ",
1844 [(PrintCall (i32 5))]>;
1845 def PrintCallRetInst6 : NVPTXInst<(outs), (ins),
1846 "call (retval0, retval1, retval2, retval3, retval4, retval5), ",
1847 [(PrintCall (i32 6))]>;
1848 def PrintCallRetInst7 : NVPTXInst<(outs), (ins),
1849 "call (retval0, retval1, retval2, retval3, retval4, retval5, retval6), ",
1850 [(PrintCall (i32 7))]>;
1851 def PrintCallRetInst8 : NVPTXInst<(outs), (ins),
1852 !strconcat("call (retval0, retval1, retval2, retval3, retval4",
1853 ", retval5, retval6, retval7), "),
1854 [(PrintCall (i32 8))]>;
1856 def PrintCallNoRetInst : NVPTXInst<(outs), (ins), "call ",
1857 [(PrintCall (i32 0))]>;
1859 def PrintCallUniRetInst1 : NVPTXInst<(outs), (ins),
1860 "call.uni (retval0), ",
1861 [(PrintCallUni (i32 1))]>;
1862 def PrintCallUniRetInst2 : NVPTXInst<(outs), (ins),
1863 "call.uni (retval0, retval1), ",
1864 [(PrintCallUni (i32 2))]>;
1865 def PrintCallUniRetInst3 : NVPTXInst<(outs), (ins),
1866 "call.uni (retval0, retval1, retval2), ",
1867 [(PrintCallUni (i32 3))]>;
1868 def PrintCallUniRetInst4 : NVPTXInst<(outs), (ins),
1869 "call.uni (retval0, retval1, retval2, retval3), ",
1870 [(PrintCallUni (i32 4))]>;
1871 def PrintCallUniRetInst5 : NVPTXInst<(outs), (ins),
1872 "call.uni (retval0, retval1, retval2, retval3, retval4), ",
1873 [(PrintCallUni (i32 5))]>;
1874 def PrintCallUniRetInst6 : NVPTXInst<(outs), (ins),
1875 "call.uni (retval0, retval1, retval2, retval3, retval4, retval5), ",
1876 [(PrintCallUni (i32 6))]>;
1877 def PrintCallUniRetInst7 : NVPTXInst<(outs), (ins),
1878 "call.uni (retval0, retval1, retval2, retval3, retval4, retval5, retval6), ",
1879 [(PrintCallUni (i32 7))]>;
1880 def PrintCallUniRetInst8 : NVPTXInst<(outs), (ins),
1881 !strconcat("call.uni (retval0, retval1, retval2, retval3, retval4",
1882 ", retval5, retval6, retval7), "),
1883 [(PrintCallUni (i32 8))]>;
1885 def PrintCallUniNoRetInst : NVPTXInst<(outs), (ins), "call.uni ",
1886 [(PrintCallUni (i32 0))]>;
1888 def LoadParamMemI64 : LoadParamMemInst<Int64Regs, ".b64">;
1889 def LoadParamMemI32 : LoadParamMemInst<Int32Regs, ".b32">;
1890 def LoadParamMemI16 : LoadParamMemInst<Int16Regs, ".b16">;
1891 def LoadParamMemI8 : LoadParamMemInst<Int16Regs, ".b8">;
1892 def LoadParamMemV2I64 : LoadParamV2MemInst<Int64Regs, ".b64">;
1893 def LoadParamMemV2I32 : LoadParamV2MemInst<Int32Regs, ".b32">;
1894 def LoadParamMemV2I16 : LoadParamV2MemInst<Int16Regs, ".b16">;
1895 def LoadParamMemV2I8 : LoadParamV2MemInst<Int16Regs, ".b8">;
1896 def LoadParamMemV4I32 : LoadParamV4MemInst<Int32Regs, ".b32">;
1897 def LoadParamMemV4I16 : LoadParamV4MemInst<Int16Regs, ".b16">;
1898 def LoadParamMemV4I8 : LoadParamV4MemInst<Int16Regs, ".b8">;
1899 def LoadParamMemF32 : LoadParamMemInst<Float32Regs, ".f32">;
1900 def LoadParamMemF64 : LoadParamMemInst<Float64Regs, ".f64">;
1901 def LoadParamMemV2F32 : LoadParamV2MemInst<Float32Regs, ".f32">;
1902 def LoadParamMemV2F64 : LoadParamV2MemInst<Float64Regs, ".f64">;
1903 def LoadParamMemV4F32 : LoadParamV4MemInst<Float32Regs, ".f32">;
1905 def StoreParamI64 : StoreParamInst<Int64Regs, ".b64">;
1906 def StoreParamI32 : StoreParamInst<Int32Regs, ".b32">;
1908 def StoreParamI16 : StoreParamInst<Int16Regs, ".b16">;
1909 def StoreParamI8 : StoreParamInst<Int16Regs, ".b8">;
1910 def StoreParamV2I64 : StoreParamV2Inst<Int64Regs, ".b64">;
1911 def StoreParamV2I32 : StoreParamV2Inst<Int32Regs, ".b32">;
1912 def StoreParamV2I16 : StoreParamV2Inst<Int16Regs, ".b16">;
1913 def StoreParamV2I8 : StoreParamV2Inst<Int16Regs, ".b8">;
1915 // FIXME: StoreParamV4Inst crashes llvm-tblgen :(
1916 //def StoreParamV4I32 : StoreParamV4Inst<Int32Regs, ".b32">;
1917 def StoreParamV4I32 : NVPTXInst<(outs), (ins Int32Regs:$val, Int32Regs:$val2,
1918 Int32Regs:$val3, Int32Regs:$val4,
1919 i32imm:$a, i32imm:$b),
1920 "st.param.b32\t[param$a+$b], {{$val, $val2, $val3, $val4}};",
1923 def StoreParamV4I16 : NVPTXInst<(outs), (ins Int16Regs:$val, Int16Regs:$val2,
1924 Int16Regs:$val3, Int16Regs:$val4,
1925 i32imm:$a, i32imm:$b),
1926 "st.param.v4.b16\t[param$a+$b], {{$val, $val2, $val3, $val4}};",
1929 def StoreParamV4I8 : NVPTXInst<(outs), (ins Int16Regs:$val, Int16Regs:$val2,
1930 Int16Regs:$val3, Int16Regs:$val4,
1931 i32imm:$a, i32imm:$b),
1932 "st.param.v4.b8\t[param$a+$b], {{$val, $val2, $val3, $val4}};",
1935 def StoreParamF32 : StoreParamInst<Float32Regs, ".f32">;
1936 def StoreParamF64 : StoreParamInst<Float64Regs, ".f64">;
1937 def StoreParamV2F32 : StoreParamV2Inst<Float32Regs, ".f32">;
1938 def StoreParamV2F64 : StoreParamV2Inst<Float64Regs, ".f64">;
1939 // FIXME: StoreParamV4Inst crashes llvm-tblgen :(
1940 //def StoreParamV4F32 : StoreParamV4Inst<Float32Regs, ".f32">;
1941 def StoreParamV4F32 : NVPTXInst<(outs),
1942 (ins Float32Regs:$val, Float32Regs:$val2,
1943 Float32Regs:$val3, Float32Regs:$val4,
1944 i32imm:$a, i32imm:$b),
1945 "st.param.v4.f32\t[param$a+$b], {{$val, $val2, $val3, $val4}};",
1949 def StoreRetvalI64 : StoreRetvalInst<Int64Regs, ".b64">;
1950 def StoreRetvalI32 : StoreRetvalInst<Int32Regs, ".b32">;
1951 def StoreRetvalI16 : StoreRetvalInst<Int16Regs, ".b16">;
1952 def StoreRetvalI8 : StoreRetvalInst<Int16Regs, ".b8">;
1953 def StoreRetvalV2I64 : StoreRetvalV2Inst<Int64Regs, ".b64">;
1954 def StoreRetvalV2I32 : StoreRetvalV2Inst<Int32Regs, ".b32">;
1955 def StoreRetvalV2I16 : StoreRetvalV2Inst<Int16Regs, ".b16">;
1956 def StoreRetvalV2I8 : StoreRetvalV2Inst<Int16Regs, ".b8">;
1957 def StoreRetvalV4I32 : StoreRetvalV4Inst<Int32Regs, ".b32">;
1958 def StoreRetvalV4I16 : StoreRetvalV4Inst<Int16Regs, ".b16">;
1959 def StoreRetvalV4I8 : StoreRetvalV4Inst<Int16Regs, ".b8">;
1961 def StoreRetvalF64 : StoreRetvalInst<Float64Regs, ".f64">;
1962 def StoreRetvalF32 : StoreRetvalInst<Float32Regs, ".f32">;
1963 def StoreRetvalV2F64 : StoreRetvalV2Inst<Float64Regs, ".f64">;
1964 def StoreRetvalV2F32 : StoreRetvalV2Inst<Float32Regs, ".f32">;
1965 def StoreRetvalV4F32 : StoreRetvalV4Inst<Float32Regs, ".f32">;
1967 def CallArgBeginInst : NVPTXInst<(outs), (ins), "(", [(CallArgBegin)]>;
1968 def CallArgEndInst1 : NVPTXInst<(outs), (ins), ");", [(CallArgEnd (i32 1))]>;
1969 def CallArgEndInst0 : NVPTXInst<(outs), (ins), ")", [(CallArgEnd (i32 0))]>;
1970 def RETURNInst : NVPTXInst<(outs), (ins), "ret;", [(RETURNNode)]>;
1972 class CallArgInst<NVPTXRegClass regclass> :
1973 NVPTXInst<(outs), (ins regclass:$a), "$a, ",
1974 [(CallArg (i32 0), regclass:$a)]>;
1976 class LastCallArgInst<NVPTXRegClass regclass> :
1977 NVPTXInst<(outs), (ins regclass:$a), "$a",
1978 [(LastCallArg (i32 0), regclass:$a)]>;
1980 def CallArgI64 : CallArgInst<Int64Regs>;
1981 def CallArgI32 : CallArgInst<Int32Regs>;
1982 def CallArgI16 : CallArgInst<Int16Regs>;
1984 def CallArgF64 : CallArgInst<Float64Regs>;
1985 def CallArgF32 : CallArgInst<Float32Regs>;
1987 def LastCallArgI64 : LastCallArgInst<Int64Regs>;
1988 def LastCallArgI32 : LastCallArgInst<Int32Regs>;
1989 def LastCallArgI16 : LastCallArgInst<Int16Regs>;
1991 def LastCallArgF64 : LastCallArgInst<Float64Regs>;
1992 def LastCallArgF32 : LastCallArgInst<Float32Regs>;
1994 def CallArgI32imm : NVPTXInst<(outs), (ins i32imm:$a), "$a, ",
1995 [(CallArg (i32 0), (i32 imm:$a))]>;
1996 def LastCallArgI32imm : NVPTXInst<(outs), (ins i32imm:$a), "$a",
1997 [(LastCallArg (i32 0), (i32 imm:$a))]>;
1999 def CallArgParam : NVPTXInst<(outs), (ins i32imm:$a), "param$a, ",
2000 [(CallArg (i32 1), (i32 imm:$a))]>;
2001 def LastCallArgParam : NVPTXInst<(outs), (ins i32imm:$a), "param$a",
2002 [(LastCallArg (i32 1), (i32 imm:$a))]>;
2004 def CallVoidInst : NVPTXInst<(outs), (ins imem:$addr),
2006 [(CallVoid (Wrapper tglobaladdr:$addr))]>;
2007 def CallVoidInstReg : NVPTXInst<(outs), (ins Int32Regs:$addr),
2009 [(CallVoid Int32Regs:$addr)]>;
2010 def CallVoidInstReg64 : NVPTXInst<(outs), (ins Int64Regs:$addr),
2012 [(CallVoid Int64Regs:$addr)]>;
2013 def PrototypeInst : NVPTXInst<(outs), (ins i32imm:$val),
2014 ", prototype_$val;",
2015 [(Prototype (i32 imm:$val))]>;
2017 def DeclareRetMemInst : NVPTXInst<(outs),
2018 (ins i32imm:$align, i32imm:$size, i32imm:$num),
2019 ".param .align $align .b8 retval$num[$size];",
2020 [(DeclareRetParam (i32 imm:$align), (i32 imm:$size), (i32 imm:$num))]>;
2021 def DeclareRetScalarInst : NVPTXInst<(outs), (ins i32imm:$size, i32imm:$num),
2022 ".param .b$size retval$num;",
2023 [(DeclareRet (i32 1), (i32 imm:$size), (i32 imm:$num))]>;
2024 def DeclareRetRegInst : NVPTXInst<(outs), (ins i32imm:$size, i32imm:$num),
2025 ".reg .b$size retval$num;",
2026 [(DeclareRet (i32 2), (i32 imm:$size), (i32 imm:$num))]>;
2028 def DeclareParamInst : NVPTXInst<(outs),
2029 (ins i32imm:$align, i32imm:$a, i32imm:$size),
2030 ".param .align $align .b8 param$a[$size];",
2031 [(DeclareParam (i32 imm:$align), (i32 imm:$a), (i32 imm:$size))]>;
2032 def DeclareScalarParamInst : NVPTXInst<(outs), (ins i32imm:$a, i32imm:$size),
2033 ".param .b$size param$a;",
2034 [(DeclareScalarParam (i32 imm:$a), (i32 imm:$size), (i32 0))]>;
2035 def DeclareScalarRegInst : NVPTXInst<(outs), (ins i32imm:$a, i32imm:$size),
2036 ".reg .b$size param$a;",
2037 [(DeclareScalarParam (i32 imm:$a), (i32 imm:$size), (i32 1))]>;
2039 class MoveParamInst<NVPTXRegClass regclass, string asmstr> :
2040 NVPTXInst<(outs regclass:$dst), (ins regclass:$src),
2041 !strconcat(!strconcat("mov", asmstr), "\t$dst, $src;"),
2042 [(set regclass:$dst, (MoveParam regclass:$src))]>;
2044 def MoveParamI64 : MoveParamInst<Int64Regs, ".b64">;
2045 def MoveParamI32 : MoveParamInst<Int32Regs, ".b32">;
2046 def MoveParamI16 : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$src),
2047 "cvt.u16.u32\t$dst, $src;",
2048 [(set Int16Regs:$dst, (MoveParam Int16Regs:$src))]>;
2049 def MoveParamF64 : MoveParamInst<Float64Regs, ".f64">;
2050 def MoveParamF32 : MoveParamInst<Float32Regs, ".f32">;
2052 class PseudoUseParamInst<NVPTXRegClass regclass> :
2053 NVPTXInst<(outs), (ins regclass:$src),
2054 "// Pseudo use of $src",
2055 [(PseudoUseParam regclass:$src)]>;
2057 def PseudoUseParamI64 : PseudoUseParamInst<Int64Regs>;
2058 def PseudoUseParamI32 : PseudoUseParamInst<Int32Regs>;
2059 def PseudoUseParamI16 : PseudoUseParamInst<Int16Regs>;
2060 def PseudoUseParamF64 : PseudoUseParamInst<Float64Regs>;
2061 def PseudoUseParamF32 : PseudoUseParamInst<Float32Regs>;
2065 // Load / Store Handling
2067 multiclass LD<NVPTXRegClass regclass> {
2068 def _avar : NVPTXInst<(outs regclass:$dst),
2069 (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2070 i32imm:$fromWidth, imem:$addr),
2071 !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2072 "$fromWidth \t$dst, [$addr];"), []>;
2073 def _areg : NVPTXInst<(outs regclass:$dst),
2074 (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2075 i32imm:$fromWidth, Int32Regs:$addr),
2076 !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2077 "$fromWidth \t$dst, [$addr];"), []>;
2078 def _areg_64 : NVPTXInst<(outs regclass:$dst),
2079 (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2080 i32imm:$fromWidth, Int64Regs:$addr),
2081 !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth",
2082 " \t$dst, [$addr];"), []>;
2083 def _ari : NVPTXInst<(outs regclass:$dst),
2084 (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2085 i32imm:$fromWidth, Int32Regs:$addr, i32imm:$offset),
2086 !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2087 "$fromWidth \t$dst, [$addr+$offset];"), []>;
2088 def _ari_64 : NVPTXInst<(outs regclass:$dst),
2089 (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2090 i32imm:$fromWidth, Int64Regs:$addr, i32imm:$offset),
2091 !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth",
2092 " \t$dst, [$addr+$offset];"), []>;
2093 def _asi : NVPTXInst<(outs regclass:$dst),
2094 (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2095 i32imm:$fromWidth, imem:$addr, i32imm:$offset),
2096 !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2097 "$fromWidth \t$dst, [$addr+$offset];"), []>;
2100 let mayLoad=1, neverHasSideEffects=1 in {
2101 defm LD_i8 : LD<Int16Regs>;
2102 defm LD_i16 : LD<Int16Regs>;
2103 defm LD_i32 : LD<Int32Regs>;
2104 defm LD_i64 : LD<Int64Regs>;
2105 defm LD_f32 : LD<Float32Regs>;
2106 defm LD_f64 : LD<Float64Regs>;
2109 multiclass ST<NVPTXRegClass regclass> {
2110 def _avar : NVPTXInst<(outs),
2111 (ins regclass:$src, LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec,
2112 LdStCode:$Sign, i32imm:$toWidth, imem:$addr),
2113 !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth",
2114 " \t[$addr], $src;"), []>;
2115 def _areg : NVPTXInst<(outs),
2116 (ins regclass:$src, LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec,
2117 LdStCode:$Sign, i32imm:$toWidth, Int32Regs:$addr),
2118 !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth",
2119 " \t[$addr], $src;"), []>;
2120 def _areg_64 : NVPTXInst<(outs),
2121 (ins regclass:$src, LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec,
2122 LdStCode:$Sign, i32imm:$toWidth, Int64Regs:$addr),
2123 !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth ",
2124 "\t[$addr], $src;"), []>;
2125 def _ari : NVPTXInst<(outs),
2126 (ins regclass:$src, LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec,
2127 LdStCode:$Sign, i32imm:$toWidth, Int32Regs:$addr, i32imm:$offset),
2128 !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth",
2129 " \t[$addr+$offset], $src;"), []>;
2130 def _ari_64 : NVPTXInst<(outs),
2131 (ins regclass:$src, LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec,
2132 LdStCode:$Sign, i32imm:$toWidth, Int64Regs:$addr, i32imm:$offset),
2133 !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth ",
2134 "\t[$addr+$offset], $src;"), []>;
2135 def _asi : NVPTXInst<(outs),
2136 (ins regclass:$src, LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec,
2137 LdStCode:$Sign, i32imm:$toWidth, imem:$addr, i32imm:$offset),
2138 !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth",
2139 " \t[$addr+$offset], $src;"), []>;
2142 let mayStore=1, neverHasSideEffects=1 in {
2143 defm ST_i8 : ST<Int16Regs>;
2144 defm ST_i16 : ST<Int16Regs>;
2145 defm ST_i32 : ST<Int32Regs>;
2146 defm ST_i64 : ST<Int64Regs>;
2147 defm ST_f32 : ST<Float32Regs>;
2148 defm ST_f64 : ST<Float64Regs>;
2151 // The following is used only in and after vector elementizations.
2152 // Vector elementization happens at the machine instruction level, so the
2153 // following instruction
2154 // never appears in the DAG.
2155 multiclass LD_VEC<NVPTXRegClass regclass> {
2156 def _v2_avar : NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
2157 (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2158 i32imm:$fromWidth, imem:$addr),
2159 !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2160 "$fromWidth \t{{$dst1, $dst2}}, [$addr];"), []>;
2161 def _v2_areg : NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
2162 (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2163 i32imm:$fromWidth, Int32Regs:$addr),
2164 !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2165 "$fromWidth \t{{$dst1, $dst2}}, [$addr];"), []>;
2166 def _v2_areg_64 : NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
2167 (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2168 i32imm:$fromWidth, Int64Regs:$addr),
2169 !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2170 "$fromWidth \t{{$dst1, $dst2}}, [$addr];"), []>;
2171 def _v2_ari : NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
2172 (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2173 i32imm:$fromWidth, Int32Regs:$addr, i32imm:$offset),
2174 !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2175 "$fromWidth \t{{$dst1, $dst2}}, [$addr+$offset];"), []>;
2176 def _v2_ari_64 : NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
2177 (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2178 i32imm:$fromWidth, Int64Regs:$addr, i32imm:$offset),
2179 !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2180 "$fromWidth \t{{$dst1, $dst2}}, [$addr+$offset];"), []>;
2181 def _v2_asi : NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
2182 (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2183 i32imm:$fromWidth, imem:$addr, i32imm:$offset),
2184 !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2185 "$fromWidth \t{{$dst1, $dst2}}, [$addr+$offset];"), []>;
2186 def _v4_avar : NVPTXInst<(outs regclass:$dst1, regclass:$dst2,
2187 regclass:$dst3, regclass:$dst4),
2188 (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2189 i32imm:$fromWidth, imem:$addr),
2190 !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2191 "$fromWidth \t{{$dst1, $dst2, $dst3, $dst4}}, [$addr];"), []>;
2192 def _v4_areg : NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
2194 (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2195 i32imm:$fromWidth, Int32Regs:$addr),
2196 !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2197 "$fromWidth \t{{$dst1, $dst2, $dst3, $dst4}}, [$addr];"), []>;
2198 def _v4_areg_64 : NVPTXInst<(outs regclass:$dst1, regclass:$dst2,
2199 regclass:$dst3, regclass:$dst4),
2200 (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2201 i32imm:$fromWidth, Int64Regs:$addr),
2202 !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2203 "$fromWidth \t{{$dst1, $dst2, $dst3, $dst4}}, [$addr];"), []>;
2204 def _v4_ari : NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
2206 (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2207 i32imm:$fromWidth, Int32Regs:$addr, i32imm:$offset),
2208 !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2209 "$fromWidth \t{{$dst1, $dst2, $dst3, $dst4}}, [$addr+$offset];"),
2211 def _v4_ari_64 : NVPTXInst<(outs regclass:$dst1, regclass:$dst2,
2212 regclass:$dst3, regclass:$dst4),
2213 (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2214 i32imm:$fromWidth, Int64Regs:$addr, i32imm:$offset),
2215 !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2216 "$fromWidth \t{{$dst1, $dst2, $dst3, $dst4}}, [$addr+$offset];"),
2218 def _v4_asi : NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
2220 (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2221 i32imm:$fromWidth, imem:$addr, i32imm:$offset),
2222 !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2223 "$fromWidth \t{{$dst1, $dst2, $dst3, $dst4}}, [$addr+$offset];"),
2226 let mayLoad=1, neverHasSideEffects=1 in {
2227 defm LDV_i8 : LD_VEC<Int16Regs>;
2228 defm LDV_i16 : LD_VEC<Int16Regs>;
2229 defm LDV_i32 : LD_VEC<Int32Regs>;
2230 defm LDV_i64 : LD_VEC<Int64Regs>;
2231 defm LDV_f32 : LD_VEC<Float32Regs>;
2232 defm LDV_f64 : LD_VEC<Float64Regs>;
2235 multiclass ST_VEC<NVPTXRegClass regclass> {
2236 def _v2_avar : NVPTXInst<(outs),
2237 (ins regclass:$src1, regclass:$src2, LdStCode:$isVol, LdStCode:$addsp,
2238 LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, imem:$addr),
2239 !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2240 "$fromWidth \t[$addr], {{$src1, $src2}};"), []>;
2241 def _v2_areg : NVPTXInst<(outs),
2242 (ins regclass:$src1, regclass:$src2, LdStCode:$isVol, LdStCode:$addsp,
2243 LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, Int32Regs:$addr),
2244 !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2245 "$fromWidth \t[$addr], {{$src1, $src2}};"), []>;
2246 def _v2_areg_64 : NVPTXInst<(outs),
2247 (ins regclass:$src1, regclass:$src2, LdStCode:$isVol, LdStCode:$addsp,
2248 LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, Int64Regs:$addr),
2249 !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2250 "$fromWidth \t[$addr], {{$src1, $src2}};"), []>;
2251 def _v2_ari : NVPTXInst<(outs),
2252 (ins regclass:$src1, regclass:$src2, LdStCode:$isVol, LdStCode:$addsp,
2253 LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, Int32Regs:$addr,
2255 !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2256 "$fromWidth \t[$addr+$offset], {{$src1, $src2}};"), []>;
2257 def _v2_ari_64 : NVPTXInst<(outs),
2258 (ins regclass:$src1, regclass:$src2, LdStCode:$isVol, LdStCode:$addsp,
2259 LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, Int64Regs:$addr,
2261 !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2262 "$fromWidth \t[$addr+$offset], {{$src1, $src2}};"), []>;
2263 def _v2_asi : NVPTXInst<(outs),
2264 (ins regclass:$src1, regclass:$src2, LdStCode:$isVol, LdStCode:$addsp,
2265 LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, imem:$addr,
2267 !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2268 "$fromWidth \t[$addr+$offset], {{$src1, $src2}};"), []>;
2269 def _v4_avar : NVPTXInst<(outs),
2270 (ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4,
2271 LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2272 i32imm:$fromWidth, imem:$addr),
2273 !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2274 "$fromWidth \t[$addr], {{$src1, $src2, $src3, $src4}};"), []>;
2275 def _v4_areg : NVPTXInst<(outs),
2276 (ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4,
2277 LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2278 i32imm:$fromWidth, Int32Regs:$addr),
2279 !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2280 "$fromWidth \t[$addr], {{$src1, $src2, $src3, $src4}};"), []>;
2281 def _v4_areg_64 : NVPTXInst<(outs),
2282 (ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4,
2283 LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2284 i32imm:$fromWidth, Int64Regs:$addr),
2285 !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2286 "$fromWidth \t[$addr], {{$src1, $src2, $src3, $src4}};"), []>;
2287 def _v4_ari : NVPTXInst<(outs),
2288 (ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4,
2289 LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2290 i32imm:$fromWidth, Int32Regs:$addr, i32imm:$offset),
2291 !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2292 "$fromWidth \t[$addr+$offset], {{$src1, $src2, $src3, $src4}};"),
2294 def _v4_ari_64 : NVPTXInst<(outs),
2295 (ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4,
2296 LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2297 i32imm:$fromWidth, Int64Regs:$addr, i32imm:$offset),
2298 !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2299 "$fromWidth \t[$addr+$offset], {{$src1, $src2, $src3, $src4}};"),
2301 def _v4_asi : NVPTXInst<(outs),
2302 (ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4,
2303 LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2304 i32imm:$fromWidth, imem:$addr, i32imm:$offset),
2305 !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2306 "$fromWidth \t[$addr+$offset], {{$src1, $src2, $src3, $src4}};"),
2309 let mayStore=1, neverHasSideEffects=1 in {
2310 defm STV_i8 : ST_VEC<Int16Regs>;
2311 defm STV_i16 : ST_VEC<Int16Regs>;
2312 defm STV_i32 : ST_VEC<Int32Regs>;
2313 defm STV_i64 : ST_VEC<Int64Regs>;
2314 defm STV_f32 : ST_VEC<Float32Regs>;
2315 defm STV_f64 : ST_VEC<Float64Regs>;
2319 //---- Conversion ----
2321 class F_BITCONVERT<string SzStr, NVPTXRegClass regclassIn,
2322 NVPTXRegClass regclassOut> :
2323 NVPTXInst<(outs regclassOut:$d), (ins regclassIn:$a),
2324 !strconcat("mov.b", !strconcat(SzStr, " \t $d, $a;")),
2325 [(set regclassOut:$d, (bitconvert regclassIn:$a))]>;
2327 def BITCONVERT_32_I2F : F_BITCONVERT<"32", Int32Regs, Float32Regs>;
2328 def BITCONVERT_32_F2I : F_BITCONVERT<"32", Float32Regs, Int32Regs>;
2329 def BITCONVERT_64_I2F : F_BITCONVERT<"64", Int64Regs, Float64Regs>;
2330 def BITCONVERT_64_F2I : F_BITCONVERT<"64", Float64Regs, Int64Regs>;
2332 // NOTE: pred->fp are currently sub-optimal due to an issue in TableGen where
2333 // we cannot specify floating-point literals in isel patterns. Therefore, we
2334 // use an integer selp to select either 1 or 0 and then cvt to floating-point.
2337 def : Pat<(f32 (sint_to_fp Int1Regs:$a)),
2338 (CVT_f32_s32 (SELP_u32ii 1, 0, Int1Regs:$a), CvtRN)>;
2339 def : Pat<(f32 (sint_to_fp Int16Regs:$a)),
2340 (CVT_f32_s16 Int16Regs:$a, CvtRN)>;
2341 def : Pat<(f32 (sint_to_fp Int32Regs:$a)),
2342 (CVT_f32_s32 Int32Regs:$a, CvtRN)>;
2343 def : Pat<(f32 (sint_to_fp Int64Regs:$a)),
2344 (CVT_f32_s64 Int64Regs:$a, CvtRN)>;
2347 def : Pat<(f32 (uint_to_fp Int1Regs:$a)),
2348 (CVT_f32_u32 (SELP_u32ii 1, 0, Int1Regs:$a), CvtRN)>;
2349 def : Pat<(f32 (uint_to_fp Int16Regs:$a)),
2350 (CVT_f32_u16 Int16Regs:$a, CvtRN)>;
2351 def : Pat<(f32 (uint_to_fp Int32Regs:$a)),
2352 (CVT_f32_u32 Int32Regs:$a, CvtRN)>;
2353 def : Pat<(f32 (uint_to_fp Int64Regs:$a)),
2354 (CVT_f32_u64 Int64Regs:$a, CvtRN)>;
2357 def : Pat<(f64 (sint_to_fp Int1Regs:$a)),
2358 (CVT_f64_s32 (SELP_u32ii 1, 0, Int1Regs:$a), CvtRN)>;
2359 def : Pat<(f64 (sint_to_fp Int16Regs:$a)),
2360 (CVT_f64_s16 Int16Regs:$a, CvtRN)>;
2361 def : Pat<(f64 (sint_to_fp Int32Regs:$a)),
2362 (CVT_f64_s32 Int32Regs:$a, CvtRN)>;
2363 def : Pat<(f64 (sint_to_fp Int64Regs:$a)),
2364 (CVT_f64_s64 Int64Regs:$a, CvtRN)>;
2367 def : Pat<(f64 (uint_to_fp Int1Regs:$a)),
2368 (CVT_f64_u32 (SELP_u32ii 1, 0, Int1Regs:$a), CvtRN)>;
2369 def : Pat<(f64 (uint_to_fp Int16Regs:$a)),
2370 (CVT_f64_u16 Int16Regs:$a, CvtRN)>;
2371 def : Pat<(f64 (uint_to_fp Int32Regs:$a)),
2372 (CVT_f64_u32 Int32Regs:$a, CvtRN)>;
2373 def : Pat<(f64 (uint_to_fp Int64Regs:$a)),
2374 (CVT_f64_u64 Int64Regs:$a, CvtRN)>;
2378 def : Pat<(i1 (fp_to_sint Float32Regs:$a)),
2379 (SETP_b32ri (BITCONVERT_32_F2I Float32Regs:$a), 0, CmpEQ)>;
2380 def : Pat<(i16 (fp_to_sint Float32Regs:$a)),
2381 (CVT_s16_f32 Float32Regs:$a, CvtRZI_FTZ)>, Requires<[doF32FTZ]>;
2382 def : Pat<(i16 (fp_to_sint Float32Regs:$a)),
2383 (CVT_s16_f32 Float32Regs:$a, CvtRZI)>;
2384 def : Pat<(i32 (fp_to_sint Float32Regs:$a)),
2385 (CVT_s32_f32 Float32Regs:$a, CvtRZI_FTZ)>, Requires<[doF32FTZ]>;
2386 def : Pat<(i32 (fp_to_sint Float32Regs:$a)),
2387 (CVT_s32_f32 Float32Regs:$a, CvtRZI)>;
2388 def : Pat<(i64 (fp_to_sint Float32Regs:$a)),
2389 (CVT_s64_f32 Float32Regs:$a, CvtRZI_FTZ)>, Requires<[doF32FTZ]>;
2390 def : Pat<(i64 (fp_to_sint Float32Regs:$a)),
2391 (CVT_s64_f32 Float32Regs:$a, CvtRZI)>;
2394 def : Pat<(i1 (fp_to_uint Float32Regs:$a)),
2395 (SETP_b32ri (BITCONVERT_32_F2I Float32Regs:$a), 0, CmpEQ)>;
2396 def : Pat<(i16 (fp_to_uint Float32Regs:$a)),
2397 (CVT_u16_f32 Float32Regs:$a, CvtRZI_FTZ)>, Requires<[doF32FTZ]>;
2398 def : Pat<(i16 (fp_to_uint Float32Regs:$a)),
2399 (CVT_u16_f32 Float32Regs:$a, CvtRZI)>;
2400 def : Pat<(i32 (fp_to_uint Float32Regs:$a)),
2401 (CVT_u32_f32 Float32Regs:$a, CvtRZI_FTZ)>, Requires<[doF32FTZ]>;
2402 def : Pat<(i32 (fp_to_uint Float32Regs:$a)),
2403 (CVT_u32_f32 Float32Regs:$a, CvtRZI)>;
2404 def : Pat<(i64 (fp_to_uint Float32Regs:$a)),
2405 (CVT_u64_f32 Float32Regs:$a, CvtRZI_FTZ)>, Requires<[doF32FTZ]>;
2406 def : Pat<(i64 (fp_to_uint Float32Regs:$a)),
2407 (CVT_u64_f32 Float32Regs:$a, CvtRZI)>;
2410 def : Pat<(i1 (fp_to_sint Float64Regs:$a)),
2411 (SETP_b64ri (BITCONVERT_64_F2I Float64Regs:$a), 0, CmpEQ)>;
2412 def : Pat<(i16 (fp_to_sint Float64Regs:$a)),
2413 (CVT_s16_f64 Float64Regs:$a, CvtRZI)>;
2414 def : Pat<(i32 (fp_to_sint Float64Regs:$a)),
2415 (CVT_s32_f64 Float64Regs:$a, CvtRZI)>;
2416 def : Pat<(i64 (fp_to_sint Float64Regs:$a)),
2417 (CVT_s64_f64 Float64Regs:$a, CvtRZI)>;
2420 def : Pat<(i1 (fp_to_uint Float64Regs:$a)),
2421 (SETP_b64ri (BITCONVERT_64_F2I Float64Regs:$a), 0, CmpEQ)>;
2422 def : Pat<(i16 (fp_to_uint Float64Regs:$a)),
2423 (CVT_u16_f64 Float64Regs:$a, CvtRZI)>;
2424 def : Pat<(i32 (fp_to_uint Float64Regs:$a)),
2425 (CVT_u32_f64 Float64Regs:$a, CvtRZI)>;
2426 def : Pat<(i64 (fp_to_uint Float64Regs:$a)),
2427 (CVT_u64_f64 Float64Regs:$a, CvtRZI)>;
2430 def : Pat<(i16 (sext Int1Regs:$a)),
2431 (SELP_s16ii -1, 0, Int1Regs:$a)>;
2432 def : Pat<(i32 (sext Int1Regs:$a)),
2433 (SELP_s32ii -1, 0, Int1Regs:$a)>;
2434 def : Pat<(i64 (sext Int1Regs:$a)),
2435 (SELP_s64ii -1, 0, Int1Regs:$a)>;
2438 def : Pat<(i16 (zext Int1Regs:$a)),
2439 (SELP_u16ii 1, 0, Int1Regs:$a)>;
2440 def : Pat<(i32 (zext Int1Regs:$a)),
2441 (SELP_u32ii 1, 0, Int1Regs:$a)>;
2442 def : Pat<(i64 (zext Int1Regs:$a)),
2443 (SELP_u64ii 1, 0, Int1Regs:$a)>;
2446 def : Pat<(i16 (anyext Int1Regs:$a)),
2447 (SELP_u16ii -1, 0, Int1Regs:$a)>;
2448 def : Pat<(i32 (anyext Int1Regs:$a)),
2449 (SELP_u32ii -1, 0, Int1Regs:$a)>;
2450 def : Pat<(i64 (anyext Int1Regs:$a)),
2451 (SELP_u64ii -1, 0, Int1Regs:$a)>;
2454 def : Pat<(i32 (sext Int16Regs:$a)),
2455 (CVT_s32_s16 Int16Regs:$a, CvtNONE)>;
2456 def : Pat<(i64 (sext Int16Regs:$a)),
2457 (CVT_s64_s16 Int16Regs:$a, CvtNONE)>;
2460 def : Pat<(i32 (zext Int16Regs:$a)),
2461 (CVT_u32_u16 Int16Regs:$a, CvtNONE)>;
2462 def : Pat<(i64 (zext Int16Regs:$a)),
2463 (CVT_u64_u16 Int16Regs:$a, CvtNONE)>;
2466 def : Pat<(i32 (anyext Int16Regs:$a)),
2467 (CVT_u32_u16 Int16Regs:$a, CvtNONE)>;
2468 def : Pat<(i64 (anyext Int16Regs:$a)),
2469 (CVT_u64_u16 Int16Regs:$a, CvtNONE)>;
2472 def : Pat<(i64 (sext Int32Regs:$a)),
2473 (CVT_s64_s32 Int32Regs:$a, CvtNONE)>;
2476 def : Pat<(i64 (zext Int32Regs:$a)),
2477 (CVT_u64_u32 Int32Regs:$a, CvtNONE)>;
2480 def : Pat<(i64 (anyext Int32Regs:$a)),
2481 (CVT_u64_u32 Int32Regs:$a, CvtNONE)>;
2485 def : Pat<(i32 (trunc Int64Regs:$a)),
2486 (CVT_u32_u64 Int64Regs:$a, CvtNONE)>;
2487 def : Pat<(i16 (trunc Int64Regs:$a)),
2488 (CVT_u16_u64 Int64Regs:$a, CvtNONE)>;
2489 def : Pat<(i1 (trunc Int64Regs:$a)),
2490 (SETP_b64ri (ANDb64ri Int64Regs:$a, 1), 1, CmpEQ)>;
2493 def : Pat<(i16 (trunc Int32Regs:$a)),
2494 (CVT_u16_u32 Int32Regs:$a, CvtNONE)>;
2495 def : Pat<(i1 (trunc Int32Regs:$a)),
2496 (SETP_b32ri (ANDb32ri Int32Regs:$a, 1), 1, CmpEQ)>;
2499 def : Pat<(i1 (trunc Int16Regs:$a)),
2500 (SETP_b16ri (ANDb16ri Int16Regs:$a, 1), 1, CmpEQ)>;
2503 def : Pat<(sext_inreg Int16Regs:$a, i8), (CVT_INREG_s16_s8 Int16Regs:$a)>;
2504 def : Pat<(sext_inreg Int32Regs:$a, i8), (CVT_INREG_s32_s8 Int32Regs:$a)>;
2505 def : Pat<(sext_inreg Int32Regs:$a, i16), (CVT_INREG_s32_s16 Int32Regs:$a)>;
2506 def : Pat<(sext_inreg Int64Regs:$a, i8), (CVT_INREG_s64_s8 Int64Regs:$a)>;
2507 def : Pat<(sext_inreg Int64Regs:$a, i16), (CVT_INREG_s64_s16 Int64Regs:$a)>;
2508 def : Pat<(sext_inreg Int64Regs:$a, i32), (CVT_INREG_s64_s32 Int64Regs:$a)>;
2511 // Select instructions with 32-bit predicates
2512 def : Pat<(select Int32Regs:$pred, Int16Regs:$a, Int16Regs:$b),
2513 (SELP_b16rr Int16Regs:$a, Int16Regs:$b,
2514 (SETP_b32ri (ANDb32ri Int32Regs:$pred, 1), 1, CmpEQ))>;
2515 def : Pat<(select Int32Regs:$pred, Int32Regs:$a, Int32Regs:$b),
2516 (SELP_b32rr Int32Regs:$a, Int32Regs:$b,
2517 (SETP_b32ri (ANDb32ri Int32Regs:$pred, 1), 1, CmpEQ))>;
2518 def : Pat<(select Int32Regs:$pred, Int64Regs:$a, Int64Regs:$b),
2519 (SELP_b64rr Int64Regs:$a, Int64Regs:$b,
2520 (SETP_b32ri (ANDb32ri Int32Regs:$pred, 1), 1, CmpEQ))>;
2521 def : Pat<(select Int32Regs:$pred, Float32Regs:$a, Float32Regs:$b),
2522 (SELP_f32rr Float32Regs:$a, Float32Regs:$b,
2523 (SETP_b32ri (ANDb32ri Int32Regs:$pred, 1), 1, CmpEQ))>;
2524 def : Pat<(select Int32Regs:$pred, Float64Regs:$a, Float64Regs:$b),
2525 (SELP_f64rr Float64Regs:$a, Float64Regs:$b,
2526 (SETP_b32ri (ANDb32ri Int32Regs:$pred, 1), 1, CmpEQ))>;
2529 // pack a set of smaller int registers to a larger int register
2530 def V4I16toI64 : NVPTXInst<(outs Int64Regs:$d),
2531 (ins Int16Regs:$s1, Int16Regs:$s2,
2532 Int16Regs:$s3, Int16Regs:$s4),
2533 "mov.b64\t$d, {{$s1, $s2, $s3, $s4}};",
2535 def V2I16toI32 : NVPTXInst<(outs Int32Regs:$d),
2536 (ins Int16Regs:$s1, Int16Regs:$s2),
2537 "mov.b32\t$d, {{$s1, $s2}};",
2539 def V2I32toI64 : NVPTXInst<(outs Int64Regs:$d),
2540 (ins Int32Regs:$s1, Int32Regs:$s2),
2541 "mov.b64\t$d, {{$s1, $s2}};",
2543 def V2F32toF64 : NVPTXInst<(outs Float64Regs:$d),
2544 (ins Float32Regs:$s1, Float32Regs:$s2),
2545 "mov.b64\t$d, {{$s1, $s2}};",
2548 // unpack a larger int register to a set of smaller int registers
2549 def I64toV4I16 : NVPTXInst<(outs Int16Regs:$d1, Int16Regs:$d2,
2550 Int16Regs:$d3, Int16Regs:$d4),
2552 "mov.b64\t{{$d1, $d2, $d3, $d4}}, $s;",
2554 def I32toV2I16 : NVPTXInst<(outs Int16Regs:$d1, Int16Regs:$d2),
2556 "mov.b32\t{{$d1, $d2}}, $s;",
2558 def I64toV2I32 : NVPTXInst<(outs Int32Regs:$d1, Int32Regs:$d2),
2560 "mov.b64\t{{$d1, $d2}}, $s;",
2562 def F64toV2F32 : NVPTXInst<(outs Float32Regs:$d1, Float32Regs:$d2),
2563 (ins Float64Regs:$s),
2564 "mov.b64\t{{$d1, $d2}}, $s;",
2567 // Count leading zeros
2568 def CLZr32 : NVPTXInst<(outs Int32Regs:$d), (ins Int32Regs:$a),
2571 def CLZr64 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
2575 // 32-bit has a direct PTX instruction
2576 def : Pat<(ctlz Int32Regs:$a),
2577 (CLZr32 Int32Regs:$a)>;
2578 def : Pat<(ctlz_zero_undef Int32Regs:$a),
2579 (CLZr32 Int32Regs:$a)>;
2581 // For 64-bit, the result in PTX is actually 32-bit so we zero-extend
2582 // to 64-bit to match the LLVM semantics
2583 def : Pat<(ctlz Int64Regs:$a),
2584 (CVT_u64_u32 (CLZr64 Int64Regs:$a), CvtNONE)>;
2585 def : Pat<(ctlz_zero_undef Int64Regs:$a),
2586 (CVT_u64_u32 (CLZr64 Int64Regs:$a), CvtNONE)>;
2588 // For 16-bit, we zero-extend to 32-bit, then trunc the result back
2589 // to 16-bits (ctlz of a 16-bit value is guaranteed to require less
2590 // than 16 bits to store). We also need to subtract 16 because the
2591 // high-order 16 zeros were counted.
2592 def : Pat<(ctlz Int16Regs:$a),
2593 (SUBi16ri (CVT_u16_u32 (CLZr32
2594 (CVT_u32_u16 Int16Regs:$a, CvtNONE)),
2596 def : Pat<(ctlz_zero_undef Int16Regs:$a),
2597 (SUBi16ri (CVT_u16_u32 (CLZr32
2598 (CVT_u32_u16 Int16Regs:$a, CvtNONE)),
2602 def POPCr32 : NVPTXInst<(outs Int32Regs:$d), (ins Int32Regs:$a),
2603 "popc.b32\t$d, $a;",
2605 def POPCr64 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
2606 "popc.b64\t$d, $a;",
2609 // 32-bit has a direct PTX instruction
2610 def : Pat<(ctpop Int32Regs:$a),
2611 (POPCr32 Int32Regs:$a)>;
2613 // For 64-bit, the result in PTX is actually 32-bit so we zero-extend
2614 // to 64-bit to match the LLVM semantics
2615 def : Pat<(ctpop Int64Regs:$a),
2616 (CVT_u64_u32 (POPCr64 Int64Regs:$a), CvtNONE)>;
2618 // For 16-bit, we zero-extend to 32-bit, then trunc the result back
2619 // to 16-bits (ctpop of a 16-bit value is guaranteed to require less
2620 // than 16 bits to store)
2621 def : Pat<(ctpop Int16Regs:$a),
2622 (CVT_u16_u32 (POPCr32 (CVT_u32_u16 Int16Regs:$a, CvtNONE)),
2625 // fround f64 -> f32
2626 def : Pat<(f32 (fround Float64Regs:$a)),
2627 (CVT_f32_f64 Float64Regs:$a, CvtRN_FTZ)>, Requires<[doF32FTZ]>;
2628 def : Pat<(f32 (fround Float64Regs:$a)),
2629 (CVT_f32_f64 Float64Regs:$a, CvtRN)>;
2631 // fextend f32 -> f64
2632 def : Pat<(f64 (fextend Float32Regs:$a)),
2633 (CVT_f64_f32 Float32Regs:$a, CvtNONE_FTZ)>, Requires<[doF32FTZ]>;
2634 def : Pat<(f64 (fextend Float32Regs:$a)),
2635 (CVT_f64_f32 Float32Regs:$a, CvtNONE)>;
2637 def retflag : SDNode<"NVPTXISD::RET_FLAG", SDTNone,
2638 [SDNPHasChain, SDNPOptInGlue]>;
2640 //-----------------------------------
2642 //-----------------------------------
2644 let isTerminator=1 in {
2645 let isReturn=1, isBarrier=1 in
2646 def Return : NVPTXInst<(outs), (ins), "ret;", [(retflag)]>;
2649 def CBranch : NVPTXInst<(outs), (ins Int1Regs:$a, brtarget:$target),
2650 "@$a bra \t$target;",
2651 [(brcond Int1Regs:$a, bb:$target)]>;
2653 def CBranchOther : NVPTXInst<(outs), (ins Int1Regs:$a, brtarget:$target),
2654 "@!$a bra \t$target;",
2657 let isBranch=1, isBarrier=1 in
2658 def GOTO : NVPTXInst<(outs), (ins brtarget:$target),
2659 "bra.uni \t$target;",
2663 def : Pat<(brcond Int32Regs:$a, bb:$target),
2664 (CBranch (SETP_u32ri Int32Regs:$a, 0, CmpNE), bb:$target)>;
2666 // SelectionDAGBuilder::visitSWitchCase() will invert the condition of a
2667 // conditional branch if
2668 // the target block is the next block so that the code can fall through to the
2670 // The invertion is done by 'xor condition, 1', which will be translated to
2671 // (setne condition, -1).
2672 // Since ptx supports '@!pred bra target', we should use it.
2673 def : Pat<(brcond (i1 (setne Int1Regs:$a, -1)), bb:$target),
2674 (CBranchOther Int1Regs:$a, bb:$target)>;
2677 def SDT_NVPTXCallSeqStart : SDCallSeqStart<[ SDTCisVT<0, i32> ]>;
2678 def SDT_NVPTXCallSeqEnd : SDCallSeqEnd<[ SDTCisVT<0, i32>,
2679 SDTCisVT<1, i32> ]>;
2681 def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_NVPTXCallSeqStart,
2682 [SDNPHasChain, SDNPOutGlue, SDNPSideEffect]>;
2683 def callseq_end : SDNode<"ISD::CALLSEQ_END", SDT_NVPTXCallSeqEnd,
2684 [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
2687 def SDT_NVPTXCall : SDTypeProfile<0, 1, [SDTCisVT<0, i32>]>;
2688 def call : SDNode<"NVPTXISD::CALL", SDT_NVPTXCall,
2689 [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
2690 def calltarget : Operand<i32>;
2692 def CALL : NVPTXInst<(outs), (ins calltarget:$dst),
2693 "call \t$dst, (1);", []>;
2696 def : Pat<(call tglobaladdr:$dst),
2697 (CALL tglobaladdr:$dst)>;
2698 def : Pat<(call texternalsym:$dst),
2699 (CALL texternalsym:$dst)>;
2701 // Pseudo instructions.
2702 class Pseudo<dag outs, dag ins, string asmstr, list<dag> pattern>
2703 : NVPTXInst<outs, ins, asmstr, pattern>;
2705 // @TODO: We use some tricks here to emit curly braces. Can we clean this up
2706 // a bit without TableGen modifications?
2707 def Callseq_Start : NVPTXInst<(outs), (ins i32imm:$amt),
2708 "// Callseq Start $amt\n\t{{\n\t.reg .b32 temp_param_reg;\n\t// <end>}}",
2709 [(callseq_start timm:$amt)]>;
2710 def Callseq_End : NVPTXInst<(outs), (ins i32imm:$amt1, i32imm:$amt2),
2711 "\n\t//{{\n\t}}// Callseq End $amt1",
2712 [(callseq_end timm:$amt1, timm:$amt2)]>;
2716 def trapinst : NVPTXInst<(outs), (ins),
2720 // Call prototype wrapper
2721 def SDTCallPrototype : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
2723 : SDNode<"NVPTXISD::CallPrototype", SDTCallPrototype,
2724 [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
2725 def ProtoIdent : Operand<i32> {
2726 let PrintMethod = "printProtoIdent";
2729 : NVPTXInst<(outs), (ins ProtoIdent:$ident),
2730 "$ident", [(CallPrototype (i32 texternalsym:$ident))]>;
2734 include "NVPTXIntrinsics.td"
2737 //-----------------------------------
2739 //-----------------------------------
2740 // BSWAP is currently expanded. The following is a more efficient
2741 // - for < sm_20, use vector scalar mov, as tesla support native 16-bit register
2742 // - for sm_20, use pmpt (use vector scalar mov to get the pack and
2743 // unpack). sm_20 supports native 32-bit register, but not native 16-bit