1 //===- NVPTXInstrInfo.td - NVPTX Instruction defs -------------*- tblgen-*-===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file describes the PTX instructions in TableGen format.
12 //===----------------------------------------------------------------------===//
14 include "NVPTXInstrFormats.td"
17 def NOP : NVPTXInst<(outs), (ins), "", []>;
19 // List of vector specific properties
20 def isVecLD : VecInstTypeEnum<1>;
21 def isVecST : VecInstTypeEnum<2>;
22 def isVecBuild : VecInstTypeEnum<3>;
23 def isVecShuffle : VecInstTypeEnum<4>;
24 def isVecExtract : VecInstTypeEnum<5>;
25 def isVecInsert : VecInstTypeEnum<6>;
26 def isVecDest : VecInstTypeEnum<7>;
27 def isVecOther : VecInstTypeEnum<15>;
29 //===----------------------------------------------------------------------===//
30 // NVPTX Operand Definitions.
31 //===----------------------------------------------------------------------===//
33 def brtarget : Operand<OtherVT>;
35 //===----------------------------------------------------------------------===//
36 // NVPTX Instruction Predicate Definitions
37 //===----------------------------------------------------------------------===//
40 def hasAtomRedG32 : Predicate<"Subtarget.hasAtomRedG32()">;
41 def hasAtomRedS32 : Predicate<"Subtarget.hasAtomRedS32()">;
42 def hasAtomRedGen32 : Predicate<"Subtarget.hasAtomRedGen32()">;
43 def useAtomRedG32forGen32 :
44 Predicate<"!Subtarget.hasAtomRedGen32() && Subtarget.hasAtomRedG32()">;
45 def hasBrkPt : Predicate<"Subtarget.hasBrkPt()">;
46 def hasAtomRedG64 : Predicate<"Subtarget.hasAtomRedG64()">;
47 def hasAtomRedS64 : Predicate<"Subtarget.hasAtomRedS64()">;
48 def hasAtomRedGen64 : Predicate<"Subtarget.hasAtomRedGen64()">;
49 def useAtomRedG64forGen64 :
50 Predicate<"!Subtarget.hasAtomRedGen64() && Subtarget.hasAtomRedG64()">;
51 def hasAtomAddF32 : Predicate<"Subtarget.hasAtomAddF32()">;
52 def hasVote : Predicate<"Subtarget.hasVote()">;
53 def hasDouble : Predicate<"Subtarget.hasDouble()">;
54 def reqPTX20 : Predicate<"Subtarget.reqPTX20()">;
55 def hasLDG : Predicate<"Subtarget.hasLDG()">;
56 def hasLDU : Predicate<"Subtarget.hasLDU()">;
57 def hasGenericLdSt : Predicate<"Subtarget.hasGenericLdSt()">;
59 def doF32FTZ : Predicate<"UseF32FTZ">;
61 def doFMAF32 : Predicate<"doFMAF32">;
62 def doFMAF32_ftz : Predicate<"(doFMAF32 && UseF32FTZ)">;
63 def doFMAF32AGG : Predicate<"doFMAF32AGG">;
64 def doFMAF32AGG_ftz : Predicate<"(doFMAF32AGG && UseF32FTZ)">;
65 def doFMAF64 : Predicate<"doFMAF64">;
66 def doFMAF64AGG : Predicate<"doFMAF64AGG">;
67 def doFMADF32 : Predicate<"doFMADF32">;
68 def doFMADF32_ftz : Predicate<"(doFMADF32 && UseF32FTZ)">;
70 def doMulWide : Predicate<"doMulWide">;
72 def allowFMA : Predicate<"allowFMA">;
73 def allowFMA_ftz : Predicate<"(allowFMA && UseF32FTZ)">;
75 def do_DIVF32_APPROX : Predicate<"do_DIVF32_PREC==0">;
76 def do_DIVF32_FULL : Predicate<"do_DIVF32_PREC==1">;
78 def do_SQRTF32_APPROX : Predicate<"do_SQRTF32_PREC==0">;
79 def do_SQRTF32_RN : Predicate<"do_SQRTF32_PREC==1">;
81 def hasHWROT32 : Predicate<"Subtarget.hasHWROT32()">;
83 def true : Predicate<"1">;
86 //===----------------------------------------------------------------------===//
87 // Some Common Instruction Class Templates
88 //===----------------------------------------------------------------------===//
90 multiclass I3<string OpcStr, SDNode OpNode> {
91 def i64rr : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, Int64Regs:$b),
92 !strconcat(OpcStr, "64 \t$dst, $a, $b;"),
93 [(set Int64Regs:$dst, (OpNode Int64Regs:$a,
95 def i64ri : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, i64imm:$b),
96 !strconcat(OpcStr, "64 \t$dst, $a, $b;"),
97 [(set Int64Regs:$dst, (OpNode Int64Regs:$a, imm:$b))]>;
98 def i32rr : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b),
99 !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
100 [(set Int32Regs:$dst, (OpNode Int32Regs:$a,
102 def i32ri : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b),
103 !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
104 [(set Int32Regs:$dst, (OpNode Int32Regs:$a, imm:$b))]>;
105 def i16rr : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, Int16Regs:$b),
106 !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
107 [(set Int16Regs:$dst, (OpNode Int16Regs:$a,
109 def i16ri : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, i16imm:$b),
110 !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
111 [(set Int16Regs:$dst, (OpNode Int16Regs:$a, (imm):$b))]>;
114 multiclass ADD_SUB_INT_32<string OpcStr, SDNode OpNode> {
115 def i32rr : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a,
117 !strconcat(OpcStr, ".s32 \t$dst, $a, $b;"),
118 [(set Int32Regs:$dst, (OpNode Int32Regs:$a,
120 def i32ri : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b),
121 !strconcat(OpcStr, ".s32 \t$dst, $a, $b;"),
122 [(set Int32Regs:$dst, (OpNode Int32Regs:$a, imm:$b))]>;
125 multiclass F3<string OpcStr, SDNode OpNode> {
126 def f64rr : NVPTXInst<(outs Float64Regs:$dst),
127 (ins Float64Regs:$a, Float64Regs:$b),
128 !strconcat(OpcStr, ".f64 \t$dst, $a, $b;"),
129 [(set Float64Regs:$dst,
130 (OpNode Float64Regs:$a, Float64Regs:$b))]>,
131 Requires<[allowFMA]>;
132 def f64ri : NVPTXInst<(outs Float64Regs:$dst),
133 (ins Float64Regs:$a, f64imm:$b),
134 !strconcat(OpcStr, ".f64 \t$dst, $a, $b;"),
135 [(set Float64Regs:$dst,
136 (OpNode Float64Regs:$a, fpimm:$b))]>,
137 Requires<[allowFMA]>;
138 def f32rr_ftz : NVPTXInst<(outs Float32Regs:$dst),
139 (ins Float32Regs:$a, Float32Regs:$b),
140 !strconcat(OpcStr, ".ftz.f32 \t$dst, $a, $b;"),
141 [(set Float32Regs:$dst,
142 (OpNode Float32Regs:$a, Float32Regs:$b))]>,
143 Requires<[allowFMA_ftz]>;
144 def f32ri_ftz : NVPTXInst<(outs Float32Regs:$dst),
145 (ins Float32Regs:$a, f32imm:$b),
146 !strconcat(OpcStr, ".ftz.f32 \t$dst, $a, $b;"),
147 [(set Float32Regs:$dst,
148 (OpNode Float32Regs:$a, fpimm:$b))]>,
149 Requires<[allowFMA_ftz]>;
150 def f32rr : NVPTXInst<(outs Float32Regs:$dst),
151 (ins Float32Regs:$a, Float32Regs:$b),
152 !strconcat(OpcStr, ".f32 \t$dst, $a, $b;"),
153 [(set Float32Regs:$dst,
154 (OpNode Float32Regs:$a, Float32Regs:$b))]>,
155 Requires<[allowFMA]>;
156 def f32ri : NVPTXInst<(outs Float32Regs:$dst),
157 (ins Float32Regs:$a, f32imm:$b),
158 !strconcat(OpcStr, ".f32 \t$dst, $a, $b;"),
159 [(set Float32Regs:$dst,
160 (OpNode Float32Regs:$a, fpimm:$b))]>,
161 Requires<[allowFMA]>;
164 multiclass F3_rn<string OpcStr, SDNode OpNode> {
165 def f64rr : NVPTXInst<(outs Float64Regs:$dst),
166 (ins Float64Regs:$a, Float64Regs:$b),
167 !strconcat(OpcStr, ".rn.f64 \t$dst, $a, $b;"),
168 [(set Float64Regs:$dst,
169 (OpNode Float64Regs:$a, Float64Regs:$b))]>;
170 def f64ri : NVPTXInst<(outs Float64Regs:$dst),
171 (ins Float64Regs:$a, f64imm:$b),
172 !strconcat(OpcStr, ".rn.f64 \t$dst, $a, $b;"),
173 [(set Float64Regs:$dst,
174 (OpNode Float64Regs:$a, fpimm:$b))]>;
175 def f32rr_ftz : NVPTXInst<(outs Float32Regs:$dst),
176 (ins Float32Regs:$a, Float32Regs:$b),
177 !strconcat(OpcStr, ".rn.ftz.f32 \t$dst, $a, $b;"),
178 [(set Float32Regs:$dst,
179 (OpNode Float32Regs:$a, Float32Regs:$b))]>,
180 Requires<[doF32FTZ]>;
181 def f32ri_ftz : NVPTXInst<(outs Float32Regs:$dst),
182 (ins Float32Regs:$a, f32imm:$b),
183 !strconcat(OpcStr, ".rn.ftz.f32 \t$dst, $a, $b;"),
184 [(set Float32Regs:$dst,
185 (OpNode Float32Regs:$a, fpimm:$b))]>,
186 Requires<[doF32FTZ]>;
187 def f32rr : NVPTXInst<(outs Float32Regs:$dst),
188 (ins Float32Regs:$a, Float32Regs:$b),
189 !strconcat(OpcStr, ".rn.f32 \t$dst, $a, $b;"),
190 [(set Float32Regs:$dst,
191 (OpNode Float32Regs:$a, Float32Regs:$b))]>;
192 def f32ri : NVPTXInst<(outs Float32Regs:$dst),
193 (ins Float32Regs:$a, f32imm:$b),
194 !strconcat(OpcStr, ".rn.f32 \t$dst, $a, $b;"),
195 [(set Float32Regs:$dst,
196 (OpNode Float32Regs:$a, fpimm:$b))]>;
199 multiclass F2<string OpcStr, SDNode OpNode> {
200 def f64 : NVPTXInst<(outs Float64Regs:$dst), (ins Float64Regs:$a),
201 !strconcat(OpcStr, ".f64 \t$dst, $a;"),
202 [(set Float64Regs:$dst, (OpNode Float64Regs:$a))]>;
203 def f32_ftz : NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$a),
204 !strconcat(OpcStr, ".ftz.f32 \t$dst, $a;"),
205 [(set Float32Regs:$dst, (OpNode Float32Regs:$a))]>,
206 Requires<[doF32FTZ]>;
207 def f32 : NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$a),
208 !strconcat(OpcStr, ".f32 \t$dst, $a;"),
209 [(set Float32Regs:$dst, (OpNode Float32Regs:$a))]>;
212 //===----------------------------------------------------------------------===//
213 // NVPTX Instructions.
214 //===----------------------------------------------------------------------===//
216 //-----------------------------------
217 // Integer Arithmetic
218 //-----------------------------------
220 multiclass ADD_SUB_i1<SDNode OpNode> {
221 def _rr: NVPTXInst<(outs Int1Regs:$dst), (ins Int1Regs:$a, Int1Regs:$b),
222 "xor.pred \t$dst, $a, $b;",
223 [(set Int1Regs:$dst, (OpNode Int1Regs:$a, Int1Regs:$b))]>;
224 def _ri: NVPTXInst<(outs Int1Regs:$dst), (ins Int1Regs:$a, i1imm:$b),
225 "xor.pred \t$dst, $a, $b;",
226 [(set Int1Regs:$dst, (OpNode Int1Regs:$a, (imm):$b))]>;
229 defm ADD_i1 : ADD_SUB_i1<add>;
230 defm SUB_i1 : ADD_SUB_i1<sub>;
233 defm ADD : I3<"add.s", add>;
234 defm SUB : I3<"sub.s", sub>;
236 defm ADDCC : ADD_SUB_INT_32<"add.cc", addc>;
237 defm SUBCC : ADD_SUB_INT_32<"sub.cc", subc>;
239 defm ADDCCC : ADD_SUB_INT_32<"addc.cc", adde>;
240 defm SUBCCC : ADD_SUB_INT_32<"subc.cc", sube>;
242 //mul.wide PTX instruction
243 def SInt32Const : PatLeaf<(imm), [{
244 const APInt &v = N->getAPIntValue();
245 if (v.isSignedIntN(32))
250 def UInt32Const : PatLeaf<(imm), [{
251 const APInt &v = N->getAPIntValue();
257 def SInt16Const : PatLeaf<(imm), [{
258 const APInt &v = N->getAPIntValue();
259 if (v.isSignedIntN(16))
264 def UInt16Const : PatLeaf<(imm), [{
265 const APInt &v = N->getAPIntValue();
271 def Int5Const : PatLeaf<(imm), [{
272 const APInt &v = N->getAPIntValue();
273 // Check if 0 <= v < 32
274 // Only then the result from (x << v) will be i32
275 if (v.sge(0) && v.slt(32))
280 def Int4Const : PatLeaf<(imm), [{
281 const APInt &v = N->getAPIntValue();
282 // Check if 0 <= v < 16
283 // Only then the result from (x << v) will be i16
284 if (v.sge(0) && v.slt(16))
289 def SHL2MUL32 : SDNodeXForm<imm, [{
290 const APInt &v = N->getAPIntValue();
292 return CurDAG->getTargetConstant(temp.shl(v), MVT::i32);
295 def SHL2MUL16 : SDNodeXForm<imm, [{
296 const APInt &v = N->getAPIntValue();
298 return CurDAG->getTargetConstant(temp.shl(v), MVT::i16);
301 def MULWIDES64 : NVPTXInst<(outs Int64Regs:$dst),
302 (ins Int32Regs:$a, Int32Regs:$b),
303 "mul.wide.s32 \t$dst, $a, $b;", []>;
304 def MULWIDES64Imm : NVPTXInst<(outs Int64Regs:$dst),
305 (ins Int32Regs:$a, i64imm:$b),
306 "mul.wide.s32 \t$dst, $a, $b;", []>;
308 def MULWIDEU64 : NVPTXInst<(outs Int64Regs:$dst),
309 (ins Int32Regs:$a, Int32Regs:$b),
310 "mul.wide.u32 \t$dst, $a, $b;", []>;
311 def MULWIDEU64Imm : NVPTXInst<(outs Int64Regs:$dst),
312 (ins Int32Regs:$a, i64imm:$b),
313 "mul.wide.u32 \t$dst, $a, $b;", []>;
315 def MULWIDES32 : NVPTXInst<(outs Int32Regs:$dst),
316 (ins Int16Regs:$a, Int16Regs:$b),
317 "mul.wide.s16 \t$dst, $a, $b;", []>;
318 def MULWIDES32Imm : NVPTXInst<(outs Int32Regs:$dst),
319 (ins Int16Regs:$a, i32imm:$b),
320 "mul.wide.s16 \t$dst, $a, $b;", []>;
322 def MULWIDEU32 : NVPTXInst<(outs Int32Regs:$dst),
323 (ins Int16Regs:$a, Int16Regs:$b),
324 "mul.wide.u16 \t$dst, $a, $b;", []>;
325 def MULWIDEU32Imm : NVPTXInst<(outs Int32Regs:$dst),
326 (ins Int16Regs:$a, i32imm:$b),
327 "mul.wide.u16 \t$dst, $a, $b;", []>;
329 def : Pat<(shl (sext Int32Regs:$a), (i32 Int5Const:$b)),
330 (MULWIDES64Imm Int32Regs:$a, (SHL2MUL32 node:$b))>,
331 Requires<[doMulWide]>;
332 def : Pat<(shl (zext Int32Regs:$a), (i32 Int5Const:$b)),
333 (MULWIDEU64Imm Int32Regs:$a, (SHL2MUL32 node:$b))>,
334 Requires<[doMulWide]>;
336 def : Pat<(shl (sext Int16Regs:$a), (i16 Int4Const:$b)),
337 (MULWIDES32Imm Int16Regs:$a, (SHL2MUL16 node:$b))>,
338 Requires<[doMulWide]>;
339 def : Pat<(shl (zext Int16Regs:$a), (i16 Int4Const:$b)),
340 (MULWIDEU32Imm Int16Regs:$a, (SHL2MUL16 node:$b))>,
341 Requires<[doMulWide]>;
343 def : Pat<(mul (sext Int32Regs:$a), (sext Int32Regs:$b)),
344 (MULWIDES64 Int32Regs:$a, Int32Regs:$b)>,
345 Requires<[doMulWide]>;
346 def : Pat<(mul (sext Int32Regs:$a), (i64 SInt32Const:$b)),
347 (MULWIDES64Imm Int32Regs:$a, (i64 SInt32Const:$b))>,
348 Requires<[doMulWide]>;
350 def : Pat<(mul (zext Int32Regs:$a), (zext Int32Regs:$b)),
351 (MULWIDEU64 Int32Regs:$a, Int32Regs:$b)>, Requires<[doMulWide]>;
352 def : Pat<(mul (zext Int32Regs:$a), (i64 UInt32Const:$b)),
353 (MULWIDEU64Imm Int32Regs:$a, (i64 UInt32Const:$b))>,
354 Requires<[doMulWide]>;
356 def : Pat<(mul (sext Int16Regs:$a), (sext Int16Regs:$b)),
357 (MULWIDES32 Int16Regs:$a, Int16Regs:$b)>, Requires<[doMulWide]>;
358 def : Pat<(mul (sext Int16Regs:$a), (i32 SInt16Const:$b)),
359 (MULWIDES32Imm Int16Regs:$a, (i32 SInt16Const:$b))>,
360 Requires<[doMulWide]>;
362 def : Pat<(mul (zext Int16Regs:$a), (zext Int16Regs:$b)),
363 (MULWIDEU32 Int16Regs:$a, Int16Regs:$b)>, Requires<[doMulWide]>;
364 def : Pat<(mul (zext Int16Regs:$a), (i32 UInt16Const:$b)),
365 (MULWIDEU32Imm Int16Regs:$a, (i32 UInt16Const:$b))>,
366 Requires<[doMulWide]>;
368 defm MULT : I3<"mul.lo.s", mul>;
370 defm MULTHS : I3<"mul.hi.s", mulhs>;
371 defm MULTHU : I3<"mul.hi.u", mulhu>;
373 defm SDIV : I3<"div.s", sdiv>;
374 defm UDIV : I3<"div.u", udiv>;
376 defm SREM : I3<"rem.s", srem>;
377 // The ri version will not be selected as DAGCombiner::visitSREM will lower it.
378 defm UREM : I3<"rem.u", urem>;
379 // The ri version will not be selected as DAGCombiner::visitUREM will lower it.
381 def MAD16rrr : NVPTXInst<(outs Int16Regs:$dst),
382 (ins Int16Regs:$a, Int16Regs:$b, Int16Regs:$c),
383 "mad.lo.s16 \t$dst, $a, $b, $c;",
384 [(set Int16Regs:$dst, (add
385 (mul Int16Regs:$a, Int16Regs:$b), Int16Regs:$c))]>;
386 def MAD16rri : NVPTXInst<(outs Int16Regs:$dst),
387 (ins Int16Regs:$a, Int16Regs:$b, i16imm:$c),
388 "mad.lo.s16 \t$dst, $a, $b, $c;",
389 [(set Int16Regs:$dst, (add
390 (mul Int16Regs:$a, Int16Regs:$b), imm:$c))]>;
391 def MAD16rir : NVPTXInst<(outs Int16Regs:$dst),
392 (ins Int16Regs:$a, i16imm:$b, Int16Regs:$c),
393 "mad.lo.s16 \t$dst, $a, $b, $c;",
394 [(set Int16Regs:$dst, (add
395 (mul Int16Regs:$a, imm:$b), Int16Regs:$c))]>;
396 def MAD16rii : NVPTXInst<(outs Int16Regs:$dst),
397 (ins Int16Regs:$a, i16imm:$b, i16imm:$c),
398 "mad.lo.s16 \t$dst, $a, $b, $c;",
399 [(set Int16Regs:$dst, (add (mul Int16Regs:$a, imm:$b),
402 def MAD32rrr : NVPTXInst<(outs Int32Regs:$dst),
403 (ins Int32Regs:$a, Int32Regs:$b, Int32Regs:$c),
404 "mad.lo.s32 \t$dst, $a, $b, $c;",
405 [(set Int32Regs:$dst, (add
406 (mul Int32Regs:$a, Int32Regs:$b), Int32Regs:$c))]>;
407 def MAD32rri : NVPTXInst<(outs Int32Regs:$dst),
408 (ins Int32Regs:$a, Int32Regs:$b, i32imm:$c),
409 "mad.lo.s32 \t$dst, $a, $b, $c;",
410 [(set Int32Regs:$dst, (add
411 (mul Int32Regs:$a, Int32Regs:$b), imm:$c))]>;
412 def MAD32rir : NVPTXInst<(outs Int32Regs:$dst),
413 (ins Int32Regs:$a, i32imm:$b, Int32Regs:$c),
414 "mad.lo.s32 \t$dst, $a, $b, $c;",
415 [(set Int32Regs:$dst, (add
416 (mul Int32Regs:$a, imm:$b), Int32Regs:$c))]>;
417 def MAD32rii : NVPTXInst<(outs Int32Regs:$dst),
418 (ins Int32Regs:$a, i32imm:$b, i32imm:$c),
419 "mad.lo.s32 \t$dst, $a, $b, $c;",
420 [(set Int32Regs:$dst, (add
421 (mul Int32Regs:$a, imm:$b), imm:$c))]>;
423 def MAD64rrr : NVPTXInst<(outs Int64Regs:$dst),
424 (ins Int64Regs:$a, Int64Regs:$b, Int64Regs:$c),
425 "mad.lo.s64 \t$dst, $a, $b, $c;",
426 [(set Int64Regs:$dst, (add
427 (mul Int64Regs:$a, Int64Regs:$b), Int64Regs:$c))]>;
428 def MAD64rri : NVPTXInst<(outs Int64Regs:$dst),
429 (ins Int64Regs:$a, Int64Regs:$b, i64imm:$c),
430 "mad.lo.s64 \t$dst, $a, $b, $c;",
431 [(set Int64Regs:$dst, (add
432 (mul Int64Regs:$a, Int64Regs:$b), imm:$c))]>;
433 def MAD64rir : NVPTXInst<(outs Int64Regs:$dst),
434 (ins Int64Regs:$a, i64imm:$b, Int64Regs:$c),
435 "mad.lo.s64 \t$dst, $a, $b, $c;",
436 [(set Int64Regs:$dst, (add
437 (mul Int64Regs:$a, imm:$b), Int64Regs:$c))]>;
438 def MAD64rii : NVPTXInst<(outs Int64Regs:$dst),
439 (ins Int64Regs:$a, i64imm:$b, i64imm:$c),
440 "mad.lo.s64 \t$dst, $a, $b, $c;",
441 [(set Int64Regs:$dst, (add
442 (mul Int64Regs:$a, imm:$b), imm:$c))]>;
445 def INEG16 : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$src),
446 "neg.s16 \t$dst, $src;",
447 [(set Int16Regs:$dst, (ineg Int16Regs:$src))]>;
448 def INEG32 : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src),
449 "neg.s32 \t$dst, $src;",
450 [(set Int32Regs:$dst, (ineg Int32Regs:$src))]>;
451 def INEG64 : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src),
452 "neg.s64 \t$dst, $src;",
453 [(set Int64Regs:$dst, (ineg Int64Regs:$src))]>;
455 //-----------------------------------
456 // Floating Point Arithmetic
457 //-----------------------------------
460 def FloatConst1 : PatLeaf<(fpimm), [{
461 if (&(N->getValueAPF().getSemantics()) != &llvm::APFloat::IEEEsingle)
463 float f = (float)N->getValueAPF().convertToFloat();
466 // Constand (double)1.0
467 def DoubleConst1 : PatLeaf<(fpimm), [{
468 if (&(N->getValueAPF().getSemantics()) != &llvm::APFloat::IEEEdouble)
470 double d = (double)N->getValueAPF().convertToDouble();
474 defm FADD : F3<"add", fadd>;
475 defm FSUB : F3<"sub", fsub>;
476 defm FMUL : F3<"mul", fmul>;
478 defm FADD_rn : F3_rn<"add", fadd>;
479 defm FSUB_rn : F3_rn<"sub", fsub>;
480 defm FMUL_rn : F3_rn<"mul", fmul>;
482 defm FABS : F2<"abs", fabs>;
483 defm FNEG : F2<"neg", fneg>;
484 defm FSQRT : F2<"sqrt.rn", fsqrt>;
489 def FDIV641r : NVPTXInst<(outs Float64Regs:$dst),
490 (ins f64imm:$a, Float64Regs:$b),
491 "rcp.rn.f64 \t$dst, $b;",
492 [(set Float64Regs:$dst,
493 (fdiv DoubleConst1:$a, Float64Regs:$b))]>;
494 def FDIV64rr : NVPTXInst<(outs Float64Regs:$dst),
495 (ins Float64Regs:$a, Float64Regs:$b),
496 "div.rn.f64 \t$dst, $a, $b;",
497 [(set Float64Regs:$dst,
498 (fdiv Float64Regs:$a, Float64Regs:$b))]>;
499 def FDIV64ri : NVPTXInst<(outs Float64Regs:$dst),
500 (ins Float64Regs:$a, f64imm:$b),
501 "div.rn.f64 \t$dst, $a, $b;",
502 [(set Float64Regs:$dst,
503 (fdiv Float64Regs:$a, fpimm:$b))]>;
506 // F32 Approximate reciprocal
508 def FDIV321r_ftz : NVPTXInst<(outs Float32Regs:$dst),
509 (ins f32imm:$a, Float32Regs:$b),
510 "rcp.approx.ftz.f32 \t$dst, $b;",
511 [(set Float32Regs:$dst,
512 (fdiv FloatConst1:$a, Float32Regs:$b))]>,
513 Requires<[do_DIVF32_APPROX, doF32FTZ]>;
514 def FDIV321r : NVPTXInst<(outs Float32Regs:$dst),
515 (ins f32imm:$a, Float32Regs:$b),
516 "rcp.approx.f32 \t$dst, $b;",
517 [(set Float32Regs:$dst,
518 (fdiv FloatConst1:$a, Float32Regs:$b))]>,
519 Requires<[do_DIVF32_APPROX]>;
521 // F32 Approximate division
523 def FDIV32approxrr_ftz : NVPTXInst<(outs Float32Regs:$dst),
524 (ins Float32Regs:$a, Float32Regs:$b),
525 "div.approx.ftz.f32 \t$dst, $a, $b;",
526 [(set Float32Regs:$dst,
527 (fdiv Float32Regs:$a, Float32Regs:$b))]>,
528 Requires<[do_DIVF32_APPROX, doF32FTZ]>;
529 def FDIV32approxrr : NVPTXInst<(outs Float32Regs:$dst),
530 (ins Float32Regs:$a, Float32Regs:$b),
531 "div.approx.f32 \t$dst, $a, $b;",
532 [(set Float32Regs:$dst,
533 (fdiv Float32Regs:$a, Float32Regs:$b))]>,
534 Requires<[do_DIVF32_APPROX]>;
536 // F32 Semi-accurate reciprocal
538 // rcp.approx gives the same result as div.full(1.0f, a) and is faster.
540 def FDIV321r_approx_ftz : NVPTXInst<(outs Float32Regs:$dst),
541 (ins f32imm:$a, Float32Regs:$b),
542 "rcp.approx.ftz.f32 \t$dst, $b;",
543 [(set Float32Regs:$dst,
544 (fdiv FloatConst1:$a, Float32Regs:$b))]>,
545 Requires<[do_DIVF32_FULL, doF32FTZ]>;
546 def FDIV321r_approx : NVPTXInst<(outs Float32Regs:$dst),
547 (ins f32imm:$a, Float32Regs:$b),
548 "rcp.approx.f32 \t$dst, $b;",
549 [(set Float32Regs:$dst,
550 (fdiv FloatConst1:$a, Float32Regs:$b))]>,
551 Requires<[do_DIVF32_FULL]>;
553 // F32 Semi-accurate division
555 def FDIV32rr_ftz : NVPTXInst<(outs Float32Regs:$dst),
556 (ins Float32Regs:$a, Float32Regs:$b),
557 "div.full.ftz.f32 \t$dst, $a, $b;",
558 [(set Float32Regs:$dst,
559 (fdiv Float32Regs:$a, Float32Regs:$b))]>,
560 Requires<[do_DIVF32_FULL, doF32FTZ]>;
561 def FDIV32ri_ftz : NVPTXInst<(outs Float32Regs:$dst),
562 (ins Float32Regs:$a, f32imm:$b),
563 "div.full.ftz.f32 \t$dst, $a, $b;",
564 [(set Float32Regs:$dst,
565 (fdiv Float32Regs:$a, fpimm:$b))]>,
566 Requires<[do_DIVF32_FULL, doF32FTZ]>;
567 def FDIV32rr : NVPTXInst<(outs Float32Regs:$dst),
568 (ins Float32Regs:$a, Float32Regs:$b),
569 "div.full.f32 \t$dst, $a, $b;",
570 [(set Float32Regs:$dst,
571 (fdiv Float32Regs:$a, Float32Regs:$b))]>,
572 Requires<[do_DIVF32_FULL]>;
573 def FDIV32ri : NVPTXInst<(outs Float32Regs:$dst),
574 (ins Float32Regs:$a, f32imm:$b),
575 "div.full.f32 \t$dst, $a, $b;",
576 [(set Float32Regs:$dst,
577 (fdiv Float32Regs:$a, fpimm:$b))]>,
578 Requires<[do_DIVF32_FULL]>;
580 // F32 Accurate reciprocal
582 def FDIV321r_prec_ftz : NVPTXInst<(outs Float32Regs:$dst),
583 (ins f32imm:$a, Float32Regs:$b),
584 "rcp.rn.ftz.f32 \t$dst, $b;",
585 [(set Float32Regs:$dst,
586 (fdiv FloatConst1:$a, Float32Regs:$b))]>,
587 Requires<[reqPTX20, doF32FTZ]>;
588 def FDIV321r_prec : NVPTXInst<(outs Float32Regs:$dst),
589 (ins f32imm:$a, Float32Regs:$b),
590 "rcp.rn.f32 \t$dst, $b;",
591 [(set Float32Regs:$dst,
592 (fdiv FloatConst1:$a, Float32Regs:$b))]>,
593 Requires<[reqPTX20]>;
595 // F32 Accurate division
597 def FDIV32rr_prec_ftz : NVPTXInst<(outs Float32Regs:$dst),
598 (ins Float32Regs:$a, Float32Regs:$b),
599 "div.rn.ftz.f32 \t$dst, $a, $b;",
600 [(set Float32Regs:$dst,
601 (fdiv Float32Regs:$a, Float32Regs:$b))]>,
602 Requires<[doF32FTZ, reqPTX20]>;
603 def FDIV32ri_prec_ftz : NVPTXInst<(outs Float32Regs:$dst),
604 (ins Float32Regs:$a, f32imm:$b),
605 "div.rn.ftz.f32 \t$dst, $a, $b;",
606 [(set Float32Regs:$dst,
607 (fdiv Float32Regs:$a, fpimm:$b))]>,
608 Requires<[doF32FTZ, reqPTX20]>;
609 def FDIV32rr_prec : NVPTXInst<(outs Float32Regs:$dst),
610 (ins Float32Regs:$a, Float32Regs:$b),
611 "div.rn.f32 \t$dst, $a, $b;",
612 [(set Float32Regs:$dst,
613 (fdiv Float32Regs:$a, Float32Regs:$b))]>,
614 Requires<[reqPTX20]>;
615 def FDIV32ri_prec : NVPTXInst<(outs Float32Regs:$dst),
616 (ins Float32Regs:$a, f32imm:$b),
617 "div.rn.f32 \t$dst, $a, $b;",
618 [(set Float32Regs:$dst,
619 (fdiv Float32Regs:$a, fpimm:$b))]>,
620 Requires<[reqPTX20]>;
623 multiclass FPCONTRACT32<string OpcStr, Predicate Pred> {
624 def rrr : NVPTXInst<(outs Float32Regs:$dst),
625 (ins Float32Regs:$a, Float32Regs:$b, Float32Regs:$c),
626 !strconcat(OpcStr, " \t$dst, $a, $b, $c;"),
627 [(set Float32Regs:$dst, (fadd
628 (fmul Float32Regs:$a, Float32Regs:$b),
629 Float32Regs:$c))]>, Requires<[Pred]>;
630 // This is to WAR a weird bug in Tablegen that does not automatically
631 // generate the following permutated rule rrr2 from the above rrr.
632 // So we explicitly add it here. This happens to FMA32 only.
633 // See the comments at FMAD32 and FMA32 for more information.
634 def rrr2 : NVPTXInst<(outs Float32Regs:$dst),
635 (ins Float32Regs:$a, Float32Regs:$b, Float32Regs:$c),
636 !strconcat(OpcStr, " \t$dst, $a, $b, $c;"),
637 [(set Float32Regs:$dst, (fadd Float32Regs:$c,
638 (fmul Float32Regs:$a, Float32Regs:$b)))]>,
640 def rri : NVPTXInst<(outs Float32Regs:$dst),
641 (ins Float32Regs:$a, Float32Regs:$b, f32imm:$c),
642 !strconcat(OpcStr, " \t$dst, $a, $b, $c;"),
643 [(set Float32Regs:$dst, (fadd
644 (fmul Float32Regs:$a, Float32Regs:$b), fpimm:$c))]>,
646 def rir : NVPTXInst<(outs Float32Regs:$dst),
647 (ins Float32Regs:$a, f32imm:$b, Float32Regs:$c),
648 !strconcat(OpcStr, " \t$dst, $a, $b, $c;"),
649 [(set Float32Regs:$dst, (fadd
650 (fmul Float32Regs:$a, fpimm:$b), Float32Regs:$c))]>,
652 def rii : NVPTXInst<(outs Float32Regs:$dst),
653 (ins Float32Regs:$a, f32imm:$b, f32imm:$c),
654 !strconcat(OpcStr, " \t$dst, $a, $b, $c;"),
655 [(set Float32Regs:$dst, (fadd
656 (fmul Float32Regs:$a, fpimm:$b), fpimm:$c))]>,
660 multiclass FPCONTRACT64<string OpcStr, Predicate Pred> {
661 def rrr : NVPTXInst<(outs Float64Regs:$dst),
662 (ins Float64Regs:$a, Float64Regs:$b, Float64Regs:$c),
663 !strconcat(OpcStr, " \t$dst, $a, $b, $c;"),
664 [(set Float64Regs:$dst, (fadd
665 (fmul Float64Regs:$a, Float64Regs:$b),
666 Float64Regs:$c))]>, Requires<[Pred]>;
667 def rri : NVPTXInst<(outs Float64Regs:$dst),
668 (ins Float64Regs:$a, Float64Regs:$b, f64imm:$c),
669 !strconcat(OpcStr, " \t$dst, $a, $b, $c;"),
670 [(set Float64Regs:$dst, (fadd (fmul Float64Regs:$a,
671 Float64Regs:$b), fpimm:$c))]>, Requires<[Pred]>;
672 def rir : NVPTXInst<(outs Float64Regs:$dst),
673 (ins Float64Regs:$a, f64imm:$b, Float64Regs:$c),
674 !strconcat(OpcStr, " \t$dst, $a, $b, $c;"),
675 [(set Float64Regs:$dst, (fadd
676 (fmul Float64Regs:$a, fpimm:$b), Float64Regs:$c))]>,
678 def rii : NVPTXInst<(outs Float64Regs:$dst),
679 (ins Float64Regs:$a, f64imm:$b, f64imm:$c),
680 !strconcat(OpcStr, " \t$dst, $a, $b, $c;"),
681 [(set Float64Regs:$dst, (fadd
682 (fmul Float64Regs:$a, fpimm:$b), fpimm:$c))]>,
686 // Due to a unknown reason (most likely a bug in tablegen), tablegen does not
687 // automatically generate the rrr2 rule from
688 // the rrr rule (see FPCONTRACT32) for FMA32, though it does for FMAD32.
689 // If we reverse the order of the following two lines, then rrr2 rule will be
690 // generated for FMA32, but not for rrr.
691 // Therefore, we manually write the rrr2 rule in FPCONTRACT32.
692 defm FMAD32_ftz : FPCONTRACT32<"mad.ftz.f32", doFMADF32_ftz>;
693 defm FMAD32 : FPCONTRACT32<"mad.f32", doFMADF32>;
694 defm FMA32_ftz : FPCONTRACT32<"fma.rn.ftz.f32", doFMAF32_ftz>;
695 defm FMA32 : FPCONTRACT32<"fma.rn.f32", doFMAF32>;
696 defm FMA64 : FPCONTRACT64<"fma.rn.f64", doFMAF64>;
698 // b*c-a => fmad(b, c, -a)
699 multiclass FPCONTRACT32_SUB_PAT_MAD<NVPTXInst Inst, Predicate Pred> {
700 def : Pat<(fsub (fmul Float32Regs:$b, Float32Regs:$c), Float32Regs:$a),
701 (Inst Float32Regs:$b, Float32Regs:$c, (FNEGf32 Float32Regs:$a))>,
705 // a-b*c => fmad(-b,c, a)
706 // - legal because a-b*c <=> a+(-b*c) <=> a+(-b)*c
707 // b*c-a => fmad(b, c, -a)
708 // - legal because b*c-a <=> b*c+(-a)
709 multiclass FPCONTRACT32_SUB_PAT<NVPTXInst Inst, Predicate Pred> {
710 def : Pat<(fsub Float32Regs:$a, (fmul Float32Regs:$b, Float32Regs:$c)),
711 (Inst (FNEGf32 Float32Regs:$b), Float32Regs:$c, Float32Regs:$a)>,
713 def : Pat<(fsub (fmul Float32Regs:$b, Float32Regs:$c), Float32Regs:$a),
714 (Inst Float32Regs:$b, Float32Regs:$c, (FNEGf32 Float32Regs:$a))>,
718 // a-b*c => fmad(-b,c, a)
719 // b*c-a => fmad(b, c, -a)
720 multiclass FPCONTRACT64_SUB_PAT<NVPTXInst Inst, Predicate Pred> {
721 def : Pat<(fsub Float64Regs:$a, (fmul Float64Regs:$b, Float64Regs:$c)),
722 (Inst (FNEGf64 Float64Regs:$b), Float64Regs:$c, Float64Regs:$a)>,
725 def : Pat<(fsub (fmul Float64Regs:$b, Float64Regs:$c), Float64Regs:$a),
726 (Inst Float64Regs:$b, Float64Regs:$c, (FNEGf64 Float64Regs:$a))>,
730 defm FMAF32ext_ftz : FPCONTRACT32_SUB_PAT<FMA32_ftzrrr, doFMAF32AGG_ftz>;
731 defm FMAF32ext : FPCONTRACT32_SUB_PAT<FMA32rrr, doFMAF32AGG>;
732 defm FMADF32ext_ftz : FPCONTRACT32_SUB_PAT_MAD<FMAD32_ftzrrr, doFMADF32_ftz>;
733 defm FMADF32ext : FPCONTRACT32_SUB_PAT_MAD<FMAD32rrr, doFMADF32>;
734 defm FMAF64ext : FPCONTRACT64_SUB_PAT<FMA64rrr, doFMAF64AGG>;
736 def SINF: NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$src),
737 "sin.approx.f32 \t$dst, $src;",
738 [(set Float32Regs:$dst, (fsin Float32Regs:$src))]>;
739 def COSF: NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$src),
740 "cos.approx.f32 \t$dst, $src;",
741 [(set Float32Regs:$dst, (fcos Float32Regs:$src))]>;
743 //-----------------------------------
744 // Logical Arithmetic
745 //-----------------------------------
747 multiclass LOG_FORMAT<string OpcStr, SDNode OpNode> {
748 def b1rr: NVPTXInst<(outs Int1Regs:$dst), (ins Int1Regs:$a, Int1Regs:$b),
749 !strconcat(OpcStr, ".pred \t$dst, $a, $b;"),
750 [(set Int1Regs:$dst, (OpNode Int1Regs:$a, Int1Regs:$b))]>;
751 def b1ri: NVPTXInst<(outs Int1Regs:$dst), (ins Int1Regs:$a, i1imm:$b),
752 !strconcat(OpcStr, ".pred \t$dst, $a, $b;"),
753 [(set Int1Regs:$dst, (OpNode Int1Regs:$a, imm:$b))]>;
754 def b16rr: NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, Int16Regs:$b),
755 !strconcat(OpcStr, ".b16 \t$dst, $a, $b;"),
756 [(set Int16Regs:$dst, (OpNode Int16Regs:$a,
758 def b16ri: NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, i16imm:$b),
759 !strconcat(OpcStr, ".b16 \t$dst, $a, $b;"),
760 [(set Int16Regs:$dst, (OpNode Int16Regs:$a, imm:$b))]>;
761 def b32rr: NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b),
762 !strconcat(OpcStr, ".b32 \t$dst, $a, $b;"),
763 [(set Int32Regs:$dst, (OpNode Int32Regs:$a,
765 def b32ri: NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b),
766 !strconcat(OpcStr, ".b32 \t$dst, $a, $b;"),
767 [(set Int32Regs:$dst, (OpNode Int32Regs:$a, imm:$b))]>;
768 def b64rr: NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, Int64Regs:$b),
769 !strconcat(OpcStr, ".b64 \t$dst, $a, $b;"),
770 [(set Int64Regs:$dst, (OpNode Int64Regs:$a,
772 def b64ri: NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, i64imm:$b),
773 !strconcat(OpcStr, ".b64 \t$dst, $a, $b;"),
774 [(set Int64Regs:$dst, (OpNode Int64Regs:$a, imm:$b))]>;
777 defm OR : LOG_FORMAT<"or", or>;
778 defm AND : LOG_FORMAT<"and", and>;
779 defm XOR : LOG_FORMAT<"xor", xor>;
781 def NOT1: NVPTXInst<(outs Int1Regs:$dst), (ins Int1Regs:$src),
782 "not.pred \t$dst, $src;",
783 [(set Int1Regs:$dst, (not Int1Regs:$src))]>;
784 def NOT16: NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$src),
785 "not.b16 \t$dst, $src;",
786 [(set Int16Regs:$dst, (not Int16Regs:$src))]>;
787 def NOT32: NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src),
788 "not.b32 \t$dst, $src;",
789 [(set Int32Regs:$dst, (not Int32Regs:$src))]>;
790 def NOT64: NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src),
791 "not.b64 \t$dst, $src;",
792 [(set Int64Regs:$dst, (not Int64Regs:$src))]>;
794 // For shifts, the second src operand must be 32-bit value
795 multiclass LSHIFT_FORMAT<string OpcStr, SDNode OpNode> {
796 def i64rr : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a,
798 !strconcat(OpcStr, "64 \t$dst, $a, $b;"),
799 [(set Int64Regs:$dst, (OpNode Int64Regs:$a,
801 def i64ri : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, i32imm:$b),
802 !strconcat(OpcStr, "64 \t$dst, $a, $b;"),
803 [(set Int64Regs:$dst, (OpNode Int64Regs:$a,
805 def i32rr : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a,
807 !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
808 [(set Int32Regs:$dst, (OpNode Int32Regs:$a,
810 def i32ri : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b),
811 !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
812 [(set Int32Regs:$dst, (OpNode Int32Regs:$a,
814 def i32ii : NVPTXInst<(outs Int32Regs:$dst), (ins i32imm:$a, i32imm:$b),
815 !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
816 [(set Int32Regs:$dst, (OpNode (i32 imm:$a),
818 def i16rr : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a,
820 !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
821 [(set Int16Regs:$dst, (OpNode Int16Regs:$a,
823 def i16ri : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, i32imm:$b),
824 !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
825 [(set Int16Regs:$dst, (OpNode Int16Regs:$a,
829 defm SHL : LSHIFT_FORMAT<"shl.b", shl>;
831 // For shifts, the second src operand must be 32-bit value
832 // Need to add cvt for the 8-bits.
833 multiclass RSHIFT_FORMAT<string OpcStr, SDNode OpNode, string CVTStr> {
834 def i64rr : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a,
836 !strconcat(OpcStr, "64 \t$dst, $a, $b;"),
837 [(set Int64Regs:$dst, (OpNode Int64Regs:$a,
839 def i64ri : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, i32imm:$b),
840 !strconcat(OpcStr, "64 \t$dst, $a, $b;"),
841 [(set Int64Regs:$dst, (OpNode Int64Regs:$a,
843 def i32rr : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a,
845 !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
846 [(set Int32Regs:$dst, (OpNode Int32Regs:$a,
848 def i32ri : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b),
849 !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
850 [(set Int32Regs:$dst, (OpNode Int32Regs:$a,
852 def i32ii : NVPTXInst<(outs Int32Regs:$dst), (ins i32imm:$a, i32imm:$b),
853 !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
854 [(set Int32Regs:$dst, (OpNode (i32 imm:$a),
856 def i16rr : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a,
858 !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
859 [(set Int16Regs:$dst, (OpNode Int16Regs:$a,
861 def i16ri : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, i32imm:$b),
862 !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
863 [(set Int16Regs:$dst, (OpNode Int16Regs:$a,
867 defm SRA : RSHIFT_FORMAT<"shr.s", sra, "cvt.s16.s8">;
868 defm SRL : RSHIFT_FORMAT<"shr.u", srl, "cvt.u16.u8">;
871 def ROT32imm_sw : NVPTXInst<(outs Int32Regs:$dst),
872 (ins Int32Regs:$src, i32imm:$amt1, i32imm:$amt2),
874 !strconcat(".reg .b32 %lhs;\n\t",
875 !strconcat(".reg .b32 %rhs;\n\t",
876 !strconcat("shl.b32 \t%lhs, $src, $amt1;\n\t",
877 !strconcat("shr.b32 \t%rhs, $src, $amt2;\n\t",
878 !strconcat("add.u32 \t$dst, %lhs, %rhs;\n\t",
879 !strconcat("}}", ""))))))),
882 def SUB_FRM_32 : SDNodeXForm<imm, [{
883 return CurDAG->getTargetConstant(32-N->getZExtValue(), MVT::i32);
886 def : Pat<(rotl Int32Regs:$src, (i32 imm:$amt)),
887 (ROT32imm_sw Int32Regs:$src, imm:$amt, (SUB_FRM_32 node:$amt))>;
888 def : Pat<(rotr Int32Regs:$src, (i32 imm:$amt)),
889 (ROT32imm_sw Int32Regs:$src, (SUB_FRM_32 node:$amt), imm:$amt)>;
891 def ROTL32reg_sw : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src,
894 !strconcat(".reg .b32 %lhs;\n\t",
895 !strconcat(".reg .b32 %rhs;\n\t",
896 !strconcat(".reg .b32 %amt2;\n\t",
897 !strconcat("shl.b32 \t%lhs, $src, $amt;\n\t",
898 !strconcat("sub.s32 \t%amt2, 32, $amt;\n\t",
899 !strconcat("shr.b32 \t%rhs, $src, %amt2;\n\t",
900 !strconcat("add.u32 \t$dst, %lhs, %rhs;\n\t",
901 !strconcat("}}", ""))))))))),
902 [(set Int32Regs:$dst, (rotl Int32Regs:$src, Int32Regs:$amt))]>;
904 def ROTR32reg_sw : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src,
907 !strconcat(".reg .b32 %lhs;\n\t",
908 !strconcat(".reg .b32 %rhs;\n\t",
909 !strconcat(".reg .b32 %amt2;\n\t",
910 !strconcat("shr.b32 \t%lhs, $src, $amt;\n\t",
911 !strconcat("sub.s32 \t%amt2, 32, $amt;\n\t",
912 !strconcat("shl.b32 \t%rhs, $src, %amt2;\n\t",
913 !strconcat("add.u32 \t$dst, %lhs, %rhs;\n\t",
914 !strconcat("}}", ""))))))))),
915 [(set Int32Regs:$dst, (rotr Int32Regs:$src, Int32Regs:$amt))]>;
918 def ROT64imm_sw : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src,
919 i32imm:$amt1, i32imm:$amt2),
921 !strconcat(".reg .b64 %lhs;\n\t",
922 !strconcat(".reg .b64 %rhs;\n\t",
923 !strconcat("shl.b64 \t%lhs, $src, $amt1;\n\t",
924 !strconcat("shr.b64 \t%rhs, $src, $amt2;\n\t",
925 !strconcat("add.u64 \t$dst, %lhs, %rhs;\n\t",
926 !strconcat("}}", ""))))))),
929 def SUB_FRM_64 : SDNodeXForm<imm, [{
930 return CurDAG->getTargetConstant(64-N->getZExtValue(), MVT::i32);
933 def : Pat<(rotl Int64Regs:$src, (i32 imm:$amt)),
934 (ROT64imm_sw Int64Regs:$src, imm:$amt, (SUB_FRM_64 node:$amt))>;
935 def : Pat<(rotr Int64Regs:$src, (i32 imm:$amt)),
936 (ROT64imm_sw Int64Regs:$src, (SUB_FRM_64 node:$amt), imm:$amt)>;
938 def ROTL64reg_sw : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src,
941 !strconcat(".reg .b64 %lhs;\n\t",
942 !strconcat(".reg .b64 %rhs;\n\t",
943 !strconcat(".reg .u32 %amt2;\n\t",
944 !strconcat("shl.b64 \t%lhs, $src, $amt;\n\t",
945 !strconcat("sub.u32 \t%amt2, 64, $amt;\n\t",
946 !strconcat("shr.b64 \t%rhs, $src, %amt2;\n\t",
947 !strconcat("add.u64 \t$dst, %lhs, %rhs;\n\t",
948 !strconcat("}}", ""))))))))),
949 [(set Int64Regs:$dst, (rotl Int64Regs:$src, Int32Regs:$amt))]>;
951 def ROTR64reg_sw : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src,
954 !strconcat(".reg .b64 %lhs;\n\t",
955 !strconcat(".reg .b64 %rhs;\n\t",
956 !strconcat(".reg .u32 %amt2;\n\t",
957 !strconcat("shr.b64 \t%lhs, $src, $amt;\n\t",
958 !strconcat("sub.u32 \t%amt2, 64, $amt;\n\t",
959 !strconcat("shl.b64 \t%rhs, $src, %amt2;\n\t",
960 !strconcat("add.u64 \t$dst, %lhs, %rhs;\n\t",
961 !strconcat("}}", ""))))))))),
962 [(set Int64Regs:$dst, (rotr Int64Regs:$src, Int32Regs:$amt))]>;
965 //-----------------------------------
966 // Data Movement (Load / Store, Move)
967 //-----------------------------------
969 def ADDRri : ComplexPattern<i32, 2, "SelectADDRri", [frameindex],
971 def ADDRri64 : ComplexPattern<i64, 2, "SelectADDRri64", [frameindex],
974 def MEMri : Operand<i32> {
975 let PrintMethod = "printMemOperand";
976 let MIOperandInfo = (ops Int32Regs, i32imm);
978 def MEMri64 : Operand<i64> {
979 let PrintMethod = "printMemOperand";
980 let MIOperandInfo = (ops Int64Regs, i64imm);
983 def imem : Operand<iPTR> {
984 let PrintMethod = "printOperand";
987 def imemAny : Operand<iPTRAny> {
988 let PrintMethod = "printOperand";
991 def LdStCode : Operand<i32> {
992 let PrintMethod = "printLdStCode";
995 def SDTWrapper : SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>, SDTCisPtrTy<0>]>;
996 def Wrapper : SDNode<"NVPTXISD::Wrapper", SDTWrapper>;
998 def MOV_ADDR : NVPTXInst<(outs Int32Regs:$dst), (ins imem:$a),
999 "mov.u32 \t$dst, $a;",
1000 [(set Int32Regs:$dst, (Wrapper tglobaladdr:$a))]>;
1002 def MOV_ADDR64 : NVPTXInst<(outs Int64Regs:$dst), (ins imem:$a),
1003 "mov.u64 \t$dst, $a;",
1004 [(set Int64Regs:$dst, (Wrapper tglobaladdr:$a))]>;
1006 // copyPhysreg is hard-coded in NVPTXInstrInfo.cpp
1007 let IsSimpleMove=1 in {
1008 def IMOV1rr: NVPTXInst<(outs Int1Regs:$dst), (ins Int1Regs:$sss),
1009 "mov.pred \t$dst, $sss;", []>;
1010 def IMOV16rr: NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$sss),
1011 "mov.u16 \t$dst, $sss;", []>;
1012 def IMOV32rr: NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$sss),
1013 "mov.u32 \t$dst, $sss;", []>;
1014 def IMOV64rr: NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$sss),
1015 "mov.u64 \t$dst, $sss;", []>;
1017 def FMOV32rr: NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$src),
1018 "mov.f32 \t$dst, $src;", []>;
1019 def FMOV64rr: NVPTXInst<(outs Float64Regs:$dst), (ins Float64Regs:$src),
1020 "mov.f64 \t$dst, $src;", []>;
1022 def IMOV1ri: NVPTXInst<(outs Int1Regs:$dst), (ins i1imm:$src),
1023 "mov.pred \t$dst, $src;",
1024 [(set Int1Regs:$dst, imm:$src)]>;
1025 def IMOV16ri: NVPTXInst<(outs Int16Regs:$dst), (ins i16imm:$src),
1026 "mov.u16 \t$dst, $src;",
1027 [(set Int16Regs:$dst, imm:$src)]>;
1028 def IMOV32ri: NVPTXInst<(outs Int32Regs:$dst), (ins i32imm:$src),
1029 "mov.u32 \t$dst, $src;",
1030 [(set Int32Regs:$dst, imm:$src)]>;
1031 def IMOV64i: NVPTXInst<(outs Int64Regs:$dst), (ins i64imm:$src),
1032 "mov.u64 \t$dst, $src;",
1033 [(set Int64Regs:$dst, imm:$src)]>;
1035 def FMOV32ri: NVPTXInst<(outs Float32Regs:$dst), (ins f32imm:$src),
1036 "mov.f32 \t$dst, $src;",
1037 [(set Float32Regs:$dst, fpimm:$src)]>;
1038 def FMOV64ri: NVPTXInst<(outs Float64Regs:$dst), (ins f64imm:$src),
1039 "mov.f64 \t$dst, $src;",
1040 [(set Float64Regs:$dst, fpimm:$src)]>;
1042 def : Pat<(i32 (Wrapper texternalsym:$dst)), (IMOV32ri texternalsym:$dst)>;
1044 //---- Copy Frame Index ----
1045 def LEA_ADDRi : NVPTXInst<(outs Int32Regs:$dst), (ins MEMri:$addr),
1046 "add.u32 \t$dst, ${addr:add};",
1047 [(set Int32Regs:$dst, ADDRri:$addr)]>;
1048 def LEA_ADDRi64 : NVPTXInst<(outs Int64Regs:$dst), (ins MEMri64:$addr),
1049 "add.u64 \t$dst, ${addr:add};",
1050 [(set Int64Regs:$dst, ADDRri64:$addr)]>;
1052 //-----------------------------------
1053 // Comparison and Selection
1054 //-----------------------------------
1056 // Generate string block like
1059 // setp.gt.s16 p, %a, %b;
1060 // selp.s16 %dst, -1, 0, p;
1062 // when OpcStr=setp.gt.s sz1=16 sz2=16 d=%dst a=%a b=%b
1063 class Set_Str<string OpcStr, string sz1, string sz2, string d, string a,
1065 string t1 = "{{\n\t.reg .pred p;\n\t";
1066 string t2 = !strconcat(t1 , OpcStr);
1067 string t3 = !strconcat(t2 , sz1);
1068 string t4 = !strconcat(t3 , " \tp, ");
1069 string t5 = !strconcat(t4 , a);
1070 string t6 = !strconcat(t5 , ", ");
1071 string t7 = !strconcat(t6 , b);
1072 string t8 = !strconcat(t7 , ";\n\tselp.s");
1073 string t9 = !strconcat(t8 , sz2);
1074 string t10 = !strconcat(t9, " \t");
1075 string t11 = !strconcat(t10, d);
1076 string s = !strconcat(t11, ", -1, 0, p;\n\t}}");
1079 multiclass ISET_FORMAT<string OpcStr, string OpcStr_u32, PatFrag OpNode,
1080 string TypeStr, string CVTStr> {
1081 def i16rr_toi16: NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a,
1083 Set_Str<OpcStr, "16", "16", "$dst", "$a", "$b">.s,
1085 def i32rr_toi32: NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a,
1087 Set_Str<OpcStr, "32", "32", "$dst", "$a", "$b">.s,
1089 def i64rr_toi64: NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a,
1091 Set_Str<OpcStr, "64", "64", "$dst", "$a", "$b">.s,
1094 def i16rr_p: NVPTXInst<(outs Int1Regs:$dst), (ins Int16Regs:$a, Int16Regs:$b),
1095 !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
1096 [(set Int1Regs:$dst, (OpNode Int16Regs:$a, Int16Regs:$b))]>;
1097 def i16ri_p: NVPTXInst<(outs Int1Regs:$dst), (ins Int16Regs:$a, i16imm:$b),
1098 !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
1099 [(set Int1Regs:$dst, (OpNode Int16Regs:$a, imm:$b))]>;
1100 def i16ir_p: NVPTXInst<(outs Int1Regs:$dst), (ins i16imm:$a, Int16Regs:$b),
1101 !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
1102 [(set Int1Regs:$dst, (OpNode imm:$a, Int16Regs:$b))]>;
1103 def i32rr_p: NVPTXInst<(outs Int1Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b),
1104 !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
1105 [(set Int1Regs:$dst, (OpNode Int32Regs:$a, Int32Regs:$b))]>;
1106 def i32ri_p: NVPTXInst<(outs Int1Regs:$dst), (ins Int32Regs:$a, i32imm:$b),
1107 !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
1108 [(set Int1Regs:$dst, (OpNode Int32Regs:$a, imm:$b))]>;
1109 def i32ir_p: NVPTXInst<(outs Int1Regs:$dst), (ins i32imm:$a, Int32Regs:$b),
1110 !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
1111 [(set Int1Regs:$dst, (OpNode imm:$a, Int32Regs:$b))]>;
1112 def i64rr_p: NVPTXInst<(outs Int1Regs:$dst), (ins Int64Regs:$a, Int64Regs:$b),
1113 !strconcat(OpcStr, "64 \t$dst, $a, $b;"),
1114 [(set Int1Regs:$dst, (OpNode Int64Regs:$a, Int64Regs:$b))]>;
1115 def i64ri_p: NVPTXInst<(outs Int1Regs:$dst), (ins Int64Regs:$a, i64imm:$b),
1116 !strconcat(OpcStr, "64 \t$dst, $a, $b;"),
1117 [(set Int1Regs:$dst, (OpNode Int64Regs:$a, imm:$b))]>;
1118 def i64ir_p: NVPTXInst<(outs Int1Regs:$dst), (ins i64imm:$a, Int64Regs:$b),
1119 !strconcat(OpcStr, "64 \t$dst, $a, $b;"),
1120 [(set Int1Regs:$dst, (OpNode imm:$a, Int64Regs:$b))]>;
1122 def i16rr_u32: NVPTXInst<(outs Int32Regs:$dst), (ins Int16Regs:$a,
1124 !strconcat(OpcStr_u32, "16 \t$dst, $a, $b;"),
1125 [(set Int32Regs:$dst, (OpNode Int16Regs:$a, Int16Regs:$b))]>;
1126 def i16ri_u32: NVPTXInst<(outs Int32Regs:$dst), (ins Int16Regs:$a, i16imm:$b),
1127 !strconcat(OpcStr_u32, "16 \t$dst, $a, $b;"),
1128 [(set Int32Regs:$dst, (OpNode Int16Regs:$a, imm:$b))]>;
1129 def i16ir_u32: NVPTXInst<(outs Int32Regs:$dst), (ins i16imm:$a, Int16Regs:$b),
1130 !strconcat(OpcStr_u32, "16 \t$dst, $a, $b;"),
1131 [(set Int32Regs:$dst, (OpNode imm:$a, Int16Regs:$b))]>;
1132 def i32rr_u32: NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a,
1134 !strconcat(OpcStr_u32, "32 \t$dst, $a, $b;"),
1135 [(set Int32Regs:$dst, (OpNode Int32Regs:$a, Int32Regs:$b))]>;
1136 def i32ri_u32: NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b),
1137 !strconcat(OpcStr_u32, "32 \t$dst, $a, $b;"),
1138 [(set Int32Regs:$dst, (OpNode Int32Regs:$a, imm:$b))]>;
1139 def i32ir_u32: NVPTXInst<(outs Int32Regs:$dst), (ins i32imm:$a, Int32Regs:$b),
1140 !strconcat(OpcStr_u32, "32 \t$dst, $a, $b;"),
1141 [(set Int32Regs:$dst, (OpNode imm:$a, Int32Regs:$b))]>;
1142 def i64rr_u32: NVPTXInst<(outs Int32Regs:$dst), (ins Int64Regs:$a,
1144 !strconcat(OpcStr_u32, "64 \t$dst, $a, $b;"),
1145 [(set Int32Regs:$dst, (OpNode Int64Regs:$a, Int64Regs:$b))]>;
1146 def i64ri_u32: NVPTXInst<(outs Int32Regs:$dst), (ins Int64Regs:$a, i64imm:$b),
1147 !strconcat(OpcStr_u32, "64 \t$dst, $a, $b;"),
1148 [(set Int32Regs:$dst, (OpNode Int64Regs:$a, imm:$b))]>;
1149 def i64ir_u32: NVPTXInst<(outs Int32Regs:$dst), (ins i64imm:$a, Int64Regs:$b),
1150 !strconcat(OpcStr_u32, "64 \t$dst, $a, $b;"),
1151 [(set Int32Regs:$dst, (OpNode imm:$a, Int64Regs:$b))]>;
1154 multiclass FSET_FORMAT<string OpcStr, string OpcStr_u32, PatFrag OpNode> {
1155 def f32rr_toi32_ftz: NVPTXInst<(outs Int32Regs:$dst), (ins Float32Regs:$a,
1157 Set_Str<OpcStr, "ftz.f32", "32", "$dst", "$a", "$b">.s,
1158 []>, Requires<[doF32FTZ]>;
1159 def f32rr_toi32: NVPTXInst<(outs Int32Regs:$dst), (ins Float32Regs:$a,
1161 Set_Str<OpcStr, "f32", "32", "$dst", "$a", "$b">.s,
1163 def f64rr_toi64: NVPTXInst<(outs Int64Regs:$dst), (ins Float64Regs:$a,
1165 Set_Str<OpcStr, "f64", "64", "$dst", "$a", "$b">.s,
1167 def f64rr_toi32: NVPTXInst<(outs Int32Regs:$dst), (ins Float64Regs:$a,
1169 Set_Str<OpcStr, "f64", "32", "$dst", "$a", "$b">.s,
1172 def f32rr_p_ftz: NVPTXInst<(outs Int1Regs:$dst), (ins Float32Regs:$a
1174 !strconcat(OpcStr, "ftz.f32 \t$dst, $a, $b;"),
1175 [(set Int1Regs:$dst, (OpNode Float32Regs:$a, Float32Regs:$b))]>
1176 , Requires<[doF32FTZ]>;
1177 def f32rr_p: NVPTXInst<(outs Int1Regs:$dst),
1178 (ins Float32Regs:$a, Float32Regs:$b),
1179 !strconcat(OpcStr, "f32 \t$dst, $a, $b;"),
1180 [(set Int1Regs:$dst, (OpNode Float32Regs:$a, Float32Regs:$b))]>;
1181 def f32ri_p_ftz: NVPTXInst<(outs Int1Regs:$dst),
1182 (ins Float32Regs:$a, f32imm:$b),
1183 !strconcat(OpcStr, "ftz.f32 \t$dst, $a, $b;"),
1184 [(set Int1Regs:$dst, (OpNode Float32Regs:$a, fpimm:$b))]>,
1185 Requires<[doF32FTZ]>;
1186 def f32ri_p: NVPTXInst<(outs Int1Regs:$dst), (ins Float32Regs:$a, f32imm:$b),
1187 !strconcat(OpcStr, "f32 \t$dst, $a, $b;"),
1188 [(set Int1Regs:$dst, (OpNode Float32Regs:$a, fpimm:$b))]>;
1189 def f32ir_p_ftz: NVPTXInst<(outs Int1Regs:$dst),
1190 (ins f32imm:$a, Float32Regs:$b),
1191 !strconcat(OpcStr, "ftz.f32 \t$dst, $a, $b;"),
1192 [(set Int1Regs:$dst, (OpNode fpimm:$a, Float32Regs:$b))]>,
1193 Requires<[doF32FTZ]>;
1194 def f32ir_p: NVPTXInst<(outs Int1Regs:$dst), (ins f32imm:$a, Float32Regs:$b),
1195 !strconcat(OpcStr, "f32 \t$dst, $a, $b;"),
1196 [(set Int1Regs:$dst, (OpNode fpimm:$a, Float32Regs:$b))]>;
1197 def f64rr_p: NVPTXInst<(outs Int1Regs:$dst),
1198 (ins Float64Regs:$a, Float64Regs:$b),
1199 !strconcat(OpcStr, "f64 \t$dst, $a, $b;"),
1200 [(set Int1Regs:$dst, (OpNode Float64Regs:$a, Float64Regs:$b))]>;
1201 def f64ri_p: NVPTXInst<(outs Int1Regs:$dst), (ins Float64Regs:$a, f64imm:$b),
1202 !strconcat(OpcStr, "f64 \t$dst, $a, $b;"),
1203 [(set Int1Regs:$dst, (OpNode Float64Regs:$a, fpimm:$b))]>;
1204 def f64ir_p: NVPTXInst<(outs Int1Regs:$dst), (ins f64imm:$a, Float64Regs:$b),
1205 !strconcat(OpcStr, "f64 \t$dst, $a, $b;"),
1206 [(set Int1Regs:$dst, (OpNode fpimm:$a, Float64Regs:$b))]>;
1208 def f32rr_u32_ftz: NVPTXInst<(outs Int32Regs:$dst),
1209 (ins Float32Regs:$a, Float32Regs:$b),
1210 !strconcat(OpcStr_u32, "ftz.f32 \t$dst, $a, $b;"),
1211 [(set Int32Regs:$dst, (OpNode Float32Regs:$a, Float32Regs:$b))]>;
1212 def f32rr_u32: NVPTXInst<(outs Int32Regs:$dst),
1213 (ins Float32Regs:$a, Float32Regs:$b),
1214 !strconcat(OpcStr_u32, "f32 \t$dst, $a, $b;"),
1215 [(set Int32Regs:$dst, (OpNode Float32Regs:$a, Float32Regs:$b))]>;
1216 def f32ri_u32_ftz: NVPTXInst<(outs Int32Regs:$dst),
1217 (ins Float32Regs:$a, f32imm:$b),
1218 !strconcat(OpcStr_u32, "ftz.f32 \t$dst, $a, $b;"),
1219 [(set Int32Regs:$dst, (OpNode Float32Regs:$a, fpimm:$b))]>;
1220 def f32ri_u32: NVPTXInst<(outs Int32Regs:$dst),
1221 (ins Float32Regs:$a, f32imm:$b),
1222 !strconcat(OpcStr_u32, "f32 \t$dst, $a, $b;"),
1223 [(set Int32Regs:$dst, (OpNode Float32Regs:$a, fpimm:$b))]>;
1224 def f32ir_u32_ftz: NVPTXInst<(outs Int32Regs:$dst),
1225 (ins f32imm:$a, Float32Regs:$b),
1226 !strconcat(OpcStr_u32, "ftz.f32 \t$dst, $a, $b;"),
1227 [(set Int32Regs:$dst, (OpNode fpimm:$a, Float32Regs:$b))]>;
1228 def f32ir_u32: NVPTXInst<(outs Int32Regs:$dst),
1229 (ins f32imm:$a, Float32Regs:$b),
1230 !strconcat(OpcStr_u32, "f32 \t$dst, $a, $b;"),
1231 [(set Int32Regs:$dst, (OpNode fpimm:$a, Float32Regs:$b))]>;
1232 def f64rr_u32: NVPTXInst<(outs Int32Regs:$dst),
1233 (ins Float64Regs:$a, Float64Regs:$b),
1234 !strconcat(OpcStr_u32, "f64 \t$dst, $a, $b;"),
1235 [(set Int32Regs:$dst, (OpNode Float64Regs:$a, Float64Regs:$b))]>;
1236 def f64ri_u32: NVPTXInst<(outs Int32Regs:$dst),
1237 (ins Float64Regs:$a, f64imm:$b),
1238 !strconcat(OpcStr_u32, "f64 \t$dst, $a, $b;"),
1239 [(set Int32Regs:$dst, (OpNode Float64Regs:$a, fpimm:$b))]>;
1240 def f64ir_u32: NVPTXInst<(outs Int32Regs:$dst),
1241 (ins f64imm:$a, Float64Regs:$b),
1242 !strconcat(OpcStr_u32, "f64 \t$dst, $a, $b;"),
1243 [(set Int32Regs:$dst, (OpNode fpimm:$a, Float64Regs:$b))]>;
1247 : ISET_FORMAT<"setp.gt.s", "set.gt.u32.s", setgt, "s16", "cvt.s16.s8">;
1249 : ISET_FORMAT<"setp.gt.u", "set.gt.u32.u", setugt, "u16", "cvt.u16.u8">;
1251 : ISET_FORMAT<"setp.lt.s", "set.lt.u32.s", setlt, "s16", "cvt.s16.s8">;
1253 : ISET_FORMAT<"setp.lt.u", "set.lt.u32.u", setult, "u16", "cvt.u16.u8">;
1255 : ISET_FORMAT<"setp.ge.s", "set.ge.u32.s", setge, "s16", "cvt.s16.s8">;
1257 : ISET_FORMAT<"setp.ge.u", "set.ge.u32.u", setuge, "u16", "cvt.u16.u8">;
1259 : ISET_FORMAT<"setp.le.s", "set.le.u32.s", setle, "s16", "cvt.s16.s8">;
1261 : ISET_FORMAT<"setp.le.u", "set.le.u32.u", setule, "u16", "cvt.u16.u8">;
1263 : ISET_FORMAT<"setp.eq.s", "set.eq.u32.s", seteq, "s16", "cvt.s16.s8">;
1265 : ISET_FORMAT<"setp.eq.u", "set.eq.u32.u", setueq, "u16", "cvt.u16.u8">;
1267 : ISET_FORMAT<"setp.ne.s", "set.ne.u32.s", setne, "s16", "cvt.s16.s8">;
1269 : ISET_FORMAT<"setp.ne.u", "set.ne.u32.u", setune, "u16", "cvt.u16.u8">;
1271 def ISetSNEi1rr_p : NVPTXInst<(outs Int1Regs:$dst),
1272 (ins Int1Regs:$a, Int1Regs:$b),
1273 "xor.pred \t$dst, $a, $b;",
1274 [(set Int1Regs:$dst, (setne Int1Regs:$a, Int1Regs:$b))]>;
1275 def ISetUNEi1rr_p : NVPTXInst<(outs Int1Regs:$dst),
1276 (ins Int1Regs:$a, Int1Regs:$b),
1277 "xor.pred \t$dst, $a, $b;",
1278 [(set Int1Regs:$dst, (setune Int1Regs:$a, Int1Regs:$b))]>;
1279 def ISetSEQi1rr_p : NVPTXInst<(outs Int1Regs:$dst),
1280 (ins Int1Regs:$a, Int1Regs:$b),
1281 !strconcat("{{\n\t",
1282 !strconcat(".reg .pred temp;\n\t",
1283 !strconcat("xor.pred \ttemp, $a, $b;\n\t",
1284 !strconcat("not.pred \t$dst, temp;\n\t}}","")))),
1285 [(set Int1Regs:$dst, (seteq Int1Regs:$a, Int1Regs:$b))]>;
1286 def ISetUEQi1rr_p : NVPTXInst<(outs Int1Regs:$dst),
1287 (ins Int1Regs:$a, Int1Regs:$b),
1288 !strconcat("{{\n\t",
1289 !strconcat(".reg .pred temp;\n\t",
1290 !strconcat("xor.pred \ttemp, $a, $b;\n\t",
1291 !strconcat("not.pred \t$dst, temp;\n\t}}","")))),
1292 [(set Int1Regs:$dst, (setueq Int1Regs:$a, Int1Regs:$b))]>;
1294 // Compare 2 i1's and produce a u32
1295 def ISETSNEi1rr_u32 : NVPTXInst<(outs Int32Regs:$dst),
1296 (ins Int1Regs:$a, Int1Regs:$b),
1297 !strconcat("{{\n\t",
1298 !strconcat(".reg .pred temp;\n\t",
1299 !strconcat("xor.pred \ttemp, $a, $b;\n\t",
1300 !strconcat("selp.u32 \t$dst, -1, 0, temp;", "\n\t}}")))),
1301 [(set Int32Regs:$dst, (setne Int1Regs:$a, Int1Regs:$b))]>;
1302 def ISETSEQi1rr_u32 : NVPTXInst<(outs Int32Regs:$dst),
1303 (ins Int1Regs:$a, Int1Regs:$b),
1304 !strconcat("{{\n\t",
1305 !strconcat(".reg .pred temp;\n\t",
1306 !strconcat("xor.pred \ttemp, $a, $b;\n\t",
1307 !strconcat("selp.u32 \t$dst, 0, -1, temp;", "\n\t}}")))),
1308 [(set Int32Regs:$dst, (seteq Int1Regs:$a, Int1Regs:$b))]>;
1310 defm FSetGT : FSET_FORMAT<"setp.gt.", "set.gt.u32.", setogt>;
1311 defm FSetLT : FSET_FORMAT<"setp.lt.", "set.lt.u32.", setolt>;
1312 defm FSetGE : FSET_FORMAT<"setp.ge.", "set.ge.u32.", setoge>;
1313 defm FSetLE : FSET_FORMAT<"setp.le.", "set.le.u32.", setole>;
1314 defm FSetEQ : FSET_FORMAT<"setp.eq.", "set.eq.u32.", setoeq>;
1315 defm FSetNE : FSET_FORMAT<"setp.ne.", "set.ne.u32.", setone>;
1317 defm FSetUGT : FSET_FORMAT<"setp.gtu.", "set.gtu.u32.", setugt>;
1318 defm FSetULT : FSET_FORMAT<"setp.ltu.", "set.ltu.u32.",setult>;
1319 defm FSetUGE : FSET_FORMAT<"setp.geu.", "set.geu.u32.",setuge>;
1320 defm FSetULE : FSET_FORMAT<"setp.leu.", "set.leu.u32.",setule>;
1321 defm FSetUEQ : FSET_FORMAT<"setp.equ.", "set.equ.u32.",setueq>;
1322 defm FSetUNE : FSET_FORMAT<"setp.neu.", "set.neu.u32.",setune>;
1324 defm FSetNUM : FSET_FORMAT<"setp.num.", "set.num.u32.",seto>;
1325 defm FSetNAN : FSET_FORMAT<"setp.nan.", "set.nan.u32.",setuo>;
1327 def SELECTi1rr : Pat<(i1 (select Int1Regs:$p, Int1Regs:$a, Int1Regs:$b)),
1328 (ORb1rr (ANDb1rr Int1Regs:$p, Int1Regs:$a),
1329 (ANDb1rr (NOT1 Int1Regs:$p), Int1Regs:$b))>;
1331 def SELECTi16rr : NVPTXInst<(outs Int16Regs:$dst),
1332 (ins Int16Regs:$a, Int16Regs:$b, Int1Regs:$p),
1333 "selp.b16 \t$dst, $a, $b, $p;",
1334 [(set Int16Regs:$dst, (select Int1Regs:$p, Int16Regs:$a, Int16Regs:$b))]>;
1335 def SELECTi16ri : NVPTXInst<(outs Int16Regs:$dst),
1336 (ins Int16Regs:$a, i16imm:$b, Int1Regs:$p),
1337 "selp.b16 \t$dst, $a, $b, $p;",
1338 [(set Int16Regs:$dst, (select Int1Regs:$p, Int16Regs:$a, imm:$b))]>;
1339 def SELECTi16ir : NVPTXInst<(outs Int16Regs:$dst),
1340 (ins i16imm:$a, Int16Regs:$b, Int1Regs:$p),
1341 "selp.b16 \t$dst, $a, $b, $p;",
1342 [(set Int16Regs:$dst, (select Int1Regs:$p, imm:$a, Int16Regs:$b))]>;
1343 def SELECTi16ii : NVPTXInst<(outs Int16Regs:$dst),
1344 (ins i16imm:$a, i16imm:$b, Int1Regs:$p),
1345 "selp.b16 \t$dst, $a, $b, $p;",
1346 [(set Int16Regs:$dst, (select Int1Regs:$p, imm:$a, imm:$b))]>;
1348 def SELECTi32rr : NVPTXInst<(outs Int32Regs:$dst),
1349 (ins Int32Regs:$a, Int32Regs:$b, Int1Regs:$p),
1350 "selp.b32 \t$dst, $a, $b, $p;",
1351 [(set Int32Regs:$dst, (select Int1Regs:$p, Int32Regs:$a, Int32Regs:$b))]>;
1352 def SELECTi32ri : NVPTXInst<(outs Int32Regs:$dst),
1353 (ins Int32Regs:$a, i32imm:$b, Int1Regs:$p),
1354 "selp.b32 \t$dst, $a, $b, $p;",
1355 [(set Int32Regs:$dst, (select Int1Regs:$p, Int32Regs:$a, imm:$b))]>;
1356 def SELECTi32ir : NVPTXInst<(outs Int32Regs:$dst),
1357 (ins i32imm:$a, Int32Regs:$b, Int1Regs:$p),
1358 "selp.b32 \t$dst, $a, $b, $p;",
1359 [(set Int32Regs:$dst, (select Int1Regs:$p, imm:$a, Int32Regs:$b))]>;
1360 def SELECTi32ii : NVPTXInst<(outs Int32Regs:$dst),
1361 (ins i32imm:$a, i32imm:$b, Int1Regs:$p),
1362 "selp.b32 \t$dst, $a, $b, $p;",
1363 [(set Int32Regs:$dst, (select Int1Regs:$p, imm:$a, imm:$b))]>;
1365 def SELECTi64rr : NVPTXInst<(outs Int64Regs:$dst),
1366 (ins Int64Regs:$a, Int64Regs:$b, Int1Regs:$p),
1367 "selp.b64 \t$dst, $a, $b, $p;",
1368 [(set Int64Regs:$dst, (select Int1Regs:$p, Int64Regs:$a, Int64Regs:$b))]>;
1369 def SELECTi64ri : NVPTXInst<(outs Int64Regs:$dst),
1370 (ins Int64Regs:$a, i64imm:$b, Int1Regs:$p),
1371 "selp.b64 \t$dst, $a, $b, $p;",
1372 [(set Int64Regs:$dst, (select Int1Regs:$p, Int64Regs:$a, imm:$b))]>;
1373 def SELECTi64ir : NVPTXInst<(outs Int64Regs:$dst),
1374 (ins i64imm:$a, Int64Regs:$b, Int1Regs:$p),
1375 "selp.b64 \t$dst, $a, $b, $p;",
1376 [(set Int64Regs:$dst, (select Int1Regs:$p, imm:$a, Int64Regs:$b))]>;
1377 def SELECTi64ii : NVPTXInst<(outs Int64Regs:$dst),
1378 (ins i64imm:$a, i64imm:$b, Int1Regs:$p),
1379 "selp.b64 \t$dst, $a, $b, $p;",
1380 [(set Int64Regs:$dst, (select Int1Regs:$p, imm:$a, imm:$b))]>;
1382 def SELECTf32rr : NVPTXInst<(outs Float32Regs:$dst),
1383 (ins Float32Regs:$a, Float32Regs:$b, Int1Regs:$p),
1384 "selp.f32 \t$dst, $a, $b, $p;",
1385 [(set Float32Regs:$dst,
1386 (select Int1Regs:$p, Float32Regs:$a, Float32Regs:$b))]>;
1387 def SELECTf32ri : NVPTXInst<(outs Float32Regs:$dst),
1388 (ins Float32Regs:$a, f32imm:$b, Int1Regs:$p),
1389 "selp.f32 \t$dst, $a, $b, $p;",
1390 [(set Float32Regs:$dst, (select Int1Regs:$p, Float32Regs:$a, fpimm:$b))]>;
1391 def SELECTf32ir : NVPTXInst<(outs Float32Regs:$dst),
1392 (ins f32imm:$a, Float32Regs:$b, Int1Regs:$p),
1393 "selp.f32 \t$dst, $a, $b, $p;",
1394 [(set Float32Regs:$dst, (select Int1Regs:$p, fpimm:$a, Float32Regs:$b))]>;
1395 def SELECTf32ii : NVPTXInst<(outs Float32Regs:$dst),
1396 (ins f32imm:$a, f32imm:$b, Int1Regs:$p),
1397 "selp.f32 \t$dst, $a, $b, $p;",
1398 [(set Float32Regs:$dst, (select Int1Regs:$p, fpimm:$a, fpimm:$b))]>;
1400 def SELECTf64rr : NVPTXInst<(outs Float64Regs:$dst),
1401 (ins Float64Regs:$a, Float64Regs:$b, Int1Regs:$p),
1402 "selp.f64 \t$dst, $a, $b, $p;",
1403 [(set Float64Regs:$dst,
1404 (select Int1Regs:$p, Float64Regs:$a, Float64Regs:$b))]>;
1405 def SELECTf64ri : NVPTXInst<(outs Float64Regs:$dst),
1406 (ins Float64Regs:$a, f64imm:$b, Int1Regs:$p),
1407 "selp.f64 \t$dst, $a, $b, $p;",
1408 [(set Float64Regs:$dst, (select Int1Regs:$p, Float64Regs:$a, fpimm:$b))]>;
1409 def SELECTf64ir : NVPTXInst<(outs Float64Regs:$dst),
1410 (ins f64imm:$a, Float64Regs:$b, Int1Regs:$p),
1411 "selp.f64 \t$dst, $a, $b, $p;",
1412 [(set Float64Regs:$dst, (select Int1Regs:$p, fpimm:$a, Float64Regs:$b))]>;
1413 def SELECTf64ii : NVPTXInst<(outs Float64Regs:$dst),
1414 (ins f64imm:$a, f64imm:$b, Int1Regs:$p),
1415 "selp.f64 \t $dst, $a, $b, $p;",
1416 [(set Float64Regs:$dst, (select Int1Regs:$p, fpimm:$a, fpimm:$b))]>;
1418 //def ld_param : SDNode<"NVPTXISD::LOAD_PARAM", SDTLoad,
1419 // [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
1421 def SDTDeclareParamProfile : SDTypeProfile<0, 3, [SDTCisInt<0>, SDTCisInt<1>,
1423 def SDTDeclareScalarParamProfile : SDTypeProfile<0, 3, [SDTCisInt<0>,
1424 SDTCisInt<1>, SDTCisInt<2>]>;
1425 def SDTLoadParamProfile : SDTypeProfile<1, 2, [SDTCisInt<1>, SDTCisInt<2>]>;
1426 def SDTLoadParamV2Profile : SDTypeProfile<2, 2, [SDTCisSameAs<0, 1>, SDTCisInt<2>, SDTCisInt<3>]>;
1427 def SDTLoadParamV4Profile : SDTypeProfile<4, 2, [SDTCisInt<4>, SDTCisInt<5>]>;
1428 def SDTPrintCallProfile : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
1429 def SDTPrintCallUniProfile : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
1430 def SDTStoreParamProfile : SDTypeProfile<0, 3, [SDTCisInt<0>, SDTCisInt<1>]>;
1431 def SDTStoreParamV2Profile : SDTypeProfile<0, 4, [SDTCisInt<0>, SDTCisInt<1>]>;
1432 def SDTStoreParamV4Profile : SDTypeProfile<0, 6, [SDTCisInt<0>, SDTCisInt<1>]>;
1433 def SDTStoreParam32Profile : SDTypeProfile<0, 3, [SDTCisInt<0>, SDTCisInt<1>]>;
1434 def SDTCallArgProfile : SDTypeProfile<0, 2, [SDTCisInt<0>]>;
1435 def SDTCallArgMarkProfile : SDTypeProfile<0, 0, []>;
1436 def SDTCallVoidProfile : SDTypeProfile<0, 1, []>;
1437 def SDTCallValProfile : SDTypeProfile<1, 0, []>;
1438 def SDTMoveParamProfile : SDTypeProfile<1, 1, []>;
1439 def SDTMoveRetvalProfile : SDTypeProfile<0, 1, []>;
1440 def SDTStoreRetvalProfile : SDTypeProfile<0, 2, [SDTCisInt<0>]>;
1441 def SDTStoreRetvalV2Profile : SDTypeProfile<0, 3, [SDTCisInt<0>]>;
1442 def SDTStoreRetvalV4Profile : SDTypeProfile<0, 5, [SDTCisInt<0>]>;
1443 def SDTPseudoUseParamProfile : SDTypeProfile<0, 1, []>;
1445 def DeclareParam : SDNode<"NVPTXISD::DeclareParam", SDTDeclareParamProfile,
1446 [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
1447 def DeclareScalarParam : SDNode<"NVPTXISD::DeclareScalarParam",
1448 SDTDeclareScalarParamProfile,
1449 [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
1450 def DeclareRetParam : SDNode<"NVPTXISD::DeclareRetParam",
1451 SDTDeclareParamProfile,
1452 [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
1453 def DeclareRet : SDNode<"NVPTXISD::DeclareRet", SDTDeclareScalarParamProfile,
1454 [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
1455 def LoadParam : SDNode<"NVPTXISD::LoadParam", SDTLoadParamProfile,
1456 [SDNPHasChain, SDNPMayLoad, SDNPOutGlue, SDNPInGlue]>;
1457 def LoadParamV2 : SDNode<"NVPTXISD::LoadParamV2", SDTLoadParamV2Profile,
1458 [SDNPHasChain, SDNPMayLoad, SDNPOutGlue, SDNPInGlue]>;
1459 def LoadParamV4 : SDNode<"NVPTXISD::LoadParamV4", SDTLoadParamV4Profile,
1460 [SDNPHasChain, SDNPMayLoad, SDNPOutGlue, SDNPInGlue]>;
1461 def PrintCall : SDNode<"NVPTXISD::PrintCall", SDTPrintCallProfile,
1462 [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
1463 def PrintCallUni : SDNode<"NVPTXISD::PrintCallUni", SDTPrintCallUniProfile,
1464 [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
1465 def StoreParam : SDNode<"NVPTXISD::StoreParam", SDTStoreParamProfile,
1466 [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
1467 def StoreParamV2 : SDNode<"NVPTXISD::StoreParamV2", SDTStoreParamV2Profile,
1468 [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
1469 def StoreParamV4 : SDNode<"NVPTXISD::StoreParamV4", SDTStoreParamV4Profile,
1470 [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
1471 def StoreParamU32 : SDNode<"NVPTXISD::StoreParamU32", SDTStoreParam32Profile,
1472 [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
1473 def StoreParamS32 : SDNode<"NVPTXISD::StoreParamS32", SDTStoreParam32Profile,
1474 [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
1475 def MoveToParam : SDNode<"NVPTXISD::MoveToParam", SDTStoreParamProfile,
1476 [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
1477 def CallArgBegin : SDNode<"NVPTXISD::CallArgBegin", SDTCallArgMarkProfile,
1478 [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
1479 def CallArg : SDNode<"NVPTXISD::CallArg", SDTCallArgProfile,
1480 [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
1481 def LastCallArg : SDNode<"NVPTXISD::LastCallArg", SDTCallArgProfile,
1482 [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
1483 def CallArgEnd : SDNode<"NVPTXISD::CallArgEnd", SDTCallVoidProfile,
1484 [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
1485 def CallVoid : SDNode<"NVPTXISD::CallVoid", SDTCallVoidProfile,
1486 [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
1487 def Prototype : SDNode<"NVPTXISD::Prototype", SDTCallVoidProfile,
1488 [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
1489 def CallVal : SDNode<"NVPTXISD::CallVal", SDTCallValProfile,
1490 [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
1491 def MoveParam : SDNode<"NVPTXISD::MoveParam", SDTMoveParamProfile,
1493 def MoveRetval : SDNode<"NVPTXISD::MoveRetval", SDTMoveRetvalProfile,
1494 [SDNPHasChain, SDNPSideEffect]>;
1495 def StoreRetval : SDNode<"NVPTXISD::StoreRetval", SDTStoreRetvalProfile,
1496 [SDNPHasChain, SDNPSideEffect]>;
1497 def StoreRetvalV2 : SDNode<"NVPTXISD::StoreRetvalV2", SDTStoreRetvalV2Profile,
1498 [SDNPHasChain, SDNPSideEffect]>;
1499 def StoreRetvalV4 : SDNode<"NVPTXISD::StoreRetvalV4", SDTStoreRetvalV4Profile,
1500 [SDNPHasChain, SDNPSideEffect]>;
1501 def MoveToRetval : SDNode<"NVPTXISD::MoveToRetval", SDTStoreRetvalProfile,
1502 [SDNPHasChain, SDNPSideEffect]>;
1503 def PseudoUseParam : SDNode<"NVPTXISD::PseudoUseParam",
1504 SDTPseudoUseParamProfile,
1505 [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
1506 def RETURNNode : SDNode<"NVPTXISD::RETURN", SDTCallArgMarkProfile,
1507 [SDNPHasChain, SDNPSideEffect]>;
1509 class LoadParamMemInst<NVPTXRegClass regclass, string opstr> :
1510 NVPTXInst<(outs regclass:$dst), (ins i32imm:$b),
1511 !strconcat(!strconcat("ld.param", opstr),
1512 "\t$dst, [retval0+$b];"),
1515 class LoadParamRegInst<NVPTXRegClass regclass, string opstr> :
1516 NVPTXInst<(outs regclass:$dst), (ins i32imm:$b),
1517 !strconcat(!strconcat("mov", opstr),
1518 "\t$dst, retval$b;"),
1519 [(set regclass:$dst, (LoadParam (i32 0), (i32 imm:$b)))]>;
1521 class LoadParamV2MemInst<NVPTXRegClass regclass, string opstr> :
1522 NVPTXInst<(outs regclass:$dst, regclass:$dst2), (ins i32imm:$b),
1523 !strconcat(!strconcat("ld.param.v2", opstr),
1524 "\t{{$dst, $dst2}}, [retval0+$b];"), []>;
1526 class LoadParamV4MemInst<NVPTXRegClass regclass, string opstr> :
1527 NVPTXInst<(outs regclass:$dst, regclass:$dst2, regclass:$dst3,
1530 !strconcat(!strconcat("ld.param.v4", opstr),
1531 "\t{{$dst, $dst2, $dst3, $dst4}}, [retval0+$b];"), []>;
1533 class StoreParamInst<NVPTXRegClass regclass, string opstr> :
1534 NVPTXInst<(outs), (ins regclass:$val, i32imm:$a, i32imm:$b),
1535 !strconcat(!strconcat("st.param", opstr),
1536 "\t[param$a+$b], $val;"),
1539 class StoreParamV2Inst<NVPTXRegClass regclass, string opstr> :
1540 NVPTXInst<(outs), (ins regclass:$val, regclass:$val2,
1541 i32imm:$a, i32imm:$b),
1542 !strconcat(!strconcat("st.param.v2", opstr),
1543 "\t[param$a+$b], {{$val, $val2}};"),
1546 class StoreParamV4Inst<NVPTXRegClass regclass, string opstr> :
1547 NVPTXInst<(outs), (ins regclass:$val, regclass:$val1, regclass:$val2,
1548 regclass:$val3, i32imm:$a, i32imm:$b),
1549 !strconcat(!strconcat("st.param.v4", opstr),
1550 "\t[param$a+$b], {{$val, $val2, $val3, $val4}};"),
1553 class MoveToParamInst<NVPTXRegClass regclass, string opstr> :
1554 NVPTXInst<(outs), (ins regclass:$val, i32imm:$a, i32imm:$b),
1555 !strconcat(!strconcat("mov", opstr),
1556 "\tparam$a, $val;"),
1557 [(MoveToParam (i32 imm:$a), (i32 imm:$b), regclass:$val)]>;
1559 class StoreRetvalInst<NVPTXRegClass regclass, string opstr> :
1560 NVPTXInst<(outs), (ins regclass:$val, i32imm:$a),
1561 !strconcat(!strconcat("st.param", opstr),
1562 "\t[func_retval0+$a], $val;"),
1565 class StoreRetvalV2Inst<NVPTXRegClass regclass, string opstr> :
1566 NVPTXInst<(outs), (ins regclass:$val, regclass:$val2, i32imm:$a),
1567 !strconcat(!strconcat("st.param.v2", opstr),
1568 "\t[func_retval0+$a], {{$val, $val2}};"),
1571 class StoreRetvalV4Inst<NVPTXRegClass regclass, string opstr> :
1573 (ins regclass:$val, regclass:$val2, regclass:$val3,
1574 regclass:$val4, i32imm:$a),
1575 !strconcat(!strconcat("st.param.v4", opstr),
1576 "\t[func_retval0+$a], {{$val, $val2, $val3, $val4}};"),
1579 class MoveToRetvalInst<NVPTXRegClass regclass, string opstr> :
1580 NVPTXInst<(outs), (ins i32imm:$num, regclass:$val),
1581 !strconcat(!strconcat("mov", opstr),
1582 "\tfunc_retval$num, $val;"),
1583 [(MoveToRetval (i32 imm:$num), regclass:$val)]>;
1585 class MoveRetvalInst<NVPTXRegClass regclass, string opstr> :
1586 NVPTXInst<(outs), (ins regclass:$val),
1587 !strconcat(!strconcat("mov", opstr),
1588 "\tfunc_retval0, $val;"),
1589 [(MoveRetval regclass:$val)]>;
1591 def PrintCallRetInst1 : NVPTXInst<(outs), (ins),
1593 [(PrintCall (i32 1))]>;
1594 def PrintCallRetInst2 : NVPTXInst<(outs), (ins),
1595 "call (retval0, retval1), ",
1596 [(PrintCall (i32 2))]>;
1597 def PrintCallRetInst3 : NVPTXInst<(outs), (ins),
1598 "call (retval0, retval1, retval2), ",
1599 [(PrintCall (i32 3))]>;
1600 def PrintCallRetInst4 : NVPTXInst<(outs), (ins),
1601 "call (retval0, retval1, retval2, retval3), ",
1602 [(PrintCall (i32 4))]>;
1603 def PrintCallRetInst5 : NVPTXInst<(outs), (ins),
1604 "call (retval0, retval1, retval2, retval3, retval4), ",
1605 [(PrintCall (i32 5))]>;
1606 def PrintCallRetInst6 : NVPTXInst<(outs), (ins),
1607 "call (retval0, retval1, retval2, retval3, retval4, retval5), ",
1608 [(PrintCall (i32 6))]>;
1609 def PrintCallRetInst7 : NVPTXInst<(outs), (ins),
1610 "call (retval0, retval1, retval2, retval3, retval4, retval5, retval6), ",
1611 [(PrintCall (i32 7))]>;
1612 def PrintCallRetInst8 : NVPTXInst<(outs), (ins),
1613 !strconcat("call (retval0, retval1, retval2, retval3, retval4",
1614 ", retval5, retval6, retval7), "),
1615 [(PrintCall (i32 8))]>;
1617 def PrintCallNoRetInst : NVPTXInst<(outs), (ins), "call ",
1618 [(PrintCall (i32 0))]>;
1620 def PrintCallUniRetInst1 : NVPTXInst<(outs), (ins),
1621 "call.uni (retval0), ",
1622 [(PrintCallUni (i32 1))]>;
1623 def PrintCallUniRetInst2 : NVPTXInst<(outs), (ins),
1624 "call.uni (retval0, retval1), ",
1625 [(PrintCallUni (i32 2))]>;
1626 def PrintCallUniRetInst3 : NVPTXInst<(outs), (ins),
1627 "call.uni (retval0, retval1, retval2), ",
1628 [(PrintCallUni (i32 3))]>;
1629 def PrintCallUniRetInst4 : NVPTXInst<(outs), (ins),
1630 "call.uni (retval0, retval1, retval2, retval3), ",
1631 [(PrintCallUni (i32 4))]>;
1632 def PrintCallUniRetInst5 : NVPTXInst<(outs), (ins),
1633 "call.uni (retval0, retval1, retval2, retval3, retval4), ",
1634 [(PrintCallUni (i32 5))]>;
1635 def PrintCallUniRetInst6 : NVPTXInst<(outs), (ins),
1636 "call.uni (retval0, retval1, retval2, retval3, retval4, retval5), ",
1637 [(PrintCallUni (i32 6))]>;
1638 def PrintCallUniRetInst7 : NVPTXInst<(outs), (ins),
1639 "call.uni (retval0, retval1, retval2, retval3, retval4, retval5, retval6), ",
1640 [(PrintCallUni (i32 7))]>;
1641 def PrintCallUniRetInst8 : NVPTXInst<(outs), (ins),
1642 !strconcat("call.uni (retval0, retval1, retval2, retval3, retval4",
1643 ", retval5, retval6, retval7), "),
1644 [(PrintCallUni (i32 8))]>;
1646 def PrintCallUniNoRetInst : NVPTXInst<(outs), (ins), "call.uni ",
1647 [(PrintCallUni (i32 0))]>;
1649 def LoadParamMemI64 : LoadParamMemInst<Int64Regs, ".b64">;
1650 def LoadParamMemI32 : LoadParamMemInst<Int32Regs, ".b32">;
1651 def LoadParamMemI16 : LoadParamMemInst<Int16Regs, ".b16">;
1652 def LoadParamMemI8 : LoadParamMemInst<Int16Regs, ".b8">;
1653 def LoadParamMemV2I64 : LoadParamV2MemInst<Int64Regs, ".b64">;
1654 def LoadParamMemV2I32 : LoadParamV2MemInst<Int32Regs, ".b32">;
1655 def LoadParamMemV2I16 : LoadParamV2MemInst<Int16Regs, ".b16">;
1656 def LoadParamMemV2I8 : LoadParamV2MemInst<Int16Regs, ".b8">;
1657 def LoadParamMemV4I32 : LoadParamV4MemInst<Int32Regs, ".b32">;
1658 def LoadParamMemV4I16 : LoadParamV4MemInst<Int16Regs, ".b16">;
1659 def LoadParamMemV4I8 : LoadParamV4MemInst<Int16Regs, ".b8">;
1660 def LoadParamMemF32 : LoadParamMemInst<Float32Regs, ".f32">;
1661 def LoadParamMemF64 : LoadParamMemInst<Float64Regs, ".f64">;
1662 def LoadParamMemV2F32 : LoadParamV2MemInst<Float32Regs, ".f32">;
1663 def LoadParamMemV2F64 : LoadParamV2MemInst<Float64Regs, ".f64">;
1664 def LoadParamMemV4F32 : LoadParamV4MemInst<Float32Regs, ".f32">;
1666 def LoadParamRegI64 : LoadParamRegInst<Int64Regs, ".b64">;
1667 def LoadParamRegI32 : LoadParamRegInst<Int32Regs, ".b32">;
1668 def LoadParamRegI16 : NVPTXInst<(outs Int16Regs:$dst), (ins i32imm:$b),
1669 "cvt.u16.u32\t$dst, retval$b;",
1670 [(set Int16Regs:$dst,
1671 (LoadParam (i32 0), (i32 imm:$b)))]>;
1673 def LoadParamRegF32 : LoadParamRegInst<Float32Regs, ".f32">;
1674 def LoadParamRegF64 : LoadParamRegInst<Float64Regs, ".f64">;
1676 def StoreParamI64 : StoreParamInst<Int64Regs, ".b64">;
1677 def StoreParamI32 : StoreParamInst<Int32Regs, ".b32">;
1679 def StoreParamI16 : StoreParamInst<Int16Regs, ".b16">;
1680 def StoreParamI8 : StoreParamInst<Int16Regs, ".b8">;
1681 def StoreParamV2I64 : StoreParamV2Inst<Int64Regs, ".b64">;
1682 def StoreParamV2I32 : StoreParamV2Inst<Int32Regs, ".b32">;
1683 def StoreParamV2I16 : StoreParamV2Inst<Int16Regs, ".b16">;
1684 def StoreParamV2I8 : StoreParamV2Inst<Int16Regs, ".b8">;
1686 // FIXME: StoreParamV4Inst crashes llvm-tblgen :(
1687 //def StoreParamV4I32 : StoreParamV4Inst<Int32Regs, ".b32">;
1688 def StoreParamV4I32 : NVPTXInst<(outs), (ins Int32Regs:$val, Int32Regs:$val2,
1689 Int32Regs:$val3, Int32Regs:$val4,
1690 i32imm:$a, i32imm:$b),
1691 "st.param.b32\t[param$a+$b], {{$val, $val2, $val3, $val4}};",
1694 def StoreParamV4I16 : NVPTXInst<(outs), (ins Int16Regs:$val, Int16Regs:$val2,
1695 Int16Regs:$val3, Int16Regs:$val4,
1696 i32imm:$a, i32imm:$b),
1697 "st.param.v4.b16\t[param$a+$b], {{$val, $val2, $val3, $val4}};",
1700 def StoreParamV4I8 : NVPTXInst<(outs), (ins Int16Regs:$val, Int16Regs:$val2,
1701 Int16Regs:$val3, Int16Regs:$val4,
1702 i32imm:$a, i32imm:$b),
1703 "st.param.v4.b8\t[param$a+$b], {{$val, $val2, $val3, $val4}};",
1706 def StoreParamS32I16 : NVPTXInst<(outs),
1707 (ins Int16Regs:$val, i32imm:$a, i32imm:$b),
1708 !strconcat("cvt.s32.s16\ttemp_param_reg, $val;\n\t",
1709 "st.param.b32\t[param$a+$b], temp_param_reg;"),
1711 def StoreParamU32I16 : NVPTXInst<(outs),
1712 (ins Int16Regs:$val, i32imm:$a, i32imm:$b),
1713 !strconcat("cvt.u32.u16\ttemp_param_reg, $val;\n\t",
1714 "st.param.b32\t[param$a+$b], temp_param_reg;"),
1717 def StoreParamU32I8 : NVPTXInst<(outs),
1718 (ins Int16Regs:$val, i32imm:$a, i32imm:$b),
1719 !strconcat("cvt.u32.u8\ttemp_param_reg, $val;\n\t",
1720 "st.param.b32\t[param$a+$b], temp_param_reg;"),
1722 def StoreParamS32I8 : NVPTXInst<(outs),
1723 (ins Int16Regs:$val, i32imm:$a, i32imm:$b),
1724 !strconcat("cvt.s32.s8\ttemp_param_reg, $val;\n\t",
1725 "st.param.b32\t[param$a+$b], temp_param_reg;"),
1728 def StoreParamF32 : StoreParamInst<Float32Regs, ".f32">;
1729 def StoreParamF64 : StoreParamInst<Float64Regs, ".f64">;
1730 def StoreParamV2F32 : StoreParamV2Inst<Float32Regs, ".f32">;
1731 def StoreParamV2F64 : StoreParamV2Inst<Float64Regs, ".f64">;
1732 // FIXME: StoreParamV4Inst crashes llvm-tblgen :(
1733 //def StoreParamV4F32 : StoreParamV4Inst<Float32Regs, ".f32">;
1734 def StoreParamV4F32 : NVPTXInst<(outs),
1735 (ins Float32Regs:$val, Float32Regs:$val2,
1736 Float32Regs:$val3, Float32Regs:$val4,
1737 i32imm:$a, i32imm:$b),
1738 "st.param.v4.f32\t[param$a+$b], {{$val, $val2, $val3, $val4}};",
1741 def MoveToParamI64 : MoveToParamInst<Int64Regs, ".b64">;
1742 def MoveToParamI32 : MoveToParamInst<Int32Regs, ".b32">;
1743 def MoveToParamF64 : MoveToParamInst<Float64Regs, ".f64">;
1744 def MoveToParamF32 : MoveToParamInst<Float32Regs, ".f32">;
1745 def MoveToParamI16 : NVPTXInst<(outs),
1746 (ins Int16Regs:$val, i32imm:$a, i32imm:$b),
1747 !strconcat("cvt.u32.u16\ttemp_param_reg, $val;\n\t",
1748 "mov.b32\tparam$a, temp_param_reg;"),
1749 [(MoveToParam (i32 imm:$a), (i32 imm:$b), Int16Regs:$val)]>;
1751 def StoreRetvalI64 : StoreRetvalInst<Int64Regs, ".b64">;
1752 def StoreRetvalI32 : StoreRetvalInst<Int32Regs, ".b32">;
1753 def StoreRetvalI16 : StoreRetvalInst<Int16Regs, ".b16">;
1754 def StoreRetvalI8 : StoreRetvalInst<Int16Regs, ".b8">;
1755 def StoreRetvalV2I64 : StoreRetvalV2Inst<Int64Regs, ".b64">;
1756 def StoreRetvalV2I32 : StoreRetvalV2Inst<Int32Regs, ".b32">;
1757 def StoreRetvalV2I16 : StoreRetvalV2Inst<Int16Regs, ".b16">;
1758 def StoreRetvalV2I8 : StoreRetvalV2Inst<Int16Regs, ".b8">;
1759 def StoreRetvalV4I32 : StoreRetvalV4Inst<Int32Regs, ".b32">;
1760 def StoreRetvalV4I16 : StoreRetvalV4Inst<Int16Regs, ".b16">;
1761 def StoreRetvalV4I8 : StoreRetvalV4Inst<Int16Regs, ".b8">;
1763 def StoreRetvalF64 : StoreRetvalInst<Float64Regs, ".f64">;
1764 def StoreRetvalF32 : StoreRetvalInst<Float32Regs, ".f32">;
1765 def StoreRetvalV2F64 : StoreRetvalV2Inst<Float64Regs, ".f64">;
1766 def StoreRetvalV2F32 : StoreRetvalV2Inst<Float32Regs, ".f32">;
1767 def StoreRetvalV4F32 : StoreRetvalV4Inst<Float32Regs, ".f32">;
1769 def MoveRetvalI64 : MoveRetvalInst<Int64Regs, ".b64">;
1770 def MoveRetvalI32 : MoveRetvalInst<Int32Regs, ".b32">;
1771 def MoveRetvalI16 : MoveRetvalInst<Int16Regs, ".b16">;
1772 def MoveRetvalI8 : MoveRetvalInst<Int16Regs, ".b8">;
1773 def MoveRetvalF64 : MoveRetvalInst<Float64Regs, ".f64">;
1774 def MoveRetvalF32 : MoveRetvalInst<Float32Regs, ".f32">;
1776 def MoveToRetvalI64 : MoveToRetvalInst<Int64Regs, ".b64">;
1777 def MoveToRetvalI32 : MoveToRetvalInst<Int32Regs, ".b32">;
1778 def MoveToRetvalF64 : MoveToRetvalInst<Float64Regs, ".f64">;
1779 def MoveToRetvalF32 : MoveToRetvalInst<Float32Regs, ".f32">;
1780 def MoveToRetvalI16 : NVPTXInst<(outs), (ins i32imm:$num, Int16Regs:$val),
1781 "cvt.u32.u16\tfunc_retval$num, $val;",
1782 [(MoveToRetval (i32 imm:$num), Int16Regs:$val)]>;
1784 def CallArgBeginInst : NVPTXInst<(outs), (ins), "(", [(CallArgBegin)]>;
1785 def CallArgEndInst1 : NVPTXInst<(outs), (ins), ");", [(CallArgEnd (i32 1))]>;
1786 def CallArgEndInst0 : NVPTXInst<(outs), (ins), ")", [(CallArgEnd (i32 0))]>;
1787 def RETURNInst : NVPTXInst<(outs), (ins), "ret;", [(RETURNNode)]>;
1789 class CallArgInst<NVPTXRegClass regclass> :
1790 NVPTXInst<(outs), (ins regclass:$a), "$a, ",
1791 [(CallArg (i32 0), regclass:$a)]>;
1793 class LastCallArgInst<NVPTXRegClass regclass> :
1794 NVPTXInst<(outs), (ins regclass:$a), "$a",
1795 [(LastCallArg (i32 0), regclass:$a)]>;
1797 def CallArgI64 : CallArgInst<Int64Regs>;
1798 def CallArgI32 : CallArgInst<Int32Regs>;
1799 def CallArgI16 : CallArgInst<Int16Regs>;
1801 def CallArgF64 : CallArgInst<Float64Regs>;
1802 def CallArgF32 : CallArgInst<Float32Regs>;
1804 def LastCallArgI64 : LastCallArgInst<Int64Regs>;
1805 def LastCallArgI32 : LastCallArgInst<Int32Regs>;
1806 def LastCallArgI16 : LastCallArgInst<Int16Regs>;
1808 def LastCallArgF64 : LastCallArgInst<Float64Regs>;
1809 def LastCallArgF32 : LastCallArgInst<Float32Regs>;
1811 def CallArgI32imm : NVPTXInst<(outs), (ins i32imm:$a), "$a, ",
1812 [(CallArg (i32 0), (i32 imm:$a))]>;
1813 def LastCallArgI32imm : NVPTXInst<(outs), (ins i32imm:$a), "$a",
1814 [(LastCallArg (i32 0), (i32 imm:$a))]>;
1816 def CallArgParam : NVPTXInst<(outs), (ins i32imm:$a), "param$a, ",
1817 [(CallArg (i32 1), (i32 imm:$a))]>;
1818 def LastCallArgParam : NVPTXInst<(outs), (ins i32imm:$a), "param$a",
1819 [(LastCallArg (i32 1), (i32 imm:$a))]>;
1821 def CallVoidInst : NVPTXInst<(outs), (ins imem:$addr),
1823 [(CallVoid (Wrapper tglobaladdr:$addr))]>;
1824 def CallVoidInstReg : NVPTXInst<(outs), (ins Int32Regs:$addr),
1826 [(CallVoid Int32Regs:$addr)]>;
1827 def CallVoidInstReg64 : NVPTXInst<(outs), (ins Int64Regs:$addr),
1829 [(CallVoid Int64Regs:$addr)]>;
1830 def PrototypeInst : NVPTXInst<(outs), (ins i32imm:$val),
1831 ", prototype_$val;",
1832 [(Prototype (i32 imm:$val))]>;
1834 def DeclareRetMemInst : NVPTXInst<(outs),
1835 (ins i32imm:$align, i32imm:$size, i32imm:$num),
1836 ".param .align $align .b8 retval$num[$size];",
1837 [(DeclareRetParam (i32 imm:$align), (i32 imm:$size), (i32 imm:$num))]>;
1838 def DeclareRetScalarInst : NVPTXInst<(outs), (ins i32imm:$size, i32imm:$num),
1839 ".param .b$size retval$num;",
1840 [(DeclareRet (i32 1), (i32 imm:$size), (i32 imm:$num))]>;
1841 def DeclareRetRegInst : NVPTXInst<(outs), (ins i32imm:$size, i32imm:$num),
1842 ".reg .b$size retval$num;",
1843 [(DeclareRet (i32 2), (i32 imm:$size), (i32 imm:$num))]>;
1845 def DeclareParamInst : NVPTXInst<(outs),
1846 (ins i32imm:$align, i32imm:$a, i32imm:$size),
1847 ".param .align $align .b8 param$a[$size];",
1848 [(DeclareParam (i32 imm:$align), (i32 imm:$a), (i32 imm:$size))]>;
1849 def DeclareScalarParamInst : NVPTXInst<(outs), (ins i32imm:$a, i32imm:$size),
1850 ".param .b$size param$a;",
1851 [(DeclareScalarParam (i32 imm:$a), (i32 imm:$size), (i32 0))]>;
1852 def DeclareScalarRegInst : NVPTXInst<(outs), (ins i32imm:$a, i32imm:$size),
1853 ".reg .b$size param$a;",
1854 [(DeclareScalarParam (i32 imm:$a), (i32 imm:$size), (i32 1))]>;
1856 class MoveParamInst<NVPTXRegClass regclass, string asmstr> :
1857 NVPTXInst<(outs regclass:$dst), (ins regclass:$src),
1858 !strconcat(!strconcat("mov", asmstr), "\t$dst, $src;"),
1859 [(set regclass:$dst, (MoveParam regclass:$src))]>;
1861 def MoveParamI64 : MoveParamInst<Int64Regs, ".b64">;
1862 def MoveParamI32 : MoveParamInst<Int32Regs, ".b32">;
1863 def MoveParamI16 : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$src),
1864 "cvt.u16.u32\t$dst, $src;",
1865 [(set Int16Regs:$dst, (MoveParam Int16Regs:$src))]>;
1866 def MoveParamF64 : MoveParamInst<Float64Regs, ".f64">;
1867 def MoveParamF32 : MoveParamInst<Float32Regs, ".f32">;
1869 class PseudoUseParamInst<NVPTXRegClass regclass> :
1870 NVPTXInst<(outs), (ins regclass:$src),
1871 "// Pseudo use of $src",
1872 [(PseudoUseParam regclass:$src)]>;
1874 def PseudoUseParamI64 : PseudoUseParamInst<Int64Regs>;
1875 def PseudoUseParamI32 : PseudoUseParamInst<Int32Regs>;
1876 def PseudoUseParamI16 : PseudoUseParamInst<Int16Regs>;
1877 def PseudoUseParamF64 : PseudoUseParamInst<Float64Regs>;
1878 def PseudoUseParamF32 : PseudoUseParamInst<Float32Regs>;
1882 // Load / Store Handling
1884 multiclass LD<NVPTXRegClass regclass> {
1885 def _avar : NVPTXInst<(outs regclass:$dst),
1886 (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
1887 i32imm:$fromWidth, imem:$addr),
1888 !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
1889 "$fromWidth \t$dst, [$addr];"), []>;
1890 def _areg : NVPTXInst<(outs regclass:$dst),
1891 (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
1892 i32imm:$fromWidth, Int32Regs:$addr),
1893 !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
1894 "$fromWidth \t$dst, [$addr];"), []>;
1895 def _areg_64 : NVPTXInst<(outs regclass:$dst),
1896 (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
1897 i32imm:$fromWidth, Int64Regs:$addr),
1898 !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth",
1899 " \t$dst, [$addr];"), []>;
1900 def _ari : NVPTXInst<(outs regclass:$dst),
1901 (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
1902 i32imm:$fromWidth, Int32Regs:$addr, i32imm:$offset),
1903 !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
1904 "$fromWidth \t$dst, [$addr+$offset];"), []>;
1905 def _ari_64 : NVPTXInst<(outs regclass:$dst),
1906 (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
1907 i32imm:$fromWidth, Int64Regs:$addr, i32imm:$offset),
1908 !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth",
1909 " \t$dst, [$addr+$offset];"), []>;
1910 def _asi : NVPTXInst<(outs regclass:$dst),
1911 (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
1912 i32imm:$fromWidth, imem:$addr, i32imm:$offset),
1913 !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
1914 "$fromWidth \t$dst, [$addr+$offset];"), []>;
1917 let mayLoad=1, neverHasSideEffects=1 in {
1918 defm LD_i8 : LD<Int16Regs>;
1919 defm LD_i16 : LD<Int16Regs>;
1920 defm LD_i32 : LD<Int32Regs>;
1921 defm LD_i64 : LD<Int64Regs>;
1922 defm LD_f32 : LD<Float32Regs>;
1923 defm LD_f64 : LD<Float64Regs>;
1926 multiclass ST<NVPTXRegClass regclass> {
1927 def _avar : NVPTXInst<(outs),
1928 (ins regclass:$src, LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec,
1929 LdStCode:$Sign, i32imm:$toWidth, imem:$addr),
1930 !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth",
1931 " \t[$addr], $src;"), []>;
1932 def _areg : NVPTXInst<(outs),
1933 (ins regclass:$src, LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec,
1934 LdStCode:$Sign, i32imm:$toWidth, Int32Regs:$addr),
1935 !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth",
1936 " \t[$addr], $src;"), []>;
1937 def _areg_64 : NVPTXInst<(outs),
1938 (ins regclass:$src, LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec,
1939 LdStCode:$Sign, i32imm:$toWidth, Int64Regs:$addr),
1940 !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth ",
1941 "\t[$addr], $src;"), []>;
1942 def _ari : NVPTXInst<(outs),
1943 (ins regclass:$src, LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec,
1944 LdStCode:$Sign, i32imm:$toWidth, Int32Regs:$addr, i32imm:$offset),
1945 !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth",
1946 " \t[$addr+$offset], $src;"), []>;
1947 def _ari_64 : NVPTXInst<(outs),
1948 (ins regclass:$src, LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec,
1949 LdStCode:$Sign, i32imm:$toWidth, Int64Regs:$addr, i32imm:$offset),
1950 !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth ",
1951 "\t[$addr+$offset], $src;"), []>;
1952 def _asi : NVPTXInst<(outs),
1953 (ins regclass:$src, LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec,
1954 LdStCode:$Sign, i32imm:$toWidth, imem:$addr, i32imm:$offset),
1955 !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth",
1956 " \t[$addr+$offset], $src;"), []>;
1959 let mayStore=1, neverHasSideEffects=1 in {
1960 defm ST_i8 : ST<Int16Regs>;
1961 defm ST_i16 : ST<Int16Regs>;
1962 defm ST_i32 : ST<Int32Regs>;
1963 defm ST_i64 : ST<Int64Regs>;
1964 defm ST_f32 : ST<Float32Regs>;
1965 defm ST_f64 : ST<Float64Regs>;
1968 // The following is used only in and after vector elementizations.
1969 // Vector elementization happens at the machine instruction level, so the
1970 // following instruction
1971 // never appears in the DAG.
1972 multiclass LD_VEC<NVPTXRegClass regclass> {
1973 def _v2_avar : NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
1974 (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
1975 i32imm:$fromWidth, imem:$addr),
1976 !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
1977 "$fromWidth \t{{$dst1, $dst2}}, [$addr];"), []>;
1978 def _v2_areg : NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
1979 (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
1980 i32imm:$fromWidth, Int32Regs:$addr),
1981 !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
1982 "$fromWidth \t{{$dst1, $dst2}}, [$addr];"), []>;
1983 def _v2_areg_64 : NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
1984 (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
1985 i32imm:$fromWidth, Int64Regs:$addr),
1986 !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
1987 "$fromWidth \t{{$dst1, $dst2}}, [$addr];"), []>;
1988 def _v2_ari : NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
1989 (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
1990 i32imm:$fromWidth, Int32Regs:$addr, i32imm:$offset),
1991 !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
1992 "$fromWidth \t{{$dst1, $dst2}}, [$addr+$offset];"), []>;
1993 def _v2_ari_64 : NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
1994 (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
1995 i32imm:$fromWidth, Int64Regs:$addr, i32imm:$offset),
1996 !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
1997 "$fromWidth \t{{$dst1, $dst2}}, [$addr+$offset];"), []>;
1998 def _v2_asi : NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
1999 (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2000 i32imm:$fromWidth, imem:$addr, i32imm:$offset),
2001 !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2002 "$fromWidth \t{{$dst1, $dst2}}, [$addr+$offset];"), []>;
2003 def _v4_avar : NVPTXInst<(outs regclass:$dst1, regclass:$dst2,
2004 regclass:$dst3, regclass:$dst4),
2005 (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2006 i32imm:$fromWidth, imem:$addr),
2007 !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2008 "$fromWidth \t{{$dst1, $dst2, $dst3, $dst4}}, [$addr];"), []>;
2009 def _v4_areg : NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
2011 (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2012 i32imm:$fromWidth, Int32Regs:$addr),
2013 !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2014 "$fromWidth \t{{$dst1, $dst2, $dst3, $dst4}}, [$addr];"), []>;
2015 def _v4_areg_64 : NVPTXInst<(outs regclass:$dst1, regclass:$dst2,
2016 regclass:$dst3, regclass:$dst4),
2017 (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2018 i32imm:$fromWidth, Int64Regs:$addr),
2019 !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2020 "$fromWidth \t{{$dst1, $dst2, $dst3, $dst4}}, [$addr];"), []>;
2021 def _v4_ari : NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
2023 (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2024 i32imm:$fromWidth, Int32Regs:$addr, i32imm:$offset),
2025 !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2026 "$fromWidth \t{{$dst1, $dst2, $dst3, $dst4}}, [$addr+$offset];"),
2028 def _v4_ari_64 : NVPTXInst<(outs regclass:$dst1, regclass:$dst2,
2029 regclass:$dst3, regclass:$dst4),
2030 (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2031 i32imm:$fromWidth, Int64Regs:$addr, i32imm:$offset),
2032 !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2033 "$fromWidth \t{{$dst1, $dst2, $dst3, $dst4}}, [$addr+$offset];"),
2035 def _v4_asi : NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
2037 (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2038 i32imm:$fromWidth, imem:$addr, i32imm:$offset),
2039 !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2040 "$fromWidth \t{{$dst1, $dst2, $dst3, $dst4}}, [$addr+$offset];"),
2043 let mayLoad=1, neverHasSideEffects=1 in {
2044 defm LDV_i8 : LD_VEC<Int16Regs>;
2045 defm LDV_i16 : LD_VEC<Int16Regs>;
2046 defm LDV_i32 : LD_VEC<Int32Regs>;
2047 defm LDV_i64 : LD_VEC<Int64Regs>;
2048 defm LDV_f32 : LD_VEC<Float32Regs>;
2049 defm LDV_f64 : LD_VEC<Float64Regs>;
2052 multiclass ST_VEC<NVPTXRegClass regclass> {
2053 def _v2_avar : NVPTXInst<(outs),
2054 (ins regclass:$src1, regclass:$src2, LdStCode:$isVol, LdStCode:$addsp,
2055 LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, imem:$addr),
2056 !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2057 "$fromWidth \t[$addr], {{$src1, $src2}};"), []>;
2058 def _v2_areg : NVPTXInst<(outs),
2059 (ins regclass:$src1, regclass:$src2, LdStCode:$isVol, LdStCode:$addsp,
2060 LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, Int32Regs:$addr),
2061 !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2062 "$fromWidth \t[$addr], {{$src1, $src2}};"), []>;
2063 def _v2_areg_64 : NVPTXInst<(outs),
2064 (ins regclass:$src1, regclass:$src2, LdStCode:$isVol, LdStCode:$addsp,
2065 LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, Int64Regs:$addr),
2066 !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2067 "$fromWidth \t[$addr], {{$src1, $src2}};"), []>;
2068 def _v2_ari : NVPTXInst<(outs),
2069 (ins regclass:$src1, regclass:$src2, LdStCode:$isVol, LdStCode:$addsp,
2070 LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, Int32Regs:$addr,
2072 !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2073 "$fromWidth \t[$addr+$offset], {{$src1, $src2}};"), []>;
2074 def _v2_ari_64 : NVPTXInst<(outs),
2075 (ins regclass:$src1, regclass:$src2, LdStCode:$isVol, LdStCode:$addsp,
2076 LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, Int64Regs:$addr,
2078 !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2079 "$fromWidth \t[$addr+$offset], {{$src1, $src2}};"), []>;
2080 def _v2_asi : NVPTXInst<(outs),
2081 (ins regclass:$src1, regclass:$src2, LdStCode:$isVol, LdStCode:$addsp,
2082 LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, imem:$addr,
2084 !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2085 "$fromWidth \t[$addr+$offset], {{$src1, $src2}};"), []>;
2086 def _v4_avar : NVPTXInst<(outs),
2087 (ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4,
2088 LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2089 i32imm:$fromWidth, imem:$addr),
2090 !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2091 "$fromWidth \t[$addr], {{$src1, $src2, $src3, $src4}};"), []>;
2092 def _v4_areg : NVPTXInst<(outs),
2093 (ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4,
2094 LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2095 i32imm:$fromWidth, Int32Regs:$addr),
2096 !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2097 "$fromWidth \t[$addr], {{$src1, $src2, $src3, $src4}};"), []>;
2098 def _v4_areg_64 : NVPTXInst<(outs),
2099 (ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4,
2100 LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2101 i32imm:$fromWidth, Int64Regs:$addr),
2102 !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2103 "$fromWidth \t[$addr], {{$src1, $src2, $src3, $src4}};"), []>;
2104 def _v4_ari : NVPTXInst<(outs),
2105 (ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4,
2106 LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2107 i32imm:$fromWidth, Int32Regs:$addr, i32imm:$offset),
2108 !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2109 "$fromWidth \t[$addr+$offset], {{$src1, $src2, $src3, $src4}};"),
2111 def _v4_ari_64 : NVPTXInst<(outs),
2112 (ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4,
2113 LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2114 i32imm:$fromWidth, Int64Regs:$addr, i32imm:$offset),
2115 !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2116 "$fromWidth \t[$addr+$offset], {{$src1, $src2, $src3, $src4}};"),
2118 def _v4_asi : NVPTXInst<(outs),
2119 (ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4,
2120 LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2121 i32imm:$fromWidth, imem:$addr, i32imm:$offset),
2122 !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2123 "$fromWidth \t[$addr+$offset], {{$src1, $src2, $src3, $src4}};"),
2126 let mayStore=1, neverHasSideEffects=1 in {
2127 defm STV_i8 : ST_VEC<Int16Regs>;
2128 defm STV_i16 : ST_VEC<Int16Regs>;
2129 defm STV_i32 : ST_VEC<Int32Regs>;
2130 defm STV_i64 : ST_VEC<Int64Regs>;
2131 defm STV_f32 : ST_VEC<Float32Regs>;
2132 defm STV_f64 : ST_VEC<Float64Regs>;
2136 //---- Conversion ----
2138 multiclass CVT_INT_TO_FP <string OpStr, SDNode OpNode> {
2139 // FIXME: need to add f16 support
2141 // NVPTXInst<(outs Float16Regs:$d), (ins Int16Regs:$a),
2142 // !strconcat(!strconcat("cvt.rn.f16.", OpStr), "16 \t$d, $a;"),
2143 // [(set Float16Regs:$d, (OpNode Int16Regs:$a))]>;
2145 // NVPTXInst<(outs Float16Regs:$d), (ins Int32Regs:$a),
2146 // !strconcat(!strconcat("cvt.rn.f16.", OpStr), "32 \t$d, $a;"),
2147 // [(set Float16Regs:$d, (OpNode Int32Regs:$a))]>;
2149 // NVPTXInst<(outs Float16Regs:$d), (ins Int64Regs:$a),
2150 // !strconcat(!strconcat("cvt.rn.f32.", OpStr), "64 \t$d, $a;"),
2151 // [(set Float32Regs:$d, (OpNode Int64Regs:$a))]>;
2154 NVPTXInst<(outs Float32Regs:$d), (ins Int1Regs:$a),
2155 "selp.f32 \t$d, 1.0, 0.0, $a;",
2156 [(set Float32Regs:$d, (OpNode Int1Regs:$a))]>;
2158 NVPTXInst<(outs Float32Regs:$d), (ins Int16Regs:$a),
2159 !strconcat(!strconcat("cvt.rn.f32.", OpStr), "16 \t$d, $a;"),
2160 [(set Float32Regs:$d, (OpNode Int16Regs:$a))]>;
2162 NVPTXInst<(outs Float32Regs:$d), (ins Int32Regs:$a),
2163 !strconcat(!strconcat("cvt.rn.f32.", OpStr), "32 \t$d, $a;"),
2164 [(set Float32Regs:$d, (OpNode Int32Regs:$a))]>;
2166 NVPTXInst<(outs Float32Regs:$d), (ins Int64Regs:$a),
2167 !strconcat(!strconcat("cvt.rn.f32.", OpStr), "64 \t$d, $a;"),
2168 [(set Float32Regs:$d, (OpNode Int64Regs:$a))]>;
2171 NVPTXInst<(outs Float64Regs:$d), (ins Int1Regs:$a),
2172 "selp.f64 \t$d, 1.0, 0.0, $a;",
2173 [(set Float64Regs:$d, (OpNode Int1Regs:$a))]>;
2175 NVPTXInst<(outs Float64Regs:$d), (ins Int16Regs:$a),
2176 !strconcat(!strconcat("cvt.rn.f64.", OpStr), "16 \t$d, $a;"),
2177 [(set Float64Regs:$d, (OpNode Int16Regs:$a))]>;
2179 NVPTXInst<(outs Float64Regs:$d), (ins Int32Regs:$a),
2180 !strconcat(!strconcat("cvt.rn.f64.", OpStr), "32 \t$d, $a;"),
2181 [(set Float64Regs:$d, (OpNode Int32Regs:$a))]>;
2183 NVPTXInst<(outs Float64Regs:$d), (ins Int64Regs:$a),
2184 !strconcat(!strconcat("cvt.rn.f64.", OpStr), "64 \t$d, $a;"),
2185 [(set Float64Regs:$d, (OpNode Int64Regs:$a))]>;
2188 defm Sint_to_fp : CVT_INT_TO_FP <"s", sint_to_fp>;
2189 defm Uint_to_fp : CVT_INT_TO_FP <"u", uint_to_fp>;
2191 multiclass CVT_FP_TO_INT <string OpStr, SDNode OpNode> {
2192 // FIXME: need to add f16 support
2194 // NVPTXInst<(outs Int16Regs:$d), (ins Float16Regs:$a),
2195 // !strconcat(!strconcat("cvt.rzi.", OpStr), "16.f16 \t$d, $a;"),
2196 // [(set Int16Regs:$d, (OpNode Float16Regs:$a))]>;
2198 NVPTXInst<(outs Int16Regs:$d), (ins Float32Regs:$a),
2199 !strconcat(!strconcat("cvt.rzi.ftz.", OpStr), "16.f32 \t$d, $a;"),
2200 [(set Int16Regs:$d, (OpNode Float32Regs:$a))]>, Requires<[doF32FTZ]>;
2202 NVPTXInst<(outs Int16Regs:$d), (ins Float32Regs:$a),
2203 !strconcat(!strconcat("cvt.rzi.", OpStr), "16.f32 \t$d, $a;"),
2204 [(set Int16Regs:$d, (OpNode Float32Regs:$a))]>;
2206 NVPTXInst<(outs Int16Regs:$d), (ins Float64Regs:$a),
2207 !strconcat(!strconcat("cvt.rzi.", OpStr), "16.f64 \t$d, $a;"),
2208 [(set Int16Regs:$d, (OpNode Float64Regs:$a))]>;
2210 // FIXME: need to add f16 support
2211 // def CVTi32f16: def CVTi32f16:
2212 // NVPTXInst<(outs Int32Regs:$d), (ins Float16Regs:$a),
2213 // !strconcat(!strconcat("cvt.rzi.", OpStr), "32.f16 \t$d, $a;"),
2214 // [(set Int32Regs:$d, (OpNode Float16Regs:$a))]>;
2216 NVPTXInst<(outs Int32Regs:$d), (ins Float32Regs:$a),
2217 !strconcat(!strconcat("cvt.rzi.ftz.", OpStr), "32.f32 \t$d, $a;"),
2218 [(set Int32Regs:$d, (OpNode Float32Regs:$a))]>, Requires<[doF32FTZ]>;
2220 NVPTXInst<(outs Int32Regs:$d), (ins Float32Regs:$a),
2221 !strconcat(!strconcat("cvt.rzi.", OpStr), "32.f32 \t$d, $a;"),
2222 [(set Int32Regs:$d, (OpNode Float32Regs:$a))]>;
2224 NVPTXInst<(outs Int32Regs:$d), (ins Float64Regs:$a),
2225 !strconcat(!strconcat("cvt.rzi.", OpStr), "32.f64 \t$d, $a;"),
2226 [(set Int32Regs:$d, (OpNode Float64Regs:$a))]>;
2228 // FIXME: need to add f16 support
2230 // NVPTXInst<(outs Int64Regs:$d), (ins Float16Regs:$a),
2231 // !strconcat(!strconcat("cvt.rzi.", OpStr), "64.f16 \t$d, $a;"),
2232 // [(set Int64Regs:$d, (OpNode Float16Regs:$a))]>;
2234 NVPTXInst<(outs Int64Regs:$d), (ins Float32Regs:$a),
2235 !strconcat(!strconcat("cvt.rzi.ftz.", OpStr), "64.f32 \t$d, $a;"),
2236 [(set Int64Regs:$d, (OpNode Float32Regs:$a))]>, Requires<[doF32FTZ]>;
2238 NVPTXInst<(outs Int64Regs:$d), (ins Float32Regs:$a),
2239 !strconcat(!strconcat("cvt.rzi.", OpStr), "64.f32 \t$d, $a;"),
2240 [(set Int64Regs:$d, (OpNode Float32Regs:$a))]>;
2242 NVPTXInst<(outs Int64Regs:$d), (ins Float64Regs:$a),
2243 !strconcat(!strconcat("cvt.rzi.", OpStr), "64.f64 \t$d, $a;"),
2244 [(set Int64Regs:$d, (OpNode Float64Regs:$a))]>;
2247 defm Fp_to_sint : CVT_FP_TO_INT <"s", fp_to_sint>;
2248 defm Fp_to_uint : CVT_FP_TO_INT <"u", fp_to_uint>;
2250 multiclass INT_EXTEND_UNSIGNED_1 <SDNode OpNode> {
2252 NVPTXInst<(outs Int16Regs:$d), (ins Int1Regs:$a),
2253 "selp.u16 \t$d, 1, 0, $a;",
2254 [(set Int16Regs:$d, (OpNode Int1Regs:$a))]>;
2256 NVPTXInst<(outs Int32Regs:$d), (ins Int1Regs:$a),
2257 "selp.u32 \t$d, 1, 0, $a;",
2258 [(set Int32Regs:$d, (OpNode Int1Regs:$a))]>;
2260 NVPTXInst<(outs Int64Regs:$d), (ins Int1Regs:$a),
2261 "selp.u64 \t$d, 1, 0, $a;",
2262 [(set Int64Regs:$d, (OpNode Int1Regs:$a))]>;
2265 multiclass INT_EXTEND_SIGNED_1 <SDNode OpNode> {
2267 NVPTXInst<(outs Int16Regs:$d), (ins Int1Regs:$a),
2268 "selp.s16 \t$d, -1, 0, $a;",
2269 [(set Int16Regs:$d, (OpNode Int1Regs:$a))]>;
2271 NVPTXInst<(outs Int32Regs:$d), (ins Int1Regs:$a),
2272 "selp.s32 \t$d, -1, 0, $a;",
2273 [(set Int32Regs:$d, (OpNode Int1Regs:$a))]>;
2275 NVPTXInst<(outs Int64Regs:$d), (ins Int1Regs:$a),
2276 "selp.s64 \t$d, -1, 0, $a;",
2277 [(set Int64Regs:$d, (OpNode Int1Regs:$a))]>;
2280 multiclass INT_EXTEND <string OpStr, SDNode OpNode> {
2282 NVPTXInst<(outs Int32Regs:$d), (ins Int16Regs:$a),
2283 !strconcat("cvt.", !strconcat(OpStr, !strconcat("32.",
2284 !strconcat(OpStr, "16 \t$d, $a;")))),
2285 [(set Int32Regs:$d, (OpNode Int16Regs:$a))]>;
2287 NVPTXInst<(outs Int64Regs:$d), (ins Int16Regs:$a),
2288 !strconcat("cvt.", !strconcat(OpStr, !strconcat("64.",
2289 !strconcat(OpStr, "16 \t$d, $a;")))),
2290 [(set Int64Regs:$d, (OpNode Int16Regs:$a))]>;
2292 NVPTXInst<(outs Int64Regs:$d), (ins Int32Regs:$a),
2293 !strconcat("cvt.", !strconcat(OpStr, !strconcat("64.",
2294 !strconcat(OpStr, "32 \t$d, $a;")))),
2295 [(set Int64Regs:$d, (OpNode Int32Regs:$a))]>;
2298 defm Sint_extend_1 : INT_EXTEND_SIGNED_1<sext>;
2299 defm Zint_extend_1 : INT_EXTEND_UNSIGNED_1<zext>;
2300 defm Aint_extend_1 : INT_EXTEND_UNSIGNED_1<anyext>;
2302 defm Sint_extend : INT_EXTEND <"s", sext>;
2303 defm Zint_extend : INT_EXTEND <"u", zext>;
2304 defm Aint_extend : INT_EXTEND <"u", anyext>;
2306 class TRUNC_to1_asm<string sz> {
2307 string s = !strconcat("{{\n\t",
2310 !strconcat(" temp;\n\t",
2313 !strconcat("\t temp, $a, 1;\n\t",
2315 !strconcat(sz, ".eq \t $d, temp, 1;\n\t}}")))))))));
2318 def TRUNC_64to32 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
2319 "cvt.u32.u64 \t$d, $a;",
2320 [(set Int32Regs:$d, (trunc Int64Regs:$a))]>;
2321 def TRUNC_64to16 : NVPTXInst<(outs Int16Regs:$d), (ins Int64Regs:$a),
2322 "cvt.u16.u64 \t$d, $a;",
2323 [(set Int16Regs:$d, (trunc Int64Regs:$a))]>;
2324 def TRUNC_32to16 : NVPTXInst<(outs Int16Regs:$d), (ins Int32Regs:$a),
2325 "cvt.u16.u32 \t$d, $a;",
2326 [(set Int16Regs:$d, (trunc Int32Regs:$a))]>;
2327 def TRUNC_64to1 : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
2328 TRUNC_to1_asm<".b64">.s,
2329 [(set Int1Regs:$d, (trunc Int64Regs:$a))]>;
2330 def TRUNC_32to1 : NVPTXInst<(outs Int1Regs:$d), (ins Int32Regs:$a),
2331 TRUNC_to1_asm<".b32">.s,
2332 [(set Int1Regs:$d, (trunc Int32Regs:$a))]>;
2333 def TRUNC_16to1 : NVPTXInst<(outs Int1Regs:$d), (ins Int16Regs:$a),
2334 TRUNC_to1_asm<".b16">.s,
2335 [(set Int1Regs:$d, (trunc Int16Regs:$a))]>;
2337 // Select instructions
2338 def : Pat<(select Int32Regs:$pred, Int16Regs:$a, Int16Regs:$b),
2339 (SELECTi16rr Int16Regs:$a, Int16Regs:$b,
2340 (TRUNC_32to1 Int32Regs:$pred))>;
2341 def : Pat<(select Int32Regs:$pred, Int32Regs:$a, Int32Regs:$b),
2342 (SELECTi32rr Int32Regs:$a, Int32Regs:$b,
2343 (TRUNC_32to1 Int32Regs:$pred))>;
2344 def : Pat<(select Int32Regs:$pred, Int64Regs:$a, Int64Regs:$b),
2345 (SELECTi64rr Int64Regs:$a, Int64Regs:$b,
2346 (TRUNC_32to1 Int32Regs:$pred))>;
2347 def : Pat<(select Int32Regs:$pred, Float32Regs:$a, Float32Regs:$b),
2348 (SELECTf32rr Float32Regs:$a, Float32Regs:$b,
2349 (TRUNC_32to1 Int32Regs:$pred))>;
2350 def : Pat<(select Int32Regs:$pred, Float64Regs:$a, Float64Regs:$b),
2351 (SELECTf64rr Float64Regs:$a, Float64Regs:$b,
2352 (TRUNC_32to1 Int32Regs:$pred))>;
2354 class F_BITCONVERT<string SzStr, NVPTXRegClass regclassIn,
2355 NVPTXRegClass regclassOut> :
2356 NVPTXInst<(outs regclassOut:$d), (ins regclassIn:$a),
2357 !strconcat("mov.b", !strconcat(SzStr, " \t $d, $a;")),
2358 [(set regclassOut:$d, (bitconvert regclassIn:$a))]>;
2360 def BITCONVERT_32_I2F : F_BITCONVERT<"32", Int32Regs, Float32Regs>;
2361 def BITCONVERT_32_F2I : F_BITCONVERT<"32", Float32Regs, Int32Regs>;
2362 def BITCONVERT_64_I2F : F_BITCONVERT<"64", Int64Regs, Float64Regs>;
2363 def BITCONVERT_64_F2I : F_BITCONVERT<"64", Float64Regs, Int64Regs>;
2365 // pack a set of smaller int registers to a larger int register
2366 def V4I16toI64 : NVPTXInst<(outs Int64Regs:$d),
2367 (ins Int16Regs:$s1, Int16Regs:$s2,
2368 Int16Regs:$s3, Int16Regs:$s4),
2369 "mov.b64\t$d, {{$s1, $s2, $s3, $s4}};",
2371 def V2I16toI32 : NVPTXInst<(outs Int32Regs:$d),
2372 (ins Int16Regs:$s1, Int16Regs:$s2),
2373 "mov.b32\t$d, {{$s1, $s2}};",
2375 def V2I32toI64 : NVPTXInst<(outs Int64Regs:$d),
2376 (ins Int32Regs:$s1, Int32Regs:$s2),
2377 "mov.b64\t$d, {{$s1, $s2}};",
2379 def V2F32toF64 : NVPTXInst<(outs Float64Regs:$d),
2380 (ins Float32Regs:$s1, Float32Regs:$s2),
2381 "mov.b64\t$d, {{$s1, $s2}};",
2384 // unpack a larger int register to a set of smaller int registers
2385 def I64toV4I16 : NVPTXInst<(outs Int16Regs:$d1, Int16Regs:$d2,
2386 Int16Regs:$d3, Int16Regs:$d4),
2388 "mov.b64\t{{$d1, $d2, $d3, $d4}}, $s;",
2390 def I32toV2I16 : NVPTXInst<(outs Int16Regs:$d1, Int16Regs:$d2),
2392 "mov.b32\t{{$d1, $d2}}, $s;",
2394 def I64toV2I32 : NVPTXInst<(outs Int32Regs:$d1, Int32Regs:$d2),
2396 "mov.b64\t{{$d1, $d2}}, $s;",
2398 def F64toV2F32 : NVPTXInst<(outs Float32Regs:$d1, Float32Regs:$d2),
2399 (ins Float64Regs:$s),
2400 "mov.b64\t{{$d1, $d2}}, $s;",
2403 def FPRound_ftz : NVPTXInst<(outs Float32Regs:$d), (ins Float64Regs:$a),
2404 "cvt.rn.ftz.f32.f64 \t$d, $a;",
2405 [(set Float32Regs:$d, (fround Float64Regs:$a))]>, Requires<[doF32FTZ]>;
2407 def FPRound : NVPTXInst<(outs Float32Regs:$d), (ins Float64Regs:$a),
2408 "cvt.rn.f32.f64 \t$d, $a;",
2409 [(set Float32Regs:$d, (fround Float64Regs:$a))]>;
2411 def FPExtend_ftz : NVPTXInst<(outs Float64Regs:$d), (ins Float32Regs:$a),
2412 "cvt.ftz.f64.f32 \t$d, $a;",
2413 [(set Float64Regs:$d, (fextend Float32Regs:$a))]>, Requires<[doF32FTZ]>;
2415 def FPExtend : NVPTXInst<(outs Float64Regs:$d), (ins Float32Regs:$a),
2416 "cvt.f64.f32 \t$d, $a;",
2417 [(set Float64Regs:$d, (fextend Float32Regs:$a))]>;
2419 def retflag : SDNode<"NVPTXISD::RET_FLAG", SDTNone,
2420 [SDNPHasChain, SDNPOptInGlue]>;
2422 //-----------------------------------
2424 //-----------------------------------
2426 let isTerminator=1 in {
2427 let isReturn=1, isBarrier=1 in
2428 def Return : NVPTXInst<(outs), (ins), "ret;", [(retflag)]>;
2431 def CBranch : NVPTXInst<(outs), (ins Int1Regs:$a, brtarget:$target),
2432 "@$a bra \t$target;",
2433 [(brcond Int1Regs:$a, bb:$target)]>;
2435 def CBranchOther : NVPTXInst<(outs), (ins Int1Regs:$a, brtarget:$target),
2436 "@!$a bra \t$target;",
2439 let isBranch=1, isBarrier=1 in
2440 def GOTO : NVPTXInst<(outs), (ins brtarget:$target),
2441 "bra.uni \t$target;",
2445 def : Pat<(brcond Int32Regs:$a, bb:$target), (CBranch
2446 (ISetUNEi32ri_p Int32Regs:$a, 0), bb:$target)>;
2448 // SelectionDAGBuilder::visitSWitchCase() will invert the condition of a
2449 // conditional branch if
2450 // the target block is the next block so that the code can fall through to the
2452 // The invertion is done by 'xor condition, 1', which will be translated to
2453 // (setne condition, -1).
2454 // Since ptx supports '@!pred bra target', we should use it.
2455 def : Pat<(brcond (i1 (setne Int1Regs:$a, -1)), bb:$target),
2456 (CBranchOther Int1Regs:$a, bb:$target)>;
2459 def SDT_NVPTXCallSeqStart : SDCallSeqStart<[ SDTCisVT<0, i32> ]>;
2460 def SDT_NVPTXCallSeqEnd : SDCallSeqEnd<[ SDTCisVT<0, i32>,
2461 SDTCisVT<1, i32> ]>;
2463 def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_NVPTXCallSeqStart,
2464 [SDNPHasChain, SDNPOutGlue, SDNPSideEffect]>;
2465 def callseq_end : SDNode<"ISD::CALLSEQ_END", SDT_NVPTXCallSeqEnd,
2466 [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
2469 def SDT_NVPTXCall : SDTypeProfile<0, 1, [SDTCisVT<0, i32>]>;
2470 def call : SDNode<"NVPTXISD::CALL", SDT_NVPTXCall,
2471 [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
2472 def calltarget : Operand<i32>;
2474 def CALL : NVPTXInst<(outs), (ins calltarget:$dst),
2475 "call \t$dst, (1);", []>;
2478 def : Pat<(call tglobaladdr:$dst),
2479 (CALL tglobaladdr:$dst)>;
2480 def : Pat<(call texternalsym:$dst),
2481 (CALL texternalsym:$dst)>;
2483 // Pseudo instructions.
2484 class Pseudo<dag outs, dag ins, string asmstr, list<dag> pattern>
2485 : NVPTXInst<outs, ins, asmstr, pattern>;
2487 // @TODO: We use some tricks here to emit curly braces. Can we clean this up
2488 // a bit without TableGen modifications?
2489 def Callseq_Start : NVPTXInst<(outs), (ins i32imm:$amt),
2490 "// Callseq Start $amt\n\t{{\n\t.reg .b32 temp_param_reg;\n\t// <end>}}",
2491 [(callseq_start timm:$amt)]>;
2492 def Callseq_End : NVPTXInst<(outs), (ins i32imm:$amt1, i32imm:$amt2),
2493 "\n\t//{{\n\t}}// Callseq End $amt1",
2494 [(callseq_end timm:$amt1, timm:$amt2)]>;
2498 def trapinst : NVPTXInst<(outs), (ins),
2502 include "NVPTXIntrinsics.td"
2505 //-----------------------------------
2507 //-----------------------------------
2508 // BSWAP is currently expanded. The following is a more efficient
2509 // - for < sm_20, use vector scalar mov, as tesla support native 16-bit register
2510 // - for sm_20, use pmpt (use vector scalar mov to get the pack and
2511 // unpack). sm_20 supports native 32-bit register, but not native 16-bit