1 //===- NVPTXInstrInfo.td - NVPTX Instruction defs -------------*- tblgen-*-===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file describes the PTX instructions in TableGen format.
12 //===----------------------------------------------------------------------===//
14 include "NVPTXInstrFormats.td"
17 def NOP : NVPTXInst<(outs), (ins), "", []>;
19 // List of vector specific properties
20 def isVecLD : VecInstTypeEnum<1>;
21 def isVecST : VecInstTypeEnum<2>;
22 def isVecBuild : VecInstTypeEnum<3>;
23 def isVecShuffle : VecInstTypeEnum<4>;
24 def isVecExtract : VecInstTypeEnum<5>;
25 def isVecInsert : VecInstTypeEnum<6>;
26 def isVecDest : VecInstTypeEnum<7>;
27 def isVecOther : VecInstTypeEnum<15>;
29 //===----------------------------------------------------------------------===//
30 // NVPTX Operand Definitions.
31 //===----------------------------------------------------------------------===//
33 def brtarget : Operand<OtherVT>;
35 //===----------------------------------------------------------------------===//
36 // NVPTX Instruction Predicate Definitions
37 //===----------------------------------------------------------------------===//
40 def hasAtomRedG32 : Predicate<"Subtarget.hasAtomRedG32()">;
41 def hasAtomRedS32 : Predicate<"Subtarget.hasAtomRedS32()">;
42 def hasAtomRedGen32 : Predicate<"Subtarget.hasAtomRedGen32()">;
43 def useAtomRedG32forGen32 :
44 Predicate<"!Subtarget.hasAtomRedGen32() && Subtarget.hasAtomRedG32()">;
45 def hasBrkPt : Predicate<"Subtarget.hasBrkPt()">;
46 def hasAtomRedG64 : Predicate<"Subtarget.hasAtomRedG64()">;
47 def hasAtomRedS64 : Predicate<"Subtarget.hasAtomRedS64()">;
48 def hasAtomRedGen64 : Predicate<"Subtarget.hasAtomRedGen64()">;
49 def useAtomRedG64forGen64 :
50 Predicate<"!Subtarget.hasAtomRedGen64() && Subtarget.hasAtomRedG64()">;
51 def hasAtomAddF32 : Predicate<"Subtarget.hasAtomAddF32()">;
52 def hasVote : Predicate<"Subtarget.hasVote()">;
53 def hasDouble : Predicate<"Subtarget.hasDouble()">;
54 def reqPTX20 : Predicate<"Subtarget.reqPTX20()">;
55 def hasLDU : Predicate<"Subtarget.hasLDU()">;
56 def hasGenericLdSt : Predicate<"Subtarget.hasGenericLdSt()">;
58 def doF32FTZ : Predicate<"UseF32FTZ">;
60 def doFMAF32 : Predicate<"doFMAF32">;
61 def doFMAF32_ftz : Predicate<"(doFMAF32 && UseF32FTZ)">;
62 def doFMAF32AGG : Predicate<"doFMAF32AGG">;
63 def doFMAF32AGG_ftz : Predicate<"(doFMAF32AGG && UseF32FTZ)">;
64 def doFMAF64 : Predicate<"doFMAF64">;
65 def doFMAF64AGG : Predicate<"doFMAF64AGG">;
66 def doFMADF32 : Predicate<"doFMADF32">;
67 def doFMADF32_ftz : Predicate<"(doFMADF32 && UseF32FTZ)">;
69 def doMulWide : Predicate<"doMulWide">;
71 def allowFMA : Predicate<"allowFMA">;
72 def allowFMA_ftz : Predicate<"(allowFMA && UseF32FTZ)">;
74 def do_DIVF32_APPROX : Predicate<"do_DIVF32_PREC==0">;
75 def do_DIVF32_FULL : Predicate<"do_DIVF32_PREC==1">;
77 def hasHWROT32 : Predicate<"Subtarget.hasHWROT32()">;
79 def true : Predicate<"1">;
81 //===----------------------------------------------------------------------===//
82 // Special Handling for 8-bit Operands and Operations
84 // PTX supports 8-bit signed and unsigned types, but does not support 8-bit
85 // operations (like add, shift, etc) except for ld/st/cvt. SASS does not have
88 // PTX ld, st and cvt instructions permit source and destination data operands
89 // to be wider than the instruction-type size, so that narrow values may be
90 // loaded, stored, and converted using regular-width registers.
92 // So in PTX generation, we
93 // - always use 16-bit registers in place in 8-bit registers.
94 // (8-bit variables should stay as 8-bit as they represent memory layout.)
95 // - for the following 8-bit operations, we sign-ext/zero-ext the 8-bit values
103 // We are patching the operations by inserting the cvt instructions in the
104 // asm strings of the affected instructions.
106 // Since vector operations, except for ld/st, are eventually elementized. We
107 // do not need to special-hand the vector 8-bit operations.
110 //===----------------------------------------------------------------------===//
112 // Generate string block like
116 // cvt.s16.s8 %temp1, %a;
117 // cvt.s16.s8 %temp2, %b;
118 // opc.s16 %dst, %temp1, %temp2;
120 // when OpcStr=opc.s TypeStr=s16 CVTStr=cvt.s16.s8
121 class Handle_i8rr<string OpcStr, string TypeStr, string CVTStr> {
122 string s = !strconcat("{{\n\t",
123 !strconcat(".reg .", !strconcat(TypeStr,
124 !strconcat(" \t%temp1;\n\t",
125 !strconcat(".reg .", !strconcat(TypeStr,
126 !strconcat(" \t%temp2;\n\t",
127 !strconcat(CVTStr, !strconcat(" \t%temp1, $a;\n\t",
128 !strconcat(CVTStr, !strconcat(" \t%temp2, $b;\n\t",
129 !strconcat(OpcStr, "16 \t$dst, %temp1, %temp2;\n\t}}"))))))))))));
132 // Generate string block like
136 // cvt.s16.s8 %temp1, %a;
137 // mov.b16 %temp2, %b;
138 // cvt.s16.s8 %temp2, %temp2;
139 // opc.s16 %dst, %temp1, %temp2;
141 // when OpcStr=opc.s TypeStr=s16 CVTStr=cvt.s16.s8
142 class Handle_i8ri<string OpcStr, string TypeStr, string CVTStr> {
143 string s = !strconcat("{{\n\t",
144 !strconcat(".reg .", !strconcat(TypeStr,
145 !strconcat(" \t%temp1;\n\t",
147 !strconcat(TypeStr, !strconcat(" \t%temp2;\n\t",
148 !strconcat(CVTStr, !strconcat(" \t%temp1, $a;\n\t",
149 !strconcat("mov.b16 \t%temp2, $b;\n\t",
150 !strconcat(CVTStr, !strconcat(" \t%temp2, %temp2;\n\t",
151 !strconcat(OpcStr, "16 \t$dst, %temp1, %temp2;\n\t}}")))))))))))));
154 // Generate string block like
158 // mov.b16 %temp1, %b;
159 // cvt.s16.s8 %temp1, %temp1;
160 // cvt.s16.s8 %temp2, %a;
161 // opc.s16 %dst, %temp1, %temp2;
163 // when OpcStr=opc.s TypeStr=s16 CVTStr=cvt.s16.s8
164 class Handle_i8ir<string OpcStr, string TypeStr, string CVTStr> {
165 string s = !strconcat("{{\n\t",
166 !strconcat(".reg .", !strconcat(TypeStr,
167 !strconcat(" \t%temp1;\n\t",
168 !strconcat(".reg .", !strconcat(TypeStr,
169 !strconcat(" \t%temp2;\n\t",
170 !strconcat("mov.b16 \t%temp1, $a;\n\t",
171 !strconcat(CVTStr, !strconcat(" \t%temp1, %temp1;\n\t",
172 !strconcat(CVTStr, !strconcat(" \t%temp2, $b;\n\t",
173 !strconcat(OpcStr, "16 \t$dst, %temp1, %temp2;\n\t}}")))))))))))));
177 //===----------------------------------------------------------------------===//
178 // Some Common Instruction Class Templates
179 //===----------------------------------------------------------------------===//
181 multiclass I3<string OpcStr, SDNode OpNode> {
182 def i64rr : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, Int64Regs:$b),
183 !strconcat(OpcStr, "64 \t$dst, $a, $b;"),
184 [(set Int64Regs:$dst, (OpNode Int64Regs:$a,
186 def i64ri : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, i64imm:$b),
187 !strconcat(OpcStr, "64 \t$dst, $a, $b;"),
188 [(set Int64Regs:$dst, (OpNode Int64Regs:$a, imm:$b))]>;
189 def i32rr : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b),
190 !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
191 [(set Int32Regs:$dst, (OpNode Int32Regs:$a,
193 def i32ri : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b),
194 !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
195 [(set Int32Regs:$dst, (OpNode Int32Regs:$a, imm:$b))]>;
196 def i16rr : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, Int16Regs:$b),
197 !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
198 [(set Int16Regs:$dst, (OpNode Int16Regs:$a,
200 def i16ri : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, i16imm:$b),
201 !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
202 [(set Int16Regs:$dst, (OpNode Int16Regs:$a, (imm):$b))]>;
203 def i8rr : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, Int8Regs:$b),
204 !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
205 [(set Int8Regs:$dst, (OpNode Int8Regs:$a, Int8Regs:$b))]>;
206 def i8ri : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, i8imm:$b),
207 !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
208 [(set Int8Regs:$dst, (OpNode Int8Regs:$a, (imm):$b))]>;
211 multiclass I3_i8<string OpcStr, SDNode OpNode, string TypeStr, string CVTStr> {
212 def i64rr : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, Int64Regs:$b),
213 !strconcat(OpcStr, "64 \t$dst, $a, $b;"),
214 [(set Int64Regs:$dst, (OpNode Int64Regs:$a,
216 def i64ri : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, i64imm:$b),
217 !strconcat(OpcStr, "64 \t$dst, $a, $b;"),
218 [(set Int64Regs:$dst, (OpNode Int64Regs:$a, imm:$b))]>;
219 def i32rr : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b),
220 !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
221 [(set Int32Regs:$dst, (OpNode Int32Regs:$a,
223 def i32ri : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b),
224 !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
225 [(set Int32Regs:$dst, (OpNode Int32Regs:$a, imm:$b))]>;
226 def i16rr : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, Int16Regs:$b),
227 !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
228 [(set Int16Regs:$dst, (OpNode Int16Regs:$a,
230 def i16ri : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, i16imm:$b),
231 !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
232 [(set Int16Regs:$dst, (OpNode Int16Regs:$a, (imm):$b))]>;
233 def i8rr : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, Int8Regs:$b),
234 Handle_i8rr<OpcStr, TypeStr, CVTStr>.s,
235 [(set Int8Regs:$dst, (OpNode Int8Regs:$a, Int8Regs:$b))]>;
236 def i8ri : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, i8imm:$b),
237 Handle_i8ri<OpcStr, TypeStr, CVTStr>.s,
238 [(set Int8Regs:$dst, (OpNode Int8Regs:$a, (imm):$b))]>;
241 multiclass I3_noi8<string OpcStr, SDNode OpNode> {
242 def i64rr : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, Int64Regs:$b),
243 !strconcat(OpcStr, "64 \t$dst, $a, $b;"),
244 [(set Int64Regs:$dst, (OpNode Int64Regs:$a,
246 def i64ri : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, i64imm:$b),
247 !strconcat(OpcStr, "64 \t$dst, $a, $b;"),
248 [(set Int64Regs:$dst, (OpNode Int64Regs:$a, imm:$b))]>;
249 def i32rr : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b),
250 !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
251 [(set Int32Regs:$dst, (OpNode Int32Regs:$a,
253 def i32ri : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b),
254 !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
255 [(set Int32Regs:$dst, (OpNode Int32Regs:$a, imm:$b))]>;
256 def i16rr : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, Int16Regs:$b),
257 !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
258 [(set Int16Regs:$dst, (OpNode Int16Regs:$a,
260 def i16ri : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, i16imm:$b),
261 !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
262 [(set Int16Regs:$dst, (OpNode Int16Regs:$a, (imm):$b))]>;
265 multiclass ADD_SUB_INT_32<string OpcStr, SDNode OpNode> {
266 def i32rr : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a,
268 !strconcat(OpcStr, ".s32 \t$dst, $a, $b;"),
269 [(set Int32Regs:$dst, (OpNode Int32Regs:$a,
271 def i32ri : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b),
272 !strconcat(OpcStr, ".s32 \t$dst, $a, $b;"),
273 [(set Int32Regs:$dst, (OpNode Int32Regs:$a, imm:$b))]>;
276 multiclass F3<string OpcStr, SDNode OpNode> {
277 def f64rr : NVPTXInst<(outs Float64Regs:$dst),
278 (ins Float64Regs:$a, Float64Regs:$b),
279 !strconcat(OpcStr, ".f64 \t$dst, $a, $b;"),
280 [(set Float64Regs:$dst,
281 (OpNode Float64Regs:$a, Float64Regs:$b))]>,
282 Requires<[allowFMA]>;
283 def f64ri : NVPTXInst<(outs Float64Regs:$dst),
284 (ins Float64Regs:$a, f64imm:$b),
285 !strconcat(OpcStr, ".f64 \t$dst, $a, $b;"),
286 [(set Float64Regs:$dst,
287 (OpNode Float64Regs:$a, fpimm:$b))]>,
288 Requires<[allowFMA]>;
289 def f32rr_ftz : NVPTXInst<(outs Float32Regs:$dst),
290 (ins Float32Regs:$a, Float32Regs:$b),
291 !strconcat(OpcStr, ".ftz.f32 \t$dst, $a, $b;"),
292 [(set Float32Regs:$dst,
293 (OpNode Float32Regs:$a, Float32Regs:$b))]>,
294 Requires<[allowFMA_ftz]>;
295 def f32ri_ftz : NVPTXInst<(outs Float32Regs:$dst),
296 (ins Float32Regs:$a, f32imm:$b),
297 !strconcat(OpcStr, ".ftz.f32 \t$dst, $a, $b;"),
298 [(set Float32Regs:$dst,
299 (OpNode Float32Regs:$a, fpimm:$b))]>,
300 Requires<[allowFMA_ftz]>;
301 def f32rr : NVPTXInst<(outs Float32Regs:$dst),
302 (ins Float32Regs:$a, Float32Regs:$b),
303 !strconcat(OpcStr, ".f32 \t$dst, $a, $b;"),
304 [(set Float32Regs:$dst,
305 (OpNode Float32Regs:$a, Float32Regs:$b))]>,
306 Requires<[allowFMA]>;
307 def f32ri : NVPTXInst<(outs Float32Regs:$dst),
308 (ins Float32Regs:$a, f32imm:$b),
309 !strconcat(OpcStr, ".f32 \t$dst, $a, $b;"),
310 [(set Float32Regs:$dst,
311 (OpNode Float32Regs:$a, fpimm:$b))]>,
312 Requires<[allowFMA]>;
315 multiclass F3_rn<string OpcStr, SDNode OpNode> {
316 def f64rr : NVPTXInst<(outs Float64Regs:$dst),
317 (ins Float64Regs:$a, Float64Regs:$b),
318 !strconcat(OpcStr, ".rn.f64 \t$dst, $a, $b;"),
319 [(set Float64Regs:$dst,
320 (OpNode Float64Regs:$a, Float64Regs:$b))]>;
321 def f64ri : NVPTXInst<(outs Float64Regs:$dst),
322 (ins Float64Regs:$a, f64imm:$b),
323 !strconcat(OpcStr, ".rn.f64 \t$dst, $a, $b;"),
324 [(set Float64Regs:$dst,
325 (OpNode Float64Regs:$a, fpimm:$b))]>;
326 def f32rr_ftz : NVPTXInst<(outs Float32Regs:$dst),
327 (ins Float32Regs:$a, Float32Regs:$b),
328 !strconcat(OpcStr, ".rn.ftz.f32 \t$dst, $a, $b;"),
329 [(set Float32Regs:$dst,
330 (OpNode Float32Regs:$a, Float32Regs:$b))]>,
331 Requires<[doF32FTZ]>;
332 def f32ri_ftz : NVPTXInst<(outs Float32Regs:$dst),
333 (ins Float32Regs:$a, f32imm:$b),
334 !strconcat(OpcStr, ".rn.ftz.f32 \t$dst, $a, $b;"),
335 [(set Float32Regs:$dst,
336 (OpNode Float32Regs:$a, fpimm:$b))]>,
337 Requires<[doF32FTZ]>;
338 def f32rr : NVPTXInst<(outs Float32Regs:$dst),
339 (ins Float32Regs:$a, Float32Regs:$b),
340 !strconcat(OpcStr, ".rn.f32 \t$dst, $a, $b;"),
341 [(set Float32Regs:$dst,
342 (OpNode Float32Regs:$a, Float32Regs:$b))]>;
343 def f32ri : NVPTXInst<(outs Float32Regs:$dst),
344 (ins Float32Regs:$a, f32imm:$b),
345 !strconcat(OpcStr, ".rn.f32 \t$dst, $a, $b;"),
346 [(set Float32Regs:$dst,
347 (OpNode Float32Regs:$a, fpimm:$b))]>;
350 multiclass F2<string OpcStr, SDNode OpNode> {
351 def f64 : NVPTXInst<(outs Float64Regs:$dst), (ins Float64Regs:$a),
352 !strconcat(OpcStr, ".f64 \t$dst, $a;"),
353 [(set Float64Regs:$dst, (OpNode Float64Regs:$a))]>;
354 def f32_ftz : NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$a),
355 !strconcat(OpcStr, ".ftz.f32 \t$dst, $a;"),
356 [(set Float32Regs:$dst, (OpNode Float32Regs:$a))]>,
357 Requires<[doF32FTZ]>;
358 def f32 : NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$a),
359 !strconcat(OpcStr, ".f32 \t$dst, $a;"),
360 [(set Float32Regs:$dst, (OpNode Float32Regs:$a))]>;
363 //===----------------------------------------------------------------------===//
364 // NVPTX Instructions.
365 //===----------------------------------------------------------------------===//
367 //-----------------------------------
368 // Integer Arithmetic
369 //-----------------------------------
371 multiclass ADD_SUB_i1<SDNode OpNode> {
372 def _rr: NVPTXInst<(outs Int1Regs:$dst), (ins Int1Regs:$a, Int1Regs:$b),
373 "xor.pred \t$dst, $a, $b;",
374 [(set Int1Regs:$dst, (OpNode Int1Regs:$a, Int1Regs:$b))]>;
375 def _ri: NVPTXInst<(outs Int1Regs:$dst), (ins Int1Regs:$a, i1imm:$b),
376 "xor.pred \t$dst, $a, $b;",
377 [(set Int1Regs:$dst, (OpNode Int1Regs:$a, (imm):$b))]>;
380 defm ADD_i1 : ADD_SUB_i1<add>;
381 defm SUB_i1 : ADD_SUB_i1<sub>;
384 defm ADD : I3<"add.s", add>;
385 defm SUB : I3<"sub.s", sub>;
387 defm ADDCC : ADD_SUB_INT_32<"add.cc", addc>;
388 defm SUBCC : ADD_SUB_INT_32<"sub.cc", subc>;
390 defm ADDCCC : ADD_SUB_INT_32<"addc.cc", adde>;
391 defm SUBCCC : ADD_SUB_INT_32<"subc.cc", sube>;
393 //mul.wide PTX instruction
394 def SInt32Const : PatLeaf<(imm), [{
395 const APInt &v = N->getAPIntValue();
396 if (v.isSignedIntN(32))
401 def UInt32Const : PatLeaf<(imm), [{
402 const APInt &v = N->getAPIntValue();
408 def SInt16Const : PatLeaf<(imm), [{
409 const APInt &v = N->getAPIntValue();
410 if (v.isSignedIntN(16))
415 def UInt16Const : PatLeaf<(imm), [{
416 const APInt &v = N->getAPIntValue();
422 def Int5Const : PatLeaf<(imm), [{
423 const APInt &v = N->getAPIntValue();
424 // Check if 0 <= v < 32
425 // Only then the result from (x << v) will be i32
426 if (v.sge(0) && v.slt(32))
431 def Int4Const : PatLeaf<(imm), [{
432 const APInt &v = N->getAPIntValue();
433 // Check if 0 <= v < 16
434 // Only then the result from (x << v) will be i16
435 if (v.sge(0) && v.slt(16))
440 def SHL2MUL32 : SDNodeXForm<imm, [{
441 const APInt &v = N->getAPIntValue();
443 return CurDAG->getTargetConstant(temp.shl(v), MVT::i32);
446 def SHL2MUL16 : SDNodeXForm<imm, [{
447 const APInt &v = N->getAPIntValue();
449 return CurDAG->getTargetConstant(temp.shl(v), MVT::i16);
452 def MULWIDES64 : NVPTXInst<(outs Int64Regs:$dst),
453 (ins Int32Regs:$a, Int32Regs:$b),
454 "mul.wide.s32 \t$dst, $a, $b;", []>;
455 def MULWIDES64Imm : NVPTXInst<(outs Int64Regs:$dst),
456 (ins Int32Regs:$a, i64imm:$b),
457 "mul.wide.s32 \t$dst, $a, $b;", []>;
459 def MULWIDEU64 : NVPTXInst<(outs Int64Regs:$dst),
460 (ins Int32Regs:$a, Int32Regs:$b),
461 "mul.wide.u32 \t$dst, $a, $b;", []>;
462 def MULWIDEU64Imm : NVPTXInst<(outs Int64Regs:$dst),
463 (ins Int32Regs:$a, i64imm:$b),
464 "mul.wide.u32 \t$dst, $a, $b;", []>;
466 def MULWIDES32 : NVPTXInst<(outs Int32Regs:$dst),
467 (ins Int16Regs:$a, Int16Regs:$b),
468 "mul.wide.s16 \t$dst, $a, $b;", []>;
469 def MULWIDES32Imm : NVPTXInst<(outs Int32Regs:$dst),
470 (ins Int16Regs:$a, i32imm:$b),
471 "mul.wide.s16 \t$dst, $a, $b;", []>;
473 def MULWIDEU32 : NVPTXInst<(outs Int32Regs:$dst),
474 (ins Int16Regs:$a, Int16Regs:$b),
475 "mul.wide.u16 \t$dst, $a, $b;", []>;
476 def MULWIDEU32Imm : NVPTXInst<(outs Int32Regs:$dst),
477 (ins Int16Regs:$a, i32imm:$b),
478 "mul.wide.u16 \t$dst, $a, $b;", []>;
480 def : Pat<(shl (sext Int32Regs:$a), (i32 Int5Const:$b)),
481 (MULWIDES64Imm Int32Regs:$a, (SHL2MUL32 node:$b))>,
482 Requires<[doMulWide]>;
483 def : Pat<(shl (zext Int32Regs:$a), (i32 Int5Const:$b)),
484 (MULWIDEU64Imm Int32Regs:$a, (SHL2MUL32 node:$b))>,
485 Requires<[doMulWide]>;
487 def : Pat<(shl (sext Int16Regs:$a), (i16 Int4Const:$b)),
488 (MULWIDES32Imm Int16Regs:$a, (SHL2MUL16 node:$b))>,
489 Requires<[doMulWide]>;
490 def : Pat<(shl (zext Int16Regs:$a), (i16 Int4Const:$b)),
491 (MULWIDEU32Imm Int16Regs:$a, (SHL2MUL16 node:$b))>,
492 Requires<[doMulWide]>;
494 def : Pat<(mul (sext Int32Regs:$a), (sext Int32Regs:$b)),
495 (MULWIDES64 Int32Regs:$a, Int32Regs:$b)>,
496 Requires<[doMulWide]>;
497 def : Pat<(mul (sext Int32Regs:$a), (i64 SInt32Const:$b)),
498 (MULWIDES64Imm Int32Regs:$a, (i64 SInt32Const:$b))>,
499 Requires<[doMulWide]>;
501 def : Pat<(mul (zext Int32Regs:$a), (zext Int32Regs:$b)),
502 (MULWIDEU64 Int32Regs:$a, Int32Regs:$b)>, Requires<[doMulWide]>;
503 def : Pat<(mul (zext Int32Regs:$a), (i64 UInt32Const:$b)),
504 (MULWIDEU64Imm Int32Regs:$a, (i64 UInt32Const:$b))>,
505 Requires<[doMulWide]>;
507 def : Pat<(mul (sext Int16Regs:$a), (sext Int16Regs:$b)),
508 (MULWIDES32 Int16Regs:$a, Int16Regs:$b)>, Requires<[doMulWide]>;
509 def : Pat<(mul (sext Int16Regs:$a), (i32 SInt16Const:$b)),
510 (MULWIDES32Imm Int16Regs:$a, (i32 SInt16Const:$b))>,
511 Requires<[doMulWide]>;
513 def : Pat<(mul (zext Int16Regs:$a), (zext Int16Regs:$b)),
514 (MULWIDEU32 Int16Regs:$a, Int16Regs:$b)>, Requires<[doMulWide]>;
515 def : Pat<(mul (zext Int16Regs:$a), (i32 UInt16Const:$b)),
516 (MULWIDEU32Imm Int16Regs:$a, (i32 UInt16Const:$b))>,
517 Requires<[doMulWide]>;
519 defm MULT : I3<"mul.lo.s", mul>;
521 defm MULTHS : I3_noi8<"mul.hi.s", mulhs>;
522 defm MULTHU : I3_noi8<"mul.hi.u", mulhu>;
523 def MULTHSi8rr : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, Int8Regs:$b),
524 !strconcat("{{ \n\t",
525 !strconcat(".reg \t.s16 temp1; \n\t",
526 !strconcat(".reg \t.s16 temp2; \n\t",
527 !strconcat("cvt.s16.s8 \ttemp1, $a; \n\t",
528 !strconcat("cvt.s16.s8 \ttemp2, $b; \n\t",
529 !strconcat("mul.lo.s16 \t$dst, temp1, temp2; \n\t",
530 !strconcat("shr.s16 \t$dst, $dst, 8; \n\t",
531 !strconcat("}}", "")))))))),
532 [(set Int8Regs:$dst, (mulhs Int8Regs:$a, Int8Regs:$b))]>;
533 def MULTHSi8ri : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, i8imm:$b),
534 !strconcat("{{ \n\t",
535 !strconcat(".reg \t.s16 temp1; \n\t",
536 !strconcat(".reg \t.s16 temp2; \n\t",
537 !strconcat("cvt.s16.s8 \ttemp1, $a; \n\t",
538 !strconcat("mov.b16 \ttemp2, $b; \n\t",
539 !strconcat("cvt.s16.s8 \ttemp2, temp2; \n\t",
540 !strconcat("mul.lo.s16 \t$dst, temp1, temp2; \n\t",
541 !strconcat("shr.s16 \t$dst, $dst, 8; \n\t",
542 !strconcat("}}", ""))))))))),
543 [(set Int8Regs:$dst, (mulhs Int8Regs:$a, imm:$b))]>;
544 def MULTHUi8rr : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, Int8Regs:$b),
545 !strconcat("{{ \n\t",
546 !strconcat(".reg \t.u16 temp1; \n\t",
547 !strconcat(".reg \t.u16 temp2; \n\t",
548 !strconcat("cvt.u16.u8 \ttemp1, $a; \n\t",
549 !strconcat("cvt.u16.u8 \ttemp2, $b; \n\t",
550 !strconcat("mul.lo.u16 \t$dst, temp1, temp2; \n\t",
551 !strconcat("shr.u16 \t$dst, $dst, 8; \n\t",
552 !strconcat("}}", "")))))))),
553 [(set Int8Regs:$dst, (mulhu Int8Regs:$a, Int8Regs:$b))]>;
554 def MULTHUi8ri : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, i8imm:$b),
555 !strconcat("{{ \n\t",
556 !strconcat(".reg \t.u16 temp1; \n\t",
557 !strconcat(".reg \t.u16 temp2; \n\t",
558 !strconcat("cvt.u16.u8 \ttemp1, $a; \n\t",
559 !strconcat("mov.b16 \ttemp2, $b; \n\t",
560 !strconcat("cvt.u16.u8 \ttemp2, temp2; \n\t",
561 !strconcat("mul.lo.u16 \t$dst, temp1, temp2; \n\t",
562 !strconcat("shr.u16 \t$dst, $dst, 8; \n\t",
563 !strconcat("}}", ""))))))))),
564 [(set Int8Regs:$dst, (mulhu Int8Regs:$a, imm:$b))]>;
567 defm SDIV : I3_i8<"div.s", sdiv, "s16", "cvt.s16.s8">;
568 defm UDIV : I3_i8<"div.u", udiv, "u16", "cvt.u16.u8">;
570 defm SREM : I3_i8<"rem.s", srem, "s16", "cvt.s16.s8">;
571 // The ri version will not be selected as DAGCombiner::visitSREM will lower it.
572 defm UREM : I3_i8<"rem.u", urem, "u16", "cvt.u16.u8">;
573 // The ri version will not be selected as DAGCombiner::visitUREM will lower it.
575 def MAD8rrr : NVPTXInst<(outs Int8Regs:$dst),
576 (ins Int8Regs:$a, Int8Regs:$b, Int8Regs:$c),
577 "mad.lo.s16 \t$dst, $a, $b, $c;",
578 [(set Int8Regs:$dst, (add (mul Int8Regs:$a, Int8Regs:$b),
580 def MAD8rri : NVPTXInst<(outs Int8Regs:$dst),
581 (ins Int8Regs:$a, Int8Regs:$b, i8imm:$c),
582 "mad.lo.s16 \t$dst, $a, $b, $c;",
583 [(set Int8Regs:$dst, (add (mul Int8Regs:$a, Int8Regs:$b),
585 def MAD8rir : NVPTXInst<(outs Int8Regs:$dst),
586 (ins Int8Regs:$a, i8imm:$b, Int8Regs:$c),
587 "mad.lo.s16 \t$dst, $a, $b, $c;",
588 [(set Int8Regs:$dst, (add (mul Int8Regs:$a, imm:$b),
590 def MAD8rii : NVPTXInst<(outs Int8Regs:$dst),
591 (ins Int8Regs:$a, i8imm:$b, i8imm:$c),
592 "mad.lo.s16 \t$dst, $a, $b, $c;",
593 [(set Int8Regs:$dst, (add (mul Int8Regs:$a, imm:$b),
596 def MAD16rrr : NVPTXInst<(outs Int16Regs:$dst),
597 (ins Int16Regs:$a, Int16Regs:$b, Int16Regs:$c),
598 "mad.lo.s16 \t$dst, $a, $b, $c;",
599 [(set Int16Regs:$dst, (add
600 (mul Int16Regs:$a, Int16Regs:$b), Int16Regs:$c))]>;
601 def MAD16rri : NVPTXInst<(outs Int16Regs:$dst),
602 (ins Int16Regs:$a, Int16Regs:$b, i16imm:$c),
603 "mad.lo.s16 \t$dst, $a, $b, $c;",
604 [(set Int16Regs:$dst, (add
605 (mul Int16Regs:$a, Int16Regs:$b), imm:$c))]>;
606 def MAD16rir : NVPTXInst<(outs Int16Regs:$dst),
607 (ins Int16Regs:$a, i16imm:$b, Int16Regs:$c),
608 "mad.lo.s16 \t$dst, $a, $b, $c;",
609 [(set Int16Regs:$dst, (add
610 (mul Int16Regs:$a, imm:$b), Int16Regs:$c))]>;
611 def MAD16rii : NVPTXInst<(outs Int16Regs:$dst),
612 (ins Int16Regs:$a, i16imm:$b, i16imm:$c),
613 "mad.lo.s16 \t$dst, $a, $b, $c;",
614 [(set Int16Regs:$dst, (add (mul Int16Regs:$a, imm:$b),
617 def MAD32rrr : NVPTXInst<(outs Int32Regs:$dst),
618 (ins Int32Regs:$a, Int32Regs:$b, Int32Regs:$c),
619 "mad.lo.s32 \t$dst, $a, $b, $c;",
620 [(set Int32Regs:$dst, (add
621 (mul Int32Regs:$a, Int32Regs:$b), Int32Regs:$c))]>;
622 def MAD32rri : NVPTXInst<(outs Int32Regs:$dst),
623 (ins Int32Regs:$a, Int32Regs:$b, i32imm:$c),
624 "mad.lo.s32 \t$dst, $a, $b, $c;",
625 [(set Int32Regs:$dst, (add
626 (mul Int32Regs:$a, Int32Regs:$b), imm:$c))]>;
627 def MAD32rir : NVPTXInst<(outs Int32Regs:$dst),
628 (ins Int32Regs:$a, i32imm:$b, Int32Regs:$c),
629 "mad.lo.s32 \t$dst, $a, $b, $c;",
630 [(set Int32Regs:$dst, (add
631 (mul Int32Regs:$a, imm:$b), Int32Regs:$c))]>;
632 def MAD32rii : NVPTXInst<(outs Int32Regs:$dst),
633 (ins Int32Regs:$a, i32imm:$b, i32imm:$c),
634 "mad.lo.s32 \t$dst, $a, $b, $c;",
635 [(set Int32Regs:$dst, (add
636 (mul Int32Regs:$a, imm:$b), imm:$c))]>;
638 def MAD64rrr : NVPTXInst<(outs Int64Regs:$dst),
639 (ins Int64Regs:$a, Int64Regs:$b, Int64Regs:$c),
640 "mad.lo.s64 \t$dst, $a, $b, $c;",
641 [(set Int64Regs:$dst, (add
642 (mul Int64Regs:$a, Int64Regs:$b), Int64Regs:$c))]>;
643 def MAD64rri : NVPTXInst<(outs Int64Regs:$dst),
644 (ins Int64Regs:$a, Int64Regs:$b, i64imm:$c),
645 "mad.lo.s64 \t$dst, $a, $b, $c;",
646 [(set Int64Regs:$dst, (add
647 (mul Int64Regs:$a, Int64Regs:$b), imm:$c))]>;
648 def MAD64rir : NVPTXInst<(outs Int64Regs:$dst),
649 (ins Int64Regs:$a, i64imm:$b, Int64Regs:$c),
650 "mad.lo.s64 \t$dst, $a, $b, $c;",
651 [(set Int64Regs:$dst, (add
652 (mul Int64Regs:$a, imm:$b), Int64Regs:$c))]>;
653 def MAD64rii : NVPTXInst<(outs Int64Regs:$dst),
654 (ins Int64Regs:$a, i64imm:$b, i64imm:$c),
655 "mad.lo.s64 \t$dst, $a, $b, $c;",
656 [(set Int64Regs:$dst, (add
657 (mul Int64Regs:$a, imm:$b), imm:$c))]>;
660 def INEG8 : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$src),
661 !strconcat("cvt.s16.s8 \t$dst, $src;\n\t",
662 "neg.s16 \t$dst, $dst;"),
663 [(set Int8Regs:$dst, (ineg Int8Regs:$src))]>;
664 def INEG16 : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$src),
665 "neg.s16 \t$dst, $src;",
666 [(set Int16Regs:$dst, (ineg Int16Regs:$src))]>;
667 def INEG32 : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src),
668 "neg.s32 \t$dst, $src;",
669 [(set Int32Regs:$dst, (ineg Int32Regs:$src))]>;
670 def INEG64 : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src),
671 "neg.s64 \t$dst, $src;",
672 [(set Int64Regs:$dst, (ineg Int64Regs:$src))]>;
674 //-----------------------------------
675 // Floating Point Arithmetic
676 //-----------------------------------
679 def FloatConst1 : PatLeaf<(fpimm), [{
680 if (&(N->getValueAPF().getSemantics()) != &llvm::APFloat::IEEEsingle)
682 float f = (float)N->getValueAPF().convertToFloat();
685 // Constand (double)1.0
686 def DoubleConst1 : PatLeaf<(fpimm), [{
687 if (&(N->getValueAPF().getSemantics()) != &llvm::APFloat::IEEEdouble)
689 double d = (double)N->getValueAPF().convertToDouble();
693 defm FADD : F3<"add", fadd>;
694 defm FSUB : F3<"sub", fsub>;
695 defm FMUL : F3<"mul", fmul>;
697 defm FADD_rn : F3_rn<"add", fadd>;
698 defm FSUB_rn : F3_rn<"sub", fsub>;
699 defm FMUL_rn : F3_rn<"mul", fmul>;
701 defm FABS : F2<"abs", fabs>;
702 defm FNEG : F2<"neg", fneg>;
703 defm FSQRT : F2<"sqrt.rn", fsqrt>;
708 def FDIV641r : NVPTXInst<(outs Float64Regs:$dst),
709 (ins f64imm:$a, Float64Regs:$b),
710 "rcp.rn.f64 \t$dst, $b;",
711 [(set Float64Regs:$dst,
712 (fdiv DoubleConst1:$a, Float64Regs:$b))]>;
713 def FDIV64rr : NVPTXInst<(outs Float64Regs:$dst),
714 (ins Float64Regs:$a, Float64Regs:$b),
715 "div.rn.f64 \t$dst, $a, $b;",
716 [(set Float64Regs:$dst,
717 (fdiv Float64Regs:$a, Float64Regs:$b))]>;
718 def FDIV64ri : NVPTXInst<(outs Float64Regs:$dst),
719 (ins Float64Regs:$a, f64imm:$b),
720 "div.rn.f64 \t$dst, $a, $b;",
721 [(set Float64Regs:$dst,
722 (fdiv Float64Regs:$a, fpimm:$b))]>;
725 // F32 Approximate reciprocal
727 def FDIV321r_ftz : NVPTXInst<(outs Float32Regs:$dst),
728 (ins f32imm:$a, Float32Regs:$b),
729 "rcp.approx.ftz.f32 \t$dst, $b;",
730 [(set Float32Regs:$dst,
731 (fdiv FloatConst1:$a, Float32Regs:$b))]>,
732 Requires<[do_DIVF32_APPROX, doF32FTZ]>;
733 def FDIV321r : NVPTXInst<(outs Float32Regs:$dst),
734 (ins f32imm:$a, Float32Regs:$b),
735 "rcp.approx.f32 \t$dst, $b;",
736 [(set Float32Regs:$dst,
737 (fdiv FloatConst1:$a, Float32Regs:$b))]>,
738 Requires<[do_DIVF32_APPROX]>;
740 // F32 Approximate division
742 def FDIV32approxrr_ftz : NVPTXInst<(outs Float32Regs:$dst),
743 (ins Float32Regs:$a, Float32Regs:$b),
744 "div.approx.ftz.f32 \t$dst, $a, $b;",
745 [(set Float32Regs:$dst,
746 (fdiv Float32Regs:$a, Float32Regs:$b))]>,
747 Requires<[do_DIVF32_APPROX, doF32FTZ]>;
748 def FDIV32approxrr : NVPTXInst<(outs Float32Regs:$dst),
749 (ins Float32Regs:$a, Float32Regs:$b),
750 "div.approx.f32 \t$dst, $a, $b;",
751 [(set Float32Regs:$dst,
752 (fdiv Float32Regs:$a, Float32Regs:$b))]>,
753 Requires<[do_DIVF32_APPROX]>;
755 // F32 Semi-accurate reciprocal
757 // rcp.approx gives the same result as div.full(1.0f, a) and is faster.
759 def FDIV321r_approx_ftz : NVPTXInst<(outs Float32Regs:$dst),
760 (ins f32imm:$a, Float32Regs:$b),
761 "rcp.approx.ftz.f32 \t$dst, $b;",
762 [(set Float32Regs:$dst,
763 (fdiv FloatConst1:$a, Float32Regs:$b))]>,
764 Requires<[do_DIVF32_FULL, doF32FTZ]>;
765 def FDIV321r_approx : NVPTXInst<(outs Float32Regs:$dst),
766 (ins f32imm:$a, Float32Regs:$b),
767 "rcp.approx.f32 \t$dst, $b;",
768 [(set Float32Regs:$dst,
769 (fdiv FloatConst1:$a, Float32Regs:$b))]>,
770 Requires<[do_DIVF32_FULL]>;
772 // F32 Semi-accurate division
774 def FDIV32rr_ftz : NVPTXInst<(outs Float32Regs:$dst),
775 (ins Float32Regs:$a, Float32Regs:$b),
776 "div.full.ftz.f32 \t$dst, $a, $b;",
777 [(set Float32Regs:$dst,
778 (fdiv Float32Regs:$a, Float32Regs:$b))]>,
779 Requires<[do_DIVF32_FULL, doF32FTZ]>;
780 def FDIV32ri_ftz : NVPTXInst<(outs Float32Regs:$dst),
781 (ins Float32Regs:$a, f32imm:$b),
782 "div.full.ftz.f32 \t$dst, $a, $b;",
783 [(set Float32Regs:$dst,
784 (fdiv Float32Regs:$a, fpimm:$b))]>,
785 Requires<[do_DIVF32_FULL, doF32FTZ]>;
786 def FDIV32rr : NVPTXInst<(outs Float32Regs:$dst),
787 (ins Float32Regs:$a, Float32Regs:$b),
788 "div.full.f32 \t$dst, $a, $b;",
789 [(set Float32Regs:$dst,
790 (fdiv Float32Regs:$a, Float32Regs:$b))]>,
791 Requires<[do_DIVF32_FULL]>;
792 def FDIV32ri : NVPTXInst<(outs Float32Regs:$dst),
793 (ins Float32Regs:$a, f32imm:$b),
794 "div.full.f32 \t$dst, $a, $b;",
795 [(set Float32Regs:$dst,
796 (fdiv Float32Regs:$a, fpimm:$b))]>,
797 Requires<[do_DIVF32_FULL]>;
799 // F32 Accurate reciprocal
801 def FDIV321r_prec_ftz : NVPTXInst<(outs Float32Regs:$dst),
802 (ins f32imm:$a, Float32Regs:$b),
803 "rcp.rn.ftz.f32 \t$dst, $b;",
804 [(set Float32Regs:$dst,
805 (fdiv FloatConst1:$a, Float32Regs:$b))]>,
806 Requires<[reqPTX20, doF32FTZ]>;
807 def FDIV321r_prec : NVPTXInst<(outs Float32Regs:$dst),
808 (ins f32imm:$a, Float32Regs:$b),
809 "rcp.rn.f32 \t$dst, $b;",
810 [(set Float32Regs:$dst,
811 (fdiv FloatConst1:$a, Float32Regs:$b))]>,
812 Requires<[reqPTX20]>;
814 // F32 Accurate division
816 def FDIV32rr_prec_ftz : NVPTXInst<(outs Float32Regs:$dst),
817 (ins Float32Regs:$a, Float32Regs:$b),
818 "div.rn.ftz.f32 \t$dst, $a, $b;",
819 [(set Float32Regs:$dst,
820 (fdiv Float32Regs:$a, Float32Regs:$b))]>,
821 Requires<[doF32FTZ, reqPTX20]>;
822 def FDIV32ri_prec_ftz : NVPTXInst<(outs Float32Regs:$dst),
823 (ins Float32Regs:$a, f32imm:$b),
824 "div.rn.ftz.f32 \t$dst, $a, $b;",
825 [(set Float32Regs:$dst,
826 (fdiv Float32Regs:$a, fpimm:$b))]>,
827 Requires<[doF32FTZ, reqPTX20]>;
828 def FDIV32rr_prec : NVPTXInst<(outs Float32Regs:$dst),
829 (ins Float32Regs:$a, Float32Regs:$b),
830 "div.rn.f32 \t$dst, $a, $b;",
831 [(set Float32Regs:$dst,
832 (fdiv Float32Regs:$a, Float32Regs:$b))]>,
833 Requires<[reqPTX20]>;
834 def FDIV32ri_prec : NVPTXInst<(outs Float32Regs:$dst),
835 (ins Float32Regs:$a, f32imm:$b),
836 "div.rn.f32 \t$dst, $a, $b;",
837 [(set Float32Regs:$dst,
838 (fdiv Float32Regs:$a, fpimm:$b))]>,
839 Requires<[reqPTX20]>;
842 multiclass FPCONTRACT32<string OpcStr, Predicate Pred> {
843 def rrr : NVPTXInst<(outs Float32Regs:$dst),
844 (ins Float32Regs:$a, Float32Regs:$b, Float32Regs:$c),
845 !strconcat(OpcStr, " \t$dst, $a, $b, $c;"),
846 [(set Float32Regs:$dst, (fadd
847 (fmul Float32Regs:$a, Float32Regs:$b),
848 Float32Regs:$c))]>, Requires<[Pred]>;
849 // This is to WAR a weird bug in Tablegen that does not automatically
850 // generate the following permutated rule rrr2 from the above rrr.
851 // So we explicitly add it here. This happens to FMA32 only.
852 // See the comments at FMAD32 and FMA32 for more information.
853 def rrr2 : NVPTXInst<(outs Float32Regs:$dst),
854 (ins Float32Regs:$a, Float32Regs:$b, Float32Regs:$c),
855 !strconcat(OpcStr, " \t$dst, $a, $b, $c;"),
856 [(set Float32Regs:$dst, (fadd Float32Regs:$c,
857 (fmul Float32Regs:$a, Float32Regs:$b)))]>,
859 def rri : NVPTXInst<(outs Float32Regs:$dst),
860 (ins Float32Regs:$a, Float32Regs:$b, f32imm:$c),
861 !strconcat(OpcStr, " \t$dst, $a, $b, $c;"),
862 [(set Float32Regs:$dst, (fadd
863 (fmul Float32Regs:$a, Float32Regs:$b), fpimm:$c))]>,
865 def rir : NVPTXInst<(outs Float32Regs:$dst),
866 (ins Float32Regs:$a, f32imm:$b, Float32Regs:$c),
867 !strconcat(OpcStr, " \t$dst, $a, $b, $c;"),
868 [(set Float32Regs:$dst, (fadd
869 (fmul Float32Regs:$a, fpimm:$b), Float32Regs:$c))]>,
871 def rii : NVPTXInst<(outs Float32Regs:$dst),
872 (ins Float32Regs:$a, f32imm:$b, f32imm:$c),
873 !strconcat(OpcStr, " \t$dst, $a, $b, $c;"),
874 [(set Float32Regs:$dst, (fadd
875 (fmul Float32Regs:$a, fpimm:$b), fpimm:$c))]>,
879 multiclass FPCONTRACT64<string OpcStr, Predicate Pred> {
880 def rrr : NVPTXInst<(outs Float64Regs:$dst),
881 (ins Float64Regs:$a, Float64Regs:$b, Float64Regs:$c),
882 !strconcat(OpcStr, " \t$dst, $a, $b, $c;"),
883 [(set Float64Regs:$dst, (fadd
884 (fmul Float64Regs:$a, Float64Regs:$b),
885 Float64Regs:$c))]>, Requires<[Pred]>;
886 def rri : NVPTXInst<(outs Float64Regs:$dst),
887 (ins Float64Regs:$a, Float64Regs:$b, f64imm:$c),
888 !strconcat(OpcStr, " \t$dst, $a, $b, $c;"),
889 [(set Float64Regs:$dst, (fadd (fmul Float64Regs:$a,
890 Float64Regs:$b), fpimm:$c))]>, Requires<[Pred]>;
891 def rir : NVPTXInst<(outs Float64Regs:$dst),
892 (ins Float64Regs:$a, f64imm:$b, Float64Regs:$c),
893 !strconcat(OpcStr, " \t$dst, $a, $b, $c;"),
894 [(set Float64Regs:$dst, (fadd
895 (fmul Float64Regs:$a, fpimm:$b), Float64Regs:$c))]>,
897 def rii : NVPTXInst<(outs Float64Regs:$dst),
898 (ins Float64Regs:$a, f64imm:$b, f64imm:$c),
899 !strconcat(OpcStr, " \t$dst, $a, $b, $c;"),
900 [(set Float64Regs:$dst, (fadd
901 (fmul Float64Regs:$a, fpimm:$b), fpimm:$c))]>,
905 // Due to a unknown reason (most likely a bug in tablegen), tablegen does not
906 // automatically generate the rrr2 rule from
907 // the rrr rule (see FPCONTRACT32) for FMA32, though it does for FMAD32.
908 // If we reverse the order of the following two lines, then rrr2 rule will be
909 // generated for FMA32, but not for rrr.
910 // Therefore, we manually write the rrr2 rule in FPCONTRACT32.
911 defm FMAD32_ftz : FPCONTRACT32<"mad.ftz.f32", doFMADF32_ftz>;
912 defm FMAD32 : FPCONTRACT32<"mad.f32", doFMADF32>;
913 defm FMA32_ftz : FPCONTRACT32<"fma.rn.ftz.f32", doFMAF32_ftz>;
914 defm FMA32 : FPCONTRACT32<"fma.rn.f32", doFMAF32>;
915 defm FMA64 : FPCONTRACT64<"fma.rn.f64", doFMAF64>;
917 // b*c-a => fmad(b, c, -a)
918 multiclass FPCONTRACT32_SUB_PAT_MAD<NVPTXInst Inst, Predicate Pred> {
919 def : Pat<(fsub (fmul Float32Regs:$b, Float32Regs:$c), Float32Regs:$a),
920 (Inst Float32Regs:$b, Float32Regs:$c, (FNEGf32 Float32Regs:$a))>,
924 // a-b*c => fmad(-b,c, a)
925 // - legal because a-b*c <=> a+(-b*c) <=> a+(-b)*c
926 // b*c-a => fmad(b, c, -a)
927 // - legal because b*c-a <=> b*c+(-a)
928 multiclass FPCONTRACT32_SUB_PAT<NVPTXInst Inst, Predicate Pred> {
929 def : Pat<(fsub Float32Regs:$a, (fmul Float32Regs:$b, Float32Regs:$c)),
930 (Inst (FNEGf32 Float32Regs:$b), Float32Regs:$c, Float32Regs:$a)>,
932 def : Pat<(fsub (fmul Float32Regs:$b, Float32Regs:$c), Float32Regs:$a),
933 (Inst Float32Regs:$b, Float32Regs:$c, (FNEGf32 Float32Regs:$a))>,
937 // a-b*c => fmad(-b,c, a)
938 // b*c-a => fmad(b, c, -a)
939 multiclass FPCONTRACT64_SUB_PAT<NVPTXInst Inst, Predicate Pred> {
940 def : Pat<(fsub Float64Regs:$a, (fmul Float64Regs:$b, Float64Regs:$c)),
941 (Inst (FNEGf64 Float64Regs:$b), Float64Regs:$c, Float64Regs:$a)>,
944 def : Pat<(fsub (fmul Float64Regs:$b, Float64Regs:$c), Float64Regs:$a),
945 (Inst Float64Regs:$b, Float64Regs:$c, (FNEGf64 Float64Regs:$a))>,
949 defm FMAF32ext_ftz : FPCONTRACT32_SUB_PAT<FMA32_ftzrrr, doFMAF32AGG_ftz>;
950 defm FMAF32ext : FPCONTRACT32_SUB_PAT<FMA32rrr, doFMAF32AGG>;
951 defm FMADF32ext_ftz : FPCONTRACT32_SUB_PAT_MAD<FMAD32_ftzrrr, doFMADF32_ftz>;
952 defm FMADF32ext : FPCONTRACT32_SUB_PAT_MAD<FMAD32rrr, doFMADF32>;
953 defm FMAF64ext : FPCONTRACT64_SUB_PAT<FMA64rrr, doFMAF64AGG>;
955 def SINF: NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$src),
956 "sin.approx.f32 \t$dst, $src;",
957 [(set Float32Regs:$dst, (fsin Float32Regs:$src))]>;
958 def COSF: NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$src),
959 "cos.approx.f32 \t$dst, $src;",
960 [(set Float32Regs:$dst, (fcos Float32Regs:$src))]>;
962 //-----------------------------------
963 // Logical Arithmetic
964 //-----------------------------------
966 multiclass LOG_FORMAT<string OpcStr, SDNode OpNode> {
967 def b1rr: NVPTXInst<(outs Int1Regs:$dst), (ins Int1Regs:$a, Int1Regs:$b),
968 !strconcat(OpcStr, ".pred \t$dst, $a, $b;"),
969 [(set Int1Regs:$dst, (OpNode Int1Regs:$a, Int1Regs:$b))]>;
970 def b1ri: NVPTXInst<(outs Int1Regs:$dst), (ins Int1Regs:$a, i1imm:$b),
971 !strconcat(OpcStr, ".pred \t$dst, $a, $b;"),
972 [(set Int1Regs:$dst, (OpNode Int1Regs:$a, imm:$b))]>;
973 def b8rr: NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, Int8Regs:$b),
974 !strconcat(OpcStr, ".b16 \t$dst, $a, $b;"),
975 [(set Int8Regs:$dst, (OpNode Int8Regs:$a, Int8Regs:$b))]>;
976 def b8ri: NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, i8imm:$b),
977 !strconcat(OpcStr, ".b16 \t$dst, $a, $b;"),
978 [(set Int8Regs:$dst, (OpNode Int8Regs:$a, imm:$b))]>;
979 def b16rr: NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, Int16Regs:$b),
980 !strconcat(OpcStr, ".b16 \t$dst, $a, $b;"),
981 [(set Int16Regs:$dst, (OpNode Int16Regs:$a,
983 def b16ri: NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, i16imm:$b),
984 !strconcat(OpcStr, ".b16 \t$dst, $a, $b;"),
985 [(set Int16Regs:$dst, (OpNode Int16Regs:$a, imm:$b))]>;
986 def b32rr: NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b),
987 !strconcat(OpcStr, ".b32 \t$dst, $a, $b;"),
988 [(set Int32Regs:$dst, (OpNode Int32Regs:$a,
990 def b32ri: NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b),
991 !strconcat(OpcStr, ".b32 \t$dst, $a, $b;"),
992 [(set Int32Regs:$dst, (OpNode Int32Regs:$a, imm:$b))]>;
993 def b64rr: NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, Int64Regs:$b),
994 !strconcat(OpcStr, ".b64 \t$dst, $a, $b;"),
995 [(set Int64Regs:$dst, (OpNode Int64Regs:$a,
997 def b64ri: NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, i64imm:$b),
998 !strconcat(OpcStr, ".b64 \t$dst, $a, $b;"),
999 [(set Int64Regs:$dst, (OpNode Int64Regs:$a, imm:$b))]>;
1002 defm OR : LOG_FORMAT<"or", or>;
1003 defm AND : LOG_FORMAT<"and", and>;
1004 defm XOR : LOG_FORMAT<"xor", xor>;
1006 def NOT1: NVPTXInst<(outs Int1Regs:$dst), (ins Int1Regs:$src),
1007 "not.pred \t$dst, $src;",
1008 [(set Int1Regs:$dst, (not Int1Regs:$src))]>;
1009 def NOT8: NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$src),
1010 "not.b16 \t$dst, $src;",
1011 [(set Int8Regs:$dst, (not Int8Regs:$src))]>;
1012 def NOT16: NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$src),
1013 "not.b16 \t$dst, $src;",
1014 [(set Int16Regs:$dst, (not Int16Regs:$src))]>;
1015 def NOT32: NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src),
1016 "not.b32 \t$dst, $src;",
1017 [(set Int32Regs:$dst, (not Int32Regs:$src))]>;
1018 def NOT64: NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src),
1019 "not.b64 \t$dst, $src;",
1020 [(set Int64Regs:$dst, (not Int64Regs:$src))]>;
1022 // For shifts, the second src operand must be 32-bit value
1023 multiclass LSHIFT_FORMAT<string OpcStr, SDNode OpNode> {
1024 def i64rr : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a,
1026 !strconcat(OpcStr, "64 \t$dst, $a, $b;"),
1027 [(set Int64Regs:$dst, (OpNode Int64Regs:$a,
1029 def i64ri : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, i32imm:$b),
1030 !strconcat(OpcStr, "64 \t$dst, $a, $b;"),
1031 [(set Int64Regs:$dst, (OpNode Int64Regs:$a,
1033 def i32rr : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a,
1035 !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
1036 [(set Int32Regs:$dst, (OpNode Int32Regs:$a,
1038 def i32ri : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b),
1039 !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
1040 [(set Int32Regs:$dst, (OpNode Int32Regs:$a,
1042 def i32ii : NVPTXInst<(outs Int32Regs:$dst), (ins i32imm:$a, i32imm:$b),
1043 !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
1044 [(set Int32Regs:$dst, (OpNode (i32 imm:$a),
1046 def i16rr : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a,
1048 !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
1049 [(set Int16Regs:$dst, (OpNode Int16Regs:$a,
1051 def i16ri : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, i32imm:$b),
1052 !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
1053 [(set Int16Regs:$dst, (OpNode Int16Regs:$a,
1055 def i8rr : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, Int32Regs:$b),
1056 !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
1057 [(set Int8Regs:$dst, (OpNode Int8Regs:$a,
1059 def i8ri : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, i32imm:$b),
1060 !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
1061 [(set Int8Regs:$dst, (OpNode Int8Regs:$a,
1065 defm SHL : LSHIFT_FORMAT<"shl.b", shl>;
1067 // For shifts, the second src operand must be 32-bit value
1068 // Need to add cvt for the 8-bits.
1069 multiclass RSHIFT_FORMAT<string OpcStr, SDNode OpNode, string CVTStr> {
1070 def i64rr : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a,
1072 !strconcat(OpcStr, "64 \t$dst, $a, $b;"),
1073 [(set Int64Regs:$dst, (OpNode Int64Regs:$a,
1075 def i64ri : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, i32imm:$b),
1076 !strconcat(OpcStr, "64 \t$dst, $a, $b;"),
1077 [(set Int64Regs:$dst, (OpNode Int64Regs:$a,
1079 def i32rr : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a,
1081 !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
1082 [(set Int32Regs:$dst, (OpNode Int32Regs:$a,
1084 def i32ri : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b),
1085 !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
1086 [(set Int32Regs:$dst, (OpNode Int32Regs:$a,
1088 def i32ii : NVPTXInst<(outs Int32Regs:$dst), (ins i32imm:$a, i32imm:$b),
1089 !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
1090 [(set Int32Regs:$dst, (OpNode (i32 imm:$a),
1092 def i16rr : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a,
1094 !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
1095 [(set Int16Regs:$dst, (OpNode Int16Regs:$a,
1097 def i16ri : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, i32imm:$b),
1098 !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
1099 [(set Int16Regs:$dst, (OpNode Int16Regs:$a,
1101 def i8rr : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, Int32Regs:$b),
1102 !strconcat(CVTStr, !strconcat(" \t$dst, $a;\n\t",
1103 !strconcat(OpcStr, "16 \t$dst, $dst, $b;"))),
1104 [(set Int8Regs:$dst, (OpNode Int8Regs:$a,
1106 def i8ri : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, i32imm:$b),
1107 !strconcat(CVTStr, !strconcat(" \t$dst, $a;\n\t",
1108 !strconcat(OpcStr, "16 \t$dst, $dst, $b;"))),
1109 [(set Int8Regs:$dst, (OpNode Int8Regs:$a,
1113 defm SRA : RSHIFT_FORMAT<"shr.s", sra, "cvt.s16.s8">;
1114 defm SRL : RSHIFT_FORMAT<"shr.u", srl, "cvt.u16.u8">;
1117 def ROT32imm_sw : NVPTXInst<(outs Int32Regs:$dst),
1118 (ins Int32Regs:$src, i32imm:$amt1, i32imm:$amt2),
1119 !strconcat("{{\n\t",
1120 !strconcat(".reg .b32 %lhs;\n\t",
1121 !strconcat(".reg .b32 %rhs;\n\t",
1122 !strconcat("shl.b32 \t%lhs, $src, $amt1;\n\t",
1123 !strconcat("shr.b32 \t%rhs, $src, $amt2;\n\t",
1124 !strconcat("add.u32 \t$dst, %lhs, %rhs;\n\t",
1125 !strconcat("}}", ""))))))),
1128 def SUB_FRM_32 : SDNodeXForm<imm, [{
1129 return CurDAG->getTargetConstant(32-N->getZExtValue(), MVT::i32);
1132 def : Pat<(rotl Int32Regs:$src, (i32 imm:$amt)),
1133 (ROT32imm_sw Int32Regs:$src, imm:$amt, (SUB_FRM_32 node:$amt))>;
1134 def : Pat<(rotr Int32Regs:$src, (i32 imm:$amt)),
1135 (ROT32imm_sw Int32Regs:$src, (SUB_FRM_32 node:$amt), imm:$amt)>;
1137 def ROTL32reg_sw : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src,
1139 !strconcat("{{\n\t",
1140 !strconcat(".reg .b32 %lhs;\n\t",
1141 !strconcat(".reg .b32 %rhs;\n\t",
1142 !strconcat(".reg .b32 %amt2;\n\t",
1143 !strconcat("shl.b32 \t%lhs, $src, $amt;\n\t",
1144 !strconcat("sub.s32 \t%amt2, 32, $amt;\n\t",
1145 !strconcat("shr.b32 \t%rhs, $src, %amt2;\n\t",
1146 !strconcat("add.u32 \t$dst, %lhs, %rhs;\n\t",
1147 !strconcat("}}", ""))))))))),
1148 [(set Int32Regs:$dst, (rotl Int32Regs:$src, Int32Regs:$amt))]>;
1150 def ROTR32reg_sw : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src,
1152 !strconcat("{{\n\t",
1153 !strconcat(".reg .b32 %lhs;\n\t",
1154 !strconcat(".reg .b32 %rhs;\n\t",
1155 !strconcat(".reg .b32 %amt2;\n\t",
1156 !strconcat("shr.b32 \t%lhs, $src, $amt;\n\t",
1157 !strconcat("sub.s32 \t%amt2, 32, $amt;\n\t",
1158 !strconcat("shl.b32 \t%rhs, $src, %amt2;\n\t",
1159 !strconcat("add.u32 \t$dst, %lhs, %rhs;\n\t",
1160 !strconcat("}}", ""))))))))),
1161 [(set Int32Regs:$dst, (rotr Int32Regs:$src, Int32Regs:$amt))]>;
1164 def ROT64imm_sw : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src,
1165 i32imm:$amt1, i32imm:$amt2),
1166 !strconcat("{{\n\t",
1167 !strconcat(".reg .b64 %lhs;\n\t",
1168 !strconcat(".reg .b64 %rhs;\n\t",
1169 !strconcat("shl.b64 \t%lhs, $src, $amt1;\n\t",
1170 !strconcat("shr.b64 \t%rhs, $src, $amt2;\n\t",
1171 !strconcat("add.u64 \t$dst, %lhs, %rhs;\n\t",
1172 !strconcat("}}", ""))))))),
1175 def SUB_FRM_64 : SDNodeXForm<imm, [{
1176 return CurDAG->getTargetConstant(64-N->getZExtValue(), MVT::i32);
1179 def : Pat<(rotl Int64Regs:$src, (i32 imm:$amt)),
1180 (ROT64imm_sw Int64Regs:$src, imm:$amt, (SUB_FRM_64 node:$amt))>;
1181 def : Pat<(rotr Int64Regs:$src, (i32 imm:$amt)),
1182 (ROT64imm_sw Int64Regs:$src, (SUB_FRM_64 node:$amt), imm:$amt)>;
1184 def ROTL64reg_sw : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src,
1186 !strconcat("{{\n\t",
1187 !strconcat(".reg .b64 %lhs;\n\t",
1188 !strconcat(".reg .b64 %rhs;\n\t",
1189 !strconcat(".reg .u32 %amt2;\n\t",
1190 !strconcat("shl.b64 \t%lhs, $src, $amt;\n\t",
1191 !strconcat("sub.u32 \t%amt2, 64, $amt;\n\t",
1192 !strconcat("shr.b64 \t%rhs, $src, %amt2;\n\t",
1193 !strconcat("add.u64 \t$dst, %lhs, %rhs;\n\t",
1194 !strconcat("}}", ""))))))))),
1195 [(set Int64Regs:$dst, (rotl Int64Regs:$src, Int32Regs:$amt))]>;
1197 def ROTR64reg_sw : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src,
1199 !strconcat("{{\n\t",
1200 !strconcat(".reg .b64 %lhs;\n\t",
1201 !strconcat(".reg .b64 %rhs;\n\t",
1202 !strconcat(".reg .u32 %amt2;\n\t",
1203 !strconcat("shr.b64 \t%lhs, $src, $amt;\n\t",
1204 !strconcat("sub.u32 \t%amt2, 64, $amt;\n\t",
1205 !strconcat("shl.b64 \t%rhs, $src, %amt2;\n\t",
1206 !strconcat("add.u64 \t$dst, %lhs, %rhs;\n\t",
1207 !strconcat("}}", ""))))))))),
1208 [(set Int64Regs:$dst, (rotr Int64Regs:$src, Int32Regs:$amt))]>;
1211 //-----------------------------------
1212 // Data Movement (Load / Store, Move)
1213 //-----------------------------------
1215 def ADDRri : ComplexPattern<i32, 2, "SelectADDRri", [frameindex],
1217 def ADDRri64 : ComplexPattern<i64, 2, "SelectADDRri64", [frameindex],
1220 def MEMri : Operand<i32> {
1221 let PrintMethod = "printMemOperand";
1222 let MIOperandInfo = (ops Int32Regs, i32imm);
1224 def MEMri64 : Operand<i64> {
1225 let PrintMethod = "printMemOperand";
1226 let MIOperandInfo = (ops Int64Regs, i64imm);
1229 def imem : Operand<iPTR> {
1230 let PrintMethod = "printOperand";
1233 def imemAny : Operand<iPTRAny> {
1234 let PrintMethod = "printOperand";
1237 def LdStCode : Operand<i32> {
1238 let PrintMethod = "printLdStCode";
1241 def SDTWrapper : SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>, SDTCisPtrTy<0>]>;
1242 def Wrapper : SDNode<"NVPTXISD::Wrapper", SDTWrapper>;
1244 def MOV_ADDR : NVPTXInst<(outs Int32Regs:$dst), (ins imem:$a),
1245 "mov.u32 \t$dst, $a;",
1246 [(set Int32Regs:$dst, (Wrapper tglobaladdr:$a))]>;
1248 def MOV_ADDR64 : NVPTXInst<(outs Int64Regs:$dst), (ins imem:$a),
1249 "mov.u64 \t$dst, $a;",
1250 [(set Int64Regs:$dst, (Wrapper tglobaladdr:$a))]>;
1252 // copyPhysreg is hard-coded in NVPTXInstrInfo.cpp
1253 let IsSimpleMove=1 in {
1254 def IMOV1rr: NVPTXInst<(outs Int1Regs:$dst), (ins Int1Regs:$sss),
1255 "mov.pred \t$dst, $sss;", []>;
1256 def IMOV8rr: NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$sss),
1257 "mov.u16 \t$dst, $sss;", []>;
1258 def IMOV16rr: NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$sss),
1259 "mov.u16 \t$dst, $sss;", []>;
1260 def IMOV32rr: NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$sss),
1261 "mov.u32 \t$dst, $sss;", []>;
1262 def IMOV64rr: NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$sss),
1263 "mov.u64 \t$dst, $sss;", []>;
1265 def FMOV32rr: NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$src),
1266 "mov.f32 \t$dst, $src;", []>;
1267 def FMOV64rr: NVPTXInst<(outs Float64Regs:$dst), (ins Float64Regs:$src),
1268 "mov.f64 \t$dst, $src;", []>;
1270 def IMOV1ri: NVPTXInst<(outs Int1Regs:$dst), (ins i1imm:$src),
1271 "mov.pred \t$dst, $src;",
1272 [(set Int1Regs:$dst, imm:$src)]>;
1273 def IMOV8ri: NVPTXInst<(outs Int8Regs:$dst), (ins i8imm:$src),
1274 "mov.u16 \t$dst, $src;",
1275 [(set Int8Regs:$dst, imm:$src)]>;
1276 def IMOV16ri: NVPTXInst<(outs Int16Regs:$dst), (ins i16imm:$src),
1277 "mov.u16 \t$dst, $src;",
1278 [(set Int16Regs:$dst, imm:$src)]>;
1279 def IMOV32ri: NVPTXInst<(outs Int32Regs:$dst), (ins i32imm:$src),
1280 "mov.u32 \t$dst, $src;",
1281 [(set Int32Regs:$dst, imm:$src)]>;
1282 def IMOV64i: NVPTXInst<(outs Int64Regs:$dst), (ins i64imm:$src),
1283 "mov.u64 \t$dst, $src;",
1284 [(set Int64Regs:$dst, imm:$src)]>;
1286 def FMOV32ri: NVPTXInst<(outs Float32Regs:$dst), (ins f32imm:$src),
1287 "mov.f32 \t$dst, $src;",
1288 [(set Float32Regs:$dst, fpimm:$src)]>;
1289 def FMOV64ri: NVPTXInst<(outs Float64Regs:$dst), (ins f64imm:$src),
1290 "mov.f64 \t$dst, $src;",
1291 [(set Float64Regs:$dst, fpimm:$src)]>;
1293 def : Pat<(i32 (Wrapper texternalsym:$dst)), (IMOV32ri texternalsym:$dst)>;
1295 //---- Copy Frame Index ----
1296 def LEA_ADDRi : NVPTXInst<(outs Int32Regs:$dst), (ins MEMri:$addr),
1297 "add.u32 \t$dst, ${addr:add};",
1298 [(set Int32Regs:$dst, ADDRri:$addr)]>;
1299 def LEA_ADDRi64 : NVPTXInst<(outs Int64Regs:$dst), (ins MEMri64:$addr),
1300 "add.u64 \t$dst, ${addr:add};",
1301 [(set Int64Regs:$dst, ADDRri64:$addr)]>;
1303 //-----------------------------------
1304 // Comparison and Selection
1305 //-----------------------------------
1307 // Generate string block like
1310 // setp.gt.s16 p, %a, %b;
1311 // selp.s16 %dst, -1, 0, p;
1313 // when OpcStr=setp.gt.s sz1=16 sz2=16 d=%dst a=%a b=%b
1314 class Set_Str<string OpcStr, string sz1, string sz2, string d, string a,
1316 string t1 = "{{\n\t.reg .pred p;\n\t";
1317 string t2 = !strconcat(t1 , OpcStr);
1318 string t3 = !strconcat(t2 , sz1);
1319 string t4 = !strconcat(t3 , " \tp, ");
1320 string t5 = !strconcat(t4 , a);
1321 string t6 = !strconcat(t5 , ", ");
1322 string t7 = !strconcat(t6 , b);
1323 string t8 = !strconcat(t7 , ";\n\tselp.s");
1324 string t9 = !strconcat(t8 , sz2);
1325 string t10 = !strconcat(t9, " \t");
1326 string t11 = !strconcat(t10, d);
1327 string s = !strconcat(t11, ", -1, 0, p;\n\t}}");
1330 // Generate string block like
1333 // .reg .s16 %temp1;
1334 // .reg .s16 %temp2;
1335 // cvt.s16.s8 %temp1, %a;
1336 // cvt s16.s8 %temp1, %b;
1337 // setp.gt.s16 p, %temp1, %temp2;
1338 // selp.s16 %dst, -1, 0, p;
1340 // when OpcStr=setp.gt.s d=%dst a=%a b=%b type=s16 cvt=cvt.s16.s8
1341 class Set_Stri8<string OpcStr, string d, string a, string b, string type,
1343 string t1 = "{{\n\t.reg .pred p;\n\t";
1344 string t2 = !strconcat(t1, ".reg .");
1345 string t3 = !strconcat(t2, type);
1346 string t4 = !strconcat(t3, " %temp1;\n\t");
1347 string t5 = !strconcat(t4, ".reg .");
1348 string t6 = !strconcat(t5, type);
1349 string t7 = !strconcat(t6, " %temp2;\n\t");
1350 string t8 = !strconcat(t7, cvt);
1351 string t9 = !strconcat(t8, " \t%temp1, ");
1352 string t10 = !strconcat(t9, a);
1353 string t11 = !strconcat(t10, ";\n\t");
1354 string t12 = !strconcat(t11, cvt);
1355 string t13 = !strconcat(t12, " \t%temp2, ");
1356 string t14 = !strconcat(t13, b);
1357 string t15 = !strconcat(t14, ";\n\t");
1358 string t16 = !strconcat(t15, OpcStr);
1359 string t17 = !strconcat(t16, "16");
1360 string t18 = !strconcat(t17, " \tp, %temp1, %temp2;\n\t");
1361 string t19 = !strconcat(t18, "selp.s16 \t");
1362 string t20 = !strconcat(t19, d);
1363 string s = !strconcat(t20, ", -1, 0, p;\n\t}}");
1366 multiclass ISET_FORMAT<string OpcStr, string OpcStr_u32, PatFrag OpNode,
1367 string TypeStr, string CVTStr> {
1368 def i8rr_toi8: NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, Int8Regs:$b),
1369 Set_Stri8<OpcStr, "$dst", "$a", "$b", TypeStr, CVTStr>.s,
1371 def i16rr_toi16: NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a,
1373 Set_Str<OpcStr, "16", "16", "$dst", "$a", "$b">.s,
1375 def i32rr_toi32: NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a,
1377 Set_Str<OpcStr, "32", "32", "$dst", "$a", "$b">.s,
1379 def i64rr_toi64: NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a,
1381 Set_Str<OpcStr, "64", "64", "$dst", "$a", "$b">.s,
1384 def i8rr_p: NVPTXInst<(outs Int1Regs:$dst), (ins Int8Regs:$a, Int8Regs:$b),
1385 Handle_i8rr<OpcStr, TypeStr, CVTStr>.s,
1386 [(set Int1Regs:$dst, (OpNode Int8Regs:$a, Int8Regs:$b))]>;
1387 def i8ri_p: NVPTXInst<(outs Int1Regs:$dst), (ins Int8Regs:$a, i8imm:$b),
1388 Handle_i8ri<OpcStr, TypeStr, CVTStr>.s,
1389 [(set Int1Regs:$dst, (OpNode Int8Regs:$a, imm:$b))]>;
1390 def i8ir_p: NVPTXInst<(outs Int1Regs:$dst), (ins i8imm:$a, Int8Regs:$b),
1391 Handle_i8ir<OpcStr, TypeStr, CVTStr>.s,
1392 [(set Int1Regs:$dst, (OpNode imm:$a, Int8Regs:$b))]>;
1393 def i16rr_p: NVPTXInst<(outs Int1Regs:$dst), (ins Int16Regs:$a, Int16Regs:$b),
1394 !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
1395 [(set Int1Regs:$dst, (OpNode Int16Regs:$a, Int16Regs:$b))]>;
1396 def i16ri_p: NVPTXInst<(outs Int1Regs:$dst), (ins Int16Regs:$a, i16imm:$b),
1397 !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
1398 [(set Int1Regs:$dst, (OpNode Int16Regs:$a, imm:$b))]>;
1399 def i16ir_p: NVPTXInst<(outs Int1Regs:$dst), (ins i16imm:$a, Int16Regs:$b),
1400 !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
1401 [(set Int1Regs:$dst, (OpNode imm:$a, Int16Regs:$b))]>;
1402 def i32rr_p: NVPTXInst<(outs Int1Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b),
1403 !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
1404 [(set Int1Regs:$dst, (OpNode Int32Regs:$a, Int32Regs:$b))]>;
1405 def i32ri_p: NVPTXInst<(outs Int1Regs:$dst), (ins Int32Regs:$a, i32imm:$b),
1406 !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
1407 [(set Int1Regs:$dst, (OpNode Int32Regs:$a, imm:$b))]>;
1408 def i32ir_p: NVPTXInst<(outs Int1Regs:$dst), (ins i32imm:$a, Int32Regs:$b),
1409 !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
1410 [(set Int1Regs:$dst, (OpNode imm:$a, Int32Regs:$b))]>;
1411 def i64rr_p: NVPTXInst<(outs Int1Regs:$dst), (ins Int64Regs:$a, Int64Regs:$b),
1412 !strconcat(OpcStr, "64 \t$dst, $a, $b;"),
1413 [(set Int1Regs:$dst, (OpNode Int64Regs:$a, Int64Regs:$b))]>;
1414 def i64ri_p: NVPTXInst<(outs Int1Regs:$dst), (ins Int64Regs:$a, i64imm:$b),
1415 !strconcat(OpcStr, "64 \t$dst, $a, $b;"),
1416 [(set Int1Regs:$dst, (OpNode Int64Regs:$a, imm:$b))]>;
1417 def i64ir_p: NVPTXInst<(outs Int1Regs:$dst), (ins i64imm:$a, Int64Regs:$b),
1418 !strconcat(OpcStr, "64 \t$dst, $a, $b;"),
1419 [(set Int1Regs:$dst, (OpNode imm:$a, Int64Regs:$b))]>;
1421 def i8rr_u32: NVPTXInst<(outs Int32Regs:$dst), (ins Int8Regs:$a, Int8Regs:$b),
1422 Handle_i8rr<OpcStr_u32, TypeStr, CVTStr>.s,
1423 [(set Int32Regs:$dst, (OpNode Int8Regs:$a, Int8Regs:$b))]>;
1424 def i8ri_u32: NVPTXInst<(outs Int32Regs:$dst), (ins Int8Regs:$a, i8imm:$b),
1425 Handle_i8ri<OpcStr_u32, TypeStr, CVTStr>.s,
1426 [(set Int32Regs:$dst, (OpNode Int8Regs:$a, imm:$b))]>;
1427 def i8ir_u32: NVPTXInst<(outs Int32Regs:$dst), (ins i8imm:$a, Int8Regs:$b),
1428 Handle_i8ir<OpcStr_u32, TypeStr, CVTStr>.s,
1429 [(set Int32Regs:$dst, (OpNode imm:$a, Int8Regs:$b))]>;
1430 def i16rr_u32: NVPTXInst<(outs Int32Regs:$dst), (ins Int16Regs:$a,
1432 !strconcat(OpcStr_u32, "16 \t$dst, $a, $b;"),
1433 [(set Int32Regs:$dst, (OpNode Int16Regs:$a, Int16Regs:$b))]>;
1434 def i16ri_u32: NVPTXInst<(outs Int32Regs:$dst), (ins Int16Regs:$a, i16imm:$b),
1435 !strconcat(OpcStr_u32, "16 \t$dst, $a, $b;"),
1436 [(set Int32Regs:$dst, (OpNode Int16Regs:$a, imm:$b))]>;
1437 def i16ir_u32: NVPTXInst<(outs Int32Regs:$dst), (ins i16imm:$a, Int16Regs:$b),
1438 !strconcat(OpcStr_u32, "16 \t$dst, $a, $b;"),
1439 [(set Int32Regs:$dst, (OpNode imm:$a, Int16Regs:$b))]>;
1440 def i32rr_u32: NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a,
1442 !strconcat(OpcStr_u32, "32 \t$dst, $a, $b;"),
1443 [(set Int32Regs:$dst, (OpNode Int32Regs:$a, Int32Regs:$b))]>;
1444 def i32ri_u32: NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b),
1445 !strconcat(OpcStr_u32, "32 \t$dst, $a, $b;"),
1446 [(set Int32Regs:$dst, (OpNode Int32Regs:$a, imm:$b))]>;
1447 def i32ir_u32: NVPTXInst<(outs Int32Regs:$dst), (ins i32imm:$a, Int32Regs:$b),
1448 !strconcat(OpcStr_u32, "32 \t$dst, $a, $b;"),
1449 [(set Int32Regs:$dst, (OpNode imm:$a, Int32Regs:$b))]>;
1450 def i64rr_u32: NVPTXInst<(outs Int32Regs:$dst), (ins Int64Regs:$a,
1452 !strconcat(OpcStr_u32, "64 \t$dst, $a, $b;"),
1453 [(set Int32Regs:$dst, (OpNode Int64Regs:$a, Int64Regs:$b))]>;
1454 def i64ri_u32: NVPTXInst<(outs Int32Regs:$dst), (ins Int64Regs:$a, i64imm:$b),
1455 !strconcat(OpcStr_u32, "64 \t$dst, $a, $b;"),
1456 [(set Int32Regs:$dst, (OpNode Int64Regs:$a, imm:$b))]>;
1457 def i64ir_u32: NVPTXInst<(outs Int32Regs:$dst), (ins i64imm:$a, Int64Regs:$b),
1458 !strconcat(OpcStr_u32, "64 \t$dst, $a, $b;"),
1459 [(set Int32Regs:$dst, (OpNode imm:$a, Int64Regs:$b))]>;
1462 multiclass FSET_FORMAT<string OpcStr, string OpcStr_u32, PatFrag OpNode> {
1463 def f32rr_toi32_ftz: NVPTXInst<(outs Int32Regs:$dst), (ins Float32Regs:$a,
1465 Set_Str<OpcStr, "ftz.f32", "32", "$dst", "$a", "$b">.s,
1466 []>, Requires<[doF32FTZ]>;
1467 def f32rr_toi32: NVPTXInst<(outs Int32Regs:$dst), (ins Float32Regs:$a,
1469 Set_Str<OpcStr, "f32", "32", "$dst", "$a", "$b">.s,
1471 def f64rr_toi64: NVPTXInst<(outs Int64Regs:$dst), (ins Float64Regs:$a,
1473 Set_Str<OpcStr, "f64", "64", "$dst", "$a", "$b">.s,
1475 def f64rr_toi32: NVPTXInst<(outs Int32Regs:$dst), (ins Float64Regs:$a,
1477 Set_Str<OpcStr, "f64", "32", "$dst", "$a", "$b">.s,
1480 def f32rr_p_ftz: NVPTXInst<(outs Int1Regs:$dst), (ins Float32Regs:$a
1482 !strconcat(OpcStr, "ftz.f32 \t$dst, $a, $b;"),
1483 [(set Int1Regs:$dst, (OpNode Float32Regs:$a, Float32Regs:$b))]>
1484 , Requires<[doF32FTZ]>;
1485 def f32rr_p: NVPTXInst<(outs Int1Regs:$dst),
1486 (ins Float32Regs:$a, Float32Regs:$b),
1487 !strconcat(OpcStr, "f32 \t$dst, $a, $b;"),
1488 [(set Int1Regs:$dst, (OpNode Float32Regs:$a, Float32Regs:$b))]>;
1489 def f32ri_p_ftz: NVPTXInst<(outs Int1Regs:$dst),
1490 (ins Float32Regs:$a, f32imm:$b),
1491 !strconcat(OpcStr, "ftz.f32 \t$dst, $a, $b;"),
1492 [(set Int1Regs:$dst, (OpNode Float32Regs:$a, fpimm:$b))]>,
1493 Requires<[doF32FTZ]>;
1494 def f32ri_p: NVPTXInst<(outs Int1Regs:$dst), (ins Float32Regs:$a, f32imm:$b),
1495 !strconcat(OpcStr, "f32 \t$dst, $a, $b;"),
1496 [(set Int1Regs:$dst, (OpNode Float32Regs:$a, fpimm:$b))]>;
1497 def f32ir_p_ftz: NVPTXInst<(outs Int1Regs:$dst),
1498 (ins f32imm:$a, Float32Regs:$b),
1499 !strconcat(OpcStr, "ftz.f32 \t$dst, $a, $b;"),
1500 [(set Int1Regs:$dst, (OpNode fpimm:$a, Float32Regs:$b))]>,
1501 Requires<[doF32FTZ]>;
1502 def f32ir_p: NVPTXInst<(outs Int1Regs:$dst), (ins f32imm:$a, Float32Regs:$b),
1503 !strconcat(OpcStr, "f32 \t$dst, $a, $b;"),
1504 [(set Int1Regs:$dst, (OpNode fpimm:$a, Float32Regs:$b))]>;
1505 def f64rr_p: NVPTXInst<(outs Int1Regs:$dst),
1506 (ins Float64Regs:$a, Float64Regs:$b),
1507 !strconcat(OpcStr, "f64 \t$dst, $a, $b;"),
1508 [(set Int1Regs:$dst, (OpNode Float64Regs:$a, Float64Regs:$b))]>;
1509 def f64ri_p: NVPTXInst<(outs Int1Regs:$dst), (ins Float64Regs:$a, f64imm:$b),
1510 !strconcat(OpcStr, "f64 \t$dst, $a, $b;"),
1511 [(set Int1Regs:$dst, (OpNode Float64Regs:$a, fpimm:$b))]>;
1512 def f64ir_p: NVPTXInst<(outs Int1Regs:$dst), (ins f64imm:$a, Float64Regs:$b),
1513 !strconcat(OpcStr, "f64 \t$dst, $a, $b;"),
1514 [(set Int1Regs:$dst, (OpNode fpimm:$a, Float64Regs:$b))]>;
1516 def f32rr_u32_ftz: NVPTXInst<(outs Int32Regs:$dst),
1517 (ins Float32Regs:$a, Float32Regs:$b),
1518 !strconcat(OpcStr_u32, "ftz.f32 \t$dst, $a, $b;"),
1519 [(set Int32Regs:$dst, (OpNode Float32Regs:$a, Float32Regs:$b))]>;
1520 def f32rr_u32: NVPTXInst<(outs Int32Regs:$dst),
1521 (ins Float32Regs:$a, Float32Regs:$b),
1522 !strconcat(OpcStr_u32, "f32 \t$dst, $a, $b;"),
1523 [(set Int32Regs:$dst, (OpNode Float32Regs:$a, Float32Regs:$b))]>;
1524 def f32ri_u32_ftz: NVPTXInst<(outs Int32Regs:$dst),
1525 (ins Float32Regs:$a, f32imm:$b),
1526 !strconcat(OpcStr_u32, "ftz.f32 \t$dst, $a, $b;"),
1527 [(set Int32Regs:$dst, (OpNode Float32Regs:$a, fpimm:$b))]>;
1528 def f32ri_u32: NVPTXInst<(outs Int32Regs:$dst),
1529 (ins Float32Regs:$a, f32imm:$b),
1530 !strconcat(OpcStr_u32, "f32 \t$dst, $a, $b;"),
1531 [(set Int32Regs:$dst, (OpNode Float32Regs:$a, fpimm:$b))]>;
1532 def f32ir_u32_ftz: NVPTXInst<(outs Int32Regs:$dst),
1533 (ins f32imm:$a, Float32Regs:$b),
1534 !strconcat(OpcStr_u32, "ftz.f32 \t$dst, $a, $b;"),
1535 [(set Int32Regs:$dst, (OpNode fpimm:$a, Float32Regs:$b))]>;
1536 def f32ir_u32: NVPTXInst<(outs Int32Regs:$dst),
1537 (ins f32imm:$a, Float32Regs:$b),
1538 !strconcat(OpcStr_u32, "f32 \t$dst, $a, $b;"),
1539 [(set Int32Regs:$dst, (OpNode fpimm:$a, Float32Regs:$b))]>;
1540 def f64rr_u32: NVPTXInst<(outs Int32Regs:$dst),
1541 (ins Float64Regs:$a, Float64Regs:$b),
1542 !strconcat(OpcStr_u32, "f64 \t$dst, $a, $b;"),
1543 [(set Int32Regs:$dst, (OpNode Float64Regs:$a, Float64Regs:$b))]>;
1544 def f64ri_u32: NVPTXInst<(outs Int32Regs:$dst),
1545 (ins Float64Regs:$a, f64imm:$b),
1546 !strconcat(OpcStr_u32, "f64 \t$dst, $a, $b;"),
1547 [(set Int32Regs:$dst, (OpNode Float64Regs:$a, fpimm:$b))]>;
1548 def f64ir_u32: NVPTXInst<(outs Int32Regs:$dst),
1549 (ins f64imm:$a, Float64Regs:$b),
1550 !strconcat(OpcStr_u32, "f64 \t$dst, $a, $b;"),
1551 [(set Int32Regs:$dst, (OpNode fpimm:$a, Float64Regs:$b))]>;
1555 : ISET_FORMAT<"setp.gt.s", "set.gt.u32.s", setgt, "s16", "cvt.s16.s8">;
1557 : ISET_FORMAT<"setp.gt.u", "set.gt.u32.u", setugt, "u16", "cvt.u16.u8">;
1559 : ISET_FORMAT<"setp.lt.s", "set.lt.u32.s", setlt, "s16", "cvt.s16.s8">;
1561 : ISET_FORMAT<"setp.lt.u", "set.lt.u32.u", setult, "u16", "cvt.u16.u8">;
1563 : ISET_FORMAT<"setp.ge.s", "set.ge.u32.s", setge, "s16", "cvt.s16.s8">;
1565 : ISET_FORMAT<"setp.ge.u", "set.ge.u32.u", setuge, "u16", "cvt.u16.u8">;
1567 : ISET_FORMAT<"setp.le.s", "set.le.u32.s", setle, "s16", "cvt.s16.s8">;
1569 : ISET_FORMAT<"setp.le.u", "set.le.u32.u", setule, "u16", "cvt.u16.u8">;
1571 : ISET_FORMAT<"setp.eq.s", "set.eq.u32.s", seteq, "s16", "cvt.s16.s8">;
1573 : ISET_FORMAT<"setp.eq.u", "set.eq.u32.u", setueq, "u16", "cvt.u16.u8">;
1575 : ISET_FORMAT<"setp.ne.s", "set.ne.u32.s", setne, "s16", "cvt.s16.s8">;
1577 : ISET_FORMAT<"setp.ne.u", "set.ne.u32.u", setune, "u16", "cvt.u16.u8">;
1579 def ISetSNEi1rr_p : NVPTXInst<(outs Int1Regs:$dst),
1580 (ins Int1Regs:$a, Int1Regs:$b),
1581 "xor.pred \t$dst, $a, $b;",
1582 [(set Int1Regs:$dst, (setne Int1Regs:$a, Int1Regs:$b))]>;
1583 def ISetUNEi1rr_p : NVPTXInst<(outs Int1Regs:$dst),
1584 (ins Int1Regs:$a, Int1Regs:$b),
1585 "xor.pred \t$dst, $a, $b;",
1586 [(set Int1Regs:$dst, (setune Int1Regs:$a, Int1Regs:$b))]>;
1587 def ISetSEQi1rr_p : NVPTXInst<(outs Int1Regs:$dst),
1588 (ins Int1Regs:$a, Int1Regs:$b),
1589 !strconcat("{{\n\t",
1590 !strconcat(".reg .pred temp;\n\t",
1591 !strconcat("xor.pred \ttemp, $a, $b;\n\t",
1592 !strconcat("not.pred \t$dst, temp;\n\t}}","")))),
1593 [(set Int1Regs:$dst, (seteq Int1Regs:$a, Int1Regs:$b))]>;
1594 def ISetUEQi1rr_p : NVPTXInst<(outs Int1Regs:$dst),
1595 (ins Int1Regs:$a, Int1Regs:$b),
1596 !strconcat("{{\n\t",
1597 !strconcat(".reg .pred temp;\n\t",
1598 !strconcat("xor.pred \ttemp, $a, $b;\n\t",
1599 !strconcat("not.pred \t$dst, temp;\n\t}}","")))),
1600 [(set Int1Regs:$dst, (setueq Int1Regs:$a, Int1Regs:$b))]>;
1602 // Compare 2 i1's and produce a u32
1603 def ISETSNEi1rr_u32 : NVPTXInst<(outs Int32Regs:$dst),
1604 (ins Int1Regs:$a, Int1Regs:$b),
1605 !strconcat("{{\n\t",
1606 !strconcat(".reg .pred temp;\n\t",
1607 !strconcat("xor.pred \ttemp, $a, $b;\n\t",
1608 !strconcat("selp.u32 \t$dst, -1, 0, temp;", "\n\t}}")))),
1609 [(set Int32Regs:$dst, (setne Int1Regs:$a, Int1Regs:$b))]>;
1610 def ISETSEQi1rr_u32 : NVPTXInst<(outs Int32Regs:$dst),
1611 (ins Int1Regs:$a, Int1Regs:$b),
1612 !strconcat("{{\n\t",
1613 !strconcat(".reg .pred temp;\n\t",
1614 !strconcat("xor.pred \ttemp, $a, $b;\n\t",
1615 !strconcat("selp.u32 \t$dst, 0, -1, temp;", "\n\t}}")))),
1616 [(set Int32Regs:$dst, (seteq Int1Regs:$a, Int1Regs:$b))]>;
1618 defm FSetGT : FSET_FORMAT<"setp.gt.", "set.gt.u32.", setogt>;
1619 defm FSetLT : FSET_FORMAT<"setp.lt.", "set.lt.u32.", setolt>;
1620 defm FSetGE : FSET_FORMAT<"setp.ge.", "set.ge.u32.", setoge>;
1621 defm FSetLE : FSET_FORMAT<"setp.le.", "set.le.u32.", setole>;
1622 defm FSetEQ : FSET_FORMAT<"setp.eq.", "set.eq.u32.", setoeq>;
1623 defm FSetNE : FSET_FORMAT<"setp.ne.", "set.ne.u32.", setone>;
1625 defm FSetUGT : FSET_FORMAT<"setp.gtu.", "set.gtu.u32.", setugt>;
1626 defm FSetULT : FSET_FORMAT<"setp.ltu.", "set.ltu.u32.",setult>;
1627 defm FSetUGE : FSET_FORMAT<"setp.geu.", "set.geu.u32.",setuge>;
1628 defm FSetULE : FSET_FORMAT<"setp.leu.", "set.leu.u32.",setule>;
1629 defm FSetUEQ : FSET_FORMAT<"setp.equ.", "set.equ.u32.",setueq>;
1630 defm FSetUNE : FSET_FORMAT<"setp.neu.", "set.neu.u32.",setune>;
1632 defm FSetNUM : FSET_FORMAT<"setp.num.", "set.num.u32.",seto>;
1633 defm FSetNAN : FSET_FORMAT<"setp.nan.", "set.nan.u32.",setuo>;
1635 def SELECTi1rr : Pat<(i1 (select Int1Regs:$p, Int1Regs:$a, Int1Regs:$b)),
1636 (ORb1rr (ANDb1rr Int1Regs:$p, Int1Regs:$a),
1637 (ANDb1rr (NOT1 Int1Regs:$p), Int1Regs:$b))>;
1638 def SELECTi8rr : NVPTXInst<(outs Int8Regs:$dst),
1639 (ins Int8Regs:$a, Int8Regs:$b, Int1Regs:$p),
1640 "selp.b16 \t$dst, $a, $b, $p;",
1641 [(set Int8Regs:$dst, (select Int1Regs:$p, Int8Regs:$a, Int8Regs:$b))]>;
1642 def SELECTi8ri : NVPTXInst<(outs Int8Regs:$dst),
1643 (ins Int8Regs:$a, i8imm:$b, Int1Regs:$p),
1644 "selp.b16 \t$dst, $a, $b, $p;",
1645 [(set Int8Regs:$dst, (select Int1Regs:$p, Int8Regs:$a, imm:$b))]>;
1646 def SELECTi8ir : NVPTXInst<(outs Int8Regs:$dst),
1647 (ins i8imm:$a, Int8Regs:$b, Int1Regs:$p),
1648 "selp.b16 \t$dst, $a, $b, $p;",
1649 [(set Int8Regs:$dst, (select Int1Regs:$p, imm:$a, Int8Regs:$b))]>;
1650 def SELECTi8ii : NVPTXInst<(outs Int8Regs:$dst),
1651 (ins i8imm:$a, i8imm:$b, Int1Regs:$p),
1652 "selp.b16 \t$dst, $a, $b, $p;",
1653 [(set Int8Regs:$dst, (select Int1Regs:$p, imm:$a, imm:$b))]>;
1655 def SELECTi16rr : NVPTXInst<(outs Int16Regs:$dst),
1656 (ins Int16Regs:$a, Int16Regs:$b, Int1Regs:$p),
1657 "selp.b16 \t$dst, $a, $b, $p;",
1658 [(set Int16Regs:$dst, (select Int1Regs:$p, Int16Regs:$a, Int16Regs:$b))]>;
1659 def SELECTi16ri : NVPTXInst<(outs Int16Regs:$dst),
1660 (ins Int16Regs:$a, i16imm:$b, Int1Regs:$p),
1661 "selp.b16 \t$dst, $a, $b, $p;",
1662 [(set Int16Regs:$dst, (select Int1Regs:$p, Int16Regs:$a, imm:$b))]>;
1663 def SELECTi16ir : NVPTXInst<(outs Int16Regs:$dst),
1664 (ins i16imm:$a, Int16Regs:$b, Int1Regs:$p),
1665 "selp.b16 \t$dst, $a, $b, $p;",
1666 [(set Int16Regs:$dst, (select Int1Regs:$p, imm:$a, Int16Regs:$b))]>;
1667 def SELECTi16ii : NVPTXInst<(outs Int16Regs:$dst),
1668 (ins i16imm:$a, i16imm:$b, Int1Regs:$p),
1669 "selp.b16 \t$dst, $a, $b, $p;",
1670 [(set Int16Regs:$dst, (select Int1Regs:$p, imm:$a, imm:$b))]>;
1672 def SELECTi32rr : NVPTXInst<(outs Int32Regs:$dst),
1673 (ins Int32Regs:$a, Int32Regs:$b, Int1Regs:$p),
1674 "selp.b32 \t$dst, $a, $b, $p;",
1675 [(set Int32Regs:$dst, (select Int1Regs:$p, Int32Regs:$a, Int32Regs:$b))]>;
1676 def SELECTi32ri : NVPTXInst<(outs Int32Regs:$dst),
1677 (ins Int32Regs:$a, i32imm:$b, Int1Regs:$p),
1678 "selp.b32 \t$dst, $a, $b, $p;",
1679 [(set Int32Regs:$dst, (select Int1Regs:$p, Int32Regs:$a, imm:$b))]>;
1680 def SELECTi32ir : NVPTXInst<(outs Int32Regs:$dst),
1681 (ins i32imm:$a, Int32Regs:$b, Int1Regs:$p),
1682 "selp.b32 \t$dst, $a, $b, $p;",
1683 [(set Int32Regs:$dst, (select Int1Regs:$p, imm:$a, Int32Regs:$b))]>;
1684 def SELECTi32ii : NVPTXInst<(outs Int32Regs:$dst),
1685 (ins i32imm:$a, i32imm:$b, Int1Regs:$p),
1686 "selp.b32 \t$dst, $a, $b, $p;",
1687 [(set Int32Regs:$dst, (select Int1Regs:$p, imm:$a, imm:$b))]>;
1689 def SELECTi64rr : NVPTXInst<(outs Int64Regs:$dst),
1690 (ins Int64Regs:$a, Int64Regs:$b, Int1Regs:$p),
1691 "selp.b64 \t$dst, $a, $b, $p;",
1692 [(set Int64Regs:$dst, (select Int1Regs:$p, Int64Regs:$a, Int64Regs:$b))]>;
1693 def SELECTi64ri : NVPTXInst<(outs Int64Regs:$dst),
1694 (ins Int64Regs:$a, i64imm:$b, Int1Regs:$p),
1695 "selp.b64 \t$dst, $a, $b, $p;",
1696 [(set Int64Regs:$dst, (select Int1Regs:$p, Int64Regs:$a, imm:$b))]>;
1697 def SELECTi64ir : NVPTXInst<(outs Int64Regs:$dst),
1698 (ins i64imm:$a, Int64Regs:$b, Int1Regs:$p),
1699 "selp.b64 \t$dst, $a, $b, $p;",
1700 [(set Int64Regs:$dst, (select Int1Regs:$p, imm:$a, Int64Regs:$b))]>;
1701 def SELECTi64ii : NVPTXInst<(outs Int64Regs:$dst),
1702 (ins i64imm:$a, i64imm:$b, Int1Regs:$p),
1703 "selp.b64 \t$dst, $a, $b, $p;",
1704 [(set Int64Regs:$dst, (select Int1Regs:$p, imm:$a, imm:$b))]>;
1706 def SELECTf32rr : NVPTXInst<(outs Float32Regs:$dst),
1707 (ins Float32Regs:$a, Float32Regs:$b, Int1Regs:$p),
1708 "selp.f32 \t$dst, $a, $b, $p;",
1709 [(set Float32Regs:$dst,
1710 (select Int1Regs:$p, Float32Regs:$a, Float32Regs:$b))]>;
1711 def SELECTf32ri : NVPTXInst<(outs Float32Regs:$dst),
1712 (ins Float32Regs:$a, f32imm:$b, Int1Regs:$p),
1713 "selp.f32 \t$dst, $a, $b, $p;",
1714 [(set Float32Regs:$dst, (select Int1Regs:$p, Float32Regs:$a, fpimm:$b))]>;
1715 def SELECTf32ir : NVPTXInst<(outs Float32Regs:$dst),
1716 (ins f32imm:$a, Float32Regs:$b, Int1Regs:$p),
1717 "selp.f32 \t$dst, $a, $b, $p;",
1718 [(set Float32Regs:$dst, (select Int1Regs:$p, fpimm:$a, Float32Regs:$b))]>;
1719 def SELECTf32ii : NVPTXInst<(outs Float32Regs:$dst),
1720 (ins f32imm:$a, f32imm:$b, Int1Regs:$p),
1721 "selp.f32 \t$dst, $a, $b, $p;",
1722 [(set Float32Regs:$dst, (select Int1Regs:$p, fpimm:$a, fpimm:$b))]>;
1724 def SELECTf64rr : NVPTXInst<(outs Float64Regs:$dst),
1725 (ins Float64Regs:$a, Float64Regs:$b, Int1Regs:$p),
1726 "selp.f64 \t$dst, $a, $b, $p;",
1727 [(set Float64Regs:$dst,
1728 (select Int1Regs:$p, Float64Regs:$a, Float64Regs:$b))]>;
1729 def SELECTf64ri : NVPTXInst<(outs Float64Regs:$dst),
1730 (ins Float64Regs:$a, f64imm:$b, Int1Regs:$p),
1731 "selp.f64 \t$dst, $a, $b, $p;",
1732 [(set Float64Regs:$dst, (select Int1Regs:$p, Float64Regs:$a, fpimm:$b))]>;
1733 def SELECTf64ir : NVPTXInst<(outs Float64Regs:$dst),
1734 (ins f64imm:$a, Float64Regs:$b, Int1Regs:$p),
1735 "selp.f64 \t$dst, $a, $b, $p;",
1736 [(set Float64Regs:$dst, (select Int1Regs:$p, fpimm:$a, Float64Regs:$b))]>;
1737 def SELECTf64ii : NVPTXInst<(outs Float64Regs:$dst),
1738 (ins f64imm:$a, f64imm:$b, Int1Regs:$p),
1739 "selp.f64 \t $dst, $a, $b, $p;",
1740 [(set Float64Regs:$dst, (select Int1Regs:$p, fpimm:$a, fpimm:$b))]>;
1742 //def ld_param : SDNode<"NVPTXISD::LOAD_PARAM", SDTLoad,
1743 // [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
1745 def SDTDeclareParamProfile : SDTypeProfile<0, 3, [SDTCisInt<0>, SDTCisInt<1>,
1747 def SDTDeclareScalarParamProfile : SDTypeProfile<0, 3, [SDTCisInt<0>,
1748 SDTCisInt<1>, SDTCisInt<2>]>;
1749 def SDTLoadParamProfile : SDTypeProfile<1, 2, [SDTCisInt<1>, SDTCisInt<2>]>;
1750 def SDTPrintCallProfile : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
1751 def SDTPrintCallUniProfile : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
1752 def SDTStoreParamProfile : SDTypeProfile<0, 3, [SDTCisInt<0>, SDTCisInt<1>]>;
1753 def SDTStoreParam32Profile : SDTypeProfile<0, 3, [SDTCisInt<0>, SDTCisInt<1>]>;
1754 def SDTCallArgProfile : SDTypeProfile<0, 2, [SDTCisInt<0>]>;
1755 def SDTCallArgMarkProfile : SDTypeProfile<0, 0, []>;
1756 def SDTCallVoidProfile : SDTypeProfile<0, 1, []>;
1757 def SDTCallValProfile : SDTypeProfile<1, 0, []>;
1758 def SDTMoveParamProfile : SDTypeProfile<1, 1, []>;
1759 def SDTMoveRetvalProfile : SDTypeProfile<0, 1, []>;
1760 def SDTStoreRetvalProfile : SDTypeProfile<0, 2, [SDTCisInt<0>]>;
1761 def SDTPseudoUseParamProfile : SDTypeProfile<0, 1, []>;
1763 def DeclareParam : SDNode<"NVPTXISD::DeclareParam", SDTDeclareParamProfile,
1764 [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
1765 def DeclareScalarParam : SDNode<"NVPTXISD::DeclareScalarParam",
1766 SDTDeclareScalarParamProfile,
1767 [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
1768 def DeclareRetParam : SDNode<"NVPTXISD::DeclareRetParam",
1769 SDTDeclareParamProfile,
1770 [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
1771 def DeclareRet : SDNode<"NVPTXISD::DeclareRet", SDTDeclareScalarParamProfile,
1772 [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
1773 def LoadParam : SDNode<"NVPTXISD::LoadParam", SDTLoadParamProfile,
1774 [SDNPHasChain, SDNPMayLoad, SDNPOutGlue, SDNPInGlue]>;
1775 def PrintCall : SDNode<"NVPTXISD::PrintCall", SDTPrintCallProfile,
1776 [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
1777 def PrintCallUni : SDNode<"NVPTXISD::PrintCallUni", SDTPrintCallUniProfile,
1778 [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
1779 def StoreParam : SDNode<"NVPTXISD::StoreParam", SDTStoreParamProfile,
1780 [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
1781 def StoreParamU32 : SDNode<"NVPTXISD::StoreParamU32", SDTStoreParam32Profile,
1782 [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
1783 def StoreParamS32 : SDNode<"NVPTXISD::StoreParamS32", SDTStoreParam32Profile,
1784 [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
1785 def MoveToParam : SDNode<"NVPTXISD::MoveToParam", SDTStoreParamProfile,
1786 [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
1787 def CallArgBegin : SDNode<"NVPTXISD::CallArgBegin", SDTCallArgMarkProfile,
1788 [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
1789 def CallArg : SDNode<"NVPTXISD::CallArg", SDTCallArgProfile,
1790 [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
1791 def LastCallArg : SDNode<"NVPTXISD::LastCallArg", SDTCallArgProfile,
1792 [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
1793 def CallArgEnd : SDNode<"NVPTXISD::CallArgEnd", SDTCallVoidProfile,
1794 [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
1795 def CallVoid : SDNode<"NVPTXISD::CallVoid", SDTCallVoidProfile,
1796 [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
1797 def Prototype : SDNode<"NVPTXISD::Prototype", SDTCallVoidProfile,
1798 [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
1799 def CallVal : SDNode<"NVPTXISD::CallVal", SDTCallValProfile,
1800 [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
1801 def MoveParam : SDNode<"NVPTXISD::MoveParam", SDTMoveParamProfile,
1803 def MoveRetval : SDNode<"NVPTXISD::MoveRetval", SDTMoveRetvalProfile,
1804 [SDNPHasChain, SDNPSideEffect]>;
1805 def StoreRetval : SDNode<"NVPTXISD::StoreRetval", SDTStoreRetvalProfile,
1806 [SDNPHasChain, SDNPSideEffect]>;
1807 def MoveToRetval : SDNode<"NVPTXISD::MoveToRetval", SDTStoreRetvalProfile,
1808 [SDNPHasChain, SDNPSideEffect]>;
1809 def PseudoUseParam : SDNode<"NVPTXISD::PseudoUseParam",
1810 SDTPseudoUseParamProfile,
1811 [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
1812 def RETURNNode : SDNode<"NVPTXISD::RETURN", SDTCallArgMarkProfile,
1813 [SDNPHasChain, SDNPSideEffect]>;
1815 class LoadParamMemInst<NVPTXRegClass regclass, string opstr> :
1816 NVPTXInst<(outs regclass:$dst), (ins i32imm:$b),
1817 !strconcat(!strconcat("ld.param", opstr),
1818 "\t$dst, [retval0+$b];"),
1819 [(set regclass:$dst, (LoadParam (i32 1), (i32 imm:$b)))]>;
1821 class LoadParamRegInst<NVPTXRegClass regclass, string opstr> :
1822 NVPTXInst<(outs regclass:$dst), (ins i32imm:$b),
1823 !strconcat(!strconcat("mov", opstr),
1824 "\t$dst, retval$b;"),
1825 [(set regclass:$dst, (LoadParam (i32 0), (i32 imm:$b)))]>;
1827 class StoreParamInst<NVPTXRegClass regclass, string opstr> :
1828 NVPTXInst<(outs), (ins regclass:$val, i32imm:$a, i32imm:$b),
1829 !strconcat(!strconcat("st.param", opstr),
1830 "\t[param$a+$b], $val;"),
1831 [(StoreParam (i32 imm:$a), (i32 imm:$b), regclass:$val)]>;
1833 class MoveToParamInst<NVPTXRegClass regclass, string opstr> :
1834 NVPTXInst<(outs), (ins regclass:$val, i32imm:$a, i32imm:$b),
1835 !strconcat(!strconcat("mov", opstr),
1836 "\tparam$a, $val;"),
1837 [(MoveToParam (i32 imm:$a), (i32 imm:$b), regclass:$val)]>;
1839 class StoreRetvalInst<NVPTXRegClass regclass, string opstr> :
1840 NVPTXInst<(outs), (ins regclass:$val, i32imm:$a),
1841 !strconcat(!strconcat("st.param", opstr),
1842 "\t[func_retval0+$a], $val;"),
1843 [(StoreRetval (i32 imm:$a), regclass:$val)]>;
1845 class MoveToRetvalInst<NVPTXRegClass regclass, string opstr> :
1846 NVPTXInst<(outs), (ins i32imm:$num, regclass:$val),
1847 !strconcat(!strconcat("mov", opstr),
1848 "\tfunc_retval$num, $val;"),
1849 [(MoveToRetval (i32 imm:$num), regclass:$val)]>;
1851 class MoveRetvalInst<NVPTXRegClass regclass, string opstr> :
1852 NVPTXInst<(outs), (ins regclass:$val),
1853 !strconcat(!strconcat("mov", opstr),
1854 "\tfunc_retval0, $val;"),
1855 [(MoveRetval regclass:$val)]>;
1857 def PrintCallRetInst1 : NVPTXInst<(outs), (ins),
1859 [(PrintCall (i32 1))]>;
1860 def PrintCallRetInst2 : NVPTXInst<(outs), (ins),
1861 "call (retval0, retval1), ",
1862 [(PrintCall (i32 2))]>;
1863 def PrintCallRetInst3 : NVPTXInst<(outs), (ins),
1864 "call (retval0, retval1, retval2), ",
1865 [(PrintCall (i32 3))]>;
1866 def PrintCallRetInst4 : NVPTXInst<(outs), (ins),
1867 "call (retval0, retval1, retval2, retval3), ",
1868 [(PrintCall (i32 4))]>;
1869 def PrintCallRetInst5 : NVPTXInst<(outs), (ins),
1870 "call (retval0, retval1, retval2, retval3, retval4), ",
1871 [(PrintCall (i32 5))]>;
1872 def PrintCallRetInst6 : NVPTXInst<(outs), (ins),
1873 "call (retval0, retval1, retval2, retval3, retval4, retval5), ",
1874 [(PrintCall (i32 6))]>;
1875 def PrintCallRetInst7 : NVPTXInst<(outs), (ins),
1876 "call (retval0, retval1, retval2, retval3, retval4, retval5, retval6), ",
1877 [(PrintCall (i32 7))]>;
1878 def PrintCallRetInst8 : NVPTXInst<(outs), (ins),
1879 !strconcat("call (retval0, retval1, retval2, retval3, retval4",
1880 ", retval5, retval6, retval7), "),
1881 [(PrintCall (i32 8))]>;
1883 def PrintCallNoRetInst : NVPTXInst<(outs), (ins), "call ",
1884 [(PrintCall (i32 0))]>;
1886 def PrintCallUniRetInst1 : NVPTXInst<(outs), (ins),
1887 "call.uni (retval0), ",
1888 [(PrintCallUni (i32 1))]>;
1889 def PrintCallUniRetInst2 : NVPTXInst<(outs), (ins),
1890 "call.uni (retval0, retval1), ",
1891 [(PrintCallUni (i32 2))]>;
1892 def PrintCallUniRetInst3 : NVPTXInst<(outs), (ins),
1893 "call.uni (retval0, retval1, retval2), ",
1894 [(PrintCallUni (i32 3))]>;
1895 def PrintCallUniRetInst4 : NVPTXInst<(outs), (ins),
1896 "call.uni (retval0, retval1, retval2, retval3), ",
1897 [(PrintCallUni (i32 4))]>;
1898 def PrintCallUniRetInst5 : NVPTXInst<(outs), (ins),
1899 "call.uni (retval0, retval1, retval2, retval3, retval4), ",
1900 [(PrintCallUni (i32 5))]>;
1901 def PrintCallUniRetInst6 : NVPTXInst<(outs), (ins),
1902 "call.uni (retval0, retval1, retval2, retval3, retval4, retval5), ",
1903 [(PrintCallUni (i32 6))]>;
1904 def PrintCallUniRetInst7 : NVPTXInst<(outs), (ins),
1905 "call.uni (retval0, retval1, retval2, retval3, retval4, retval5, retval6), ",
1906 [(PrintCallUni (i32 7))]>;
1907 def PrintCallUniRetInst8 : NVPTXInst<(outs), (ins),
1908 !strconcat("call.uni (retval0, retval1, retval2, retval3, retval4",
1909 ", retval5, retval6, retval7), "),
1910 [(PrintCallUni (i32 8))]>;
1912 def PrintCallUniNoRetInst : NVPTXInst<(outs), (ins), "call.uni ",
1913 [(PrintCallUni (i32 0))]>;
1915 def LoadParamMemI64 : LoadParamMemInst<Int64Regs, ".b64">;
1916 def LoadParamMemI32 : LoadParamMemInst<Int32Regs, ".b32">;
1917 def LoadParamMemI16 : LoadParamMemInst<Int16Regs, ".b16">;
1918 def LoadParamMemI8 : LoadParamMemInst<Int8Regs, ".b8">;
1920 //def LoadParamMemI16 : NVPTXInst<(outs Int16Regs:$dst), (ins i32imm:$b),
1921 // !strconcat("ld.param.b32\ttemp_param_reg, [retval0+$b];\n\t",
1922 // "cvt.u16.u32\t$dst, temp_param_reg;"),
1923 // [(set Int16Regs:$dst, (LoadParam (i32 1), (i32 imm:$b)))]>;
1924 //def LoadParamMemI8 : NVPTXInst<(outs Int8Regs:$dst), (ins i32imm:$b),
1925 // !strconcat("ld.param.b32\ttemp_param_reg, [retval0+$b];\n\t",
1926 // "cvt.u16.u32\t$dst, temp_param_reg;"),
1927 // [(set Int8Regs:$dst, (LoadParam (i32 1), (i32 imm:$b)))]>;
1929 def LoadParamMemF32 : LoadParamMemInst<Float32Regs, ".f32">;
1930 def LoadParamMemF64 : LoadParamMemInst<Float64Regs, ".f64">;
1932 def LoadParamRegI64 : LoadParamRegInst<Int64Regs, ".b64">;
1933 def LoadParamRegI32 : LoadParamRegInst<Int32Regs, ".b32">;
1934 def LoadParamRegI16 : NVPTXInst<(outs Int16Regs:$dst), (ins i32imm:$b),
1935 "cvt.u16.u32\t$dst, retval$b;",
1936 [(set Int16Regs:$dst,
1937 (LoadParam (i32 0), (i32 imm:$b)))]>;
1938 def LoadParamRegI8 : NVPTXInst<(outs Int8Regs:$dst), (ins i32imm:$b),
1939 "cvt.u16.u32\t$dst, retval$b;",
1940 [(set Int8Regs:$dst,
1941 (LoadParam (i32 0), (i32 imm:$b)))]>;
1943 def LoadParamRegF32 : LoadParamRegInst<Float32Regs, ".f32">;
1944 def LoadParamRegF64 : LoadParamRegInst<Float64Regs, ".f64">;
1946 def StoreParamI64 : StoreParamInst<Int64Regs, ".b64">;
1947 def StoreParamI32 : StoreParamInst<Int32Regs, ".b32">;
1949 def StoreParamI16 : NVPTXInst<(outs),
1950 (ins Int16Regs:$val, i32imm:$a, i32imm:$b),
1951 "st.param.b16\t[param$a+$b], $val;",
1952 [(StoreParam (i32 imm:$a), (i32 imm:$b), Int16Regs:$val)]>;
1954 def StoreParamI8 : NVPTXInst<(outs),
1955 (ins Int8Regs:$val, i32imm:$a, i32imm:$b),
1956 "st.param.b8\t[param$a+$b], $val;",
1958 (i32 imm:$a), (i32 imm:$b), Int8Regs:$val)]>;
1960 def StoreParamS32I16 : NVPTXInst<(outs),
1961 (ins Int16Regs:$val, i32imm:$a, i32imm:$b),
1962 !strconcat("cvt.s32.s16\ttemp_param_reg, $val;\n\t",
1963 "st.param.b32\t[param$a+$b], temp_param_reg;"),
1964 [(StoreParamS32 (i32 imm:$a), (i32 imm:$b), Int16Regs:$val)]>;
1965 def StoreParamU32I16 : NVPTXInst<(outs),
1966 (ins Int16Regs:$val, i32imm:$a, i32imm:$b),
1967 !strconcat("cvt.u32.u16\ttemp_param_reg, $val;\n\t",
1968 "st.param.b32\t[param$a+$b], temp_param_reg;"),
1969 [(StoreParamU32 (i32 imm:$a), (i32 imm:$b), Int16Regs:$val)]>;
1971 def StoreParamU32I8 : NVPTXInst<(outs),
1972 (ins Int8Regs:$val, i32imm:$a, i32imm:$b),
1973 !strconcat("cvt.u32.u8\ttemp_param_reg, $val;\n\t",
1974 "st.param.b32\t[param$a+$b], temp_param_reg;"),
1975 [(StoreParamU32 (i32 imm:$a), (i32 imm:$b), Int8Regs:$val)]>;
1976 def StoreParamS32I8 : NVPTXInst<(outs),
1977 (ins Int8Regs:$val, i32imm:$a, i32imm:$b),
1978 !strconcat("cvt.s32.s8\ttemp_param_reg, $val;\n\t",
1979 "st.param.b32\t[param$a+$b], temp_param_reg;"),
1980 [(StoreParamS32 (i32 imm:$a), (i32 imm:$b), Int8Regs:$val)]>;
1982 def StoreParamF32 : StoreParamInst<Float32Regs, ".f32">;
1983 def StoreParamF64 : StoreParamInst<Float64Regs, ".f64">;
1985 def MoveToParamI64 : MoveToParamInst<Int64Regs, ".b64">;
1986 def MoveToParamI32 : MoveToParamInst<Int32Regs, ".b32">;
1987 def MoveToParamF64 : MoveToParamInst<Float64Regs, ".f64">;
1988 def MoveToParamF32 : MoveToParamInst<Float32Regs, ".f32">;
1989 def MoveToParamI16 : NVPTXInst<(outs),
1990 (ins Int16Regs:$val, i32imm:$a, i32imm:$b),
1991 !strconcat("cvt.u32.u16\ttemp_param_reg, $val;\n\t",
1992 "mov.b32\tparam$a, temp_param_reg;"),
1993 [(MoveToParam (i32 imm:$a), (i32 imm:$b), Int16Regs:$val)]>;
1994 def MoveToParamI8 : NVPTXInst<(outs),
1995 (ins Int8Regs:$val, i32imm:$a, i32imm:$b),
1996 !strconcat("cvt.u32.u16\ttemp_param_reg, $val;\n\t",
1997 "mov.b32\tparam$a, temp_param_reg;"),
1998 [(MoveToParam (i32 imm:$a), (i32 imm:$b), Int8Regs:$val)]>;
2000 def StoreRetvalI64 : StoreRetvalInst<Int64Regs, ".b64">;
2001 def StoreRetvalI32 : StoreRetvalInst<Int32Regs, ".b32">;
2002 def StoreRetvalI16 : StoreRetvalInst<Int16Regs, ".b16">;
2003 def StoreRetvalI8 : StoreRetvalInst<Int8Regs, ".b8">;
2005 //def StoreRetvalI16 : NVPTXInst<(outs), (ins Int16Regs:$val, i32imm:$a),
2006 // !strconcat("\{\n\t",
2007 // !strconcat(".reg .b32 temp_retval_reg;\n\t",
2008 // !strconcat("cvt.u32.u16\ttemp_retval_reg, $val;\n\t",
2009 // "st.param.b32\t[func_retval0+$a], temp_retval_reg;\n\t\}"))),
2010 // [(StoreRetval (i32 imm:$a), Int16Regs:$val)]>;
2011 //def StoreRetvalI8 : NVPTXInst<(outs), (ins Int8Regs:$val, i32imm:$a),
2012 // !strconcat("\{\n\t",
2013 // !strconcat(".reg .b32 temp_retval_reg;\n\t",
2014 // !strconcat("cvt.u32.u16\ttemp_retval_reg, $val;\n\t",
2015 // "st.param.b32\t[func_retval0+$a], temp_retval_reg;\n\t\}"))),
2016 // [(StoreRetval (i32 imm:$a), Int8Regs:$val)]>;
2018 def StoreRetvalF64 : StoreRetvalInst<Float64Regs, ".f64">;
2019 def StoreRetvalF32 : StoreRetvalInst<Float32Regs, ".f32">;
2021 def MoveRetvalI64 : MoveRetvalInst<Int64Regs, ".b64">;
2022 def MoveRetvalI32 : MoveRetvalInst<Int32Regs, ".b32">;
2023 def MoveRetvalI16 : MoveRetvalInst<Int16Regs, ".b16">;
2024 def MoveRetvalI8 : MoveRetvalInst<Int8Regs, ".b8">;
2025 def MoveRetvalF64 : MoveRetvalInst<Float64Regs, ".f64">;
2026 def MoveRetvalF32 : MoveRetvalInst<Float32Regs, ".f32">;
2028 def MoveToRetvalI64 : MoveToRetvalInst<Int64Regs, ".b64">;
2029 def MoveToRetvalI32 : MoveToRetvalInst<Int32Regs, ".b32">;
2030 def MoveToRetvalF64 : MoveToRetvalInst<Float64Regs, ".f64">;
2031 def MoveToRetvalF32 : MoveToRetvalInst<Float32Regs, ".f32">;
2032 def MoveToRetvalI16 : NVPTXInst<(outs), (ins i32imm:$num, Int16Regs:$val),
2033 "cvt.u32.u16\tfunc_retval$num, $val;",
2034 [(MoveToRetval (i32 imm:$num), Int16Regs:$val)]>;
2035 def MoveToRetvalI8 : NVPTXInst<(outs), (ins i32imm:$num, Int8Regs:$val),
2036 "cvt.u32.u16\tfunc_retval$num, $val;",
2037 [(MoveToRetval (i32 imm:$num), Int8Regs:$val)]>;
2039 def CallArgBeginInst : NVPTXInst<(outs), (ins), "(", [(CallArgBegin)]>;
2040 def CallArgEndInst1 : NVPTXInst<(outs), (ins), ");", [(CallArgEnd (i32 1))]>;
2041 def CallArgEndInst0 : NVPTXInst<(outs), (ins), ")", [(CallArgEnd (i32 0))]>;
2042 def RETURNInst : NVPTXInst<(outs), (ins), "ret;", [(RETURNNode)]>;
2044 class CallArgInst<NVPTXRegClass regclass> :
2045 NVPTXInst<(outs), (ins regclass:$a), "$a, ",
2046 [(CallArg (i32 0), regclass:$a)]>;
2048 class LastCallArgInst<NVPTXRegClass regclass> :
2049 NVPTXInst<(outs), (ins regclass:$a), "$a",
2050 [(LastCallArg (i32 0), regclass:$a)]>;
2052 def CallArgI64 : CallArgInst<Int64Regs>;
2053 def CallArgI32 : CallArgInst<Int32Regs>;
2054 def CallArgI16 : CallArgInst<Int16Regs>;
2055 def CallArgI8 : CallArgInst<Int8Regs>;
2057 def CallArgF64 : CallArgInst<Float64Regs>;
2058 def CallArgF32 : CallArgInst<Float32Regs>;
2060 def LastCallArgI64 : LastCallArgInst<Int64Regs>;
2061 def LastCallArgI32 : LastCallArgInst<Int32Regs>;
2062 def LastCallArgI16 : LastCallArgInst<Int16Regs>;
2063 def LastCallArgI8 : LastCallArgInst<Int8Regs>;
2065 def LastCallArgF64 : LastCallArgInst<Float64Regs>;
2066 def LastCallArgF32 : LastCallArgInst<Float32Regs>;
2068 def CallArgI32imm : NVPTXInst<(outs), (ins i32imm:$a), "$a, ",
2069 [(CallArg (i32 0), (i32 imm:$a))]>;
2070 def LastCallArgI32imm : NVPTXInst<(outs), (ins i32imm:$a), "$a",
2071 [(LastCallArg (i32 0), (i32 imm:$a))]>;
2073 def CallArgParam : NVPTXInst<(outs), (ins i32imm:$a), "param$a, ",
2074 [(CallArg (i32 1), (i32 imm:$a))]>;
2075 def LastCallArgParam : NVPTXInst<(outs), (ins i32imm:$a), "param$a",
2076 [(LastCallArg (i32 1), (i32 imm:$a))]>;
2078 def CallVoidInst : NVPTXInst<(outs), (ins imem:$addr),
2080 [(CallVoid (Wrapper tglobaladdr:$addr))]>;
2081 def CallVoidInstReg : NVPTXInst<(outs), (ins Int32Regs:$addr),
2083 [(CallVoid Int32Regs:$addr)]>;
2084 def CallVoidInstReg64 : NVPTXInst<(outs), (ins Int64Regs:$addr),
2086 [(CallVoid Int64Regs:$addr)]>;
2087 def PrototypeInst : NVPTXInst<(outs), (ins i32imm:$val),
2088 ", prototype_$val;",
2089 [(Prototype (i32 imm:$val))]>;
2091 def DeclareRetMemInst : NVPTXInst<(outs),
2092 (ins i32imm:$align, i32imm:$size, i32imm:$num),
2093 ".param .align $align .b8 retval$num[$size];",
2094 [(DeclareRetParam (i32 imm:$align), (i32 imm:$size), (i32 imm:$num))]>;
2095 def DeclareRetScalarInst : NVPTXInst<(outs), (ins i32imm:$size, i32imm:$num),
2096 ".param .b$size retval$num;",
2097 [(DeclareRet (i32 1), (i32 imm:$size), (i32 imm:$num))]>;
2098 def DeclareRetRegInst : NVPTXInst<(outs), (ins i32imm:$size, i32imm:$num),
2099 ".reg .b$size retval$num;",
2100 [(DeclareRet (i32 2), (i32 imm:$size), (i32 imm:$num))]>;
2102 def DeclareParamInst : NVPTXInst<(outs),
2103 (ins i32imm:$align, i32imm:$a, i32imm:$size),
2104 ".param .align $align .b8 param$a[$size];",
2105 [(DeclareParam (i32 imm:$align), (i32 imm:$a), (i32 imm:$size))]>;
2106 def DeclareScalarParamInst : NVPTXInst<(outs), (ins i32imm:$a, i32imm:$size),
2107 ".param .b$size param$a;",
2108 [(DeclareScalarParam (i32 imm:$a), (i32 imm:$size), (i32 0))]>;
2109 def DeclareScalarRegInst : NVPTXInst<(outs), (ins i32imm:$a, i32imm:$size),
2110 ".reg .b$size param$a;",
2111 [(DeclareScalarParam (i32 imm:$a), (i32 imm:$size), (i32 1))]>;
2113 class MoveParamInst<NVPTXRegClass regclass, string asmstr> :
2114 NVPTXInst<(outs regclass:$dst), (ins regclass:$src),
2115 !strconcat(!strconcat("mov", asmstr), "\t$dst, $src;"),
2116 [(set regclass:$dst, (MoveParam regclass:$src))]>;
2118 def MoveParamI64 : MoveParamInst<Int64Regs, ".b64">;
2119 def MoveParamI32 : MoveParamInst<Int32Regs, ".b32">;
2120 def MoveParamI16 : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$src),
2121 "cvt.u16.u32\t$dst, $src;",
2122 [(set Int16Regs:$dst, (MoveParam Int16Regs:$src))]>;
2123 def MoveParamI8 : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$src),
2124 "cvt.u16.u32\t$dst, $src;",
2125 [(set Int8Regs:$dst, (MoveParam Int8Regs:$src))]>;
2126 def MoveParamF64 : MoveParamInst<Float64Regs, ".f64">;
2127 def MoveParamF32 : MoveParamInst<Float32Regs, ".f32">;
2129 class PseudoUseParamInst<NVPTXRegClass regclass> :
2130 NVPTXInst<(outs), (ins regclass:$src),
2131 "// Pseudo use of $src",
2132 [(PseudoUseParam regclass:$src)]>;
2134 def PseudoUseParamI64 : PseudoUseParamInst<Int64Regs>;
2135 def PseudoUseParamI32 : PseudoUseParamInst<Int32Regs>;
2136 def PseudoUseParamI16 : PseudoUseParamInst<Int16Regs>;
2137 def PseudoUseParamI8 : PseudoUseParamInst<Int8Regs>;
2138 def PseudoUseParamF64 : PseudoUseParamInst<Float64Regs>;
2139 def PseudoUseParamF32 : PseudoUseParamInst<Float32Regs>;
2143 // Load / Store Handling
2145 multiclass LD<NVPTXRegClass regclass> {
2146 def _avar : NVPTXInst<(outs regclass:$dst),
2147 (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2148 i32imm:$fromWidth, imem:$addr),
2149 !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2150 "$fromWidth \t$dst, [$addr];"), []>;
2151 def _areg : NVPTXInst<(outs regclass:$dst),
2152 (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2153 i32imm:$fromWidth, Int32Regs:$addr),
2154 !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2155 "$fromWidth \t$dst, [$addr];"), []>;
2156 def _ari : NVPTXInst<(outs regclass:$dst),
2157 (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2158 i32imm:$fromWidth, Int32Regs:$addr, i32imm:$offset),
2159 !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2160 "$fromWidth \t$dst, [$addr+$offset];"), []>;
2161 def _asi : NVPTXInst<(outs regclass:$dst),
2162 (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2163 i32imm:$fromWidth, imem:$addr, i32imm:$offset),
2164 !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2165 "$fromWidth \t$dst, [$addr+$offset];"), []>;
2168 let mayLoad=1, neverHasSideEffects=1 in {
2169 defm LD_i8 : LD<Int8Regs>;
2170 defm LD_i16 : LD<Int16Regs>;
2171 defm LD_i32 : LD<Int32Regs>;
2172 defm LD_i64 : LD<Int64Regs>;
2173 defm LD_f32 : LD<Float32Regs>;
2174 defm LD_f64 : LD<Float64Regs>;
2177 let VecInstType=isVecLD.Value, mayLoad=1, neverHasSideEffects=1 in {
2178 defm LD_v2i8 : LD<V2I8Regs>;
2179 defm LD_v4i8 : LD<V4I8Regs>;
2180 defm LD_v2i16 : LD<V2I16Regs>;
2181 defm LD_v4i16 : LD<V4I16Regs>;
2182 defm LD_v2i32 : LD<V2I32Regs>;
2183 defm LD_v4i32 : LD<V4I32Regs>;
2184 defm LD_v2f32 : LD<V2F32Regs>;
2185 defm LD_v4f32 : LD<V4F32Regs>;
2186 defm LD_v2i64 : LD<V2I64Regs>;
2187 defm LD_v2f64 : LD<V2F64Regs>;
2190 multiclass ST<NVPTXRegClass regclass> {
2191 def _avar : NVPTXInst<(outs),
2192 (ins regclass:$src, LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec,
2193 LdStCode:$Sign, i32imm:$toWidth, imem:$addr),
2194 !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth",
2195 " \t[$addr], $src;"), []>;
2196 def _areg : NVPTXInst<(outs),
2197 (ins regclass:$src, LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec,
2198 LdStCode:$Sign, i32imm:$toWidth, Int32Regs:$addr),
2199 !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth",
2200 " \t[$addr], $src;"), []>;
2201 def _ari : NVPTXInst<(outs),
2202 (ins regclass:$src, LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec,
2203 LdStCode:$Sign, i32imm:$toWidth, Int32Regs:$addr, i32imm:$offset),
2204 !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth",
2205 " \t[$addr+$offset], $src;"), []>;
2206 def _asi : NVPTXInst<(outs),
2207 (ins regclass:$src, LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec,
2208 LdStCode:$Sign, i32imm:$toWidth, imem:$addr, i32imm:$offset),
2209 !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth",
2210 " \t[$addr+$offset], $src;"), []>;
2213 let mayStore=1, neverHasSideEffects=1 in {
2214 defm ST_i8 : ST<Int8Regs>;
2215 defm ST_i16 : ST<Int16Regs>;
2216 defm ST_i32 : ST<Int32Regs>;
2217 defm ST_i64 : ST<Int64Regs>;
2218 defm ST_f32 : ST<Float32Regs>;
2219 defm ST_f64 : ST<Float64Regs>;
2222 let VecInstType=isVecST.Value, mayStore=1, neverHasSideEffects=1 in {
2223 defm ST_v2i8 : ST<V2I8Regs>;
2224 defm ST_v4i8 : ST<V4I8Regs>;
2225 defm ST_v2i16 : ST<V2I16Regs>;
2226 defm ST_v4i16 : ST<V4I16Regs>;
2227 defm ST_v2i32 : ST<V2I32Regs>;
2228 defm ST_v4i32 : ST<V4I32Regs>;
2229 defm ST_v2f32 : ST<V2F32Regs>;
2230 defm ST_v4f32 : ST<V4F32Regs>;
2231 defm ST_v2i64 : ST<V2I64Regs>;
2232 defm ST_v2f64 : ST<V2F64Regs>;
2235 // The following is used only in and after vector elementizations.
2236 // Vector elementization happens at the machine instruction level, so the
2237 // following instruction
2238 // never appears in the DAG.
2239 multiclass LD_VEC<NVPTXRegClass regclass> {
2240 def _v2_avar : NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
2241 (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2242 i32imm:$fromWidth, imem:$addr),
2243 !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2244 "$fromWidth \t{{$dst1, $dst2}}, [$addr];"), []>;
2245 def _v2_areg : NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
2246 (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2247 i32imm:$fromWidth, Int32Regs:$addr),
2248 !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2249 "$fromWidth \t{{$dst1, $dst2}}, [$addr];"), []>;
2250 def _v2_ari : NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
2251 (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2252 i32imm:$fromWidth, Int32Regs:$addr, i32imm:$offset),
2253 !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2254 "$fromWidth \t{{$dst1, $dst2}}, [$addr+$offset];"), []>;
2255 def _v2_asi : NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
2256 (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2257 i32imm:$fromWidth, imem:$addr, i32imm:$offset),
2258 !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2259 "$fromWidth \t{{$dst1, $dst2}}, [$addr+$offset];"), []>;
2260 def _v4_avar : NVPTXInst<(outs regclass:$dst1, regclass:$dst2,
2261 regclass:$dst3, regclass:$dst4),
2262 (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2263 i32imm:$fromWidth, imem:$addr),
2264 !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2265 "$fromWidth \t{{$dst1, $dst2, $dst3, $dst4}}, [$addr];"), []>;
2266 def _v4_areg : NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
2268 (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2269 i32imm:$fromWidth, Int32Regs:$addr),
2270 !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2271 "$fromWidth \t{{$dst1, $dst2, $dst3, $dst4}}, [$addr];"), []>;
2272 def _v4_ari : NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
2274 (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2275 i32imm:$fromWidth, Int32Regs:$addr, i32imm:$offset),
2276 !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2277 "$fromWidth \t{{$dst1, $dst2, $dst3, $dst4}}, [$addr+$offset];"),
2279 def _v4_asi : NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
2281 (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2282 i32imm:$fromWidth, imem:$addr, i32imm:$offset),
2283 !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2284 "$fromWidth \t{{$dst1, $dst2, $dst3, $dst4}}, [$addr+$offset];"),
2287 let mayLoad=1, neverHasSideEffects=1 in {
2288 defm LDV_i8 : LD_VEC<Int8Regs>;
2289 defm LDV_i16 : LD_VEC<Int16Regs>;
2290 defm LDV_i32 : LD_VEC<Int32Regs>;
2291 defm LDV_i64 : LD_VEC<Int64Regs>;
2292 defm LDV_f32 : LD_VEC<Float32Regs>;
2293 defm LDV_f64 : LD_VEC<Float64Regs>;
2296 multiclass ST_VEC<NVPTXRegClass regclass> {
2297 def _v2_avar : NVPTXInst<(outs),
2298 (ins regclass:$src1, regclass:$src2, LdStCode:$isVol, LdStCode:$addsp,
2299 LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, imem:$addr),
2300 !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2301 "$fromWidth \t[$addr], {{$src1, $src2}};"), []>;
2302 def _v2_areg : NVPTXInst<(outs),
2303 (ins regclass:$src1, regclass:$src2, LdStCode:$isVol, LdStCode:$addsp,
2304 LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, Int32Regs:$addr),
2305 !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2306 "$fromWidth \t[$addr], {{$src1, $src2}};"), []>;
2307 def _v2_ari : NVPTXInst<(outs),
2308 (ins regclass:$src1, regclass:$src2, LdStCode:$isVol, LdStCode:$addsp,
2309 LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, Int32Regs:$addr,
2311 !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2312 "$fromWidth \t[$addr+$offset], {{$src1, $src2}};"), []>;
2313 def _v2_asi : NVPTXInst<(outs),
2314 (ins regclass:$src1, regclass:$src2, LdStCode:$isVol, LdStCode:$addsp,
2315 LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, imem:$addr,
2317 !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2318 "$fromWidth \t[$addr+$offset], {{$src1, $src2}};"), []>;
2319 def _v4_avar : NVPTXInst<(outs),
2320 (ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4,
2321 LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2322 i32imm:$fromWidth, imem:$addr),
2323 !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2324 "$fromWidth \t[$addr], {{$src1, $src2, $src3, $src4}};"), []>;
2325 def _v4_areg : NVPTXInst<(outs),
2326 (ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4,
2327 LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2328 i32imm:$fromWidth, Int32Regs:$addr),
2329 !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2330 "$fromWidth \t[$addr], {{$src1, $src2, $src3, $src4}};"), []>;
2331 def _v4_ari : NVPTXInst<(outs),
2332 (ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4,
2333 LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2334 i32imm:$fromWidth, Int32Regs:$addr, i32imm:$offset),
2335 !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2336 "$fromWidth \t[$addr+$offset], {{$src1, $src2, $src3, $src4}};"),
2338 def _v4_asi : NVPTXInst<(outs),
2339 (ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4,
2340 LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2341 i32imm:$fromWidth, imem:$addr, i32imm:$offset),
2342 !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2343 "$fromWidth \t[$addr+$offset], {{$src1, $src2, $src3, $src4}};"),
2346 let mayStore=1, neverHasSideEffects=1 in {
2347 defm STV_i8 : ST_VEC<Int8Regs>;
2348 defm STV_i16 : ST_VEC<Int16Regs>;
2349 defm STV_i32 : ST_VEC<Int32Regs>;
2350 defm STV_i64 : ST_VEC<Int64Regs>;
2351 defm STV_f32 : ST_VEC<Float32Regs>;
2352 defm STV_f64 : ST_VEC<Float64Regs>;
2356 //---- Conversion ----
2358 multiclass CVT_INT_TO_FP <string OpStr, SDNode OpNode> {
2359 // FIXME: need to add f16 support
2361 // NVPTXInst<(outs Float16Regs:$d), (ins Int8Regs:$a),
2362 // !strconcat(!strconcat("cvt.rn.f16.", OpStr), "8 \t$d, $a;"),
2363 // [(set Float16Regs:$d, (OpNode Int8Regs:$a))]>;
2365 // NVPTXInst<(outs Float16Regs:$d), (ins Int16Regs:$a),
2366 // !strconcat(!strconcat("cvt.rn.f16.", OpStr), "16 \t$d, $a;"),
2367 // [(set Float16Regs:$d, (OpNode Int16Regs:$a))]>;
2369 // NVPTXInst<(outs Float16Regs:$d), (ins Int32Regs:$a),
2370 // !strconcat(!strconcat("cvt.rn.f16.", OpStr), "32 \t$d, $a;"),
2371 // [(set Float16Regs:$d, (OpNode Int32Regs:$a))]>;
2373 // NVPTXInst<(outs Float16Regs:$d), (ins Int64Regs:$a),
2374 // !strconcat(!strconcat("cvt.rn.f32.", OpStr), "64 \t$d, $a;"),
2375 // [(set Float32Regs:$d, (OpNode Int64Regs:$a))]>;
2378 NVPTXInst<(outs Float32Regs:$d), (ins Int1Regs:$a),
2379 "selp.f32 \t$d, 1.0, 0.0, $a;",
2380 [(set Float32Regs:$d, (OpNode Int1Regs:$a))]>;
2382 NVPTXInst<(outs Float32Regs:$d), (ins Int8Regs:$a),
2383 !strconcat(!strconcat("cvt.rn.f32.", OpStr), "8 \t$d, $a;"),
2384 [(set Float32Regs:$d, (OpNode Int8Regs:$a))]>;
2386 NVPTXInst<(outs Float32Regs:$d), (ins Int16Regs:$a),
2387 !strconcat(!strconcat("cvt.rn.f32.", OpStr), "16 \t$d, $a;"),
2388 [(set Float32Regs:$d, (OpNode Int16Regs:$a))]>;
2390 NVPTXInst<(outs Float32Regs:$d), (ins Int32Regs:$a),
2391 !strconcat(!strconcat("cvt.rn.f32.", OpStr), "32 \t$d, $a;"),
2392 [(set Float32Regs:$d, (OpNode Int32Regs:$a))]>;
2394 NVPTXInst<(outs Float32Regs:$d), (ins Int64Regs:$a),
2395 !strconcat(!strconcat("cvt.rn.f32.", OpStr), "64 \t$d, $a;"),
2396 [(set Float32Regs:$d, (OpNode Int64Regs:$a))]>;
2399 NVPTXInst<(outs Float64Regs:$d), (ins Int1Regs:$a),
2400 "selp.f64 \t$d, 1.0, 0.0, $a;",
2401 [(set Float64Regs:$d, (OpNode Int1Regs:$a))]>;
2403 NVPTXInst<(outs Float64Regs:$d), (ins Int8Regs:$a),
2404 !strconcat(!strconcat("cvt.rn.f64.", OpStr), "8 \t$d, $a;"),
2405 [(set Float64Regs:$d, (OpNode Int8Regs:$a))]>;
2407 NVPTXInst<(outs Float64Regs:$d), (ins Int16Regs:$a),
2408 !strconcat(!strconcat("cvt.rn.f64.", OpStr), "16 \t$d, $a;"),
2409 [(set Float64Regs:$d, (OpNode Int16Regs:$a))]>;
2411 NVPTXInst<(outs Float64Regs:$d), (ins Int32Regs:$a),
2412 !strconcat(!strconcat("cvt.rn.f64.", OpStr), "32 \t$d, $a;"),
2413 [(set Float64Regs:$d, (OpNode Int32Regs:$a))]>;
2415 NVPTXInst<(outs Float64Regs:$d), (ins Int64Regs:$a),
2416 !strconcat(!strconcat("cvt.rn.f64.", OpStr), "64 \t$d, $a;"),
2417 [(set Float64Regs:$d, (OpNode Int64Regs:$a))]>;
2420 defm Sint_to_fp : CVT_INT_TO_FP <"s", sint_to_fp>;
2421 defm Uint_to_fp : CVT_INT_TO_FP <"u", uint_to_fp>;
2423 multiclass CVT_FP_TO_INT <string OpStr, SDNode OpNode> {
2424 // FIXME: need to add f16 support
2426 // NVPTXInst<(outs Int8Regs:$d), (ins Float16Regs:$a),
2427 // !strconcat(!strconcat("cvt.rzi.", OpStr), "8.f16 $d, $a;"),
2428 // [(set Int8Regs:$d, (OpNode Float16Regs:$a))]>;
2430 NVPTXInst<(outs Int8Regs:$d), (ins Float32Regs:$a),
2431 !strconcat(!strconcat("cvt.rzi.ftz.", OpStr), "16.f32 \t$d, $a;"),
2432 [(set Int8Regs:$d, (OpNode Float32Regs:$a))]>, Requires<[doF32FTZ]>;
2434 NVPTXInst<(outs Int8Regs:$d), (ins Float32Regs:$a),
2435 !strconcat(!strconcat("cvt.rzi.", OpStr), "16.f32 \t$d, $a;"),
2436 [(set Int8Regs:$d, (OpNode Float32Regs:$a))]>;
2438 NVPTXInst<(outs Int8Regs:$d), (ins Float64Regs:$a),
2439 !strconcat(!strconcat("cvt.rzi.", OpStr), "16.f64 \t$d, $a;"),
2440 [(set Int8Regs:$d, (OpNode Float64Regs:$a))]>;
2442 // FIXME: need to add f16 support
2444 // NVPTXInst<(outs Int16Regs:$d), (ins Float16Regs:$a),
2445 // !strconcat(!strconcat("cvt.rzi.", OpStr), "16.f16 \t$d, $a;"),
2446 // [(set Int16Regs:$d, (OpNode Float16Regs:$a))]>;
2448 NVPTXInst<(outs Int16Regs:$d), (ins Float32Regs:$a),
2449 !strconcat(!strconcat("cvt.rzi.ftz.", OpStr), "16.f32 \t$d, $a;"),
2450 [(set Int16Regs:$d, (OpNode Float32Regs:$a))]>, Requires<[doF32FTZ]>;
2452 NVPTXInst<(outs Int16Regs:$d), (ins Float32Regs:$a),
2453 !strconcat(!strconcat("cvt.rzi.", OpStr), "16.f32 \t$d, $a;"),
2454 [(set Int16Regs:$d, (OpNode Float32Regs:$a))]>;
2456 NVPTXInst<(outs Int16Regs:$d), (ins Float64Regs:$a),
2457 !strconcat(!strconcat("cvt.rzi.", OpStr), "16.f64 \t$d, $a;"),
2458 [(set Int16Regs:$d, (OpNode Float64Regs:$a))]>;
2460 // FIXME: need to add f16 support
2461 // def CVTi32f16: def CVTi32f16:
2462 // NVPTXInst<(outs Int32Regs:$d), (ins Float16Regs:$a),
2463 // !strconcat(!strconcat("cvt.rzi.", OpStr), "32.f16 \t$d, $a;"),
2464 // [(set Int32Regs:$d, (OpNode Float16Regs:$a))]>;
2466 NVPTXInst<(outs Int32Regs:$d), (ins Float32Regs:$a),
2467 !strconcat(!strconcat("cvt.rzi.ftz.", OpStr), "32.f32 \t$d, $a;"),
2468 [(set Int32Regs:$d, (OpNode Float32Regs:$a))]>, Requires<[doF32FTZ]>;
2470 NVPTXInst<(outs Int32Regs:$d), (ins Float32Regs:$a),
2471 !strconcat(!strconcat("cvt.rzi.", OpStr), "32.f32 \t$d, $a;"),
2472 [(set Int32Regs:$d, (OpNode Float32Regs:$a))]>;
2474 NVPTXInst<(outs Int32Regs:$d), (ins Float64Regs:$a),
2475 !strconcat(!strconcat("cvt.rzi.", OpStr), "32.f64 \t$d, $a;"),
2476 [(set Int32Regs:$d, (OpNode Float64Regs:$a))]>;
2478 // FIXME: need to add f16 support
2480 // NVPTXInst<(outs Int64Regs:$d), (ins Float16Regs:$a),
2481 // !strconcat(!strconcat("cvt.rzi.", OpStr), "64.f16 \t$d, $a;"),
2482 // [(set Int64Regs:$d, (OpNode Float16Regs:$a))]>;
2484 NVPTXInst<(outs Int64Regs:$d), (ins Float32Regs:$a),
2485 !strconcat(!strconcat("cvt.rzi.ftz.", OpStr), "64.f32 \t$d, $a;"),
2486 [(set Int64Regs:$d, (OpNode Float32Regs:$a))]>, Requires<[doF32FTZ]>;
2488 NVPTXInst<(outs Int64Regs:$d), (ins Float32Regs:$a),
2489 !strconcat(!strconcat("cvt.rzi.", OpStr), "64.f32 \t$d, $a;"),
2490 [(set Int64Regs:$d, (OpNode Float32Regs:$a))]>;
2492 NVPTXInst<(outs Int64Regs:$d), (ins Float64Regs:$a),
2493 !strconcat(!strconcat("cvt.rzi.", OpStr), "64.f64 \t$d, $a;"),
2494 [(set Int64Regs:$d, (OpNode Float64Regs:$a))]>;
2497 defm Fp_to_sint : CVT_FP_TO_INT <"s", fp_to_sint>;
2498 defm Fp_to_uint : CVT_FP_TO_INT <"u", fp_to_uint>;
2500 multiclass INT_EXTEND_UNSIGNED_1 <SDNode OpNode> {
2502 NVPTXInst<(outs Int8Regs:$d), (ins Int1Regs:$a),
2503 "selp.u16 \t$d, 1, 0, $a;",
2504 [(set Int8Regs:$d, (OpNode Int1Regs:$a))]>;
2506 NVPTXInst<(outs Int16Regs:$d), (ins Int1Regs:$a),
2507 "selp.u16 \t$d, 1, 0, $a;",
2508 [(set Int16Regs:$d, (OpNode Int1Regs:$a))]>;
2510 NVPTXInst<(outs Int32Regs:$d), (ins Int1Regs:$a),
2511 "selp.u32 \t$d, 1, 0, $a;",
2512 [(set Int32Regs:$d, (OpNode Int1Regs:$a))]>;
2514 NVPTXInst<(outs Int64Regs:$d), (ins Int1Regs:$a),
2515 "selp.u64 \t$d, 1, 0, $a;",
2516 [(set Int64Regs:$d, (OpNode Int1Regs:$a))]>;
2519 multiclass INT_EXTEND_SIGNED_1 <SDNode OpNode> {
2521 NVPTXInst<(outs Int8Regs:$d), (ins Int1Regs:$a),
2522 "selp.s16 \t$d, -1, 0, $a;",
2523 [(set Int8Regs:$d, (OpNode Int1Regs:$a))]>;
2525 NVPTXInst<(outs Int16Regs:$d), (ins Int1Regs:$a),
2526 "selp.s16 \t$d, -1, 0, $a;",
2527 [(set Int16Regs:$d, (OpNode Int1Regs:$a))]>;
2529 NVPTXInst<(outs Int32Regs:$d), (ins Int1Regs:$a),
2530 "selp.s32 \t$d, -1, 0, $a;",
2531 [(set Int32Regs:$d, (OpNode Int1Regs:$a))]>;
2533 NVPTXInst<(outs Int64Regs:$d), (ins Int1Regs:$a),
2534 "selp.s64 \t$d, -1, 0, $a;",
2535 [(set Int64Regs:$d, (OpNode Int1Regs:$a))]>;
2538 multiclass INT_EXTEND <string OpStr, SDNode OpNode> {
2539 // All Int8Regs are emiited as 16bit registers in ptx.
2540 // And there is no selp.u8 in ptx.
2542 NVPTXInst<(outs Int16Regs:$d), (ins Int8Regs:$a),
2543 !strconcat("cvt.", !strconcat(OpStr, !strconcat("16.",
2544 !strconcat(OpStr, "8 \t$d, $a;")))),
2545 [(set Int16Regs:$d, (OpNode Int8Regs:$a))]>;
2547 NVPTXInst<(outs Int32Regs:$d), (ins Int8Regs:$a),
2548 !strconcat("cvt.", !strconcat(OpStr, !strconcat("32.",
2549 !strconcat(OpStr, "8 \t$d, $a;")))),
2550 [(set Int32Regs:$d, (OpNode Int8Regs:$a))]>;
2552 NVPTXInst<(outs Int64Regs:$d), (ins Int8Regs:$a),
2553 !strconcat("cvt.", !strconcat(OpStr, !strconcat("64.",
2554 !strconcat(OpStr, "8 \t$d, $a;")))),
2555 [(set Int64Regs:$d, (OpNode Int8Regs:$a))]>;
2557 NVPTXInst<(outs Int32Regs:$d), (ins Int16Regs:$a),
2558 !strconcat("cvt.", !strconcat(OpStr, !strconcat("32.",
2559 !strconcat(OpStr, "16 \t$d, $a;")))),
2560 [(set Int32Regs:$d, (OpNode Int16Regs:$a))]>;
2562 NVPTXInst<(outs Int64Regs:$d), (ins Int16Regs:$a),
2563 !strconcat("cvt.", !strconcat(OpStr, !strconcat("64.",
2564 !strconcat(OpStr, "16 \t$d, $a;")))),
2565 [(set Int64Regs:$d, (OpNode Int16Regs:$a))]>;
2567 NVPTXInst<(outs Int64Regs:$d), (ins Int32Regs:$a),
2568 !strconcat("cvt.", !strconcat(OpStr, !strconcat("64.",
2569 !strconcat(OpStr, "32 \t$d, $a;")))),
2570 [(set Int64Regs:$d, (OpNode Int32Regs:$a))]>;
2573 defm Sint_extend_1 : INT_EXTEND_SIGNED_1<sext>;
2574 defm Zint_extend_1 : INT_EXTEND_UNSIGNED_1<zext>;
2575 defm Aint_extend_1 : INT_EXTEND_UNSIGNED_1<anyext>;
2577 defm Sint_extend : INT_EXTEND <"s", sext>;
2578 defm Zint_extend : INT_EXTEND <"u", zext>;
2579 defm Aint_extend : INT_EXTEND <"u", anyext>;
2581 class TRUNC_to1_asm<string sz> {
2582 string s = !strconcat("{{\n\t",
2585 !strconcat(" temp;\n\t",
2588 !strconcat("\t temp, $a, 1;\n\t",
2590 !strconcat(sz, ".eq \t $d, temp, 1;\n\t}}")))))))));
2593 def TRUNC_64to32 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
2594 "cvt.u32.u64 \t$d, $a;",
2595 [(set Int32Regs:$d, (trunc Int64Regs:$a))]>;
2596 def TRUNC_64to16 : NVPTXInst<(outs Int16Regs:$d), (ins Int64Regs:$a),
2597 "cvt.u16.u64 \t$d, $a;",
2598 [(set Int16Regs:$d, (trunc Int64Regs:$a))]>;
2599 def TRUNC_64to8 : NVPTXInst<(outs Int8Regs:$d), (ins Int64Regs:$a),
2600 "cvt.u8.u64 \t$d, $a;",
2601 [(set Int8Regs:$d, (trunc Int64Regs:$a))]>;
2602 def TRUNC_32to16 : NVPTXInst<(outs Int16Regs:$d), (ins Int32Regs:$a),
2603 "cvt.u16.u32 \t$d, $a;",
2604 [(set Int16Regs:$d, (trunc Int32Regs:$a))]>;
2605 def TRUNC_32to8 : NVPTXInst<(outs Int8Regs:$d), (ins Int32Regs:$a),
2606 "cvt.u8.u32 \t$d, $a;",
2607 [(set Int8Regs:$d, (trunc Int32Regs:$a))]>;
2608 def TRUNC_16to8 : NVPTXInst<(outs Int8Regs:$d), (ins Int16Regs:$a),
2609 "cvt.u8.u16 \t$d, $a;",
2610 [(set Int8Regs:$d, (trunc Int16Regs:$a))]>;
2611 def TRUNC_64to1 : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
2612 TRUNC_to1_asm<".b64">.s,
2613 [(set Int1Regs:$d, (trunc Int64Regs:$a))]>;
2614 def TRUNC_32to1 : NVPTXInst<(outs Int1Regs:$d), (ins Int32Regs:$a),
2615 TRUNC_to1_asm<".b32">.s,
2616 [(set Int1Regs:$d, (trunc Int32Regs:$a))]>;
2617 def TRUNC_16to1 : NVPTXInst<(outs Int1Regs:$d), (ins Int16Regs:$a),
2618 TRUNC_to1_asm<".b16">.s,
2619 [(set Int1Regs:$d, (trunc Int16Regs:$a))]>;
2620 def TRUNC_8to1 : NVPTXInst<(outs Int1Regs:$d), (ins Int8Regs:$a),
2621 TRUNC_to1_asm<".b16">.s,
2622 [(set Int1Regs:$d, (trunc Int8Regs:$a))]>;
2624 // Select instructions
2625 def : Pat<(select Int32Regs:$pred, Int8Regs:$a, Int8Regs:$b),
2626 (SELECTi8rr Int8Regs:$a, Int8Regs:$b, (TRUNC_32to1 Int32Regs:$pred))>;
2627 def : Pat<(select Int32Regs:$pred, Int16Regs:$a, Int16Regs:$b),
2628 (SELECTi16rr Int16Regs:$a, Int16Regs:$b,
2629 (TRUNC_32to1 Int32Regs:$pred))>;
2630 def : Pat<(select Int32Regs:$pred, Int32Regs:$a, Int32Regs:$b),
2631 (SELECTi32rr Int32Regs:$a, Int32Regs:$b,
2632 (TRUNC_32to1 Int32Regs:$pred))>;
2633 def : Pat<(select Int32Regs:$pred, Int64Regs:$a, Int64Regs:$b),
2634 (SELECTi64rr Int64Regs:$a, Int64Regs:$b,
2635 (TRUNC_32to1 Int32Regs:$pred))>;
2636 def : Pat<(select Int32Regs:$pred, Float32Regs:$a, Float32Regs:$b),
2637 (SELECTf32rr Float32Regs:$a, Float32Regs:$b,
2638 (TRUNC_32to1 Int32Regs:$pred))>;
2639 def : Pat<(select Int32Regs:$pred, Float64Regs:$a, Float64Regs:$b),
2640 (SELECTf64rr Float64Regs:$a, Float64Regs:$b,
2641 (TRUNC_32to1 Int32Regs:$pred))>;
2643 class F_BITCONVERT<string SzStr, NVPTXRegClass regclassIn,
2644 NVPTXRegClass regclassOut> :
2645 NVPTXInst<(outs regclassOut:$d), (ins regclassIn:$a),
2646 !strconcat("mov.b", !strconcat(SzStr, " \t $d, $a;")),
2647 [(set regclassOut:$d, (bitconvert regclassIn:$a))]>;
2649 def BITCONVERT_32_I2F : F_BITCONVERT<"32", Int32Regs, Float32Regs>;
2650 def BITCONVERT_32_F2I : F_BITCONVERT<"32", Float32Regs, Int32Regs>;
2651 def BITCONVERT_64_I2F : F_BITCONVERT<"64", Int64Regs, Float64Regs>;
2652 def BITCONVERT_64_F2I : F_BITCONVERT<"64", Float64Regs, Int64Regs>;
2654 // pack a set of smaller int registers to a larger int register
2655 def V4I8toI32 : NVPTXInst<(outs Int32Regs:$d),
2656 (ins Int8Regs:$s1, Int8Regs:$s2,
2657 Int8Regs:$s3, Int8Regs:$s4),
2658 !strconcat("{{\n\t.reg .b8\t%t<4>;",
2659 !strconcat("\n\tcvt.u8.u8\t%t0, $s1;",
2660 !strconcat("\n\tcvt.u8.u8\t%t1, $s2;",
2661 !strconcat("\n\tcvt.u8.u8\t%t2, $s3;",
2662 !strconcat("\n\tcvt.u8.u8\t%t3, $s4;",
2663 "\n\tmov.b32\t$d, {%t0, %t1, %t2, %t3};\n\t}}"))))),
2665 def V4I16toI64 : NVPTXInst<(outs Int64Regs:$d),
2666 (ins Int16Regs:$s1, Int16Regs:$s2,
2667 Int16Regs:$s3, Int16Regs:$s4),
2668 "mov.b64\t$d, {{$s1, $s2, $s3, $s4}};",
2670 def V2I8toI16 : NVPTXInst<(outs Int16Regs:$d),
2671 (ins Int8Regs:$s1, Int8Regs:$s2),
2672 !strconcat("{{\n\t.reg .b8\t%t<2>;",
2673 !strconcat("\n\tcvt.u8.u8\t%t0, $s1;",
2674 !strconcat("\n\tcvt.u8.u8\t%t1, $s2;",
2675 "\n\tmov.b16\t$d, {%t0, %t1};\n\t}}"))),
2677 def V2I16toI32 : NVPTXInst<(outs Int32Regs:$d),
2678 (ins Int16Regs:$s1, Int16Regs:$s2),
2679 "mov.b32\t$d, {{$s1, $s2}};",
2681 def V2I32toI64 : NVPTXInst<(outs Int64Regs:$d),
2682 (ins Int32Regs:$s1, Int32Regs:$s2),
2683 "mov.b64\t$d, {{$s1, $s2}};",
2685 def V2F32toF64 : NVPTXInst<(outs Float64Regs:$d),
2686 (ins Float32Regs:$s1, Float32Regs:$s2),
2687 "mov.b64\t$d, {{$s1, $s2}};",
2690 // unpack a larger int register to a set of smaller int registers
2691 def I32toV4I8 : NVPTXInst<(outs Int8Regs:$d1, Int8Regs:$d2,
2692 Int8Regs:$d3, Int8Regs:$d4),
2694 !strconcat("{{\n\t.reg .b8\t%t<4>;",
2695 !strconcat("\n\tmov.b32\t{%t0, %t1, %t2, %t3}, $s;",
2696 !strconcat("\n\tcvt.u8.u8\t$d1, %t0;",
2697 !strconcat("\n\tcvt.u8.u8\t$d2, %t1;",
2698 !strconcat("\n\tcvt.u8.u8\t$d3, %t2;",
2699 "\n\tcvt.u8.u8\t$d4, %t3;\n\t}}"))))),
2701 def I64toV4I16 : NVPTXInst<(outs Int16Regs:$d1, Int16Regs:$d2,
2702 Int16Regs:$d3, Int16Regs:$d4),
2704 "mov.b64\t{{$d1, $d2, $d3, $d4}}, $s;",
2706 def I16toV2I8 : NVPTXInst<(outs Int8Regs:$d1, Int8Regs:$d2),
2708 !strconcat("{{\n\t.reg .b8\t%t<2>;",
2709 !strconcat("\n\tmov.b16\t{%t0, %t1}, $s;",
2710 !strconcat("\n\tcvt.u8.u8\t$d1, %t0;",
2711 "\n\tcvt.u8.u8\t$d2, %t1;\n\t}}"))),
2713 def I32toV2I16 : NVPTXInst<(outs Int16Regs:$d1, Int16Regs:$d2),
2715 "mov.b32\t{{$d1, $d2}}, $s;",
2717 def I64toV2I32 : NVPTXInst<(outs Int32Regs:$d1, Int32Regs:$d2),
2719 "mov.b64\t{{$d1, $d2}}, $s;",
2721 def F64toV2F32 : NVPTXInst<(outs Float32Regs:$d1, Float32Regs:$d2),
2722 (ins Float64Regs:$s),
2723 "mov.b64\t{{$d1, $d2}}, $s;",
2726 def FPRound_ftz : NVPTXInst<(outs Float32Regs:$d), (ins Float64Regs:$a),
2727 "cvt.rn.ftz.f32.f64 \t$d, $a;",
2728 [(set Float32Regs:$d, (fround Float64Regs:$a))]>, Requires<[doF32FTZ]>;
2730 def FPRound : NVPTXInst<(outs Float32Regs:$d), (ins Float64Regs:$a),
2731 "cvt.rn.f32.f64 \t$d, $a;",
2732 [(set Float32Regs:$d, (fround Float64Regs:$a))]>;
2734 def FPExtend_ftz : NVPTXInst<(outs Float64Regs:$d), (ins Float32Regs:$a),
2735 "cvt.ftz.f64.f32 \t$d, $a;",
2736 [(set Float64Regs:$d, (fextend Float32Regs:$a))]>, Requires<[doF32FTZ]>;
2738 def FPExtend : NVPTXInst<(outs Float64Regs:$d), (ins Float32Regs:$a),
2739 "cvt.f64.f32 \t$d, $a;",
2740 [(set Float64Regs:$d, (fextend Float32Regs:$a))]>;
2742 def retflag : SDNode<"NVPTXISD::RET_FLAG", SDTNone,
2743 [SDNPHasChain, SDNPOptInGlue]>;
2745 //-----------------------------------
2747 //-----------------------------------
2749 let isTerminator=1 in {
2750 let isReturn=1, isBarrier=1 in
2751 def Return : NVPTXInst<(outs), (ins), "ret;", [(retflag)]>;
2754 def CBranch : NVPTXInst<(outs), (ins Int1Regs:$a, brtarget:$target),
2755 "@$a bra \t$target;",
2756 [(brcond Int1Regs:$a, bb:$target)]>;
2758 def CBranchOther : NVPTXInst<(outs), (ins Int1Regs:$a, brtarget:$target),
2759 "@!$a bra \t$target;",
2762 let isBranch=1, isBarrier=1 in
2763 def GOTO : NVPTXInst<(outs), (ins brtarget:$target),
2764 "bra.uni \t$target;",
2768 def : Pat<(brcond Int32Regs:$a, bb:$target), (CBranch
2769 (ISetUNEi32ri_p Int32Regs:$a, 0), bb:$target)>;
2771 // SelectionDAGBuilder::visitSWitchCase() will invert the condition of a
2772 // conditional branch if
2773 // the target block is the next block so that the code can fall through to the
2775 // The invertion is done by 'xor condition, 1', which will be translated to
2776 // (setne condition, -1).
2777 // Since ptx supports '@!pred bra target', we should use it.
2778 def : Pat<(brcond (i1 (setne Int1Regs:$a, -1)), bb:$target),
2779 (CBranchOther Int1Regs:$a, bb:$target)>;
2782 def SDT_NVPTXCallSeqStart : SDCallSeqStart<[ SDTCisVT<0, i32> ]>;
2783 def SDT_NVPTXCallSeqEnd : SDCallSeqEnd<[ SDTCisVT<0, i32>,
2784 SDTCisVT<1, i32> ]>;
2786 def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_NVPTXCallSeqStart,
2787 [SDNPHasChain, SDNPOutGlue, SDNPSideEffect]>;
2788 def callseq_end : SDNode<"ISD::CALLSEQ_END", SDT_NVPTXCallSeqEnd,
2789 [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
2792 def SDT_NVPTXCall : SDTypeProfile<0, 1, [SDTCisVT<0, i32>]>;
2793 def call : SDNode<"NVPTXISD::CALL", SDT_NVPTXCall,
2794 [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
2795 def calltarget : Operand<i32>;
2797 def CALL : NVPTXInst<(outs), (ins calltarget:$dst),
2798 "call \t$dst, (1);", []>;
2801 def : Pat<(call tglobaladdr:$dst),
2802 (CALL tglobaladdr:$dst)>;
2803 def : Pat<(call texternalsym:$dst),
2804 (CALL texternalsym:$dst)>;
2806 // Pseudo instructions.
2807 class Pseudo<dag outs, dag ins, string asmstr, list<dag> pattern>
2808 : NVPTXInst<outs, ins, asmstr, pattern>;
2810 // @TODO: We use some tricks here to emit curly braces. Can we clean this up
2811 // a bit without TableGen modifications?
2812 def Callseq_Start : NVPTXInst<(outs), (ins i32imm:$amt),
2813 "// Callseq Start $amt\n\t{{\n\t.reg .b32 temp_param_reg;\n\t// <end>}}",
2814 [(callseq_start timm:$amt)]>;
2815 def Callseq_End : NVPTXInst<(outs), (ins i32imm:$amt1, i32imm:$amt2),
2816 "\n\t//{{\n\t}}// Callseq End $amt1",
2817 [(callseq_end timm:$amt1, timm:$amt2)]>;
2821 def trapinst : NVPTXInst<(outs), (ins),
2825 include "NVPTXVector.td"
2827 include "NVPTXIntrinsics.td"
2830 //-----------------------------------
2832 //-----------------------------------
2833 // BSWAP is currently expanded. The following is a more efficient
2834 // - for < sm_20, use vector scalar mov, as tesla support native 16-bit register
2835 // - for sm_20, use pmpt (use vector scalar mov to get the pack and
2836 // unpack). sm_20 supports native 32-bit register, but not native 16-bit