1 //===- NVPTXInstrInfo.td - NVPTX Instruction defs -------------*- tblgen-*-===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file describes the PTX instructions in TableGen format.
12 //===----------------------------------------------------------------------===//
14 include "NVPTXInstrFormats.td"
17 def NOP : NVPTXInst<(outs), (ins), "", []>;
19 // List of vector specific properties
20 def isVecLD : VecInstTypeEnum<1>;
21 def isVecST : VecInstTypeEnum<2>;
22 def isVecBuild : VecInstTypeEnum<3>;
23 def isVecShuffle : VecInstTypeEnum<4>;
24 def isVecExtract : VecInstTypeEnum<5>;
25 def isVecInsert : VecInstTypeEnum<6>;
26 def isVecDest : VecInstTypeEnum<7>;
27 def isVecOther : VecInstTypeEnum<15>;
29 //===----------------------------------------------------------------------===//
30 // NVPTX Operand Definitions.
31 //===----------------------------------------------------------------------===//
33 def brtarget : Operand<OtherVT>;
35 //===----------------------------------------------------------------------===//
36 // NVPTX Instruction Predicate Definitions
37 //===----------------------------------------------------------------------===//
40 def hasAtomRedG32 : Predicate<"Subtarget.hasAtomRedG32()">;
41 def hasAtomRedS32 : Predicate<"Subtarget.hasAtomRedS32()">;
42 def hasAtomRedGen32 : Predicate<"Subtarget.hasAtomRedGen32()">;
43 def useAtomRedG32forGen32 :
44 Predicate<"!Subtarget.hasAtomRedGen32() && Subtarget.hasAtomRedG32()">;
45 def hasBrkPt : Predicate<"Subtarget.hasBrkPt()">;
46 def hasAtomRedG64 : Predicate<"Subtarget.hasAtomRedG64()">;
47 def hasAtomRedS64 : Predicate<"Subtarget.hasAtomRedS64()">;
48 def hasAtomRedGen64 : Predicate<"Subtarget.hasAtomRedGen64()">;
49 def useAtomRedG64forGen64 :
50 Predicate<"!Subtarget.hasAtomRedGen64() && Subtarget.hasAtomRedG64()">;
51 def hasAtomAddF32 : Predicate<"Subtarget.hasAtomAddF32()">;
52 def hasVote : Predicate<"Subtarget.hasVote()">;
53 def hasDouble : Predicate<"Subtarget.hasDouble()">;
54 def reqPTX20 : Predicate<"Subtarget.reqPTX20()">;
55 def hasLDG : Predicate<"Subtarget.hasLDG()">;
56 def hasLDU : Predicate<"Subtarget.hasLDU()">;
57 def hasGenericLdSt : Predicate<"Subtarget.hasGenericLdSt()">;
59 def doF32FTZ : Predicate<"UseF32FTZ">;
61 def doFMAF32 : Predicate<"doFMAF32">;
62 def doFMAF32_ftz : Predicate<"(doFMAF32 && UseF32FTZ)">;
63 def doFMAF32AGG : Predicate<"doFMAF32AGG">;
64 def doFMAF32AGG_ftz : Predicate<"(doFMAF32AGG && UseF32FTZ)">;
65 def doFMAF64 : Predicate<"doFMAF64">;
66 def doFMAF64AGG : Predicate<"doFMAF64AGG">;
67 def doFMADF32 : Predicate<"doFMADF32">;
68 def doFMADF32_ftz : Predicate<"(doFMADF32 && UseF32FTZ)">;
70 def doMulWide : Predicate<"doMulWide">;
72 def allowFMA : Predicate<"allowFMA">;
73 def allowFMA_ftz : Predicate<"(allowFMA && UseF32FTZ)">;
75 def do_DIVF32_APPROX : Predicate<"do_DIVF32_PREC==0">;
76 def do_DIVF32_FULL : Predicate<"do_DIVF32_PREC==1">;
78 def do_SQRTF32_APPROX : Predicate<"do_SQRTF32_PREC==0">;
79 def do_SQRTF32_RN : Predicate<"do_SQRTF32_PREC==1">;
81 def hasHWROT32 : Predicate<"Subtarget.hasHWROT32()">;
83 def true : Predicate<"1">;
85 //===----------------------------------------------------------------------===//
86 // Special Handling for 8-bit Operands and Operations
88 // PTX supports 8-bit signed and unsigned types, but does not support 8-bit
89 // operations (like add, shift, etc) except for ld/st/cvt. SASS does not have
92 // PTX ld, st and cvt instructions permit source and destination data operands
93 // to be wider than the instruction-type size, so that narrow values may be
94 // loaded, stored, and converted using regular-width registers.
96 // So in PTX generation, we
97 // - always use 16-bit registers in place in 8-bit registers.
98 // (8-bit variables should stay as 8-bit as they represent memory layout.)
99 // - for the following 8-bit operations, we sign-ext/zero-ext the 8-bit values
107 // We are patching the operations by inserting the cvt instructions in the
108 // asm strings of the affected instructions.
110 // Since vector operations, except for ld/st, are eventually elementized. We
111 // do not need to special-hand the vector 8-bit operations.
114 //===----------------------------------------------------------------------===//
116 // Generate string block like
120 // cvt.s16.s8 %temp1, %a;
121 // cvt.s16.s8 %temp2, %b;
122 // opc.s16 %dst, %temp1, %temp2;
124 // when OpcStr=opc.s TypeStr=s16 CVTStr=cvt.s16.s8
125 class Handle_i8rr<string OpcStr, string TypeStr, string CVTStr> {
126 string s = !strconcat("{{\n\t",
127 !strconcat(".reg .", !strconcat(TypeStr,
128 !strconcat(" \t%temp1;\n\t",
129 !strconcat(".reg .", !strconcat(TypeStr,
130 !strconcat(" \t%temp2;\n\t",
131 !strconcat(CVTStr, !strconcat(" \t%temp1, $a;\n\t",
132 !strconcat(CVTStr, !strconcat(" \t%temp2, $b;\n\t",
133 !strconcat(OpcStr, "16 \t$dst, %temp1, %temp2;\n\t}}"))))))))))));
136 // Generate string block like
140 // cvt.s16.s8 %temp1, %a;
141 // mov.b16 %temp2, %b;
142 // cvt.s16.s8 %temp2, %temp2;
143 // opc.s16 %dst, %temp1, %temp2;
145 // when OpcStr=opc.s TypeStr=s16 CVTStr=cvt.s16.s8
146 class Handle_i8ri<string OpcStr, string TypeStr, string CVTStr> {
147 string s = !strconcat("{{\n\t",
148 !strconcat(".reg .", !strconcat(TypeStr,
149 !strconcat(" \t%temp1;\n\t",
151 !strconcat(TypeStr, !strconcat(" \t%temp2;\n\t",
152 !strconcat(CVTStr, !strconcat(" \t%temp1, $a;\n\t",
153 !strconcat("mov.b16 \t%temp2, $b;\n\t",
154 !strconcat(CVTStr, !strconcat(" \t%temp2, %temp2;\n\t",
155 !strconcat(OpcStr, "16 \t$dst, %temp1, %temp2;\n\t}}")))))))))))));
158 // Generate string block like
162 // mov.b16 %temp1, %b;
163 // cvt.s16.s8 %temp1, %temp1;
164 // cvt.s16.s8 %temp2, %a;
165 // opc.s16 %dst, %temp1, %temp2;
167 // when OpcStr=opc.s TypeStr=s16 CVTStr=cvt.s16.s8
168 class Handle_i8ir<string OpcStr, string TypeStr, string CVTStr> {
169 string s = !strconcat("{{\n\t",
170 !strconcat(".reg .", !strconcat(TypeStr,
171 !strconcat(" \t%temp1;\n\t",
172 !strconcat(".reg .", !strconcat(TypeStr,
173 !strconcat(" \t%temp2;\n\t",
174 !strconcat("mov.b16 \t%temp1, $a;\n\t",
175 !strconcat(CVTStr, !strconcat(" \t%temp1, %temp1;\n\t",
176 !strconcat(CVTStr, !strconcat(" \t%temp2, $b;\n\t",
177 !strconcat(OpcStr, "16 \t$dst, %temp1, %temp2;\n\t}}")))))))))))));
181 //===----------------------------------------------------------------------===//
182 // Some Common Instruction Class Templates
183 //===----------------------------------------------------------------------===//
185 multiclass I3<string OpcStr, SDNode OpNode> {
186 def i64rr : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, Int64Regs:$b),
187 !strconcat(OpcStr, "64 \t$dst, $a, $b;"),
188 [(set Int64Regs:$dst, (OpNode Int64Regs:$a,
190 def i64ri : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, i64imm:$b),
191 !strconcat(OpcStr, "64 \t$dst, $a, $b;"),
192 [(set Int64Regs:$dst, (OpNode Int64Regs:$a, imm:$b))]>;
193 def i32rr : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b),
194 !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
195 [(set Int32Regs:$dst, (OpNode Int32Regs:$a,
197 def i32ri : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b),
198 !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
199 [(set Int32Regs:$dst, (OpNode Int32Regs:$a, imm:$b))]>;
200 def i16rr : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, Int16Regs:$b),
201 !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
202 [(set Int16Regs:$dst, (OpNode Int16Regs:$a,
204 def i16ri : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, i16imm:$b),
205 !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
206 [(set Int16Regs:$dst, (OpNode Int16Regs:$a, (imm):$b))]>;
207 def i8rr : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, Int8Regs:$b),
208 !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
209 [(set Int8Regs:$dst, (OpNode Int8Regs:$a, Int8Regs:$b))]>;
210 def i8ri : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, i8imm:$b),
211 !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
212 [(set Int8Regs:$dst, (OpNode Int8Regs:$a, (imm):$b))]>;
215 multiclass I3_i8<string OpcStr, SDNode OpNode, string TypeStr, string CVTStr> {
216 def i64rr : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, Int64Regs:$b),
217 !strconcat(OpcStr, "64 \t$dst, $a, $b;"),
218 [(set Int64Regs:$dst, (OpNode Int64Regs:$a,
220 def i64ri : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, i64imm:$b),
221 !strconcat(OpcStr, "64 \t$dst, $a, $b;"),
222 [(set Int64Regs:$dst, (OpNode Int64Regs:$a, imm:$b))]>;
223 def i32rr : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b),
224 !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
225 [(set Int32Regs:$dst, (OpNode Int32Regs:$a,
227 def i32ri : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b),
228 !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
229 [(set Int32Regs:$dst, (OpNode Int32Regs:$a, imm:$b))]>;
230 def i16rr : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, Int16Regs:$b),
231 !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
232 [(set Int16Regs:$dst, (OpNode Int16Regs:$a,
234 def i16ri : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, i16imm:$b),
235 !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
236 [(set Int16Regs:$dst, (OpNode Int16Regs:$a, (imm):$b))]>;
237 def i8rr : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, Int8Regs:$b),
238 Handle_i8rr<OpcStr, TypeStr, CVTStr>.s,
239 [(set Int8Regs:$dst, (OpNode Int8Regs:$a, Int8Regs:$b))]>;
240 def i8ri : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, i8imm:$b),
241 Handle_i8ri<OpcStr, TypeStr, CVTStr>.s,
242 [(set Int8Regs:$dst, (OpNode Int8Regs:$a, (imm):$b))]>;
245 multiclass I3_noi8<string OpcStr, SDNode OpNode> {
246 def i64rr : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, Int64Regs:$b),
247 !strconcat(OpcStr, "64 \t$dst, $a, $b;"),
248 [(set Int64Regs:$dst, (OpNode Int64Regs:$a,
250 def i64ri : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, i64imm:$b),
251 !strconcat(OpcStr, "64 \t$dst, $a, $b;"),
252 [(set Int64Regs:$dst, (OpNode Int64Regs:$a, imm:$b))]>;
253 def i32rr : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b),
254 !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
255 [(set Int32Regs:$dst, (OpNode Int32Regs:$a,
257 def i32ri : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b),
258 !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
259 [(set Int32Regs:$dst, (OpNode Int32Regs:$a, imm:$b))]>;
260 def i16rr : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, Int16Regs:$b),
261 !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
262 [(set Int16Regs:$dst, (OpNode Int16Regs:$a,
264 def i16ri : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, i16imm:$b),
265 !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
266 [(set Int16Regs:$dst, (OpNode Int16Regs:$a, (imm):$b))]>;
269 multiclass ADD_SUB_INT_32<string OpcStr, SDNode OpNode> {
270 def i32rr : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a,
272 !strconcat(OpcStr, ".s32 \t$dst, $a, $b;"),
273 [(set Int32Regs:$dst, (OpNode Int32Regs:$a,
275 def i32ri : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b),
276 !strconcat(OpcStr, ".s32 \t$dst, $a, $b;"),
277 [(set Int32Regs:$dst, (OpNode Int32Regs:$a, imm:$b))]>;
280 multiclass F3<string OpcStr, SDNode OpNode> {
281 def f64rr : NVPTXInst<(outs Float64Regs:$dst),
282 (ins Float64Regs:$a, Float64Regs:$b),
283 !strconcat(OpcStr, ".f64 \t$dst, $a, $b;"),
284 [(set Float64Regs:$dst,
285 (OpNode Float64Regs:$a, Float64Regs:$b))]>,
286 Requires<[allowFMA]>;
287 def f64ri : NVPTXInst<(outs Float64Regs:$dst),
288 (ins Float64Regs:$a, f64imm:$b),
289 !strconcat(OpcStr, ".f64 \t$dst, $a, $b;"),
290 [(set Float64Regs:$dst,
291 (OpNode Float64Regs:$a, fpimm:$b))]>,
292 Requires<[allowFMA]>;
293 def f32rr_ftz : NVPTXInst<(outs Float32Regs:$dst),
294 (ins Float32Regs:$a, Float32Regs:$b),
295 !strconcat(OpcStr, ".ftz.f32 \t$dst, $a, $b;"),
296 [(set Float32Regs:$dst,
297 (OpNode Float32Regs:$a, Float32Regs:$b))]>,
298 Requires<[allowFMA_ftz]>;
299 def f32ri_ftz : NVPTXInst<(outs Float32Regs:$dst),
300 (ins Float32Regs:$a, f32imm:$b),
301 !strconcat(OpcStr, ".ftz.f32 \t$dst, $a, $b;"),
302 [(set Float32Regs:$dst,
303 (OpNode Float32Regs:$a, fpimm:$b))]>,
304 Requires<[allowFMA_ftz]>;
305 def f32rr : NVPTXInst<(outs Float32Regs:$dst),
306 (ins Float32Regs:$a, Float32Regs:$b),
307 !strconcat(OpcStr, ".f32 \t$dst, $a, $b;"),
308 [(set Float32Regs:$dst,
309 (OpNode Float32Regs:$a, Float32Regs:$b))]>,
310 Requires<[allowFMA]>;
311 def f32ri : NVPTXInst<(outs Float32Regs:$dst),
312 (ins Float32Regs:$a, f32imm:$b),
313 !strconcat(OpcStr, ".f32 \t$dst, $a, $b;"),
314 [(set Float32Regs:$dst,
315 (OpNode Float32Regs:$a, fpimm:$b))]>,
316 Requires<[allowFMA]>;
319 multiclass F3_rn<string OpcStr, SDNode OpNode> {
320 def f64rr : NVPTXInst<(outs Float64Regs:$dst),
321 (ins Float64Regs:$a, Float64Regs:$b),
322 !strconcat(OpcStr, ".rn.f64 \t$dst, $a, $b;"),
323 [(set Float64Regs:$dst,
324 (OpNode Float64Regs:$a, Float64Regs:$b))]>;
325 def f64ri : NVPTXInst<(outs Float64Regs:$dst),
326 (ins Float64Regs:$a, f64imm:$b),
327 !strconcat(OpcStr, ".rn.f64 \t$dst, $a, $b;"),
328 [(set Float64Regs:$dst,
329 (OpNode Float64Regs:$a, fpimm:$b))]>;
330 def f32rr_ftz : NVPTXInst<(outs Float32Regs:$dst),
331 (ins Float32Regs:$a, Float32Regs:$b),
332 !strconcat(OpcStr, ".rn.ftz.f32 \t$dst, $a, $b;"),
333 [(set Float32Regs:$dst,
334 (OpNode Float32Regs:$a, Float32Regs:$b))]>,
335 Requires<[doF32FTZ]>;
336 def f32ri_ftz : NVPTXInst<(outs Float32Regs:$dst),
337 (ins Float32Regs:$a, f32imm:$b),
338 !strconcat(OpcStr, ".rn.ftz.f32 \t$dst, $a, $b;"),
339 [(set Float32Regs:$dst,
340 (OpNode Float32Regs:$a, fpimm:$b))]>,
341 Requires<[doF32FTZ]>;
342 def f32rr : NVPTXInst<(outs Float32Regs:$dst),
343 (ins Float32Regs:$a, Float32Regs:$b),
344 !strconcat(OpcStr, ".rn.f32 \t$dst, $a, $b;"),
345 [(set Float32Regs:$dst,
346 (OpNode Float32Regs:$a, Float32Regs:$b))]>;
347 def f32ri : NVPTXInst<(outs Float32Regs:$dst),
348 (ins Float32Regs:$a, f32imm:$b),
349 !strconcat(OpcStr, ".rn.f32 \t$dst, $a, $b;"),
350 [(set Float32Regs:$dst,
351 (OpNode Float32Regs:$a, fpimm:$b))]>;
354 multiclass F2<string OpcStr, SDNode OpNode> {
355 def f64 : NVPTXInst<(outs Float64Regs:$dst), (ins Float64Regs:$a),
356 !strconcat(OpcStr, ".f64 \t$dst, $a;"),
357 [(set Float64Regs:$dst, (OpNode Float64Regs:$a))]>;
358 def f32_ftz : NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$a),
359 !strconcat(OpcStr, ".ftz.f32 \t$dst, $a;"),
360 [(set Float32Regs:$dst, (OpNode Float32Regs:$a))]>,
361 Requires<[doF32FTZ]>;
362 def f32 : NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$a),
363 !strconcat(OpcStr, ".f32 \t$dst, $a;"),
364 [(set Float32Regs:$dst, (OpNode Float32Regs:$a))]>;
367 //===----------------------------------------------------------------------===//
368 // NVPTX Instructions.
369 //===----------------------------------------------------------------------===//
371 //-----------------------------------
372 // Integer Arithmetic
373 //-----------------------------------
375 multiclass ADD_SUB_i1<SDNode OpNode> {
376 def _rr: NVPTXInst<(outs Int1Regs:$dst), (ins Int1Regs:$a, Int1Regs:$b),
377 "xor.pred \t$dst, $a, $b;",
378 [(set Int1Regs:$dst, (OpNode Int1Regs:$a, Int1Regs:$b))]>;
379 def _ri: NVPTXInst<(outs Int1Regs:$dst), (ins Int1Regs:$a, i1imm:$b),
380 "xor.pred \t$dst, $a, $b;",
381 [(set Int1Regs:$dst, (OpNode Int1Regs:$a, (imm):$b))]>;
384 defm ADD_i1 : ADD_SUB_i1<add>;
385 defm SUB_i1 : ADD_SUB_i1<sub>;
388 defm ADD : I3<"add.s", add>;
389 defm SUB : I3<"sub.s", sub>;
391 defm ADDCC : ADD_SUB_INT_32<"add.cc", addc>;
392 defm SUBCC : ADD_SUB_INT_32<"sub.cc", subc>;
394 defm ADDCCC : ADD_SUB_INT_32<"addc.cc", adde>;
395 defm SUBCCC : ADD_SUB_INT_32<"subc.cc", sube>;
397 //mul.wide PTX instruction
398 def SInt32Const : PatLeaf<(imm), [{
399 const APInt &v = N->getAPIntValue();
400 if (v.isSignedIntN(32))
405 def UInt32Const : PatLeaf<(imm), [{
406 const APInt &v = N->getAPIntValue();
412 def SInt16Const : PatLeaf<(imm), [{
413 const APInt &v = N->getAPIntValue();
414 if (v.isSignedIntN(16))
419 def UInt16Const : PatLeaf<(imm), [{
420 const APInt &v = N->getAPIntValue();
426 def Int5Const : PatLeaf<(imm), [{
427 const APInt &v = N->getAPIntValue();
428 // Check if 0 <= v < 32
429 // Only then the result from (x << v) will be i32
430 if (v.sge(0) && v.slt(32))
435 def Int4Const : PatLeaf<(imm), [{
436 const APInt &v = N->getAPIntValue();
437 // Check if 0 <= v < 16
438 // Only then the result from (x << v) will be i16
439 if (v.sge(0) && v.slt(16))
444 def SHL2MUL32 : SDNodeXForm<imm, [{
445 const APInt &v = N->getAPIntValue();
447 return CurDAG->getTargetConstant(temp.shl(v), MVT::i32);
450 def SHL2MUL16 : SDNodeXForm<imm, [{
451 const APInt &v = N->getAPIntValue();
453 return CurDAG->getTargetConstant(temp.shl(v), MVT::i16);
456 def MULWIDES64 : NVPTXInst<(outs Int64Regs:$dst),
457 (ins Int32Regs:$a, Int32Regs:$b),
458 "mul.wide.s32 \t$dst, $a, $b;", []>;
459 def MULWIDES64Imm : NVPTXInst<(outs Int64Regs:$dst),
460 (ins Int32Regs:$a, i64imm:$b),
461 "mul.wide.s32 \t$dst, $a, $b;", []>;
463 def MULWIDEU64 : NVPTXInst<(outs Int64Regs:$dst),
464 (ins Int32Regs:$a, Int32Regs:$b),
465 "mul.wide.u32 \t$dst, $a, $b;", []>;
466 def MULWIDEU64Imm : NVPTXInst<(outs Int64Regs:$dst),
467 (ins Int32Regs:$a, i64imm:$b),
468 "mul.wide.u32 \t$dst, $a, $b;", []>;
470 def MULWIDES32 : NVPTXInst<(outs Int32Regs:$dst),
471 (ins Int16Regs:$a, Int16Regs:$b),
472 "mul.wide.s16 \t$dst, $a, $b;", []>;
473 def MULWIDES32Imm : NVPTXInst<(outs Int32Regs:$dst),
474 (ins Int16Regs:$a, i32imm:$b),
475 "mul.wide.s16 \t$dst, $a, $b;", []>;
477 def MULWIDEU32 : NVPTXInst<(outs Int32Regs:$dst),
478 (ins Int16Regs:$a, Int16Regs:$b),
479 "mul.wide.u16 \t$dst, $a, $b;", []>;
480 def MULWIDEU32Imm : NVPTXInst<(outs Int32Regs:$dst),
481 (ins Int16Regs:$a, i32imm:$b),
482 "mul.wide.u16 \t$dst, $a, $b;", []>;
484 def : Pat<(shl (sext Int32Regs:$a), (i32 Int5Const:$b)),
485 (MULWIDES64Imm Int32Regs:$a, (SHL2MUL32 node:$b))>,
486 Requires<[doMulWide]>;
487 def : Pat<(shl (zext Int32Regs:$a), (i32 Int5Const:$b)),
488 (MULWIDEU64Imm Int32Regs:$a, (SHL2MUL32 node:$b))>,
489 Requires<[doMulWide]>;
491 def : Pat<(shl (sext Int16Regs:$a), (i16 Int4Const:$b)),
492 (MULWIDES32Imm Int16Regs:$a, (SHL2MUL16 node:$b))>,
493 Requires<[doMulWide]>;
494 def : Pat<(shl (zext Int16Regs:$a), (i16 Int4Const:$b)),
495 (MULWIDEU32Imm Int16Regs:$a, (SHL2MUL16 node:$b))>,
496 Requires<[doMulWide]>;
498 def : Pat<(mul (sext Int32Regs:$a), (sext Int32Regs:$b)),
499 (MULWIDES64 Int32Regs:$a, Int32Regs:$b)>,
500 Requires<[doMulWide]>;
501 def : Pat<(mul (sext Int32Regs:$a), (i64 SInt32Const:$b)),
502 (MULWIDES64Imm Int32Regs:$a, (i64 SInt32Const:$b))>,
503 Requires<[doMulWide]>;
505 def : Pat<(mul (zext Int32Regs:$a), (zext Int32Regs:$b)),
506 (MULWIDEU64 Int32Regs:$a, Int32Regs:$b)>, Requires<[doMulWide]>;
507 def : Pat<(mul (zext Int32Regs:$a), (i64 UInt32Const:$b)),
508 (MULWIDEU64Imm Int32Regs:$a, (i64 UInt32Const:$b))>,
509 Requires<[doMulWide]>;
511 def : Pat<(mul (sext Int16Regs:$a), (sext Int16Regs:$b)),
512 (MULWIDES32 Int16Regs:$a, Int16Regs:$b)>, Requires<[doMulWide]>;
513 def : Pat<(mul (sext Int16Regs:$a), (i32 SInt16Const:$b)),
514 (MULWIDES32Imm Int16Regs:$a, (i32 SInt16Const:$b))>,
515 Requires<[doMulWide]>;
517 def : Pat<(mul (zext Int16Regs:$a), (zext Int16Regs:$b)),
518 (MULWIDEU32 Int16Regs:$a, Int16Regs:$b)>, Requires<[doMulWide]>;
519 def : Pat<(mul (zext Int16Regs:$a), (i32 UInt16Const:$b)),
520 (MULWIDEU32Imm Int16Regs:$a, (i32 UInt16Const:$b))>,
521 Requires<[doMulWide]>;
523 defm MULT : I3<"mul.lo.s", mul>;
525 defm MULTHS : I3_noi8<"mul.hi.s", mulhs>;
526 defm MULTHU : I3_noi8<"mul.hi.u", mulhu>;
527 def MULTHSi8rr : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, Int8Regs:$b),
528 !strconcat("{{ \n\t",
529 !strconcat(".reg \t.s16 temp1; \n\t",
530 !strconcat(".reg \t.s16 temp2; \n\t",
531 !strconcat("cvt.s16.s8 \ttemp1, $a; \n\t",
532 !strconcat("cvt.s16.s8 \ttemp2, $b; \n\t",
533 !strconcat("mul.lo.s16 \t$dst, temp1, temp2; \n\t",
534 !strconcat("shr.s16 \t$dst, $dst, 8; \n\t",
535 !strconcat("}}", "")))))))),
536 [(set Int8Regs:$dst, (mulhs Int8Regs:$a, Int8Regs:$b))]>;
537 def MULTHSi8ri : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, i8imm:$b),
538 !strconcat("{{ \n\t",
539 !strconcat(".reg \t.s16 temp1; \n\t",
540 !strconcat(".reg \t.s16 temp2; \n\t",
541 !strconcat("cvt.s16.s8 \ttemp1, $a; \n\t",
542 !strconcat("mov.b16 \ttemp2, $b; \n\t",
543 !strconcat("cvt.s16.s8 \ttemp2, temp2; \n\t",
544 !strconcat("mul.lo.s16 \t$dst, temp1, temp2; \n\t",
545 !strconcat("shr.s16 \t$dst, $dst, 8; \n\t",
546 !strconcat("}}", ""))))))))),
547 [(set Int8Regs:$dst, (mulhs Int8Regs:$a, imm:$b))]>;
548 def MULTHUi8rr : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, Int8Regs:$b),
549 !strconcat("{{ \n\t",
550 !strconcat(".reg \t.u16 temp1; \n\t",
551 !strconcat(".reg \t.u16 temp2; \n\t",
552 !strconcat("cvt.u16.u8 \ttemp1, $a; \n\t",
553 !strconcat("cvt.u16.u8 \ttemp2, $b; \n\t",
554 !strconcat("mul.lo.u16 \t$dst, temp1, temp2; \n\t",
555 !strconcat("shr.u16 \t$dst, $dst, 8; \n\t",
556 !strconcat("}}", "")))))))),
557 [(set Int8Regs:$dst, (mulhu Int8Regs:$a, Int8Regs:$b))]>;
558 def MULTHUi8ri : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, i8imm:$b),
559 !strconcat("{{ \n\t",
560 !strconcat(".reg \t.u16 temp1; \n\t",
561 !strconcat(".reg \t.u16 temp2; \n\t",
562 !strconcat("cvt.u16.u8 \ttemp1, $a; \n\t",
563 !strconcat("mov.b16 \ttemp2, $b; \n\t",
564 !strconcat("cvt.u16.u8 \ttemp2, temp2; \n\t",
565 !strconcat("mul.lo.u16 \t$dst, temp1, temp2; \n\t",
566 !strconcat("shr.u16 \t$dst, $dst, 8; \n\t",
567 !strconcat("}}", ""))))))))),
568 [(set Int8Regs:$dst, (mulhu Int8Regs:$a, imm:$b))]>;
571 defm SDIV : I3_i8<"div.s", sdiv, "s16", "cvt.s16.s8">;
572 defm UDIV : I3_i8<"div.u", udiv, "u16", "cvt.u16.u8">;
574 defm SREM : I3_i8<"rem.s", srem, "s16", "cvt.s16.s8">;
575 // The ri version will not be selected as DAGCombiner::visitSREM will lower it.
576 defm UREM : I3_i8<"rem.u", urem, "u16", "cvt.u16.u8">;
577 // The ri version will not be selected as DAGCombiner::visitUREM will lower it.
579 def MAD8rrr : NVPTXInst<(outs Int8Regs:$dst),
580 (ins Int8Regs:$a, Int8Regs:$b, Int8Regs:$c),
581 "mad.lo.s16 \t$dst, $a, $b, $c;",
582 [(set Int8Regs:$dst, (add (mul Int8Regs:$a, Int8Regs:$b),
584 def MAD8rri : NVPTXInst<(outs Int8Regs:$dst),
585 (ins Int8Regs:$a, Int8Regs:$b, i8imm:$c),
586 "mad.lo.s16 \t$dst, $a, $b, $c;",
587 [(set Int8Regs:$dst, (add (mul Int8Regs:$a, Int8Regs:$b),
589 def MAD8rir : NVPTXInst<(outs Int8Regs:$dst),
590 (ins Int8Regs:$a, i8imm:$b, Int8Regs:$c),
591 "mad.lo.s16 \t$dst, $a, $b, $c;",
592 [(set Int8Regs:$dst, (add (mul Int8Regs:$a, imm:$b),
594 def MAD8rii : NVPTXInst<(outs Int8Regs:$dst),
595 (ins Int8Regs:$a, i8imm:$b, i8imm:$c),
596 "mad.lo.s16 \t$dst, $a, $b, $c;",
597 [(set Int8Regs:$dst, (add (mul Int8Regs:$a, imm:$b),
600 def MAD16rrr : NVPTXInst<(outs Int16Regs:$dst),
601 (ins Int16Regs:$a, Int16Regs:$b, Int16Regs:$c),
602 "mad.lo.s16 \t$dst, $a, $b, $c;",
603 [(set Int16Regs:$dst, (add
604 (mul Int16Regs:$a, Int16Regs:$b), Int16Regs:$c))]>;
605 def MAD16rri : NVPTXInst<(outs Int16Regs:$dst),
606 (ins Int16Regs:$a, Int16Regs:$b, i16imm:$c),
607 "mad.lo.s16 \t$dst, $a, $b, $c;",
608 [(set Int16Regs:$dst, (add
609 (mul Int16Regs:$a, Int16Regs:$b), imm:$c))]>;
610 def MAD16rir : NVPTXInst<(outs Int16Regs:$dst),
611 (ins Int16Regs:$a, i16imm:$b, Int16Regs:$c),
612 "mad.lo.s16 \t$dst, $a, $b, $c;",
613 [(set Int16Regs:$dst, (add
614 (mul Int16Regs:$a, imm:$b), Int16Regs:$c))]>;
615 def MAD16rii : NVPTXInst<(outs Int16Regs:$dst),
616 (ins Int16Regs:$a, i16imm:$b, i16imm:$c),
617 "mad.lo.s16 \t$dst, $a, $b, $c;",
618 [(set Int16Regs:$dst, (add (mul Int16Regs:$a, imm:$b),
621 def MAD32rrr : NVPTXInst<(outs Int32Regs:$dst),
622 (ins Int32Regs:$a, Int32Regs:$b, Int32Regs:$c),
623 "mad.lo.s32 \t$dst, $a, $b, $c;",
624 [(set Int32Regs:$dst, (add
625 (mul Int32Regs:$a, Int32Regs:$b), Int32Regs:$c))]>;
626 def MAD32rri : NVPTXInst<(outs Int32Regs:$dst),
627 (ins Int32Regs:$a, Int32Regs:$b, i32imm:$c),
628 "mad.lo.s32 \t$dst, $a, $b, $c;",
629 [(set Int32Regs:$dst, (add
630 (mul Int32Regs:$a, Int32Regs:$b), imm:$c))]>;
631 def MAD32rir : NVPTXInst<(outs Int32Regs:$dst),
632 (ins Int32Regs:$a, i32imm:$b, Int32Regs:$c),
633 "mad.lo.s32 \t$dst, $a, $b, $c;",
634 [(set Int32Regs:$dst, (add
635 (mul Int32Regs:$a, imm:$b), Int32Regs:$c))]>;
636 def MAD32rii : NVPTXInst<(outs Int32Regs:$dst),
637 (ins Int32Regs:$a, i32imm:$b, i32imm:$c),
638 "mad.lo.s32 \t$dst, $a, $b, $c;",
639 [(set Int32Regs:$dst, (add
640 (mul Int32Regs:$a, imm:$b), imm:$c))]>;
642 def MAD64rrr : NVPTXInst<(outs Int64Regs:$dst),
643 (ins Int64Regs:$a, Int64Regs:$b, Int64Regs:$c),
644 "mad.lo.s64 \t$dst, $a, $b, $c;",
645 [(set Int64Regs:$dst, (add
646 (mul Int64Regs:$a, Int64Regs:$b), Int64Regs:$c))]>;
647 def MAD64rri : NVPTXInst<(outs Int64Regs:$dst),
648 (ins Int64Regs:$a, Int64Regs:$b, i64imm:$c),
649 "mad.lo.s64 \t$dst, $a, $b, $c;",
650 [(set Int64Regs:$dst, (add
651 (mul Int64Regs:$a, Int64Regs:$b), imm:$c))]>;
652 def MAD64rir : NVPTXInst<(outs Int64Regs:$dst),
653 (ins Int64Regs:$a, i64imm:$b, Int64Regs:$c),
654 "mad.lo.s64 \t$dst, $a, $b, $c;",
655 [(set Int64Regs:$dst, (add
656 (mul Int64Regs:$a, imm:$b), Int64Regs:$c))]>;
657 def MAD64rii : NVPTXInst<(outs Int64Regs:$dst),
658 (ins Int64Regs:$a, i64imm:$b, i64imm:$c),
659 "mad.lo.s64 \t$dst, $a, $b, $c;",
660 [(set Int64Regs:$dst, (add
661 (mul Int64Regs:$a, imm:$b), imm:$c))]>;
664 def INEG8 : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$src),
665 !strconcat("cvt.s16.s8 \t$dst, $src;\n\t",
666 "neg.s16 \t$dst, $dst;"),
667 [(set Int8Regs:$dst, (ineg Int8Regs:$src))]>;
668 def INEG16 : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$src),
669 "neg.s16 \t$dst, $src;",
670 [(set Int16Regs:$dst, (ineg Int16Regs:$src))]>;
671 def INEG32 : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src),
672 "neg.s32 \t$dst, $src;",
673 [(set Int32Regs:$dst, (ineg Int32Regs:$src))]>;
674 def INEG64 : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src),
675 "neg.s64 \t$dst, $src;",
676 [(set Int64Regs:$dst, (ineg Int64Regs:$src))]>;
678 //-----------------------------------
679 // Floating Point Arithmetic
680 //-----------------------------------
683 def FloatConst1 : PatLeaf<(fpimm), [{
684 if (&(N->getValueAPF().getSemantics()) != &llvm::APFloat::IEEEsingle)
686 float f = (float)N->getValueAPF().convertToFloat();
689 // Constand (double)1.0
690 def DoubleConst1 : PatLeaf<(fpimm), [{
691 if (&(N->getValueAPF().getSemantics()) != &llvm::APFloat::IEEEdouble)
693 double d = (double)N->getValueAPF().convertToDouble();
697 defm FADD : F3<"add", fadd>;
698 defm FSUB : F3<"sub", fsub>;
699 defm FMUL : F3<"mul", fmul>;
701 defm FADD_rn : F3_rn<"add", fadd>;
702 defm FSUB_rn : F3_rn<"sub", fsub>;
703 defm FMUL_rn : F3_rn<"mul", fmul>;
705 defm FABS : F2<"abs", fabs>;
706 defm FNEG : F2<"neg", fneg>;
707 defm FSQRT : F2<"sqrt.rn", fsqrt>;
712 def FDIV641r : NVPTXInst<(outs Float64Regs:$dst),
713 (ins f64imm:$a, Float64Regs:$b),
714 "rcp.rn.f64 \t$dst, $b;",
715 [(set Float64Regs:$dst,
716 (fdiv DoubleConst1:$a, Float64Regs:$b))]>;
717 def FDIV64rr : NVPTXInst<(outs Float64Regs:$dst),
718 (ins Float64Regs:$a, Float64Regs:$b),
719 "div.rn.f64 \t$dst, $a, $b;",
720 [(set Float64Regs:$dst,
721 (fdiv Float64Regs:$a, Float64Regs:$b))]>;
722 def FDIV64ri : NVPTXInst<(outs Float64Regs:$dst),
723 (ins Float64Regs:$a, f64imm:$b),
724 "div.rn.f64 \t$dst, $a, $b;",
725 [(set Float64Regs:$dst,
726 (fdiv Float64Regs:$a, fpimm:$b))]>;
729 // F32 Approximate reciprocal
731 def FDIV321r_ftz : NVPTXInst<(outs Float32Regs:$dst),
732 (ins f32imm:$a, Float32Regs:$b),
733 "rcp.approx.ftz.f32 \t$dst, $b;",
734 [(set Float32Regs:$dst,
735 (fdiv FloatConst1:$a, Float32Regs:$b))]>,
736 Requires<[do_DIVF32_APPROX, doF32FTZ]>;
737 def FDIV321r : NVPTXInst<(outs Float32Regs:$dst),
738 (ins f32imm:$a, Float32Regs:$b),
739 "rcp.approx.f32 \t$dst, $b;",
740 [(set Float32Regs:$dst,
741 (fdiv FloatConst1:$a, Float32Regs:$b))]>,
742 Requires<[do_DIVF32_APPROX]>;
744 // F32 Approximate division
746 def FDIV32approxrr_ftz : NVPTXInst<(outs Float32Regs:$dst),
747 (ins Float32Regs:$a, Float32Regs:$b),
748 "div.approx.ftz.f32 \t$dst, $a, $b;",
749 [(set Float32Regs:$dst,
750 (fdiv Float32Regs:$a, Float32Regs:$b))]>,
751 Requires<[do_DIVF32_APPROX, doF32FTZ]>;
752 def FDIV32approxrr : NVPTXInst<(outs Float32Regs:$dst),
753 (ins Float32Regs:$a, Float32Regs:$b),
754 "div.approx.f32 \t$dst, $a, $b;",
755 [(set Float32Regs:$dst,
756 (fdiv Float32Regs:$a, Float32Regs:$b))]>,
757 Requires<[do_DIVF32_APPROX]>;
759 // F32 Semi-accurate reciprocal
761 // rcp.approx gives the same result as div.full(1.0f, a) and is faster.
763 def FDIV321r_approx_ftz : NVPTXInst<(outs Float32Regs:$dst),
764 (ins f32imm:$a, Float32Regs:$b),
765 "rcp.approx.ftz.f32 \t$dst, $b;",
766 [(set Float32Regs:$dst,
767 (fdiv FloatConst1:$a, Float32Regs:$b))]>,
768 Requires<[do_DIVF32_FULL, doF32FTZ]>;
769 def FDIV321r_approx : NVPTXInst<(outs Float32Regs:$dst),
770 (ins f32imm:$a, Float32Regs:$b),
771 "rcp.approx.f32 \t$dst, $b;",
772 [(set Float32Regs:$dst,
773 (fdiv FloatConst1:$a, Float32Regs:$b))]>,
774 Requires<[do_DIVF32_FULL]>;
776 // F32 Semi-accurate division
778 def FDIV32rr_ftz : NVPTXInst<(outs Float32Regs:$dst),
779 (ins Float32Regs:$a, Float32Regs:$b),
780 "div.full.ftz.f32 \t$dst, $a, $b;",
781 [(set Float32Regs:$dst,
782 (fdiv Float32Regs:$a, Float32Regs:$b))]>,
783 Requires<[do_DIVF32_FULL, doF32FTZ]>;
784 def FDIV32ri_ftz : NVPTXInst<(outs Float32Regs:$dst),
785 (ins Float32Regs:$a, f32imm:$b),
786 "div.full.ftz.f32 \t$dst, $a, $b;",
787 [(set Float32Regs:$dst,
788 (fdiv Float32Regs:$a, fpimm:$b))]>,
789 Requires<[do_DIVF32_FULL, doF32FTZ]>;
790 def FDIV32rr : NVPTXInst<(outs Float32Regs:$dst),
791 (ins Float32Regs:$a, Float32Regs:$b),
792 "div.full.f32 \t$dst, $a, $b;",
793 [(set Float32Regs:$dst,
794 (fdiv Float32Regs:$a, Float32Regs:$b))]>,
795 Requires<[do_DIVF32_FULL]>;
796 def FDIV32ri : NVPTXInst<(outs Float32Regs:$dst),
797 (ins Float32Regs:$a, f32imm:$b),
798 "div.full.f32 \t$dst, $a, $b;",
799 [(set Float32Regs:$dst,
800 (fdiv Float32Regs:$a, fpimm:$b))]>,
801 Requires<[do_DIVF32_FULL]>;
803 // F32 Accurate reciprocal
805 def FDIV321r_prec_ftz : NVPTXInst<(outs Float32Regs:$dst),
806 (ins f32imm:$a, Float32Regs:$b),
807 "rcp.rn.ftz.f32 \t$dst, $b;",
808 [(set Float32Regs:$dst,
809 (fdiv FloatConst1:$a, Float32Regs:$b))]>,
810 Requires<[reqPTX20, doF32FTZ]>;
811 def FDIV321r_prec : NVPTXInst<(outs Float32Regs:$dst),
812 (ins f32imm:$a, Float32Regs:$b),
813 "rcp.rn.f32 \t$dst, $b;",
814 [(set Float32Regs:$dst,
815 (fdiv FloatConst1:$a, Float32Regs:$b))]>,
816 Requires<[reqPTX20]>;
818 // F32 Accurate division
820 def FDIV32rr_prec_ftz : NVPTXInst<(outs Float32Regs:$dst),
821 (ins Float32Regs:$a, Float32Regs:$b),
822 "div.rn.ftz.f32 \t$dst, $a, $b;",
823 [(set Float32Regs:$dst,
824 (fdiv Float32Regs:$a, Float32Regs:$b))]>,
825 Requires<[doF32FTZ, reqPTX20]>;
826 def FDIV32ri_prec_ftz : NVPTXInst<(outs Float32Regs:$dst),
827 (ins Float32Regs:$a, f32imm:$b),
828 "div.rn.ftz.f32 \t$dst, $a, $b;",
829 [(set Float32Regs:$dst,
830 (fdiv Float32Regs:$a, fpimm:$b))]>,
831 Requires<[doF32FTZ, reqPTX20]>;
832 def FDIV32rr_prec : NVPTXInst<(outs Float32Regs:$dst),
833 (ins Float32Regs:$a, Float32Regs:$b),
834 "div.rn.f32 \t$dst, $a, $b;",
835 [(set Float32Regs:$dst,
836 (fdiv Float32Regs:$a, Float32Regs:$b))]>,
837 Requires<[reqPTX20]>;
838 def FDIV32ri_prec : NVPTXInst<(outs Float32Regs:$dst),
839 (ins Float32Regs:$a, f32imm:$b),
840 "div.rn.f32 \t$dst, $a, $b;",
841 [(set Float32Regs:$dst,
842 (fdiv Float32Regs:$a, fpimm:$b))]>,
843 Requires<[reqPTX20]>;
846 multiclass FPCONTRACT32<string OpcStr, Predicate Pred> {
847 def rrr : NVPTXInst<(outs Float32Regs:$dst),
848 (ins Float32Regs:$a, Float32Regs:$b, Float32Regs:$c),
849 !strconcat(OpcStr, " \t$dst, $a, $b, $c;"),
850 [(set Float32Regs:$dst, (fadd
851 (fmul Float32Regs:$a, Float32Regs:$b),
852 Float32Regs:$c))]>, Requires<[Pred]>;
853 // This is to WAR a weird bug in Tablegen that does not automatically
854 // generate the following permutated rule rrr2 from the above rrr.
855 // So we explicitly add it here. This happens to FMA32 only.
856 // See the comments at FMAD32 and FMA32 for more information.
857 def rrr2 : NVPTXInst<(outs Float32Regs:$dst),
858 (ins Float32Regs:$a, Float32Regs:$b, Float32Regs:$c),
859 !strconcat(OpcStr, " \t$dst, $a, $b, $c;"),
860 [(set Float32Regs:$dst, (fadd Float32Regs:$c,
861 (fmul Float32Regs:$a, Float32Regs:$b)))]>,
863 def rri : NVPTXInst<(outs Float32Regs:$dst),
864 (ins Float32Regs:$a, Float32Regs:$b, f32imm:$c),
865 !strconcat(OpcStr, " \t$dst, $a, $b, $c;"),
866 [(set Float32Regs:$dst, (fadd
867 (fmul Float32Regs:$a, Float32Regs:$b), fpimm:$c))]>,
869 def rir : NVPTXInst<(outs Float32Regs:$dst),
870 (ins Float32Regs:$a, f32imm:$b, Float32Regs:$c),
871 !strconcat(OpcStr, " \t$dst, $a, $b, $c;"),
872 [(set Float32Regs:$dst, (fadd
873 (fmul Float32Regs:$a, fpimm:$b), Float32Regs:$c))]>,
875 def rii : NVPTXInst<(outs Float32Regs:$dst),
876 (ins Float32Regs:$a, f32imm:$b, f32imm:$c),
877 !strconcat(OpcStr, " \t$dst, $a, $b, $c;"),
878 [(set Float32Regs:$dst, (fadd
879 (fmul Float32Regs:$a, fpimm:$b), fpimm:$c))]>,
883 multiclass FPCONTRACT64<string OpcStr, Predicate Pred> {
884 def rrr : NVPTXInst<(outs Float64Regs:$dst),
885 (ins Float64Regs:$a, Float64Regs:$b, Float64Regs:$c),
886 !strconcat(OpcStr, " \t$dst, $a, $b, $c;"),
887 [(set Float64Regs:$dst, (fadd
888 (fmul Float64Regs:$a, Float64Regs:$b),
889 Float64Regs:$c))]>, Requires<[Pred]>;
890 def rri : NVPTXInst<(outs Float64Regs:$dst),
891 (ins Float64Regs:$a, Float64Regs:$b, f64imm:$c),
892 !strconcat(OpcStr, " \t$dst, $a, $b, $c;"),
893 [(set Float64Regs:$dst, (fadd (fmul Float64Regs:$a,
894 Float64Regs:$b), fpimm:$c))]>, Requires<[Pred]>;
895 def rir : NVPTXInst<(outs Float64Regs:$dst),
896 (ins Float64Regs:$a, f64imm:$b, Float64Regs:$c),
897 !strconcat(OpcStr, " \t$dst, $a, $b, $c;"),
898 [(set Float64Regs:$dst, (fadd
899 (fmul Float64Regs:$a, fpimm:$b), Float64Regs:$c))]>,
901 def rii : NVPTXInst<(outs Float64Regs:$dst),
902 (ins Float64Regs:$a, f64imm:$b, f64imm:$c),
903 !strconcat(OpcStr, " \t$dst, $a, $b, $c;"),
904 [(set Float64Regs:$dst, (fadd
905 (fmul Float64Regs:$a, fpimm:$b), fpimm:$c))]>,
909 // Due to a unknown reason (most likely a bug in tablegen), tablegen does not
910 // automatically generate the rrr2 rule from
911 // the rrr rule (see FPCONTRACT32) for FMA32, though it does for FMAD32.
912 // If we reverse the order of the following two lines, then rrr2 rule will be
913 // generated for FMA32, but not for rrr.
914 // Therefore, we manually write the rrr2 rule in FPCONTRACT32.
915 defm FMAD32_ftz : FPCONTRACT32<"mad.ftz.f32", doFMADF32_ftz>;
916 defm FMAD32 : FPCONTRACT32<"mad.f32", doFMADF32>;
917 defm FMA32_ftz : FPCONTRACT32<"fma.rn.ftz.f32", doFMAF32_ftz>;
918 defm FMA32 : FPCONTRACT32<"fma.rn.f32", doFMAF32>;
919 defm FMA64 : FPCONTRACT64<"fma.rn.f64", doFMAF64>;
921 // b*c-a => fmad(b, c, -a)
922 multiclass FPCONTRACT32_SUB_PAT_MAD<NVPTXInst Inst, Predicate Pred> {
923 def : Pat<(fsub (fmul Float32Regs:$b, Float32Regs:$c), Float32Regs:$a),
924 (Inst Float32Regs:$b, Float32Regs:$c, (FNEGf32 Float32Regs:$a))>,
928 // a-b*c => fmad(-b,c, a)
929 // - legal because a-b*c <=> a+(-b*c) <=> a+(-b)*c
930 // b*c-a => fmad(b, c, -a)
931 // - legal because b*c-a <=> b*c+(-a)
932 multiclass FPCONTRACT32_SUB_PAT<NVPTXInst Inst, Predicate Pred> {
933 def : Pat<(fsub Float32Regs:$a, (fmul Float32Regs:$b, Float32Regs:$c)),
934 (Inst (FNEGf32 Float32Regs:$b), Float32Regs:$c, Float32Regs:$a)>,
936 def : Pat<(fsub (fmul Float32Regs:$b, Float32Regs:$c), Float32Regs:$a),
937 (Inst Float32Regs:$b, Float32Regs:$c, (FNEGf32 Float32Regs:$a))>,
941 // a-b*c => fmad(-b,c, a)
942 // b*c-a => fmad(b, c, -a)
943 multiclass FPCONTRACT64_SUB_PAT<NVPTXInst Inst, Predicate Pred> {
944 def : Pat<(fsub Float64Regs:$a, (fmul Float64Regs:$b, Float64Regs:$c)),
945 (Inst (FNEGf64 Float64Regs:$b), Float64Regs:$c, Float64Regs:$a)>,
948 def : Pat<(fsub (fmul Float64Regs:$b, Float64Regs:$c), Float64Regs:$a),
949 (Inst Float64Regs:$b, Float64Regs:$c, (FNEGf64 Float64Regs:$a))>,
953 defm FMAF32ext_ftz : FPCONTRACT32_SUB_PAT<FMA32_ftzrrr, doFMAF32AGG_ftz>;
954 defm FMAF32ext : FPCONTRACT32_SUB_PAT<FMA32rrr, doFMAF32AGG>;
955 defm FMADF32ext_ftz : FPCONTRACT32_SUB_PAT_MAD<FMAD32_ftzrrr, doFMADF32_ftz>;
956 defm FMADF32ext : FPCONTRACT32_SUB_PAT_MAD<FMAD32rrr, doFMADF32>;
957 defm FMAF64ext : FPCONTRACT64_SUB_PAT<FMA64rrr, doFMAF64AGG>;
959 def SINF: NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$src),
960 "sin.approx.f32 \t$dst, $src;",
961 [(set Float32Regs:$dst, (fsin Float32Regs:$src))]>;
962 def COSF: NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$src),
963 "cos.approx.f32 \t$dst, $src;",
964 [(set Float32Regs:$dst, (fcos Float32Regs:$src))]>;
966 //-----------------------------------
967 // Logical Arithmetic
968 //-----------------------------------
970 multiclass LOG_FORMAT<string OpcStr, SDNode OpNode> {
971 def b1rr: NVPTXInst<(outs Int1Regs:$dst), (ins Int1Regs:$a, Int1Regs:$b),
972 !strconcat(OpcStr, ".pred \t$dst, $a, $b;"),
973 [(set Int1Regs:$dst, (OpNode Int1Regs:$a, Int1Regs:$b))]>;
974 def b1ri: NVPTXInst<(outs Int1Regs:$dst), (ins Int1Regs:$a, i1imm:$b),
975 !strconcat(OpcStr, ".pred \t$dst, $a, $b;"),
976 [(set Int1Regs:$dst, (OpNode Int1Regs:$a, imm:$b))]>;
977 def b8rr: NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, Int8Regs:$b),
978 !strconcat(OpcStr, ".b16 \t$dst, $a, $b;"),
979 [(set Int8Regs:$dst, (OpNode Int8Regs:$a, Int8Regs:$b))]>;
980 def b8ri: NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, i8imm:$b),
981 !strconcat(OpcStr, ".b16 \t$dst, $a, $b;"),
982 [(set Int8Regs:$dst, (OpNode Int8Regs:$a, imm:$b))]>;
983 def b16rr: NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, Int16Regs:$b),
984 !strconcat(OpcStr, ".b16 \t$dst, $a, $b;"),
985 [(set Int16Regs:$dst, (OpNode Int16Regs:$a,
987 def b16ri: NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, i16imm:$b),
988 !strconcat(OpcStr, ".b16 \t$dst, $a, $b;"),
989 [(set Int16Regs:$dst, (OpNode Int16Regs:$a, imm:$b))]>;
990 def b32rr: NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b),
991 !strconcat(OpcStr, ".b32 \t$dst, $a, $b;"),
992 [(set Int32Regs:$dst, (OpNode Int32Regs:$a,
994 def b32ri: NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b),
995 !strconcat(OpcStr, ".b32 \t$dst, $a, $b;"),
996 [(set Int32Regs:$dst, (OpNode Int32Regs:$a, imm:$b))]>;
997 def b64rr: NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, Int64Regs:$b),
998 !strconcat(OpcStr, ".b64 \t$dst, $a, $b;"),
999 [(set Int64Regs:$dst, (OpNode Int64Regs:$a,
1001 def b64ri: NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, i64imm:$b),
1002 !strconcat(OpcStr, ".b64 \t$dst, $a, $b;"),
1003 [(set Int64Regs:$dst, (OpNode Int64Regs:$a, imm:$b))]>;
1006 defm OR : LOG_FORMAT<"or", or>;
1007 defm AND : LOG_FORMAT<"and", and>;
1008 defm XOR : LOG_FORMAT<"xor", xor>;
1010 def NOT1: NVPTXInst<(outs Int1Regs:$dst), (ins Int1Regs:$src),
1011 "not.pred \t$dst, $src;",
1012 [(set Int1Regs:$dst, (not Int1Regs:$src))]>;
1013 def NOT8: NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$src),
1014 "not.b16 \t$dst, $src;",
1015 [(set Int8Regs:$dst, (not Int8Regs:$src))]>;
1016 def NOT16: NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$src),
1017 "not.b16 \t$dst, $src;",
1018 [(set Int16Regs:$dst, (not Int16Regs:$src))]>;
1019 def NOT32: NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src),
1020 "not.b32 \t$dst, $src;",
1021 [(set Int32Regs:$dst, (not Int32Regs:$src))]>;
1022 def NOT64: NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src),
1023 "not.b64 \t$dst, $src;",
1024 [(set Int64Regs:$dst, (not Int64Regs:$src))]>;
1026 // For shifts, the second src operand must be 32-bit value
1027 multiclass LSHIFT_FORMAT<string OpcStr, SDNode OpNode> {
1028 def i64rr : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a,
1030 !strconcat(OpcStr, "64 \t$dst, $a, $b;"),
1031 [(set Int64Regs:$dst, (OpNode Int64Regs:$a,
1033 def i64ri : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, i32imm:$b),
1034 !strconcat(OpcStr, "64 \t$dst, $a, $b;"),
1035 [(set Int64Regs:$dst, (OpNode Int64Regs:$a,
1037 def i32rr : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a,
1039 !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
1040 [(set Int32Regs:$dst, (OpNode Int32Regs:$a,
1042 def i32ri : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b),
1043 !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
1044 [(set Int32Regs:$dst, (OpNode Int32Regs:$a,
1046 def i32ii : NVPTXInst<(outs Int32Regs:$dst), (ins i32imm:$a, i32imm:$b),
1047 !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
1048 [(set Int32Regs:$dst, (OpNode (i32 imm:$a),
1050 def i16rr : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a,
1052 !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
1053 [(set Int16Regs:$dst, (OpNode Int16Regs:$a,
1055 def i16ri : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, i32imm:$b),
1056 !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
1057 [(set Int16Regs:$dst, (OpNode Int16Regs:$a,
1059 def i8rr : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, Int32Regs:$b),
1060 !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
1061 [(set Int8Regs:$dst, (OpNode Int8Regs:$a,
1063 def i8ri : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, i32imm:$b),
1064 !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
1065 [(set Int8Regs:$dst, (OpNode Int8Regs:$a,
1069 defm SHL : LSHIFT_FORMAT<"shl.b", shl>;
1071 // For shifts, the second src operand must be 32-bit value
1072 // Need to add cvt for the 8-bits.
1073 multiclass RSHIFT_FORMAT<string OpcStr, SDNode OpNode, string CVTStr> {
1074 def i64rr : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a,
1076 !strconcat(OpcStr, "64 \t$dst, $a, $b;"),
1077 [(set Int64Regs:$dst, (OpNode Int64Regs:$a,
1079 def i64ri : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, i32imm:$b),
1080 !strconcat(OpcStr, "64 \t$dst, $a, $b;"),
1081 [(set Int64Regs:$dst, (OpNode Int64Regs:$a,
1083 def i32rr : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a,
1085 !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
1086 [(set Int32Regs:$dst, (OpNode Int32Regs:$a,
1088 def i32ri : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b),
1089 !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
1090 [(set Int32Regs:$dst, (OpNode Int32Regs:$a,
1092 def i32ii : NVPTXInst<(outs Int32Regs:$dst), (ins i32imm:$a, i32imm:$b),
1093 !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
1094 [(set Int32Regs:$dst, (OpNode (i32 imm:$a),
1096 def i16rr : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a,
1098 !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
1099 [(set Int16Regs:$dst, (OpNode Int16Regs:$a,
1101 def i16ri : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, i32imm:$b),
1102 !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
1103 [(set Int16Regs:$dst, (OpNode Int16Regs:$a,
1105 def i8rr : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, Int32Regs:$b),
1106 !strconcat(CVTStr, !strconcat(" \t$dst, $a;\n\t",
1107 !strconcat(OpcStr, "16 \t$dst, $dst, $b;"))),
1108 [(set Int8Regs:$dst, (OpNode Int8Regs:$a,
1110 def i8ri : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, i32imm:$b),
1111 !strconcat(CVTStr, !strconcat(" \t$dst, $a;\n\t",
1112 !strconcat(OpcStr, "16 \t$dst, $dst, $b;"))),
1113 [(set Int8Regs:$dst, (OpNode Int8Regs:$a,
1117 defm SRA : RSHIFT_FORMAT<"shr.s", sra, "cvt.s16.s8">;
1118 defm SRL : RSHIFT_FORMAT<"shr.u", srl, "cvt.u16.u8">;
1121 def ROT32imm_sw : NVPTXInst<(outs Int32Regs:$dst),
1122 (ins Int32Regs:$src, i32imm:$amt1, i32imm:$amt2),
1123 !strconcat("{{\n\t",
1124 !strconcat(".reg .b32 %lhs;\n\t",
1125 !strconcat(".reg .b32 %rhs;\n\t",
1126 !strconcat("shl.b32 \t%lhs, $src, $amt1;\n\t",
1127 !strconcat("shr.b32 \t%rhs, $src, $amt2;\n\t",
1128 !strconcat("add.u32 \t$dst, %lhs, %rhs;\n\t",
1129 !strconcat("}}", ""))))))),
1132 def SUB_FRM_32 : SDNodeXForm<imm, [{
1133 return CurDAG->getTargetConstant(32-N->getZExtValue(), MVT::i32);
1136 def : Pat<(rotl Int32Regs:$src, (i32 imm:$amt)),
1137 (ROT32imm_sw Int32Regs:$src, imm:$amt, (SUB_FRM_32 node:$amt))>;
1138 def : Pat<(rotr Int32Regs:$src, (i32 imm:$amt)),
1139 (ROT32imm_sw Int32Regs:$src, (SUB_FRM_32 node:$amt), imm:$amt)>;
1141 def ROTL32reg_sw : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src,
1143 !strconcat("{{\n\t",
1144 !strconcat(".reg .b32 %lhs;\n\t",
1145 !strconcat(".reg .b32 %rhs;\n\t",
1146 !strconcat(".reg .b32 %amt2;\n\t",
1147 !strconcat("shl.b32 \t%lhs, $src, $amt;\n\t",
1148 !strconcat("sub.s32 \t%amt2, 32, $amt;\n\t",
1149 !strconcat("shr.b32 \t%rhs, $src, %amt2;\n\t",
1150 !strconcat("add.u32 \t$dst, %lhs, %rhs;\n\t",
1151 !strconcat("}}", ""))))))))),
1152 [(set Int32Regs:$dst, (rotl Int32Regs:$src, Int32Regs:$amt))]>;
1154 def ROTR32reg_sw : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src,
1156 !strconcat("{{\n\t",
1157 !strconcat(".reg .b32 %lhs;\n\t",
1158 !strconcat(".reg .b32 %rhs;\n\t",
1159 !strconcat(".reg .b32 %amt2;\n\t",
1160 !strconcat("shr.b32 \t%lhs, $src, $amt;\n\t",
1161 !strconcat("sub.s32 \t%amt2, 32, $amt;\n\t",
1162 !strconcat("shl.b32 \t%rhs, $src, %amt2;\n\t",
1163 !strconcat("add.u32 \t$dst, %lhs, %rhs;\n\t",
1164 !strconcat("}}", ""))))))))),
1165 [(set Int32Regs:$dst, (rotr Int32Regs:$src, Int32Regs:$amt))]>;
1168 def ROT64imm_sw : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src,
1169 i32imm:$amt1, i32imm:$amt2),
1170 !strconcat("{{\n\t",
1171 !strconcat(".reg .b64 %lhs;\n\t",
1172 !strconcat(".reg .b64 %rhs;\n\t",
1173 !strconcat("shl.b64 \t%lhs, $src, $amt1;\n\t",
1174 !strconcat("shr.b64 \t%rhs, $src, $amt2;\n\t",
1175 !strconcat("add.u64 \t$dst, %lhs, %rhs;\n\t",
1176 !strconcat("}}", ""))))))),
1179 def SUB_FRM_64 : SDNodeXForm<imm, [{
1180 return CurDAG->getTargetConstant(64-N->getZExtValue(), MVT::i32);
1183 def : Pat<(rotl Int64Regs:$src, (i32 imm:$amt)),
1184 (ROT64imm_sw Int64Regs:$src, imm:$amt, (SUB_FRM_64 node:$amt))>;
1185 def : Pat<(rotr Int64Regs:$src, (i32 imm:$amt)),
1186 (ROT64imm_sw Int64Regs:$src, (SUB_FRM_64 node:$amt), imm:$amt)>;
1188 def ROTL64reg_sw : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src,
1190 !strconcat("{{\n\t",
1191 !strconcat(".reg .b64 %lhs;\n\t",
1192 !strconcat(".reg .b64 %rhs;\n\t",
1193 !strconcat(".reg .u32 %amt2;\n\t",
1194 !strconcat("shl.b64 \t%lhs, $src, $amt;\n\t",
1195 !strconcat("sub.u32 \t%amt2, 64, $amt;\n\t",
1196 !strconcat("shr.b64 \t%rhs, $src, %amt2;\n\t",
1197 !strconcat("add.u64 \t$dst, %lhs, %rhs;\n\t",
1198 !strconcat("}}", ""))))))))),
1199 [(set Int64Regs:$dst, (rotl Int64Regs:$src, Int32Regs:$amt))]>;
1201 def ROTR64reg_sw : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src,
1203 !strconcat("{{\n\t",
1204 !strconcat(".reg .b64 %lhs;\n\t",
1205 !strconcat(".reg .b64 %rhs;\n\t",
1206 !strconcat(".reg .u32 %amt2;\n\t",
1207 !strconcat("shr.b64 \t%lhs, $src, $amt;\n\t",
1208 !strconcat("sub.u32 \t%amt2, 64, $amt;\n\t",
1209 !strconcat("shl.b64 \t%rhs, $src, %amt2;\n\t",
1210 !strconcat("add.u64 \t$dst, %lhs, %rhs;\n\t",
1211 !strconcat("}}", ""))))))))),
1212 [(set Int64Regs:$dst, (rotr Int64Regs:$src, Int32Regs:$amt))]>;
1215 //-----------------------------------
1216 // Data Movement (Load / Store, Move)
1217 //-----------------------------------
1219 def ADDRri : ComplexPattern<i32, 2, "SelectADDRri", [frameindex],
1221 def ADDRri64 : ComplexPattern<i64, 2, "SelectADDRri64", [frameindex],
1224 def MEMri : Operand<i32> {
1225 let PrintMethod = "printMemOperand";
1226 let MIOperandInfo = (ops Int32Regs, i32imm);
1228 def MEMri64 : Operand<i64> {
1229 let PrintMethod = "printMemOperand";
1230 let MIOperandInfo = (ops Int64Regs, i64imm);
1233 def imem : Operand<iPTR> {
1234 let PrintMethod = "printOperand";
1237 def imemAny : Operand<iPTRAny> {
1238 let PrintMethod = "printOperand";
1241 def LdStCode : Operand<i32> {
1242 let PrintMethod = "printLdStCode";
1245 def SDTWrapper : SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>, SDTCisPtrTy<0>]>;
1246 def Wrapper : SDNode<"NVPTXISD::Wrapper", SDTWrapper>;
1248 def MOV_ADDR : NVPTXInst<(outs Int32Regs:$dst), (ins imem:$a),
1249 "mov.u32 \t$dst, $a;",
1250 [(set Int32Regs:$dst, (Wrapper tglobaladdr:$a))]>;
1252 def MOV_ADDR64 : NVPTXInst<(outs Int64Regs:$dst), (ins imem:$a),
1253 "mov.u64 \t$dst, $a;",
1254 [(set Int64Regs:$dst, (Wrapper tglobaladdr:$a))]>;
1256 // copyPhysreg is hard-coded in NVPTXInstrInfo.cpp
1257 let IsSimpleMove=1 in {
1258 def IMOV1rr: NVPTXInst<(outs Int1Regs:$dst), (ins Int1Regs:$sss),
1259 "mov.pred \t$dst, $sss;", []>;
1260 def IMOV8rr: NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$sss),
1261 "mov.u16 \t$dst, $sss;", []>;
1262 def IMOV16rr: NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$sss),
1263 "mov.u16 \t$dst, $sss;", []>;
1264 def IMOV32rr: NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$sss),
1265 "mov.u32 \t$dst, $sss;", []>;
1266 def IMOV64rr: NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$sss),
1267 "mov.u64 \t$dst, $sss;", []>;
1269 def FMOV32rr: NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$src),
1270 "mov.f32 \t$dst, $src;", []>;
1271 def FMOV64rr: NVPTXInst<(outs Float64Regs:$dst), (ins Float64Regs:$src),
1272 "mov.f64 \t$dst, $src;", []>;
1274 def IMOV1ri: NVPTXInst<(outs Int1Regs:$dst), (ins i1imm:$src),
1275 "mov.pred \t$dst, $src;",
1276 [(set Int1Regs:$dst, imm:$src)]>;
1277 def IMOV8ri: NVPTXInst<(outs Int8Regs:$dst), (ins i8imm:$src),
1278 "mov.u16 \t$dst, $src;",
1279 [(set Int8Regs:$dst, imm:$src)]>;
1280 def IMOV16ri: NVPTXInst<(outs Int16Regs:$dst), (ins i16imm:$src),
1281 "mov.u16 \t$dst, $src;",
1282 [(set Int16Regs:$dst, imm:$src)]>;
1283 def IMOV32ri: NVPTXInst<(outs Int32Regs:$dst), (ins i32imm:$src),
1284 "mov.u32 \t$dst, $src;",
1285 [(set Int32Regs:$dst, imm:$src)]>;
1286 def IMOV64i: NVPTXInst<(outs Int64Regs:$dst), (ins i64imm:$src),
1287 "mov.u64 \t$dst, $src;",
1288 [(set Int64Regs:$dst, imm:$src)]>;
1290 def FMOV32ri: NVPTXInst<(outs Float32Regs:$dst), (ins f32imm:$src),
1291 "mov.f32 \t$dst, $src;",
1292 [(set Float32Regs:$dst, fpimm:$src)]>;
1293 def FMOV64ri: NVPTXInst<(outs Float64Regs:$dst), (ins f64imm:$src),
1294 "mov.f64 \t$dst, $src;",
1295 [(set Float64Regs:$dst, fpimm:$src)]>;
1297 def : Pat<(i32 (Wrapper texternalsym:$dst)), (IMOV32ri texternalsym:$dst)>;
1299 //---- Copy Frame Index ----
1300 def LEA_ADDRi : NVPTXInst<(outs Int32Regs:$dst), (ins MEMri:$addr),
1301 "add.u32 \t$dst, ${addr:add};",
1302 [(set Int32Regs:$dst, ADDRri:$addr)]>;
1303 def LEA_ADDRi64 : NVPTXInst<(outs Int64Regs:$dst), (ins MEMri64:$addr),
1304 "add.u64 \t$dst, ${addr:add};",
1305 [(set Int64Regs:$dst, ADDRri64:$addr)]>;
1307 //-----------------------------------
1308 // Comparison and Selection
1309 //-----------------------------------
1311 // Generate string block like
1314 // setp.gt.s16 p, %a, %b;
1315 // selp.s16 %dst, -1, 0, p;
1317 // when OpcStr=setp.gt.s sz1=16 sz2=16 d=%dst a=%a b=%b
1318 class Set_Str<string OpcStr, string sz1, string sz2, string d, string a,
1320 string t1 = "{{\n\t.reg .pred p;\n\t";
1321 string t2 = !strconcat(t1 , OpcStr);
1322 string t3 = !strconcat(t2 , sz1);
1323 string t4 = !strconcat(t3 , " \tp, ");
1324 string t5 = !strconcat(t4 , a);
1325 string t6 = !strconcat(t5 , ", ");
1326 string t7 = !strconcat(t6 , b);
1327 string t8 = !strconcat(t7 , ";\n\tselp.s");
1328 string t9 = !strconcat(t8 , sz2);
1329 string t10 = !strconcat(t9, " \t");
1330 string t11 = !strconcat(t10, d);
1331 string s = !strconcat(t11, ", -1, 0, p;\n\t}}");
1334 // Generate string block like
1337 // .reg .s16 %temp1;
1338 // .reg .s16 %temp2;
1339 // cvt.s16.s8 %temp1, %a;
1340 // cvt s16.s8 %temp1, %b;
1341 // setp.gt.s16 p, %temp1, %temp2;
1342 // selp.s16 %dst, -1, 0, p;
1344 // when OpcStr=setp.gt.s d=%dst a=%a b=%b type=s16 cvt=cvt.s16.s8
1345 class Set_Stri8<string OpcStr, string d, string a, string b, string type,
1347 string t1 = "{{\n\t.reg .pred p;\n\t";
1348 string t2 = !strconcat(t1, ".reg .");
1349 string t3 = !strconcat(t2, type);
1350 string t4 = !strconcat(t3, " %temp1;\n\t");
1351 string t5 = !strconcat(t4, ".reg .");
1352 string t6 = !strconcat(t5, type);
1353 string t7 = !strconcat(t6, " %temp2;\n\t");
1354 string t8 = !strconcat(t7, cvt);
1355 string t9 = !strconcat(t8, " \t%temp1, ");
1356 string t10 = !strconcat(t9, a);
1357 string t11 = !strconcat(t10, ";\n\t");
1358 string t12 = !strconcat(t11, cvt);
1359 string t13 = !strconcat(t12, " \t%temp2, ");
1360 string t14 = !strconcat(t13, b);
1361 string t15 = !strconcat(t14, ";\n\t");
1362 string t16 = !strconcat(t15, OpcStr);
1363 string t17 = !strconcat(t16, "16");
1364 string t18 = !strconcat(t17, " \tp, %temp1, %temp2;\n\t");
1365 string t19 = !strconcat(t18, "selp.s16 \t");
1366 string t20 = !strconcat(t19, d);
1367 string s = !strconcat(t20, ", -1, 0, p;\n\t}}");
1370 multiclass ISET_FORMAT<string OpcStr, string OpcStr_u32, PatFrag OpNode,
1371 string TypeStr, string CVTStr> {
1372 def i8rr_toi8: NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, Int8Regs:$b),
1373 Set_Stri8<OpcStr, "$dst", "$a", "$b", TypeStr, CVTStr>.s,
1375 def i16rr_toi16: NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a,
1377 Set_Str<OpcStr, "16", "16", "$dst", "$a", "$b">.s,
1379 def i32rr_toi32: NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a,
1381 Set_Str<OpcStr, "32", "32", "$dst", "$a", "$b">.s,
1383 def i64rr_toi64: NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a,
1385 Set_Str<OpcStr, "64", "64", "$dst", "$a", "$b">.s,
1388 def i8rr_p: NVPTXInst<(outs Int1Regs:$dst), (ins Int8Regs:$a, Int8Regs:$b),
1389 Handle_i8rr<OpcStr, TypeStr, CVTStr>.s,
1390 [(set Int1Regs:$dst, (OpNode Int8Regs:$a, Int8Regs:$b))]>;
1391 def i8ri_p: NVPTXInst<(outs Int1Regs:$dst), (ins Int8Regs:$a, i8imm:$b),
1392 Handle_i8ri<OpcStr, TypeStr, CVTStr>.s,
1393 [(set Int1Regs:$dst, (OpNode Int8Regs:$a, imm:$b))]>;
1394 def i8ir_p: NVPTXInst<(outs Int1Regs:$dst), (ins i8imm:$a, Int8Regs:$b),
1395 Handle_i8ir<OpcStr, TypeStr, CVTStr>.s,
1396 [(set Int1Regs:$dst, (OpNode imm:$a, Int8Regs:$b))]>;
1397 def i16rr_p: NVPTXInst<(outs Int1Regs:$dst), (ins Int16Regs:$a, Int16Regs:$b),
1398 !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
1399 [(set Int1Regs:$dst, (OpNode Int16Regs:$a, Int16Regs:$b))]>;
1400 def i16ri_p: NVPTXInst<(outs Int1Regs:$dst), (ins Int16Regs:$a, i16imm:$b),
1401 !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
1402 [(set Int1Regs:$dst, (OpNode Int16Regs:$a, imm:$b))]>;
1403 def i16ir_p: NVPTXInst<(outs Int1Regs:$dst), (ins i16imm:$a, Int16Regs:$b),
1404 !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
1405 [(set Int1Regs:$dst, (OpNode imm:$a, Int16Regs:$b))]>;
1406 def i32rr_p: NVPTXInst<(outs Int1Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b),
1407 !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
1408 [(set Int1Regs:$dst, (OpNode Int32Regs:$a, Int32Regs:$b))]>;
1409 def i32ri_p: NVPTXInst<(outs Int1Regs:$dst), (ins Int32Regs:$a, i32imm:$b),
1410 !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
1411 [(set Int1Regs:$dst, (OpNode Int32Regs:$a, imm:$b))]>;
1412 def i32ir_p: NVPTXInst<(outs Int1Regs:$dst), (ins i32imm:$a, Int32Regs:$b),
1413 !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
1414 [(set Int1Regs:$dst, (OpNode imm:$a, Int32Regs:$b))]>;
1415 def i64rr_p: NVPTXInst<(outs Int1Regs:$dst), (ins Int64Regs:$a, Int64Regs:$b),
1416 !strconcat(OpcStr, "64 \t$dst, $a, $b;"),
1417 [(set Int1Regs:$dst, (OpNode Int64Regs:$a, Int64Regs:$b))]>;
1418 def i64ri_p: NVPTXInst<(outs Int1Regs:$dst), (ins Int64Regs:$a, i64imm:$b),
1419 !strconcat(OpcStr, "64 \t$dst, $a, $b;"),
1420 [(set Int1Regs:$dst, (OpNode Int64Regs:$a, imm:$b))]>;
1421 def i64ir_p: NVPTXInst<(outs Int1Regs:$dst), (ins i64imm:$a, Int64Regs:$b),
1422 !strconcat(OpcStr, "64 \t$dst, $a, $b;"),
1423 [(set Int1Regs:$dst, (OpNode imm:$a, Int64Regs:$b))]>;
1425 def i8rr_u32: NVPTXInst<(outs Int32Regs:$dst), (ins Int8Regs:$a, Int8Regs:$b),
1426 Handle_i8rr<OpcStr_u32, TypeStr, CVTStr>.s,
1427 [(set Int32Regs:$dst, (OpNode Int8Regs:$a, Int8Regs:$b))]>;
1428 def i8ri_u32: NVPTXInst<(outs Int32Regs:$dst), (ins Int8Regs:$a, i8imm:$b),
1429 Handle_i8ri<OpcStr_u32, TypeStr, CVTStr>.s,
1430 [(set Int32Regs:$dst, (OpNode Int8Regs:$a, imm:$b))]>;
1431 def i8ir_u32: NVPTXInst<(outs Int32Regs:$dst), (ins i8imm:$a, Int8Regs:$b),
1432 Handle_i8ir<OpcStr_u32, TypeStr, CVTStr>.s,
1433 [(set Int32Regs:$dst, (OpNode imm:$a, Int8Regs:$b))]>;
1434 def i16rr_u32: NVPTXInst<(outs Int32Regs:$dst), (ins Int16Regs:$a,
1436 !strconcat(OpcStr_u32, "16 \t$dst, $a, $b;"),
1437 [(set Int32Regs:$dst, (OpNode Int16Regs:$a, Int16Regs:$b))]>;
1438 def i16ri_u32: NVPTXInst<(outs Int32Regs:$dst), (ins Int16Regs:$a, i16imm:$b),
1439 !strconcat(OpcStr_u32, "16 \t$dst, $a, $b;"),
1440 [(set Int32Regs:$dst, (OpNode Int16Regs:$a, imm:$b))]>;
1441 def i16ir_u32: NVPTXInst<(outs Int32Regs:$dst), (ins i16imm:$a, Int16Regs:$b),
1442 !strconcat(OpcStr_u32, "16 \t$dst, $a, $b;"),
1443 [(set Int32Regs:$dst, (OpNode imm:$a, Int16Regs:$b))]>;
1444 def i32rr_u32: NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a,
1446 !strconcat(OpcStr_u32, "32 \t$dst, $a, $b;"),
1447 [(set Int32Regs:$dst, (OpNode Int32Regs:$a, Int32Regs:$b))]>;
1448 def i32ri_u32: NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b),
1449 !strconcat(OpcStr_u32, "32 \t$dst, $a, $b;"),
1450 [(set Int32Regs:$dst, (OpNode Int32Regs:$a, imm:$b))]>;
1451 def i32ir_u32: NVPTXInst<(outs Int32Regs:$dst), (ins i32imm:$a, Int32Regs:$b),
1452 !strconcat(OpcStr_u32, "32 \t$dst, $a, $b;"),
1453 [(set Int32Regs:$dst, (OpNode imm:$a, Int32Regs:$b))]>;
1454 def i64rr_u32: NVPTXInst<(outs Int32Regs:$dst), (ins Int64Regs:$a,
1456 !strconcat(OpcStr_u32, "64 \t$dst, $a, $b;"),
1457 [(set Int32Regs:$dst, (OpNode Int64Regs:$a, Int64Regs:$b))]>;
1458 def i64ri_u32: NVPTXInst<(outs Int32Regs:$dst), (ins Int64Regs:$a, i64imm:$b),
1459 !strconcat(OpcStr_u32, "64 \t$dst, $a, $b;"),
1460 [(set Int32Regs:$dst, (OpNode Int64Regs:$a, imm:$b))]>;
1461 def i64ir_u32: NVPTXInst<(outs Int32Regs:$dst), (ins i64imm:$a, Int64Regs:$b),
1462 !strconcat(OpcStr_u32, "64 \t$dst, $a, $b;"),
1463 [(set Int32Regs:$dst, (OpNode imm:$a, Int64Regs:$b))]>;
1466 multiclass FSET_FORMAT<string OpcStr, string OpcStr_u32, PatFrag OpNode> {
1467 def f32rr_toi32_ftz: NVPTXInst<(outs Int32Regs:$dst), (ins Float32Regs:$a,
1469 Set_Str<OpcStr, "ftz.f32", "32", "$dst", "$a", "$b">.s,
1470 []>, Requires<[doF32FTZ]>;
1471 def f32rr_toi32: NVPTXInst<(outs Int32Regs:$dst), (ins Float32Regs:$a,
1473 Set_Str<OpcStr, "f32", "32", "$dst", "$a", "$b">.s,
1475 def f64rr_toi64: NVPTXInst<(outs Int64Regs:$dst), (ins Float64Regs:$a,
1477 Set_Str<OpcStr, "f64", "64", "$dst", "$a", "$b">.s,
1479 def f64rr_toi32: NVPTXInst<(outs Int32Regs:$dst), (ins Float64Regs:$a,
1481 Set_Str<OpcStr, "f64", "32", "$dst", "$a", "$b">.s,
1484 def f32rr_p_ftz: NVPTXInst<(outs Int1Regs:$dst), (ins Float32Regs:$a
1486 !strconcat(OpcStr, "ftz.f32 \t$dst, $a, $b;"),
1487 [(set Int1Regs:$dst, (OpNode Float32Regs:$a, Float32Regs:$b))]>
1488 , Requires<[doF32FTZ]>;
1489 def f32rr_p: NVPTXInst<(outs Int1Regs:$dst),
1490 (ins Float32Regs:$a, Float32Regs:$b),
1491 !strconcat(OpcStr, "f32 \t$dst, $a, $b;"),
1492 [(set Int1Regs:$dst, (OpNode Float32Regs:$a, Float32Regs:$b))]>;
1493 def f32ri_p_ftz: NVPTXInst<(outs Int1Regs:$dst),
1494 (ins Float32Regs:$a, f32imm:$b),
1495 !strconcat(OpcStr, "ftz.f32 \t$dst, $a, $b;"),
1496 [(set Int1Regs:$dst, (OpNode Float32Regs:$a, fpimm:$b))]>,
1497 Requires<[doF32FTZ]>;
1498 def f32ri_p: NVPTXInst<(outs Int1Regs:$dst), (ins Float32Regs:$a, f32imm:$b),
1499 !strconcat(OpcStr, "f32 \t$dst, $a, $b;"),
1500 [(set Int1Regs:$dst, (OpNode Float32Regs:$a, fpimm:$b))]>;
1501 def f32ir_p_ftz: NVPTXInst<(outs Int1Regs:$dst),
1502 (ins f32imm:$a, Float32Regs:$b),
1503 !strconcat(OpcStr, "ftz.f32 \t$dst, $a, $b;"),
1504 [(set Int1Regs:$dst, (OpNode fpimm:$a, Float32Regs:$b))]>,
1505 Requires<[doF32FTZ]>;
1506 def f32ir_p: NVPTXInst<(outs Int1Regs:$dst), (ins f32imm:$a, Float32Regs:$b),
1507 !strconcat(OpcStr, "f32 \t$dst, $a, $b;"),
1508 [(set Int1Regs:$dst, (OpNode fpimm:$a, Float32Regs:$b))]>;
1509 def f64rr_p: NVPTXInst<(outs Int1Regs:$dst),
1510 (ins Float64Regs:$a, Float64Regs:$b),
1511 !strconcat(OpcStr, "f64 \t$dst, $a, $b;"),
1512 [(set Int1Regs:$dst, (OpNode Float64Regs:$a, Float64Regs:$b))]>;
1513 def f64ri_p: NVPTXInst<(outs Int1Regs:$dst), (ins Float64Regs:$a, f64imm:$b),
1514 !strconcat(OpcStr, "f64 \t$dst, $a, $b;"),
1515 [(set Int1Regs:$dst, (OpNode Float64Regs:$a, fpimm:$b))]>;
1516 def f64ir_p: NVPTXInst<(outs Int1Regs:$dst), (ins f64imm:$a, Float64Regs:$b),
1517 !strconcat(OpcStr, "f64 \t$dst, $a, $b;"),
1518 [(set Int1Regs:$dst, (OpNode fpimm:$a, Float64Regs:$b))]>;
1520 def f32rr_u32_ftz: NVPTXInst<(outs Int32Regs:$dst),
1521 (ins Float32Regs:$a, Float32Regs:$b),
1522 !strconcat(OpcStr_u32, "ftz.f32 \t$dst, $a, $b;"),
1523 [(set Int32Regs:$dst, (OpNode Float32Regs:$a, Float32Regs:$b))]>;
1524 def f32rr_u32: NVPTXInst<(outs Int32Regs:$dst),
1525 (ins Float32Regs:$a, Float32Regs:$b),
1526 !strconcat(OpcStr_u32, "f32 \t$dst, $a, $b;"),
1527 [(set Int32Regs:$dst, (OpNode Float32Regs:$a, Float32Regs:$b))]>;
1528 def f32ri_u32_ftz: NVPTXInst<(outs Int32Regs:$dst),
1529 (ins Float32Regs:$a, f32imm:$b),
1530 !strconcat(OpcStr_u32, "ftz.f32 \t$dst, $a, $b;"),
1531 [(set Int32Regs:$dst, (OpNode Float32Regs:$a, fpimm:$b))]>;
1532 def f32ri_u32: NVPTXInst<(outs Int32Regs:$dst),
1533 (ins Float32Regs:$a, f32imm:$b),
1534 !strconcat(OpcStr_u32, "f32 \t$dst, $a, $b;"),
1535 [(set Int32Regs:$dst, (OpNode Float32Regs:$a, fpimm:$b))]>;
1536 def f32ir_u32_ftz: NVPTXInst<(outs Int32Regs:$dst),
1537 (ins f32imm:$a, Float32Regs:$b),
1538 !strconcat(OpcStr_u32, "ftz.f32 \t$dst, $a, $b;"),
1539 [(set Int32Regs:$dst, (OpNode fpimm:$a, Float32Regs:$b))]>;
1540 def f32ir_u32: NVPTXInst<(outs Int32Regs:$dst),
1541 (ins f32imm:$a, Float32Regs:$b),
1542 !strconcat(OpcStr_u32, "f32 \t$dst, $a, $b;"),
1543 [(set Int32Regs:$dst, (OpNode fpimm:$a, Float32Regs:$b))]>;
1544 def f64rr_u32: NVPTXInst<(outs Int32Regs:$dst),
1545 (ins Float64Regs:$a, Float64Regs:$b),
1546 !strconcat(OpcStr_u32, "f64 \t$dst, $a, $b;"),
1547 [(set Int32Regs:$dst, (OpNode Float64Regs:$a, Float64Regs:$b))]>;
1548 def f64ri_u32: NVPTXInst<(outs Int32Regs:$dst),
1549 (ins Float64Regs:$a, f64imm:$b),
1550 !strconcat(OpcStr_u32, "f64 \t$dst, $a, $b;"),
1551 [(set Int32Regs:$dst, (OpNode Float64Regs:$a, fpimm:$b))]>;
1552 def f64ir_u32: NVPTXInst<(outs Int32Regs:$dst),
1553 (ins f64imm:$a, Float64Regs:$b),
1554 !strconcat(OpcStr_u32, "f64 \t$dst, $a, $b;"),
1555 [(set Int32Regs:$dst, (OpNode fpimm:$a, Float64Regs:$b))]>;
1559 : ISET_FORMAT<"setp.gt.s", "set.gt.u32.s", setgt, "s16", "cvt.s16.s8">;
1561 : ISET_FORMAT<"setp.gt.u", "set.gt.u32.u", setugt, "u16", "cvt.u16.u8">;
1563 : ISET_FORMAT<"setp.lt.s", "set.lt.u32.s", setlt, "s16", "cvt.s16.s8">;
1565 : ISET_FORMAT<"setp.lt.u", "set.lt.u32.u", setult, "u16", "cvt.u16.u8">;
1567 : ISET_FORMAT<"setp.ge.s", "set.ge.u32.s", setge, "s16", "cvt.s16.s8">;
1569 : ISET_FORMAT<"setp.ge.u", "set.ge.u32.u", setuge, "u16", "cvt.u16.u8">;
1571 : ISET_FORMAT<"setp.le.s", "set.le.u32.s", setle, "s16", "cvt.s16.s8">;
1573 : ISET_FORMAT<"setp.le.u", "set.le.u32.u", setule, "u16", "cvt.u16.u8">;
1575 : ISET_FORMAT<"setp.eq.s", "set.eq.u32.s", seteq, "s16", "cvt.s16.s8">;
1577 : ISET_FORMAT<"setp.eq.u", "set.eq.u32.u", setueq, "u16", "cvt.u16.u8">;
1579 : ISET_FORMAT<"setp.ne.s", "set.ne.u32.s", setne, "s16", "cvt.s16.s8">;
1581 : ISET_FORMAT<"setp.ne.u", "set.ne.u32.u", setune, "u16", "cvt.u16.u8">;
1583 def ISetSNEi1rr_p : NVPTXInst<(outs Int1Regs:$dst),
1584 (ins Int1Regs:$a, Int1Regs:$b),
1585 "xor.pred \t$dst, $a, $b;",
1586 [(set Int1Regs:$dst, (setne Int1Regs:$a, Int1Regs:$b))]>;
1587 def ISetUNEi1rr_p : NVPTXInst<(outs Int1Regs:$dst),
1588 (ins Int1Regs:$a, Int1Regs:$b),
1589 "xor.pred \t$dst, $a, $b;",
1590 [(set Int1Regs:$dst, (setune Int1Regs:$a, Int1Regs:$b))]>;
1591 def ISetSEQi1rr_p : NVPTXInst<(outs Int1Regs:$dst),
1592 (ins Int1Regs:$a, Int1Regs:$b),
1593 !strconcat("{{\n\t",
1594 !strconcat(".reg .pred temp;\n\t",
1595 !strconcat("xor.pred \ttemp, $a, $b;\n\t",
1596 !strconcat("not.pred \t$dst, temp;\n\t}}","")))),
1597 [(set Int1Regs:$dst, (seteq Int1Regs:$a, Int1Regs:$b))]>;
1598 def ISetUEQi1rr_p : NVPTXInst<(outs Int1Regs:$dst),
1599 (ins Int1Regs:$a, Int1Regs:$b),
1600 !strconcat("{{\n\t",
1601 !strconcat(".reg .pred temp;\n\t",
1602 !strconcat("xor.pred \ttemp, $a, $b;\n\t",
1603 !strconcat("not.pred \t$dst, temp;\n\t}}","")))),
1604 [(set Int1Regs:$dst, (setueq Int1Regs:$a, Int1Regs:$b))]>;
1606 // Compare 2 i1's and produce a u32
1607 def ISETSNEi1rr_u32 : NVPTXInst<(outs Int32Regs:$dst),
1608 (ins Int1Regs:$a, Int1Regs:$b),
1609 !strconcat("{{\n\t",
1610 !strconcat(".reg .pred temp;\n\t",
1611 !strconcat("xor.pred \ttemp, $a, $b;\n\t",
1612 !strconcat("selp.u32 \t$dst, -1, 0, temp;", "\n\t}}")))),
1613 [(set Int32Regs:$dst, (setne Int1Regs:$a, Int1Regs:$b))]>;
1614 def ISETSEQi1rr_u32 : NVPTXInst<(outs Int32Regs:$dst),
1615 (ins Int1Regs:$a, Int1Regs:$b),
1616 !strconcat("{{\n\t",
1617 !strconcat(".reg .pred temp;\n\t",
1618 !strconcat("xor.pred \ttemp, $a, $b;\n\t",
1619 !strconcat("selp.u32 \t$dst, 0, -1, temp;", "\n\t}}")))),
1620 [(set Int32Regs:$dst, (seteq Int1Regs:$a, Int1Regs:$b))]>;
1622 defm FSetGT : FSET_FORMAT<"setp.gt.", "set.gt.u32.", setogt>;
1623 defm FSetLT : FSET_FORMAT<"setp.lt.", "set.lt.u32.", setolt>;
1624 defm FSetGE : FSET_FORMAT<"setp.ge.", "set.ge.u32.", setoge>;
1625 defm FSetLE : FSET_FORMAT<"setp.le.", "set.le.u32.", setole>;
1626 defm FSetEQ : FSET_FORMAT<"setp.eq.", "set.eq.u32.", setoeq>;
1627 defm FSetNE : FSET_FORMAT<"setp.ne.", "set.ne.u32.", setone>;
1629 defm FSetUGT : FSET_FORMAT<"setp.gtu.", "set.gtu.u32.", setugt>;
1630 defm FSetULT : FSET_FORMAT<"setp.ltu.", "set.ltu.u32.",setult>;
1631 defm FSetUGE : FSET_FORMAT<"setp.geu.", "set.geu.u32.",setuge>;
1632 defm FSetULE : FSET_FORMAT<"setp.leu.", "set.leu.u32.",setule>;
1633 defm FSetUEQ : FSET_FORMAT<"setp.equ.", "set.equ.u32.",setueq>;
1634 defm FSetUNE : FSET_FORMAT<"setp.neu.", "set.neu.u32.",setune>;
1636 defm FSetNUM : FSET_FORMAT<"setp.num.", "set.num.u32.",seto>;
1637 defm FSetNAN : FSET_FORMAT<"setp.nan.", "set.nan.u32.",setuo>;
1639 def SELECTi1rr : Pat<(i1 (select Int1Regs:$p, Int1Regs:$a, Int1Regs:$b)),
1640 (ORb1rr (ANDb1rr Int1Regs:$p, Int1Regs:$a),
1641 (ANDb1rr (NOT1 Int1Regs:$p), Int1Regs:$b))>;
1642 def SELECTi8rr : NVPTXInst<(outs Int8Regs:$dst),
1643 (ins Int8Regs:$a, Int8Regs:$b, Int1Regs:$p),
1644 "selp.b16 \t$dst, $a, $b, $p;",
1645 [(set Int8Regs:$dst, (select Int1Regs:$p, Int8Regs:$a, Int8Regs:$b))]>;
1646 def SELECTi8ri : NVPTXInst<(outs Int8Regs:$dst),
1647 (ins Int8Regs:$a, i8imm:$b, Int1Regs:$p),
1648 "selp.b16 \t$dst, $a, $b, $p;",
1649 [(set Int8Regs:$dst, (select Int1Regs:$p, Int8Regs:$a, imm:$b))]>;
1650 def SELECTi8ir : NVPTXInst<(outs Int8Regs:$dst),
1651 (ins i8imm:$a, Int8Regs:$b, Int1Regs:$p),
1652 "selp.b16 \t$dst, $a, $b, $p;",
1653 [(set Int8Regs:$dst, (select Int1Regs:$p, imm:$a, Int8Regs:$b))]>;
1654 def SELECTi8ii : NVPTXInst<(outs Int8Regs:$dst),
1655 (ins i8imm:$a, i8imm:$b, Int1Regs:$p),
1656 "selp.b16 \t$dst, $a, $b, $p;",
1657 [(set Int8Regs:$dst, (select Int1Regs:$p, imm:$a, imm:$b))]>;
1659 def SELECTi16rr : NVPTXInst<(outs Int16Regs:$dst),
1660 (ins Int16Regs:$a, Int16Regs:$b, Int1Regs:$p),
1661 "selp.b16 \t$dst, $a, $b, $p;",
1662 [(set Int16Regs:$dst, (select Int1Regs:$p, Int16Regs:$a, Int16Regs:$b))]>;
1663 def SELECTi16ri : NVPTXInst<(outs Int16Regs:$dst),
1664 (ins Int16Regs:$a, i16imm:$b, Int1Regs:$p),
1665 "selp.b16 \t$dst, $a, $b, $p;",
1666 [(set Int16Regs:$dst, (select Int1Regs:$p, Int16Regs:$a, imm:$b))]>;
1667 def SELECTi16ir : NVPTXInst<(outs Int16Regs:$dst),
1668 (ins i16imm:$a, Int16Regs:$b, Int1Regs:$p),
1669 "selp.b16 \t$dst, $a, $b, $p;",
1670 [(set Int16Regs:$dst, (select Int1Regs:$p, imm:$a, Int16Regs:$b))]>;
1671 def SELECTi16ii : NVPTXInst<(outs Int16Regs:$dst),
1672 (ins i16imm:$a, i16imm:$b, Int1Regs:$p),
1673 "selp.b16 \t$dst, $a, $b, $p;",
1674 [(set Int16Regs:$dst, (select Int1Regs:$p, imm:$a, imm:$b))]>;
1676 def SELECTi32rr : NVPTXInst<(outs Int32Regs:$dst),
1677 (ins Int32Regs:$a, Int32Regs:$b, Int1Regs:$p),
1678 "selp.b32 \t$dst, $a, $b, $p;",
1679 [(set Int32Regs:$dst, (select Int1Regs:$p, Int32Regs:$a, Int32Regs:$b))]>;
1680 def SELECTi32ri : NVPTXInst<(outs Int32Regs:$dst),
1681 (ins Int32Regs:$a, i32imm:$b, Int1Regs:$p),
1682 "selp.b32 \t$dst, $a, $b, $p;",
1683 [(set Int32Regs:$dst, (select Int1Regs:$p, Int32Regs:$a, imm:$b))]>;
1684 def SELECTi32ir : NVPTXInst<(outs Int32Regs:$dst),
1685 (ins i32imm:$a, Int32Regs:$b, Int1Regs:$p),
1686 "selp.b32 \t$dst, $a, $b, $p;",
1687 [(set Int32Regs:$dst, (select Int1Regs:$p, imm:$a, Int32Regs:$b))]>;
1688 def SELECTi32ii : NVPTXInst<(outs Int32Regs:$dst),
1689 (ins i32imm:$a, i32imm:$b, Int1Regs:$p),
1690 "selp.b32 \t$dst, $a, $b, $p;",
1691 [(set Int32Regs:$dst, (select Int1Regs:$p, imm:$a, imm:$b))]>;
1693 def SELECTi64rr : NVPTXInst<(outs Int64Regs:$dst),
1694 (ins Int64Regs:$a, Int64Regs:$b, Int1Regs:$p),
1695 "selp.b64 \t$dst, $a, $b, $p;",
1696 [(set Int64Regs:$dst, (select Int1Regs:$p, Int64Regs:$a, Int64Regs:$b))]>;
1697 def SELECTi64ri : NVPTXInst<(outs Int64Regs:$dst),
1698 (ins Int64Regs:$a, i64imm:$b, Int1Regs:$p),
1699 "selp.b64 \t$dst, $a, $b, $p;",
1700 [(set Int64Regs:$dst, (select Int1Regs:$p, Int64Regs:$a, imm:$b))]>;
1701 def SELECTi64ir : NVPTXInst<(outs Int64Regs:$dst),
1702 (ins i64imm:$a, Int64Regs:$b, Int1Regs:$p),
1703 "selp.b64 \t$dst, $a, $b, $p;",
1704 [(set Int64Regs:$dst, (select Int1Regs:$p, imm:$a, Int64Regs:$b))]>;
1705 def SELECTi64ii : NVPTXInst<(outs Int64Regs:$dst),
1706 (ins i64imm:$a, i64imm:$b, Int1Regs:$p),
1707 "selp.b64 \t$dst, $a, $b, $p;",
1708 [(set Int64Regs:$dst, (select Int1Regs:$p, imm:$a, imm:$b))]>;
1710 def SELECTf32rr : NVPTXInst<(outs Float32Regs:$dst),
1711 (ins Float32Regs:$a, Float32Regs:$b, Int1Regs:$p),
1712 "selp.f32 \t$dst, $a, $b, $p;",
1713 [(set Float32Regs:$dst,
1714 (select Int1Regs:$p, Float32Regs:$a, Float32Regs:$b))]>;
1715 def SELECTf32ri : NVPTXInst<(outs Float32Regs:$dst),
1716 (ins Float32Regs:$a, f32imm:$b, Int1Regs:$p),
1717 "selp.f32 \t$dst, $a, $b, $p;",
1718 [(set Float32Regs:$dst, (select Int1Regs:$p, Float32Regs:$a, fpimm:$b))]>;
1719 def SELECTf32ir : NVPTXInst<(outs Float32Regs:$dst),
1720 (ins f32imm:$a, Float32Regs:$b, Int1Regs:$p),
1721 "selp.f32 \t$dst, $a, $b, $p;",
1722 [(set Float32Regs:$dst, (select Int1Regs:$p, fpimm:$a, Float32Regs:$b))]>;
1723 def SELECTf32ii : NVPTXInst<(outs Float32Regs:$dst),
1724 (ins f32imm:$a, f32imm:$b, Int1Regs:$p),
1725 "selp.f32 \t$dst, $a, $b, $p;",
1726 [(set Float32Regs:$dst, (select Int1Regs:$p, fpimm:$a, fpimm:$b))]>;
1728 def SELECTf64rr : NVPTXInst<(outs Float64Regs:$dst),
1729 (ins Float64Regs:$a, Float64Regs:$b, Int1Regs:$p),
1730 "selp.f64 \t$dst, $a, $b, $p;",
1731 [(set Float64Regs:$dst,
1732 (select Int1Regs:$p, Float64Regs:$a, Float64Regs:$b))]>;
1733 def SELECTf64ri : NVPTXInst<(outs Float64Regs:$dst),
1734 (ins Float64Regs:$a, f64imm:$b, Int1Regs:$p),
1735 "selp.f64 \t$dst, $a, $b, $p;",
1736 [(set Float64Regs:$dst, (select Int1Regs:$p, Float64Regs:$a, fpimm:$b))]>;
1737 def SELECTf64ir : NVPTXInst<(outs Float64Regs:$dst),
1738 (ins f64imm:$a, Float64Regs:$b, Int1Regs:$p),
1739 "selp.f64 \t$dst, $a, $b, $p;",
1740 [(set Float64Regs:$dst, (select Int1Regs:$p, fpimm:$a, Float64Regs:$b))]>;
1741 def SELECTf64ii : NVPTXInst<(outs Float64Regs:$dst),
1742 (ins f64imm:$a, f64imm:$b, Int1Regs:$p),
1743 "selp.f64 \t $dst, $a, $b, $p;",
1744 [(set Float64Regs:$dst, (select Int1Regs:$p, fpimm:$a, fpimm:$b))]>;
1746 //def ld_param : SDNode<"NVPTXISD::LOAD_PARAM", SDTLoad,
1747 // [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
1749 def SDTDeclareParamProfile : SDTypeProfile<0, 3, [SDTCisInt<0>, SDTCisInt<1>,
1751 def SDTDeclareScalarParamProfile : SDTypeProfile<0, 3, [SDTCisInt<0>,
1752 SDTCisInt<1>, SDTCisInt<2>]>;
1753 def SDTLoadParamProfile : SDTypeProfile<1, 2, [SDTCisInt<1>, SDTCisInt<2>]>;
1754 def SDTPrintCallProfile : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
1755 def SDTPrintCallUniProfile : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
1756 def SDTStoreParamProfile : SDTypeProfile<0, 3, [SDTCisInt<0>, SDTCisInt<1>]>;
1757 def SDTStoreParam32Profile : SDTypeProfile<0, 3, [SDTCisInt<0>, SDTCisInt<1>]>;
1758 def SDTCallArgProfile : SDTypeProfile<0, 2, [SDTCisInt<0>]>;
1759 def SDTCallArgMarkProfile : SDTypeProfile<0, 0, []>;
1760 def SDTCallVoidProfile : SDTypeProfile<0, 1, []>;
1761 def SDTCallValProfile : SDTypeProfile<1, 0, []>;
1762 def SDTMoveParamProfile : SDTypeProfile<1, 1, []>;
1763 def SDTMoveRetvalProfile : SDTypeProfile<0, 1, []>;
1764 def SDTStoreRetvalProfile : SDTypeProfile<0, 2, [SDTCisInt<0>]>;
1765 def SDTPseudoUseParamProfile : SDTypeProfile<0, 1, []>;
1767 def DeclareParam : SDNode<"NVPTXISD::DeclareParam", SDTDeclareParamProfile,
1768 [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
1769 def DeclareScalarParam : SDNode<"NVPTXISD::DeclareScalarParam",
1770 SDTDeclareScalarParamProfile,
1771 [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
1772 def DeclareRetParam : SDNode<"NVPTXISD::DeclareRetParam",
1773 SDTDeclareParamProfile,
1774 [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
1775 def DeclareRet : SDNode<"NVPTXISD::DeclareRet", SDTDeclareScalarParamProfile,
1776 [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
1777 def LoadParam : SDNode<"NVPTXISD::LoadParam", SDTLoadParamProfile,
1778 [SDNPHasChain, SDNPMayLoad, SDNPOutGlue, SDNPInGlue]>;
1779 def PrintCall : SDNode<"NVPTXISD::PrintCall", SDTPrintCallProfile,
1780 [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
1781 def PrintCallUni : SDNode<"NVPTXISD::PrintCallUni", SDTPrintCallUniProfile,
1782 [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
1783 def StoreParam : SDNode<"NVPTXISD::StoreParam", SDTStoreParamProfile,
1784 [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
1785 def StoreParamU32 : SDNode<"NVPTXISD::StoreParamU32", SDTStoreParam32Profile,
1786 [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
1787 def StoreParamS32 : SDNode<"NVPTXISD::StoreParamS32", SDTStoreParam32Profile,
1788 [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
1789 def MoveToParam : SDNode<"NVPTXISD::MoveToParam", SDTStoreParamProfile,
1790 [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
1791 def CallArgBegin : SDNode<"NVPTXISD::CallArgBegin", SDTCallArgMarkProfile,
1792 [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
1793 def CallArg : SDNode<"NVPTXISD::CallArg", SDTCallArgProfile,
1794 [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
1795 def LastCallArg : SDNode<"NVPTXISD::LastCallArg", SDTCallArgProfile,
1796 [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
1797 def CallArgEnd : SDNode<"NVPTXISD::CallArgEnd", SDTCallVoidProfile,
1798 [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
1799 def CallVoid : SDNode<"NVPTXISD::CallVoid", SDTCallVoidProfile,
1800 [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
1801 def Prototype : SDNode<"NVPTXISD::Prototype", SDTCallVoidProfile,
1802 [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
1803 def CallVal : SDNode<"NVPTXISD::CallVal", SDTCallValProfile,
1804 [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
1805 def MoveParam : SDNode<"NVPTXISD::MoveParam", SDTMoveParamProfile,
1807 def MoveRetval : SDNode<"NVPTXISD::MoveRetval", SDTMoveRetvalProfile,
1808 [SDNPHasChain, SDNPSideEffect]>;
1809 def StoreRetval : SDNode<"NVPTXISD::StoreRetval", SDTStoreRetvalProfile,
1810 [SDNPHasChain, SDNPSideEffect]>;
1811 def MoveToRetval : SDNode<"NVPTXISD::MoveToRetval", SDTStoreRetvalProfile,
1812 [SDNPHasChain, SDNPSideEffect]>;
1813 def PseudoUseParam : SDNode<"NVPTXISD::PseudoUseParam",
1814 SDTPseudoUseParamProfile,
1815 [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
1816 def RETURNNode : SDNode<"NVPTXISD::RETURN", SDTCallArgMarkProfile,
1817 [SDNPHasChain, SDNPSideEffect]>;
1819 class LoadParamMemInst<NVPTXRegClass regclass, string opstr> :
1820 NVPTXInst<(outs regclass:$dst), (ins i32imm:$b),
1821 !strconcat(!strconcat("ld.param", opstr),
1822 "\t$dst, [retval0+$b];"),
1823 [(set regclass:$dst, (LoadParam (i32 1), (i32 imm:$b)))]>;
1825 class LoadParamRegInst<NVPTXRegClass regclass, string opstr> :
1826 NVPTXInst<(outs regclass:$dst), (ins i32imm:$b),
1827 !strconcat(!strconcat("mov", opstr),
1828 "\t$dst, retval$b;"),
1829 [(set regclass:$dst, (LoadParam (i32 0), (i32 imm:$b)))]>;
1831 class StoreParamInst<NVPTXRegClass regclass, string opstr> :
1832 NVPTXInst<(outs), (ins regclass:$val, i32imm:$a, i32imm:$b),
1833 !strconcat(!strconcat("st.param", opstr),
1834 "\t[param$a+$b], $val;"),
1835 [(StoreParam (i32 imm:$a), (i32 imm:$b), regclass:$val)]>;
1837 class MoveToParamInst<NVPTXRegClass regclass, string opstr> :
1838 NVPTXInst<(outs), (ins regclass:$val, i32imm:$a, i32imm:$b),
1839 !strconcat(!strconcat("mov", opstr),
1840 "\tparam$a, $val;"),
1841 [(MoveToParam (i32 imm:$a), (i32 imm:$b), regclass:$val)]>;
1843 class StoreRetvalInst<NVPTXRegClass regclass, string opstr> :
1844 NVPTXInst<(outs), (ins regclass:$val, i32imm:$a),
1845 !strconcat(!strconcat("st.param", opstr),
1846 "\t[func_retval0+$a], $val;"),
1847 [(StoreRetval (i32 imm:$a), regclass:$val)]>;
1849 class MoveToRetvalInst<NVPTXRegClass regclass, string opstr> :
1850 NVPTXInst<(outs), (ins i32imm:$num, regclass:$val),
1851 !strconcat(!strconcat("mov", opstr),
1852 "\tfunc_retval$num, $val;"),
1853 [(MoveToRetval (i32 imm:$num), regclass:$val)]>;
1855 class MoveRetvalInst<NVPTXRegClass regclass, string opstr> :
1856 NVPTXInst<(outs), (ins regclass:$val),
1857 !strconcat(!strconcat("mov", opstr),
1858 "\tfunc_retval0, $val;"),
1859 [(MoveRetval regclass:$val)]>;
1861 def PrintCallRetInst1 : NVPTXInst<(outs), (ins),
1863 [(PrintCall (i32 1))]>;
1864 def PrintCallRetInst2 : NVPTXInst<(outs), (ins),
1865 "call (retval0, retval1), ",
1866 [(PrintCall (i32 2))]>;
1867 def PrintCallRetInst3 : NVPTXInst<(outs), (ins),
1868 "call (retval0, retval1, retval2), ",
1869 [(PrintCall (i32 3))]>;
1870 def PrintCallRetInst4 : NVPTXInst<(outs), (ins),
1871 "call (retval0, retval1, retval2, retval3), ",
1872 [(PrintCall (i32 4))]>;
1873 def PrintCallRetInst5 : NVPTXInst<(outs), (ins),
1874 "call (retval0, retval1, retval2, retval3, retval4), ",
1875 [(PrintCall (i32 5))]>;
1876 def PrintCallRetInst6 : NVPTXInst<(outs), (ins),
1877 "call (retval0, retval1, retval2, retval3, retval4, retval5), ",
1878 [(PrintCall (i32 6))]>;
1879 def PrintCallRetInst7 : NVPTXInst<(outs), (ins),
1880 "call (retval0, retval1, retval2, retval3, retval4, retval5, retval6), ",
1881 [(PrintCall (i32 7))]>;
1882 def PrintCallRetInst8 : NVPTXInst<(outs), (ins),
1883 !strconcat("call (retval0, retval1, retval2, retval3, retval4",
1884 ", retval5, retval6, retval7), "),
1885 [(PrintCall (i32 8))]>;
1887 def PrintCallNoRetInst : NVPTXInst<(outs), (ins), "call ",
1888 [(PrintCall (i32 0))]>;
1890 def PrintCallUniRetInst1 : NVPTXInst<(outs), (ins),
1891 "call.uni (retval0), ",
1892 [(PrintCallUni (i32 1))]>;
1893 def PrintCallUniRetInst2 : NVPTXInst<(outs), (ins),
1894 "call.uni (retval0, retval1), ",
1895 [(PrintCallUni (i32 2))]>;
1896 def PrintCallUniRetInst3 : NVPTXInst<(outs), (ins),
1897 "call.uni (retval0, retval1, retval2), ",
1898 [(PrintCallUni (i32 3))]>;
1899 def PrintCallUniRetInst4 : NVPTXInst<(outs), (ins),
1900 "call.uni (retval0, retval1, retval2, retval3), ",
1901 [(PrintCallUni (i32 4))]>;
1902 def PrintCallUniRetInst5 : NVPTXInst<(outs), (ins),
1903 "call.uni (retval0, retval1, retval2, retval3, retval4), ",
1904 [(PrintCallUni (i32 5))]>;
1905 def PrintCallUniRetInst6 : NVPTXInst<(outs), (ins),
1906 "call.uni (retval0, retval1, retval2, retval3, retval4, retval5), ",
1907 [(PrintCallUni (i32 6))]>;
1908 def PrintCallUniRetInst7 : NVPTXInst<(outs), (ins),
1909 "call.uni (retval0, retval1, retval2, retval3, retval4, retval5, retval6), ",
1910 [(PrintCallUni (i32 7))]>;
1911 def PrintCallUniRetInst8 : NVPTXInst<(outs), (ins),
1912 !strconcat("call.uni (retval0, retval1, retval2, retval3, retval4",
1913 ", retval5, retval6, retval7), "),
1914 [(PrintCallUni (i32 8))]>;
1916 def PrintCallUniNoRetInst : NVPTXInst<(outs), (ins), "call.uni ",
1917 [(PrintCallUni (i32 0))]>;
1919 def LoadParamMemI64 : LoadParamMemInst<Int64Regs, ".b64">;
1920 def LoadParamMemI32 : LoadParamMemInst<Int32Regs, ".b32">;
1921 def LoadParamMemI16 : LoadParamMemInst<Int16Regs, ".b16">;
1922 def LoadParamMemI8 : LoadParamMemInst<Int8Regs, ".b8">;
1924 //def LoadParamMemI16 : NVPTXInst<(outs Int16Regs:$dst), (ins i32imm:$b),
1925 // !strconcat("ld.param.b32\ttemp_param_reg, [retval0+$b];\n\t",
1926 // "cvt.u16.u32\t$dst, temp_param_reg;"),
1927 // [(set Int16Regs:$dst, (LoadParam (i32 1), (i32 imm:$b)))]>;
1928 //def LoadParamMemI8 : NVPTXInst<(outs Int8Regs:$dst), (ins i32imm:$b),
1929 // !strconcat("ld.param.b32\ttemp_param_reg, [retval0+$b];\n\t",
1930 // "cvt.u16.u32\t$dst, temp_param_reg;"),
1931 // [(set Int8Regs:$dst, (LoadParam (i32 1), (i32 imm:$b)))]>;
1933 def LoadParamMemF32 : LoadParamMemInst<Float32Regs, ".f32">;
1934 def LoadParamMemF64 : LoadParamMemInst<Float64Regs, ".f64">;
1936 def LoadParamRegI64 : LoadParamRegInst<Int64Regs, ".b64">;
1937 def LoadParamRegI32 : LoadParamRegInst<Int32Regs, ".b32">;
1938 def LoadParamRegI16 : NVPTXInst<(outs Int16Regs:$dst), (ins i32imm:$b),
1939 "cvt.u16.u32\t$dst, retval$b;",
1940 [(set Int16Regs:$dst,
1941 (LoadParam (i32 0), (i32 imm:$b)))]>;
1942 def LoadParamRegI8 : NVPTXInst<(outs Int8Regs:$dst), (ins i32imm:$b),
1943 "cvt.u16.u32\t$dst, retval$b;",
1944 [(set Int8Regs:$dst,
1945 (LoadParam (i32 0), (i32 imm:$b)))]>;
1947 def LoadParamRegF32 : LoadParamRegInst<Float32Regs, ".f32">;
1948 def LoadParamRegF64 : LoadParamRegInst<Float64Regs, ".f64">;
1950 def StoreParamI64 : StoreParamInst<Int64Regs, ".b64">;
1951 def StoreParamI32 : StoreParamInst<Int32Regs, ".b32">;
1953 def StoreParamI16 : NVPTXInst<(outs),
1954 (ins Int16Regs:$val, i32imm:$a, i32imm:$b),
1955 "st.param.b16\t[param$a+$b], $val;",
1956 [(StoreParam (i32 imm:$a), (i32 imm:$b), Int16Regs:$val)]>;
1958 def StoreParamI8 : NVPTXInst<(outs),
1959 (ins Int8Regs:$val, i32imm:$a, i32imm:$b),
1960 "st.param.b8\t[param$a+$b], $val;",
1962 (i32 imm:$a), (i32 imm:$b), Int8Regs:$val)]>;
1964 def StoreParamS32I16 : NVPTXInst<(outs),
1965 (ins Int16Regs:$val, i32imm:$a, i32imm:$b),
1966 !strconcat("cvt.s32.s16\ttemp_param_reg, $val;\n\t",
1967 "st.param.b32\t[param$a+$b], temp_param_reg;"),
1968 [(StoreParamS32 (i32 imm:$a), (i32 imm:$b), Int16Regs:$val)]>;
1969 def StoreParamU32I16 : NVPTXInst<(outs),
1970 (ins Int16Regs:$val, i32imm:$a, i32imm:$b),
1971 !strconcat("cvt.u32.u16\ttemp_param_reg, $val;\n\t",
1972 "st.param.b32\t[param$a+$b], temp_param_reg;"),
1973 [(StoreParamU32 (i32 imm:$a), (i32 imm:$b), Int16Regs:$val)]>;
1975 def StoreParamU32I8 : NVPTXInst<(outs),
1976 (ins Int8Regs:$val, i32imm:$a, i32imm:$b),
1977 !strconcat("cvt.u32.u8\ttemp_param_reg, $val;\n\t",
1978 "st.param.b32\t[param$a+$b], temp_param_reg;"),
1979 [(StoreParamU32 (i32 imm:$a), (i32 imm:$b), Int8Regs:$val)]>;
1980 def StoreParamS32I8 : NVPTXInst<(outs),
1981 (ins Int8Regs:$val, i32imm:$a, i32imm:$b),
1982 !strconcat("cvt.s32.s8\ttemp_param_reg, $val;\n\t",
1983 "st.param.b32\t[param$a+$b], temp_param_reg;"),
1984 [(StoreParamS32 (i32 imm:$a), (i32 imm:$b), Int8Regs:$val)]>;
1986 def StoreParamF32 : StoreParamInst<Float32Regs, ".f32">;
1987 def StoreParamF64 : StoreParamInst<Float64Regs, ".f64">;
1989 def MoveToParamI64 : MoveToParamInst<Int64Regs, ".b64">;
1990 def MoveToParamI32 : MoveToParamInst<Int32Regs, ".b32">;
1991 def MoveToParamF64 : MoveToParamInst<Float64Regs, ".f64">;
1992 def MoveToParamF32 : MoveToParamInst<Float32Regs, ".f32">;
1993 def MoveToParamI16 : NVPTXInst<(outs),
1994 (ins Int16Regs:$val, i32imm:$a, i32imm:$b),
1995 !strconcat("cvt.u32.u16\ttemp_param_reg, $val;\n\t",
1996 "mov.b32\tparam$a, temp_param_reg;"),
1997 [(MoveToParam (i32 imm:$a), (i32 imm:$b), Int16Regs:$val)]>;
1998 def MoveToParamI8 : NVPTXInst<(outs),
1999 (ins Int8Regs:$val, i32imm:$a, i32imm:$b),
2000 !strconcat("cvt.u32.u16\ttemp_param_reg, $val;\n\t",
2001 "mov.b32\tparam$a, temp_param_reg;"),
2002 [(MoveToParam (i32 imm:$a), (i32 imm:$b), Int8Regs:$val)]>;
2004 def StoreRetvalI64 : StoreRetvalInst<Int64Regs, ".b64">;
2005 def StoreRetvalI32 : StoreRetvalInst<Int32Regs, ".b32">;
2006 def StoreRetvalI16 : StoreRetvalInst<Int16Regs, ".b16">;
2007 def StoreRetvalI8 : StoreRetvalInst<Int8Regs, ".b8">;
2009 //def StoreRetvalI16 : NVPTXInst<(outs), (ins Int16Regs:$val, i32imm:$a),
2010 // !strconcat("\{\n\t",
2011 // !strconcat(".reg .b32 temp_retval_reg;\n\t",
2012 // !strconcat("cvt.u32.u16\ttemp_retval_reg, $val;\n\t",
2013 // "st.param.b32\t[func_retval0+$a], temp_retval_reg;\n\t\}"))),
2014 // [(StoreRetval (i32 imm:$a), Int16Regs:$val)]>;
2015 //def StoreRetvalI8 : NVPTXInst<(outs), (ins Int8Regs:$val, i32imm:$a),
2016 // !strconcat("\{\n\t",
2017 // !strconcat(".reg .b32 temp_retval_reg;\n\t",
2018 // !strconcat("cvt.u32.u16\ttemp_retval_reg, $val;\n\t",
2019 // "st.param.b32\t[func_retval0+$a], temp_retval_reg;\n\t\}"))),
2020 // [(StoreRetval (i32 imm:$a), Int8Regs:$val)]>;
2022 def StoreRetvalF64 : StoreRetvalInst<Float64Regs, ".f64">;
2023 def StoreRetvalF32 : StoreRetvalInst<Float32Regs, ".f32">;
2025 def MoveRetvalI64 : MoveRetvalInst<Int64Regs, ".b64">;
2026 def MoveRetvalI32 : MoveRetvalInst<Int32Regs, ".b32">;
2027 def MoveRetvalI16 : MoveRetvalInst<Int16Regs, ".b16">;
2028 def MoveRetvalI8 : MoveRetvalInst<Int8Regs, ".b8">;
2029 def MoveRetvalF64 : MoveRetvalInst<Float64Regs, ".f64">;
2030 def MoveRetvalF32 : MoveRetvalInst<Float32Regs, ".f32">;
2032 def MoveToRetvalI64 : MoveToRetvalInst<Int64Regs, ".b64">;
2033 def MoveToRetvalI32 : MoveToRetvalInst<Int32Regs, ".b32">;
2034 def MoveToRetvalF64 : MoveToRetvalInst<Float64Regs, ".f64">;
2035 def MoveToRetvalF32 : MoveToRetvalInst<Float32Regs, ".f32">;
2036 def MoveToRetvalI16 : NVPTXInst<(outs), (ins i32imm:$num, Int16Regs:$val),
2037 "cvt.u32.u16\tfunc_retval$num, $val;",
2038 [(MoveToRetval (i32 imm:$num), Int16Regs:$val)]>;
2039 def MoveToRetvalI8 : NVPTXInst<(outs), (ins i32imm:$num, Int8Regs:$val),
2040 "cvt.u32.u16\tfunc_retval$num, $val;",
2041 [(MoveToRetval (i32 imm:$num), Int8Regs:$val)]>;
2043 def CallArgBeginInst : NVPTXInst<(outs), (ins), "(", [(CallArgBegin)]>;
2044 def CallArgEndInst1 : NVPTXInst<(outs), (ins), ");", [(CallArgEnd (i32 1))]>;
2045 def CallArgEndInst0 : NVPTXInst<(outs), (ins), ")", [(CallArgEnd (i32 0))]>;
2046 def RETURNInst : NVPTXInst<(outs), (ins), "ret;", [(RETURNNode)]>;
2048 class CallArgInst<NVPTXRegClass regclass> :
2049 NVPTXInst<(outs), (ins regclass:$a), "$a, ",
2050 [(CallArg (i32 0), regclass:$a)]>;
2052 class LastCallArgInst<NVPTXRegClass regclass> :
2053 NVPTXInst<(outs), (ins regclass:$a), "$a",
2054 [(LastCallArg (i32 0), regclass:$a)]>;
2056 def CallArgI64 : CallArgInst<Int64Regs>;
2057 def CallArgI32 : CallArgInst<Int32Regs>;
2058 def CallArgI16 : CallArgInst<Int16Regs>;
2059 def CallArgI8 : CallArgInst<Int8Regs>;
2061 def CallArgF64 : CallArgInst<Float64Regs>;
2062 def CallArgF32 : CallArgInst<Float32Regs>;
2064 def LastCallArgI64 : LastCallArgInst<Int64Regs>;
2065 def LastCallArgI32 : LastCallArgInst<Int32Regs>;
2066 def LastCallArgI16 : LastCallArgInst<Int16Regs>;
2067 def LastCallArgI8 : LastCallArgInst<Int8Regs>;
2069 def LastCallArgF64 : LastCallArgInst<Float64Regs>;
2070 def LastCallArgF32 : LastCallArgInst<Float32Regs>;
2072 def CallArgI32imm : NVPTXInst<(outs), (ins i32imm:$a), "$a, ",
2073 [(CallArg (i32 0), (i32 imm:$a))]>;
2074 def LastCallArgI32imm : NVPTXInst<(outs), (ins i32imm:$a), "$a",
2075 [(LastCallArg (i32 0), (i32 imm:$a))]>;
2077 def CallArgParam : NVPTXInst<(outs), (ins i32imm:$a), "param$a, ",
2078 [(CallArg (i32 1), (i32 imm:$a))]>;
2079 def LastCallArgParam : NVPTXInst<(outs), (ins i32imm:$a), "param$a",
2080 [(LastCallArg (i32 1), (i32 imm:$a))]>;
2082 def CallVoidInst : NVPTXInst<(outs), (ins imem:$addr),
2084 [(CallVoid (Wrapper tglobaladdr:$addr))]>;
2085 def CallVoidInstReg : NVPTXInst<(outs), (ins Int32Regs:$addr),
2087 [(CallVoid Int32Regs:$addr)]>;
2088 def CallVoidInstReg64 : NVPTXInst<(outs), (ins Int64Regs:$addr),
2090 [(CallVoid Int64Regs:$addr)]>;
2091 def PrototypeInst : NVPTXInst<(outs), (ins i32imm:$val),
2092 ", prototype_$val;",
2093 [(Prototype (i32 imm:$val))]>;
2095 def DeclareRetMemInst : NVPTXInst<(outs),
2096 (ins i32imm:$align, i32imm:$size, i32imm:$num),
2097 ".param .align $align .b8 retval$num[$size];",
2098 [(DeclareRetParam (i32 imm:$align), (i32 imm:$size), (i32 imm:$num))]>;
2099 def DeclareRetScalarInst : NVPTXInst<(outs), (ins i32imm:$size, i32imm:$num),
2100 ".param .b$size retval$num;",
2101 [(DeclareRet (i32 1), (i32 imm:$size), (i32 imm:$num))]>;
2102 def DeclareRetRegInst : NVPTXInst<(outs), (ins i32imm:$size, i32imm:$num),
2103 ".reg .b$size retval$num;",
2104 [(DeclareRet (i32 2), (i32 imm:$size), (i32 imm:$num))]>;
2106 def DeclareParamInst : NVPTXInst<(outs),
2107 (ins i32imm:$align, i32imm:$a, i32imm:$size),
2108 ".param .align $align .b8 param$a[$size];",
2109 [(DeclareParam (i32 imm:$align), (i32 imm:$a), (i32 imm:$size))]>;
2110 def DeclareScalarParamInst : NVPTXInst<(outs), (ins i32imm:$a, i32imm:$size),
2111 ".param .b$size param$a;",
2112 [(DeclareScalarParam (i32 imm:$a), (i32 imm:$size), (i32 0))]>;
2113 def DeclareScalarRegInst : NVPTXInst<(outs), (ins i32imm:$a, i32imm:$size),
2114 ".reg .b$size param$a;",
2115 [(DeclareScalarParam (i32 imm:$a), (i32 imm:$size), (i32 1))]>;
2117 class MoveParamInst<NVPTXRegClass regclass, string asmstr> :
2118 NVPTXInst<(outs regclass:$dst), (ins regclass:$src),
2119 !strconcat(!strconcat("mov", asmstr), "\t$dst, $src;"),
2120 [(set regclass:$dst, (MoveParam regclass:$src))]>;
2122 def MoveParamI64 : MoveParamInst<Int64Regs, ".b64">;
2123 def MoveParamI32 : MoveParamInst<Int32Regs, ".b32">;
2124 def MoveParamI16 : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$src),
2125 "cvt.u16.u32\t$dst, $src;",
2126 [(set Int16Regs:$dst, (MoveParam Int16Regs:$src))]>;
2127 def MoveParamI8 : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$src),
2128 "cvt.u16.u32\t$dst, $src;",
2129 [(set Int8Regs:$dst, (MoveParam Int8Regs:$src))]>;
2130 def MoveParamF64 : MoveParamInst<Float64Regs, ".f64">;
2131 def MoveParamF32 : MoveParamInst<Float32Regs, ".f32">;
2133 class PseudoUseParamInst<NVPTXRegClass regclass> :
2134 NVPTXInst<(outs), (ins regclass:$src),
2135 "// Pseudo use of $src",
2136 [(PseudoUseParam regclass:$src)]>;
2138 def PseudoUseParamI64 : PseudoUseParamInst<Int64Regs>;
2139 def PseudoUseParamI32 : PseudoUseParamInst<Int32Regs>;
2140 def PseudoUseParamI16 : PseudoUseParamInst<Int16Regs>;
2141 def PseudoUseParamI8 : PseudoUseParamInst<Int8Regs>;
2142 def PseudoUseParamF64 : PseudoUseParamInst<Float64Regs>;
2143 def PseudoUseParamF32 : PseudoUseParamInst<Float32Regs>;
2147 // Load / Store Handling
2149 multiclass LD<NVPTXRegClass regclass> {
2150 def _avar : NVPTXInst<(outs regclass:$dst),
2151 (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2152 i32imm:$fromWidth, imem:$addr),
2153 !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2154 "$fromWidth \t$dst, [$addr];"), []>;
2155 def _areg : NVPTXInst<(outs regclass:$dst),
2156 (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2157 i32imm:$fromWidth, Int32Regs:$addr),
2158 !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2159 "$fromWidth \t$dst, [$addr];"), []>;
2160 def _areg_64 : NVPTXInst<(outs regclass:$dst),
2161 (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2162 i32imm:$fromWidth, Int64Regs:$addr),
2163 !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth",
2164 " \t$dst, [$addr];"), []>;
2165 def _ari : NVPTXInst<(outs regclass:$dst),
2166 (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2167 i32imm:$fromWidth, Int32Regs:$addr, i32imm:$offset),
2168 !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2169 "$fromWidth \t$dst, [$addr+$offset];"), []>;
2170 def _ari_64 : NVPTXInst<(outs regclass:$dst),
2171 (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2172 i32imm:$fromWidth, Int64Regs:$addr, i32imm:$offset),
2173 !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth",
2174 " \t$dst, [$addr+$offset];"), []>;
2175 def _asi : NVPTXInst<(outs regclass:$dst),
2176 (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2177 i32imm:$fromWidth, imem:$addr, i32imm:$offset),
2178 !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2179 "$fromWidth \t$dst, [$addr+$offset];"), []>;
2182 let mayLoad=1, neverHasSideEffects=1 in {
2183 defm LD_i8 : LD<Int8Regs>;
2184 defm LD_i16 : LD<Int16Regs>;
2185 defm LD_i32 : LD<Int32Regs>;
2186 defm LD_i64 : LD<Int64Regs>;
2187 defm LD_f32 : LD<Float32Regs>;
2188 defm LD_f64 : LD<Float64Regs>;
2191 multiclass ST<NVPTXRegClass regclass> {
2192 def _avar : NVPTXInst<(outs),
2193 (ins regclass:$src, LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec,
2194 LdStCode:$Sign, i32imm:$toWidth, imem:$addr),
2195 !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth",
2196 " \t[$addr], $src;"), []>;
2197 def _areg : NVPTXInst<(outs),
2198 (ins regclass:$src, LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec,
2199 LdStCode:$Sign, i32imm:$toWidth, Int32Regs:$addr),
2200 !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth",
2201 " \t[$addr], $src;"), []>;
2202 def _areg_64 : NVPTXInst<(outs),
2203 (ins regclass:$src, LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec,
2204 LdStCode:$Sign, i32imm:$toWidth, Int64Regs:$addr),
2205 !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth ",
2206 "\t[$addr], $src;"), []>;
2207 def _ari : NVPTXInst<(outs),
2208 (ins regclass:$src, LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec,
2209 LdStCode:$Sign, i32imm:$toWidth, Int32Regs:$addr, i32imm:$offset),
2210 !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth",
2211 " \t[$addr+$offset], $src;"), []>;
2212 def _ari_64 : NVPTXInst<(outs),
2213 (ins regclass:$src, LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec,
2214 LdStCode:$Sign, i32imm:$toWidth, Int64Regs:$addr, i32imm:$offset),
2215 !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth ",
2216 "\t[$addr+$offset], $src;"), []>;
2217 def _asi : NVPTXInst<(outs),
2218 (ins regclass:$src, LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec,
2219 LdStCode:$Sign, i32imm:$toWidth, imem:$addr, i32imm:$offset),
2220 !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth",
2221 " \t[$addr+$offset], $src;"), []>;
2224 let mayStore=1, neverHasSideEffects=1 in {
2225 defm ST_i8 : ST<Int8Regs>;
2226 defm ST_i16 : ST<Int16Regs>;
2227 defm ST_i32 : ST<Int32Regs>;
2228 defm ST_i64 : ST<Int64Regs>;
2229 defm ST_f32 : ST<Float32Regs>;
2230 defm ST_f64 : ST<Float64Regs>;
2233 // The following is used only in and after vector elementizations.
2234 // Vector elementization happens at the machine instruction level, so the
2235 // following instruction
2236 // never appears in the DAG.
2237 multiclass LD_VEC<NVPTXRegClass regclass> {
2238 def _v2_avar : NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
2239 (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2240 i32imm:$fromWidth, imem:$addr),
2241 !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2242 "$fromWidth \t{{$dst1, $dst2}}, [$addr];"), []>;
2243 def _v2_areg : NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
2244 (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2245 i32imm:$fromWidth, Int32Regs:$addr),
2246 !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2247 "$fromWidth \t{{$dst1, $dst2}}, [$addr];"), []>;
2248 def _v2_areg_64 : NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
2249 (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2250 i32imm:$fromWidth, Int64Regs:$addr),
2251 !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2252 "$fromWidth \t{{$dst1, $dst2}}, [$addr];"), []>;
2253 def _v2_ari : NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
2254 (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2255 i32imm:$fromWidth, Int32Regs:$addr, i32imm:$offset),
2256 !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2257 "$fromWidth \t{{$dst1, $dst2}}, [$addr+$offset];"), []>;
2258 def _v2_ari_64 : NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
2259 (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2260 i32imm:$fromWidth, Int64Regs:$addr, i32imm:$offset),
2261 !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2262 "$fromWidth \t{{$dst1, $dst2}}, [$addr+$offset];"), []>;
2263 def _v2_asi : NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
2264 (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2265 i32imm:$fromWidth, imem:$addr, i32imm:$offset),
2266 !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2267 "$fromWidth \t{{$dst1, $dst2}}, [$addr+$offset];"), []>;
2268 def _v4_avar : NVPTXInst<(outs regclass:$dst1, regclass:$dst2,
2269 regclass:$dst3, regclass:$dst4),
2270 (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2271 i32imm:$fromWidth, imem:$addr),
2272 !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2273 "$fromWidth \t{{$dst1, $dst2, $dst3, $dst4}}, [$addr];"), []>;
2274 def _v4_areg : NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
2276 (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2277 i32imm:$fromWidth, Int32Regs:$addr),
2278 !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2279 "$fromWidth \t{{$dst1, $dst2, $dst3, $dst4}}, [$addr];"), []>;
2280 def _v4_areg_64 : NVPTXInst<(outs regclass:$dst1, regclass:$dst2,
2281 regclass:$dst3, regclass:$dst4),
2282 (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2283 i32imm:$fromWidth, Int64Regs:$addr),
2284 !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2285 "$fromWidth \t{{$dst1, $dst2, $dst3, $dst4}}, [$addr];"), []>;
2286 def _v4_ari : NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
2288 (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2289 i32imm:$fromWidth, Int32Regs:$addr, i32imm:$offset),
2290 !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2291 "$fromWidth \t{{$dst1, $dst2, $dst3, $dst4}}, [$addr+$offset];"),
2293 def _v4_ari_64 : NVPTXInst<(outs regclass:$dst1, regclass:$dst2,
2294 regclass:$dst3, regclass:$dst4),
2295 (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2296 i32imm:$fromWidth, Int64Regs:$addr, i32imm:$offset),
2297 !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2298 "$fromWidth \t{{$dst1, $dst2, $dst3, $dst4}}, [$addr+$offset];"),
2300 def _v4_asi : NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
2302 (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2303 i32imm:$fromWidth, imem:$addr, i32imm:$offset),
2304 !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2305 "$fromWidth \t{{$dst1, $dst2, $dst3, $dst4}}, [$addr+$offset];"),
2308 let mayLoad=1, neverHasSideEffects=1 in {
2309 defm LDV_i8 : LD_VEC<Int8Regs>;
2310 defm LDV_i16 : LD_VEC<Int16Regs>;
2311 defm LDV_i32 : LD_VEC<Int32Regs>;
2312 defm LDV_i64 : LD_VEC<Int64Regs>;
2313 defm LDV_f32 : LD_VEC<Float32Regs>;
2314 defm LDV_f64 : LD_VEC<Float64Regs>;
2317 multiclass ST_VEC<NVPTXRegClass regclass> {
2318 def _v2_avar : NVPTXInst<(outs),
2319 (ins regclass:$src1, regclass:$src2, LdStCode:$isVol, LdStCode:$addsp,
2320 LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, imem:$addr),
2321 !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2322 "$fromWidth \t[$addr], {{$src1, $src2}};"), []>;
2323 def _v2_areg : NVPTXInst<(outs),
2324 (ins regclass:$src1, regclass:$src2, LdStCode:$isVol, LdStCode:$addsp,
2325 LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, Int32Regs:$addr),
2326 !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2327 "$fromWidth \t[$addr], {{$src1, $src2}};"), []>;
2328 def _v2_areg_64 : NVPTXInst<(outs),
2329 (ins regclass:$src1, regclass:$src2, LdStCode:$isVol, LdStCode:$addsp,
2330 LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, Int64Regs:$addr),
2331 !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2332 "$fromWidth \t[$addr], {{$src1, $src2}};"), []>;
2333 def _v2_ari : NVPTXInst<(outs),
2334 (ins regclass:$src1, regclass:$src2, LdStCode:$isVol, LdStCode:$addsp,
2335 LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, Int32Regs:$addr,
2337 !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2338 "$fromWidth \t[$addr+$offset], {{$src1, $src2}};"), []>;
2339 def _v2_ari_64 : NVPTXInst<(outs),
2340 (ins regclass:$src1, regclass:$src2, LdStCode:$isVol, LdStCode:$addsp,
2341 LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, Int64Regs:$addr,
2343 !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2344 "$fromWidth \t[$addr+$offset], {{$src1, $src2}};"), []>;
2345 def _v2_asi : NVPTXInst<(outs),
2346 (ins regclass:$src1, regclass:$src2, LdStCode:$isVol, LdStCode:$addsp,
2347 LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, imem:$addr,
2349 !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2350 "$fromWidth \t[$addr+$offset], {{$src1, $src2}};"), []>;
2351 def _v4_avar : NVPTXInst<(outs),
2352 (ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4,
2353 LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2354 i32imm:$fromWidth, imem:$addr),
2355 !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2356 "$fromWidth \t[$addr], {{$src1, $src2, $src3, $src4}};"), []>;
2357 def _v4_areg : NVPTXInst<(outs),
2358 (ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4,
2359 LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2360 i32imm:$fromWidth, Int32Regs:$addr),
2361 !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2362 "$fromWidth \t[$addr], {{$src1, $src2, $src3, $src4}};"), []>;
2363 def _v4_areg_64 : NVPTXInst<(outs),
2364 (ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4,
2365 LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2366 i32imm:$fromWidth, Int64Regs:$addr),
2367 !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2368 "$fromWidth \t[$addr], {{$src1, $src2, $src3, $src4}};"), []>;
2369 def _v4_ari : NVPTXInst<(outs),
2370 (ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4,
2371 LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2372 i32imm:$fromWidth, Int32Regs:$addr, i32imm:$offset),
2373 !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2374 "$fromWidth \t[$addr+$offset], {{$src1, $src2, $src3, $src4}};"),
2376 def _v4_ari_64 : NVPTXInst<(outs),
2377 (ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4,
2378 LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2379 i32imm:$fromWidth, Int64Regs:$addr, i32imm:$offset),
2380 !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2381 "$fromWidth \t[$addr+$offset], {{$src1, $src2, $src3, $src4}};"),
2383 def _v4_asi : NVPTXInst<(outs),
2384 (ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4,
2385 LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2386 i32imm:$fromWidth, imem:$addr, i32imm:$offset),
2387 !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2388 "$fromWidth \t[$addr+$offset], {{$src1, $src2, $src3, $src4}};"),
2391 let mayStore=1, neverHasSideEffects=1 in {
2392 defm STV_i8 : ST_VEC<Int8Regs>;
2393 defm STV_i16 : ST_VEC<Int16Regs>;
2394 defm STV_i32 : ST_VEC<Int32Regs>;
2395 defm STV_i64 : ST_VEC<Int64Regs>;
2396 defm STV_f32 : ST_VEC<Float32Regs>;
2397 defm STV_f64 : ST_VEC<Float64Regs>;
2401 //---- Conversion ----
2403 multiclass CVT_INT_TO_FP <string OpStr, SDNode OpNode> {
2404 // FIXME: need to add f16 support
2406 // NVPTXInst<(outs Float16Regs:$d), (ins Int8Regs:$a),
2407 // !strconcat(!strconcat("cvt.rn.f16.", OpStr), "8 \t$d, $a;"),
2408 // [(set Float16Regs:$d, (OpNode Int8Regs:$a))]>;
2410 // NVPTXInst<(outs Float16Regs:$d), (ins Int16Regs:$a),
2411 // !strconcat(!strconcat("cvt.rn.f16.", OpStr), "16 \t$d, $a;"),
2412 // [(set Float16Regs:$d, (OpNode Int16Regs:$a))]>;
2414 // NVPTXInst<(outs Float16Regs:$d), (ins Int32Regs:$a),
2415 // !strconcat(!strconcat("cvt.rn.f16.", OpStr), "32 \t$d, $a;"),
2416 // [(set Float16Regs:$d, (OpNode Int32Regs:$a))]>;
2418 // NVPTXInst<(outs Float16Regs:$d), (ins Int64Regs:$a),
2419 // !strconcat(!strconcat("cvt.rn.f32.", OpStr), "64 \t$d, $a;"),
2420 // [(set Float32Regs:$d, (OpNode Int64Regs:$a))]>;
2423 NVPTXInst<(outs Float32Regs:$d), (ins Int1Regs:$a),
2424 "selp.f32 \t$d, 1.0, 0.0, $a;",
2425 [(set Float32Regs:$d, (OpNode Int1Regs:$a))]>;
2427 NVPTXInst<(outs Float32Regs:$d), (ins Int8Regs:$a),
2428 !strconcat(!strconcat("cvt.rn.f32.", OpStr), "8 \t$d, $a;"),
2429 [(set Float32Regs:$d, (OpNode Int8Regs:$a))]>;
2431 NVPTXInst<(outs Float32Regs:$d), (ins Int16Regs:$a),
2432 !strconcat(!strconcat("cvt.rn.f32.", OpStr), "16 \t$d, $a;"),
2433 [(set Float32Regs:$d, (OpNode Int16Regs:$a))]>;
2435 NVPTXInst<(outs Float32Regs:$d), (ins Int32Regs:$a),
2436 !strconcat(!strconcat("cvt.rn.f32.", OpStr), "32 \t$d, $a;"),
2437 [(set Float32Regs:$d, (OpNode Int32Regs:$a))]>;
2439 NVPTXInst<(outs Float32Regs:$d), (ins Int64Regs:$a),
2440 !strconcat(!strconcat("cvt.rn.f32.", OpStr), "64 \t$d, $a;"),
2441 [(set Float32Regs:$d, (OpNode Int64Regs:$a))]>;
2444 NVPTXInst<(outs Float64Regs:$d), (ins Int1Regs:$a),
2445 "selp.f64 \t$d, 1.0, 0.0, $a;",
2446 [(set Float64Regs:$d, (OpNode Int1Regs:$a))]>;
2448 NVPTXInst<(outs Float64Regs:$d), (ins Int8Regs:$a),
2449 !strconcat(!strconcat("cvt.rn.f64.", OpStr), "8 \t$d, $a;"),
2450 [(set Float64Regs:$d, (OpNode Int8Regs:$a))]>;
2452 NVPTXInst<(outs Float64Regs:$d), (ins Int16Regs:$a),
2453 !strconcat(!strconcat("cvt.rn.f64.", OpStr), "16 \t$d, $a;"),
2454 [(set Float64Regs:$d, (OpNode Int16Regs:$a))]>;
2456 NVPTXInst<(outs Float64Regs:$d), (ins Int32Regs:$a),
2457 !strconcat(!strconcat("cvt.rn.f64.", OpStr), "32 \t$d, $a;"),
2458 [(set Float64Regs:$d, (OpNode Int32Regs:$a))]>;
2460 NVPTXInst<(outs Float64Regs:$d), (ins Int64Regs:$a),
2461 !strconcat(!strconcat("cvt.rn.f64.", OpStr), "64 \t$d, $a;"),
2462 [(set Float64Regs:$d, (OpNode Int64Regs:$a))]>;
2465 defm Sint_to_fp : CVT_INT_TO_FP <"s", sint_to_fp>;
2466 defm Uint_to_fp : CVT_INT_TO_FP <"u", uint_to_fp>;
2468 multiclass CVT_FP_TO_INT <string OpStr, SDNode OpNode> {
2469 // FIXME: need to add f16 support
2471 // NVPTXInst<(outs Int8Regs:$d), (ins Float16Regs:$a),
2472 // !strconcat(!strconcat("cvt.rzi.", OpStr), "8.f16 $d, $a;"),
2473 // [(set Int8Regs:$d, (OpNode Float16Regs:$a))]>;
2475 NVPTXInst<(outs Int8Regs:$d), (ins Float32Regs:$a),
2476 !strconcat(!strconcat("cvt.rzi.ftz.", OpStr), "16.f32 \t$d, $a;"),
2477 [(set Int8Regs:$d, (OpNode Float32Regs:$a))]>, Requires<[doF32FTZ]>;
2479 NVPTXInst<(outs Int8Regs:$d), (ins Float32Regs:$a),
2480 !strconcat(!strconcat("cvt.rzi.", OpStr), "16.f32 \t$d, $a;"),
2481 [(set Int8Regs:$d, (OpNode Float32Regs:$a))]>;
2483 NVPTXInst<(outs Int8Regs:$d), (ins Float64Regs:$a),
2484 !strconcat(!strconcat("cvt.rzi.", OpStr), "16.f64 \t$d, $a;"),
2485 [(set Int8Regs:$d, (OpNode Float64Regs:$a))]>;
2487 // FIXME: need to add f16 support
2489 // NVPTXInst<(outs Int16Regs:$d), (ins Float16Regs:$a),
2490 // !strconcat(!strconcat("cvt.rzi.", OpStr), "16.f16 \t$d, $a;"),
2491 // [(set Int16Regs:$d, (OpNode Float16Regs:$a))]>;
2493 NVPTXInst<(outs Int16Regs:$d), (ins Float32Regs:$a),
2494 !strconcat(!strconcat("cvt.rzi.ftz.", OpStr), "16.f32 \t$d, $a;"),
2495 [(set Int16Regs:$d, (OpNode Float32Regs:$a))]>, Requires<[doF32FTZ]>;
2497 NVPTXInst<(outs Int16Regs:$d), (ins Float32Regs:$a),
2498 !strconcat(!strconcat("cvt.rzi.", OpStr), "16.f32 \t$d, $a;"),
2499 [(set Int16Regs:$d, (OpNode Float32Regs:$a))]>;
2501 NVPTXInst<(outs Int16Regs:$d), (ins Float64Regs:$a),
2502 !strconcat(!strconcat("cvt.rzi.", OpStr), "16.f64 \t$d, $a;"),
2503 [(set Int16Regs:$d, (OpNode Float64Regs:$a))]>;
2505 // FIXME: need to add f16 support
2506 // def CVTi32f16: def CVTi32f16:
2507 // NVPTXInst<(outs Int32Regs:$d), (ins Float16Regs:$a),
2508 // !strconcat(!strconcat("cvt.rzi.", OpStr), "32.f16 \t$d, $a;"),
2509 // [(set Int32Regs:$d, (OpNode Float16Regs:$a))]>;
2511 NVPTXInst<(outs Int32Regs:$d), (ins Float32Regs:$a),
2512 !strconcat(!strconcat("cvt.rzi.ftz.", OpStr), "32.f32 \t$d, $a;"),
2513 [(set Int32Regs:$d, (OpNode Float32Regs:$a))]>, Requires<[doF32FTZ]>;
2515 NVPTXInst<(outs Int32Regs:$d), (ins Float32Regs:$a),
2516 !strconcat(!strconcat("cvt.rzi.", OpStr), "32.f32 \t$d, $a;"),
2517 [(set Int32Regs:$d, (OpNode Float32Regs:$a))]>;
2519 NVPTXInst<(outs Int32Regs:$d), (ins Float64Regs:$a),
2520 !strconcat(!strconcat("cvt.rzi.", OpStr), "32.f64 \t$d, $a;"),
2521 [(set Int32Regs:$d, (OpNode Float64Regs:$a))]>;
2523 // FIXME: need to add f16 support
2525 // NVPTXInst<(outs Int64Regs:$d), (ins Float16Regs:$a),
2526 // !strconcat(!strconcat("cvt.rzi.", OpStr), "64.f16 \t$d, $a;"),
2527 // [(set Int64Regs:$d, (OpNode Float16Regs:$a))]>;
2529 NVPTXInst<(outs Int64Regs:$d), (ins Float32Regs:$a),
2530 !strconcat(!strconcat("cvt.rzi.ftz.", OpStr), "64.f32 \t$d, $a;"),
2531 [(set Int64Regs:$d, (OpNode Float32Regs:$a))]>, Requires<[doF32FTZ]>;
2533 NVPTXInst<(outs Int64Regs:$d), (ins Float32Regs:$a),
2534 !strconcat(!strconcat("cvt.rzi.", OpStr), "64.f32 \t$d, $a;"),
2535 [(set Int64Regs:$d, (OpNode Float32Regs:$a))]>;
2537 NVPTXInst<(outs Int64Regs:$d), (ins Float64Regs:$a),
2538 !strconcat(!strconcat("cvt.rzi.", OpStr), "64.f64 \t$d, $a;"),
2539 [(set Int64Regs:$d, (OpNode Float64Regs:$a))]>;
2542 defm Fp_to_sint : CVT_FP_TO_INT <"s", fp_to_sint>;
2543 defm Fp_to_uint : CVT_FP_TO_INT <"u", fp_to_uint>;
2545 multiclass INT_EXTEND_UNSIGNED_1 <SDNode OpNode> {
2547 NVPTXInst<(outs Int8Regs:$d), (ins Int1Regs:$a),
2548 "selp.u16 \t$d, 1, 0, $a;",
2549 [(set Int8Regs:$d, (OpNode Int1Regs:$a))]>;
2551 NVPTXInst<(outs Int16Regs:$d), (ins Int1Regs:$a),
2552 "selp.u16 \t$d, 1, 0, $a;",
2553 [(set Int16Regs:$d, (OpNode Int1Regs:$a))]>;
2555 NVPTXInst<(outs Int32Regs:$d), (ins Int1Regs:$a),
2556 "selp.u32 \t$d, 1, 0, $a;",
2557 [(set Int32Regs:$d, (OpNode Int1Regs:$a))]>;
2559 NVPTXInst<(outs Int64Regs:$d), (ins Int1Regs:$a),
2560 "selp.u64 \t$d, 1, 0, $a;",
2561 [(set Int64Regs:$d, (OpNode Int1Regs:$a))]>;
2564 multiclass INT_EXTEND_SIGNED_1 <SDNode OpNode> {
2566 NVPTXInst<(outs Int8Regs:$d), (ins Int1Regs:$a),
2567 "selp.s16 \t$d, -1, 0, $a;",
2568 [(set Int8Regs:$d, (OpNode Int1Regs:$a))]>;
2570 NVPTXInst<(outs Int16Regs:$d), (ins Int1Regs:$a),
2571 "selp.s16 \t$d, -1, 0, $a;",
2572 [(set Int16Regs:$d, (OpNode Int1Regs:$a))]>;
2574 NVPTXInst<(outs Int32Regs:$d), (ins Int1Regs:$a),
2575 "selp.s32 \t$d, -1, 0, $a;",
2576 [(set Int32Regs:$d, (OpNode Int1Regs:$a))]>;
2578 NVPTXInst<(outs Int64Regs:$d), (ins Int1Regs:$a),
2579 "selp.s64 \t$d, -1, 0, $a;",
2580 [(set Int64Regs:$d, (OpNode Int1Regs:$a))]>;
2583 multiclass INT_EXTEND <string OpStr, SDNode OpNode> {
2584 // All Int8Regs are emiited as 16bit registers in ptx.
2585 // And there is no selp.u8 in ptx.
2587 NVPTXInst<(outs Int16Regs:$d), (ins Int8Regs:$a),
2588 !strconcat("cvt.", !strconcat(OpStr, !strconcat("16.",
2589 !strconcat(OpStr, "8 \t$d, $a;")))),
2590 [(set Int16Regs:$d, (OpNode Int8Regs:$a))]>;
2592 NVPTXInst<(outs Int32Regs:$d), (ins Int8Regs:$a),
2593 !strconcat("cvt.", !strconcat(OpStr, !strconcat("32.",
2594 !strconcat(OpStr, "8 \t$d, $a;")))),
2595 [(set Int32Regs:$d, (OpNode Int8Regs:$a))]>;
2597 NVPTXInst<(outs Int64Regs:$d), (ins Int8Regs:$a),
2598 !strconcat("cvt.", !strconcat(OpStr, !strconcat("64.",
2599 !strconcat(OpStr, "8 \t$d, $a;")))),
2600 [(set Int64Regs:$d, (OpNode Int8Regs:$a))]>;
2602 NVPTXInst<(outs Int32Regs:$d), (ins Int16Regs:$a),
2603 !strconcat("cvt.", !strconcat(OpStr, !strconcat("32.",
2604 !strconcat(OpStr, "16 \t$d, $a;")))),
2605 [(set Int32Regs:$d, (OpNode Int16Regs:$a))]>;
2607 NVPTXInst<(outs Int64Regs:$d), (ins Int16Regs:$a),
2608 !strconcat("cvt.", !strconcat(OpStr, !strconcat("64.",
2609 !strconcat(OpStr, "16 \t$d, $a;")))),
2610 [(set Int64Regs:$d, (OpNode Int16Regs:$a))]>;
2612 NVPTXInst<(outs Int64Regs:$d), (ins Int32Regs:$a),
2613 !strconcat("cvt.", !strconcat(OpStr, !strconcat("64.",
2614 !strconcat(OpStr, "32 \t$d, $a;")))),
2615 [(set Int64Regs:$d, (OpNode Int32Regs:$a))]>;
2618 defm Sint_extend_1 : INT_EXTEND_SIGNED_1<sext>;
2619 defm Zint_extend_1 : INT_EXTEND_UNSIGNED_1<zext>;
2620 defm Aint_extend_1 : INT_EXTEND_UNSIGNED_1<anyext>;
2622 defm Sint_extend : INT_EXTEND <"s", sext>;
2623 defm Zint_extend : INT_EXTEND <"u", zext>;
2624 defm Aint_extend : INT_EXTEND <"u", anyext>;
2626 class TRUNC_to1_asm<string sz> {
2627 string s = !strconcat("{{\n\t",
2630 !strconcat(" temp;\n\t",
2633 !strconcat("\t temp, $a, 1;\n\t",
2635 !strconcat(sz, ".eq \t $d, temp, 1;\n\t}}")))))))));
2638 def TRUNC_64to32 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
2639 "cvt.u32.u64 \t$d, $a;",
2640 [(set Int32Regs:$d, (trunc Int64Regs:$a))]>;
2641 def TRUNC_64to16 : NVPTXInst<(outs Int16Regs:$d), (ins Int64Regs:$a),
2642 "cvt.u16.u64 \t$d, $a;",
2643 [(set Int16Regs:$d, (trunc Int64Regs:$a))]>;
2644 def TRUNC_64to8 : NVPTXInst<(outs Int8Regs:$d), (ins Int64Regs:$a),
2645 "cvt.u8.u64 \t$d, $a;",
2646 [(set Int8Regs:$d, (trunc Int64Regs:$a))]>;
2647 def TRUNC_32to16 : NVPTXInst<(outs Int16Regs:$d), (ins Int32Regs:$a),
2648 "cvt.u16.u32 \t$d, $a;",
2649 [(set Int16Regs:$d, (trunc Int32Regs:$a))]>;
2650 def TRUNC_32to8 : NVPTXInst<(outs Int8Regs:$d), (ins Int32Regs:$a),
2651 "cvt.u8.u32 \t$d, $a;",
2652 [(set Int8Regs:$d, (trunc Int32Regs:$a))]>;
2653 def TRUNC_16to8 : NVPTXInst<(outs Int8Regs:$d), (ins Int16Regs:$a),
2654 "cvt.u8.u16 \t$d, $a;",
2655 [(set Int8Regs:$d, (trunc Int16Regs:$a))]>;
2656 def TRUNC_64to1 : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
2657 TRUNC_to1_asm<".b64">.s,
2658 [(set Int1Regs:$d, (trunc Int64Regs:$a))]>;
2659 def TRUNC_32to1 : NVPTXInst<(outs Int1Regs:$d), (ins Int32Regs:$a),
2660 TRUNC_to1_asm<".b32">.s,
2661 [(set Int1Regs:$d, (trunc Int32Regs:$a))]>;
2662 def TRUNC_16to1 : NVPTXInst<(outs Int1Regs:$d), (ins Int16Regs:$a),
2663 TRUNC_to1_asm<".b16">.s,
2664 [(set Int1Regs:$d, (trunc Int16Regs:$a))]>;
2665 def TRUNC_8to1 : NVPTXInst<(outs Int1Regs:$d), (ins Int8Regs:$a),
2666 TRUNC_to1_asm<".b16">.s,
2667 [(set Int1Regs:$d, (trunc Int8Regs:$a))]>;
2669 // Select instructions
2670 def : Pat<(select Int32Regs:$pred, Int8Regs:$a, Int8Regs:$b),
2671 (SELECTi8rr Int8Regs:$a, Int8Regs:$b, (TRUNC_32to1 Int32Regs:$pred))>;
2672 def : Pat<(select Int32Regs:$pred, Int16Regs:$a, Int16Regs:$b),
2673 (SELECTi16rr Int16Regs:$a, Int16Regs:$b,
2674 (TRUNC_32to1 Int32Regs:$pred))>;
2675 def : Pat<(select Int32Regs:$pred, Int32Regs:$a, Int32Regs:$b),
2676 (SELECTi32rr Int32Regs:$a, Int32Regs:$b,
2677 (TRUNC_32to1 Int32Regs:$pred))>;
2678 def : Pat<(select Int32Regs:$pred, Int64Regs:$a, Int64Regs:$b),
2679 (SELECTi64rr Int64Regs:$a, Int64Regs:$b,
2680 (TRUNC_32to1 Int32Regs:$pred))>;
2681 def : Pat<(select Int32Regs:$pred, Float32Regs:$a, Float32Regs:$b),
2682 (SELECTf32rr Float32Regs:$a, Float32Regs:$b,
2683 (TRUNC_32to1 Int32Regs:$pred))>;
2684 def : Pat<(select Int32Regs:$pred, Float64Regs:$a, Float64Regs:$b),
2685 (SELECTf64rr Float64Regs:$a, Float64Regs:$b,
2686 (TRUNC_32to1 Int32Regs:$pred))>;
2688 class F_BITCONVERT<string SzStr, NVPTXRegClass regclassIn,
2689 NVPTXRegClass regclassOut> :
2690 NVPTXInst<(outs regclassOut:$d), (ins regclassIn:$a),
2691 !strconcat("mov.b", !strconcat(SzStr, " \t $d, $a;")),
2692 [(set regclassOut:$d, (bitconvert regclassIn:$a))]>;
2694 def BITCONVERT_32_I2F : F_BITCONVERT<"32", Int32Regs, Float32Regs>;
2695 def BITCONVERT_32_F2I : F_BITCONVERT<"32", Float32Regs, Int32Regs>;
2696 def BITCONVERT_64_I2F : F_BITCONVERT<"64", Int64Regs, Float64Regs>;
2697 def BITCONVERT_64_F2I : F_BITCONVERT<"64", Float64Regs, Int64Regs>;
2699 // pack a set of smaller int registers to a larger int register
2700 def V4I8toI32 : NVPTXInst<(outs Int32Regs:$d),
2701 (ins Int8Regs:$s1, Int8Regs:$s2,
2702 Int8Regs:$s3, Int8Regs:$s4),
2703 !strconcat("{{\n\t.reg .b8\t%t<4>;",
2704 !strconcat("\n\tcvt.u8.u8\t%t0, $s1;",
2705 !strconcat("\n\tcvt.u8.u8\t%t1, $s2;",
2706 !strconcat("\n\tcvt.u8.u8\t%t2, $s3;",
2707 !strconcat("\n\tcvt.u8.u8\t%t3, $s4;",
2708 "\n\tmov.b32\t$d, {%t0, %t1, %t2, %t3};\n\t}}"))))),
2710 def V4I16toI64 : NVPTXInst<(outs Int64Regs:$d),
2711 (ins Int16Regs:$s1, Int16Regs:$s2,
2712 Int16Regs:$s3, Int16Regs:$s4),
2713 "mov.b64\t$d, {{$s1, $s2, $s3, $s4}};",
2715 def V2I8toI16 : NVPTXInst<(outs Int16Regs:$d),
2716 (ins Int8Regs:$s1, Int8Regs:$s2),
2717 !strconcat("{{\n\t.reg .b8\t%t<2>;",
2718 !strconcat("\n\tcvt.u8.u8\t%t0, $s1;",
2719 !strconcat("\n\tcvt.u8.u8\t%t1, $s2;",
2720 "\n\tmov.b16\t$d, {%t0, %t1};\n\t}}"))),
2722 def V2I16toI32 : NVPTXInst<(outs Int32Regs:$d),
2723 (ins Int16Regs:$s1, Int16Regs:$s2),
2724 "mov.b32\t$d, {{$s1, $s2}};",
2726 def V2I32toI64 : NVPTXInst<(outs Int64Regs:$d),
2727 (ins Int32Regs:$s1, Int32Regs:$s2),
2728 "mov.b64\t$d, {{$s1, $s2}};",
2730 def V2F32toF64 : NVPTXInst<(outs Float64Regs:$d),
2731 (ins Float32Regs:$s1, Float32Regs:$s2),
2732 "mov.b64\t$d, {{$s1, $s2}};",
2735 // unpack a larger int register to a set of smaller int registers
2736 def I32toV4I8 : NVPTXInst<(outs Int8Regs:$d1, Int8Regs:$d2,
2737 Int8Regs:$d3, Int8Regs:$d4),
2739 !strconcat("{{\n\t.reg .b8\t%t<4>;",
2740 !strconcat("\n\tmov.b32\t{%t0, %t1, %t2, %t3}, $s;",
2741 !strconcat("\n\tcvt.u8.u8\t$d1, %t0;",
2742 !strconcat("\n\tcvt.u8.u8\t$d2, %t1;",
2743 !strconcat("\n\tcvt.u8.u8\t$d3, %t2;",
2744 "\n\tcvt.u8.u8\t$d4, %t3;\n\t}}"))))),
2746 def I64toV4I16 : NVPTXInst<(outs Int16Regs:$d1, Int16Regs:$d2,
2747 Int16Regs:$d3, Int16Regs:$d4),
2749 "mov.b64\t{{$d1, $d2, $d3, $d4}}, $s;",
2751 def I16toV2I8 : NVPTXInst<(outs Int8Regs:$d1, Int8Regs:$d2),
2753 !strconcat("{{\n\t.reg .b8\t%t<2>;",
2754 !strconcat("\n\tmov.b16\t{%t0, %t1}, $s;",
2755 !strconcat("\n\tcvt.u8.u8\t$d1, %t0;",
2756 "\n\tcvt.u8.u8\t$d2, %t1;\n\t}}"))),
2758 def I32toV2I16 : NVPTXInst<(outs Int16Regs:$d1, Int16Regs:$d2),
2760 "mov.b32\t{{$d1, $d2}}, $s;",
2762 def I64toV2I32 : NVPTXInst<(outs Int32Regs:$d1, Int32Regs:$d2),
2764 "mov.b64\t{{$d1, $d2}}, $s;",
2766 def F64toV2F32 : NVPTXInst<(outs Float32Regs:$d1, Float32Regs:$d2),
2767 (ins Float64Regs:$s),
2768 "mov.b64\t{{$d1, $d2}}, $s;",
2771 def FPRound_ftz : NVPTXInst<(outs Float32Regs:$d), (ins Float64Regs:$a),
2772 "cvt.rn.ftz.f32.f64 \t$d, $a;",
2773 [(set Float32Regs:$d, (fround Float64Regs:$a))]>, Requires<[doF32FTZ]>;
2775 def FPRound : NVPTXInst<(outs Float32Regs:$d), (ins Float64Regs:$a),
2776 "cvt.rn.f32.f64 \t$d, $a;",
2777 [(set Float32Regs:$d, (fround Float64Regs:$a))]>;
2779 def FPExtend_ftz : NVPTXInst<(outs Float64Regs:$d), (ins Float32Regs:$a),
2780 "cvt.ftz.f64.f32 \t$d, $a;",
2781 [(set Float64Regs:$d, (fextend Float32Regs:$a))]>, Requires<[doF32FTZ]>;
2783 def FPExtend : NVPTXInst<(outs Float64Regs:$d), (ins Float32Regs:$a),
2784 "cvt.f64.f32 \t$d, $a;",
2785 [(set Float64Regs:$d, (fextend Float32Regs:$a))]>;
2787 def retflag : SDNode<"NVPTXISD::RET_FLAG", SDTNone,
2788 [SDNPHasChain, SDNPOptInGlue]>;
2790 //-----------------------------------
2792 //-----------------------------------
2794 let isTerminator=1 in {
2795 let isReturn=1, isBarrier=1 in
2796 def Return : NVPTXInst<(outs), (ins), "ret;", [(retflag)]>;
2799 def CBranch : NVPTXInst<(outs), (ins Int1Regs:$a, brtarget:$target),
2800 "@$a bra \t$target;",
2801 [(brcond Int1Regs:$a, bb:$target)]>;
2803 def CBranchOther : NVPTXInst<(outs), (ins Int1Regs:$a, brtarget:$target),
2804 "@!$a bra \t$target;",
2807 let isBranch=1, isBarrier=1 in
2808 def GOTO : NVPTXInst<(outs), (ins brtarget:$target),
2809 "bra.uni \t$target;",
2813 def : Pat<(brcond Int32Regs:$a, bb:$target), (CBranch
2814 (ISetUNEi32ri_p Int32Regs:$a, 0), bb:$target)>;
2816 // SelectionDAGBuilder::visitSWitchCase() will invert the condition of a
2817 // conditional branch if
2818 // the target block is the next block so that the code can fall through to the
2820 // The invertion is done by 'xor condition, 1', which will be translated to
2821 // (setne condition, -1).
2822 // Since ptx supports '@!pred bra target', we should use it.
2823 def : Pat<(brcond (i1 (setne Int1Regs:$a, -1)), bb:$target),
2824 (CBranchOther Int1Regs:$a, bb:$target)>;
2827 def SDT_NVPTXCallSeqStart : SDCallSeqStart<[ SDTCisVT<0, i32> ]>;
2828 def SDT_NVPTXCallSeqEnd : SDCallSeqEnd<[ SDTCisVT<0, i32>,
2829 SDTCisVT<1, i32> ]>;
2831 def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_NVPTXCallSeqStart,
2832 [SDNPHasChain, SDNPOutGlue, SDNPSideEffect]>;
2833 def callseq_end : SDNode<"ISD::CALLSEQ_END", SDT_NVPTXCallSeqEnd,
2834 [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
2837 def SDT_NVPTXCall : SDTypeProfile<0, 1, [SDTCisVT<0, i32>]>;
2838 def call : SDNode<"NVPTXISD::CALL", SDT_NVPTXCall,
2839 [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
2840 def calltarget : Operand<i32>;
2842 def CALL : NVPTXInst<(outs), (ins calltarget:$dst),
2843 "call \t$dst, (1);", []>;
2846 def : Pat<(call tglobaladdr:$dst),
2847 (CALL tglobaladdr:$dst)>;
2848 def : Pat<(call texternalsym:$dst),
2849 (CALL texternalsym:$dst)>;
2851 // Pseudo instructions.
2852 class Pseudo<dag outs, dag ins, string asmstr, list<dag> pattern>
2853 : NVPTXInst<outs, ins, asmstr, pattern>;
2855 // @TODO: We use some tricks here to emit curly braces. Can we clean this up
2856 // a bit without TableGen modifications?
2857 def Callseq_Start : NVPTXInst<(outs), (ins i32imm:$amt),
2858 "// Callseq Start $amt\n\t{{\n\t.reg .b32 temp_param_reg;\n\t// <end>}}",
2859 [(callseq_start timm:$amt)]>;
2860 def Callseq_End : NVPTXInst<(outs), (ins i32imm:$amt1, i32imm:$amt2),
2861 "\n\t//{{\n\t}}// Callseq End $amt1",
2862 [(callseq_end timm:$amt1, timm:$amt2)]>;
2866 def trapinst : NVPTXInst<(outs), (ins),
2870 include "NVPTXIntrinsics.td"
2873 //-----------------------------------
2875 //-----------------------------------
2876 // BSWAP is currently expanded. The following is a more efficient
2877 // - for < sm_20, use vector scalar mov, as tesla support native 16-bit register
2878 // - for sm_20, use pmpt (use vector scalar mov to get the pack and
2879 // unpack). sm_20 supports native 32-bit register, but not native 16-bit