1 //===- NVPTXVector.td - NVPTX Vector Specific Instruction defs -*- tblgen-*-==//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 //-----------------------------------
12 //-----------------------------------
15 // All vector instructions derive from NVPTXVecInst
18 class NVPTXVecInst<dag outs, dag ins, string asmstr, list<dag> pattern,
20 : NVPTXInst<outs, ins, asmstr, pattern> {
21 NVPTXInst scalarInst=sInst;
24 let isAsCheapAsAMove=1, VecInstType=isVecExtract.Value in {
26 def V2i16Extract : NVPTXVecInst<(outs Int16Regs:$dst),
27 (ins V2I16Regs:$src, i8imm:$c),
28 "mov.u16 \t$dst, $src${c:vecelem};",
29 [(set Int16Regs:$dst, (vector_extract
30 (v2i16 V2I16Regs:$src), imm:$c))],
34 def V4i16Extract : NVPTXVecInst<(outs Int16Regs:$dst),
35 (ins V4I16Regs:$src, i8imm:$c),
36 "mov.u16 \t$dst, $src${c:vecelem};",
37 [(set Int16Regs:$dst, (vector_extract
38 (v4i16 V4I16Regs:$src), imm:$c))],
42 def V2i8Extract : NVPTXVecInst<(outs Int8Regs:$dst),
43 (ins V2I8Regs:$src, i8imm:$c),
44 "mov.u16 \t$dst, $src${c:vecelem};",
45 [(set Int8Regs:$dst, (vector_extract
46 (v2i8 V2I8Regs:$src), imm:$c))],
50 def V4i8Extract : NVPTXVecInst<(outs Int8Regs:$dst),
51 (ins V4I8Regs:$src, i8imm:$c),
52 "mov.u16 \t$dst, $src${c:vecelem};",
53 [(set Int8Regs:$dst, (vector_extract
54 (v4i8 V4I8Regs:$src), imm:$c))],
58 def V2i32Extract : NVPTXVecInst<(outs Int32Regs:$dst),
59 (ins V2I32Regs:$src, i8imm:$c),
60 "mov.u32 \t$dst, $src${c:vecelem};",
61 [(set Int32Regs:$dst, (vector_extract
62 (v2i32 V2I32Regs:$src), imm:$c))],
66 def V2f32Extract : NVPTXVecInst<(outs Float32Regs:$dst),
67 (ins V2F32Regs:$src, i8imm:$c),
68 "mov.f32 \t$dst, $src${c:vecelem};",
69 [(set Float32Regs:$dst, (vector_extract
70 (v2f32 V2F32Regs:$src), imm:$c))],
74 def V2i64Extract : NVPTXVecInst<(outs Int64Regs:$dst),
75 (ins V2I64Regs:$src, i8imm:$c),
76 "mov.u64 \t$dst, $src${c:vecelem};",
77 [(set Int64Regs:$dst, (vector_extract
78 (v2i64 V2I64Regs:$src), imm:$c))],
82 def V2f64Extract : NVPTXVecInst<(outs Float64Regs:$dst),
83 (ins V2F64Regs:$src, i8imm:$c),
84 "mov.f64 \t$dst, $src${c:vecelem};",
85 [(set Float64Regs:$dst, (vector_extract
86 (v2f64 V2F64Regs:$src), imm:$c))],
90 def V4i32Extract : NVPTXVecInst<(outs Int32Regs:$dst),
91 (ins V4I32Regs:$src, i8imm:$c),
92 "mov.u32 \t$dst, $src${c:vecelem};",
93 [(set Int32Regs:$dst, (vector_extract
94 (v4i32 V4I32Regs:$src), imm:$c))],
98 def V4f32Extract : NVPTXVecInst<(outs Float32Regs:$dst),
99 (ins V4F32Regs:$src, i8imm:$c),
100 "mov.f32 \t$dst, $src${c:vecelem};",
101 [(set Float32Regs:$dst, (vector_extract
102 (v4f32 V4F32Regs:$src), imm:$c))],
106 let isAsCheapAsAMove=1, VecInstType=isVecInsert.Value in {
108 def V2i8Insert : NVPTXVecInst<(outs V2I8Regs:$dst),
109 (ins V2I8Regs:$src, Int8Regs:$val, i8imm:$c),
110 "mov.v2.u16 \t${dst:vecfull}, ${src:vecfull};"
111 "\n\tmov.u16 \t$dst${c:vecelem}, $val;",
113 (vector_insert V2I8Regs:$src, Int8Regs:$val, imm:$c))],
117 def V4i8Insert : NVPTXVecInst<(outs V4I8Regs:$dst),
118 (ins V4I8Regs:$src, Int8Regs:$val, i8imm:$c),
119 "mov.v4.u16 \t${dst:vecfull}, ${src:vecfull};"
120 "\n\tmov.u16 \t$dst${c:vecelem}, $val;",
122 (vector_insert V4I8Regs:$src, Int8Regs:$val, imm:$c))],
126 def V2i16Insert : NVPTXVecInst<(outs V2I16Regs:$dst),
127 (ins V2I16Regs:$src, Int16Regs:$val, i8imm:$c),
128 "mov.v2.u16 \t${dst:vecfull}, ${src:vecfull};"
129 "\n\tmov.u16 \t$dst${c:vecelem}, $val;",
130 [(set V2I16Regs:$dst,
131 (vector_insert V2I16Regs:$src, Int16Regs:$val, imm:$c))],
135 def V4i16Insert : NVPTXVecInst<(outs V4I16Regs:$dst),
136 (ins V4I16Regs:$src, Int16Regs:$val, i8imm:$c),
137 "mov.v4.u16 \t${dst:vecfull}, ${src:vecfull};"
138 "\n\tmov.u16 \t$dst${c:vecelem}, $val;",
139 [(set V4I16Regs:$dst,
140 (vector_insert V4I16Regs:$src, Int16Regs:$val, imm:$c))],
144 def V2i32Insert : NVPTXVecInst<(outs V2I32Regs:$dst),
145 (ins V2I32Regs:$src, Int32Regs:$val, i8imm:$c),
146 "mov.v2.u32 \t${dst:vecfull}, ${src:vecfull};"
147 "\n\tmov.u32 \t$dst${c:vecelem}, $val;",
148 [(set V2I32Regs:$dst,
149 (vector_insert V2I32Regs:$src, Int32Regs:$val, imm:$c))],
153 def V2f32Insert : NVPTXVecInst<(outs V2F32Regs:$dst),
154 (ins V2F32Regs:$src, Float32Regs:$val, i8imm:$c),
155 "mov.v2.f32 \t${dst:vecfull}, ${src:vecfull};"
156 "\n\tmov.f32 \t$dst${c:vecelem}, $val;",
157 [(set V2F32Regs:$dst,
158 (vector_insert V2F32Regs:$src, Float32Regs:$val, imm:$c))],
162 def V2i64Insert : NVPTXVecInst<(outs V2I64Regs:$dst),
163 (ins V2I64Regs:$src, Int64Regs:$val, i8imm:$c),
164 "mov.v2.u64 \t${dst:vecfull}, ${src:vecfull};"
165 "\n\tmov.u64 \t$dst${c:vecelem}, $val;",
166 [(set V2I64Regs:$dst,
167 (vector_insert V2I64Regs:$src, Int64Regs:$val, imm:$c))],
171 def V2f64Insert : NVPTXVecInst<(outs V2F64Regs:$dst),
172 (ins V2F64Regs:$src, Float64Regs:$val, i8imm:$c),
173 "mov.v2.f64 \t${dst:vecfull}, ${src:vecfull};"
174 "\n\tmov.f64 \t$dst${c:vecelem}, $val;",
175 [(set V2F64Regs:$dst,
176 (vector_insert V2F64Regs:$src, Float64Regs:$val, imm:$c))],
180 def V4i32Insert : NVPTXVecInst<(outs V4I32Regs:$dst),
181 (ins V4I32Regs:$src, Int32Regs:$val, i8imm:$c),
182 "mov.v4.u32 \t${dst:vecfull}, ${src:vecfull};"
183 "\n\tmov.u32 \t$dst${c:vecelem}, $val;",
184 [(set V4I32Regs:$dst,
185 (vector_insert V4I32Regs:$src, Int32Regs:$val, imm:$c))],
189 def V4f32Insert : NVPTXVecInst<(outs V4F32Regs:$dst),
190 (ins V4F32Regs:$src, Float32Regs:$val, i8imm:$c),
191 "mov.v4.f32 \t${dst:vecfull}, ${src:vecfull};"
192 "\n\tmov.f32 \t$dst${c:vecelem}, $val;",
193 [(set V4F32Regs:$dst,
194 (vector_insert V4F32Regs:$src, Float32Regs:$val, imm:$c))],
198 class BinOpAsmString<string c> {
202 class V4AsmStr<string opcode> : BinOpAsmString<
203 !strconcat(!strconcat(!strconcat(!strconcat(
204 !strconcat(!strconcat(!strconcat(
205 opcode, " \t${dst}_0, ${a}_0, ${b}_0;\n\t"),
206 opcode), " \t${dst}_1, ${a}_1, ${b}_1;\n\t"),
207 opcode), " \t${dst}_2, ${a}_2, ${b}_2;\n\t"),
208 opcode), " \t${dst}_3, ${a}_3, ${b}_3;")>;
210 class V2AsmStr<string opcode> : BinOpAsmString<
211 !strconcat(!strconcat(!strconcat(
212 opcode, " \t${dst}_0, ${a}_0, ${b}_0;\n\t"),
213 opcode), " \t${dst}_1, ${a}_1, ${b}_1;")>;
215 class V4MADStr<string opcode> : BinOpAsmString<
216 !strconcat(!strconcat(!strconcat(!strconcat(
217 !strconcat(!strconcat(!strconcat(
218 opcode, " \t${dst}_0, ${a}_0, ${b}_0, ${c}_0;\n\t"),
219 opcode), " \t${dst}_1, ${a}_1, ${b}_1, ${c}_1;\n\t"),
220 opcode), " \t${dst}_2, ${a}_2, ${b}_2, ${c}_2;\n\t"),
221 opcode), " \t${dst}_3, ${a}_3, ${b}_3, ${c}_3;")>;
223 class V2MADStr<string opcode> : BinOpAsmString<
224 !strconcat(!strconcat(!strconcat(
225 opcode, " \t${dst}_0, ${a}_0, ${b}_0, ${c}_0;\n\t"),
226 opcode), " \t${dst}_1, ${a}_1, ${b}_1, ${c}_1;")>;
228 class V4UnaryStr<string opcode> : BinOpAsmString<
229 !strconcat(!strconcat(!strconcat(!strconcat(
230 !strconcat(!strconcat(!strconcat(
231 opcode, " \t${dst}_0, ${a}_0;\n\t"),
232 opcode), " \t${dst}_1, ${a}_1;\n\t"),
233 opcode), " \t${dst}_2, ${a}_2;\n\t"),
234 opcode), " \t${dst}_3, ${a}_3;")>;
236 class V2UnaryStr<string opcode> : BinOpAsmString<
237 !strconcat(!strconcat(!strconcat(
238 opcode, " \t${dst}_0, ${a}_0;\n\t"),
239 opcode), " \t${dst}_1, ${a}_1;")>;
241 class VecBinaryOp<BinOpAsmString asmstr, SDNode OpNode, NVPTXRegClass regclass,
242 NVPTXInst sInst=NOP> :
243 NVPTXVecInst<(outs regclass:$dst), (ins regclass:$a, regclass:$b),
245 [(set regclass:$dst, (OpNode regclass:$a, regclass:$b))],
248 class VecShiftOp<BinOpAsmString asmstr, SDNode OpNode, NVPTXRegClass regclass1,
249 NVPTXRegClass regclass2, NVPTXInst sInst=NOP> :
250 NVPTXVecInst<(outs regclass1:$dst), (ins regclass1:$a, regclass2:$b),
252 [(set regclass1:$dst, (OpNode regclass1:$a, regclass2:$b))],
255 class VecUnaryOp<BinOpAsmString asmstr, PatFrag OpNode, NVPTXRegClass regclass,
256 NVPTXInst sInst=NOP> :
257 NVPTXVecInst<(outs regclass:$dst), (ins regclass:$a),
259 [(set regclass:$dst, (OpNode regclass:$a))], sInst>;
261 multiclass IntBinVOp<string asmstr, SDNode OpNode,
262 NVPTXInst i64op=NOP, NVPTXInst i32op=NOP, NVPTXInst
263 i16op=NOP, NVPTXInst i8op=NOP> {
264 def V2I64 : VecBinaryOp<V2AsmStr<!strconcat(asmstr, "64")>, OpNode, V2I64Regs,
266 def V4I32 : VecBinaryOp<V4AsmStr<!strconcat(asmstr, "32")>, OpNode, V4I32Regs,
268 def V2I32 : VecBinaryOp<V2AsmStr<!strconcat(asmstr, "32")>, OpNode, V2I32Regs,
270 def V4I16 : VecBinaryOp<V4AsmStr<!strconcat(asmstr, "16")>, OpNode, V4I16Regs,
272 def V2I16 : VecBinaryOp<V2AsmStr<!strconcat(asmstr, "16")>, OpNode, V2I16Regs,
274 def V4I8 : VecBinaryOp<V4AsmStr<!strconcat(asmstr, "16")>, OpNode, V4I8Regs,
276 def V2I8 : VecBinaryOp<V2AsmStr<!strconcat(asmstr, "16")>, OpNode, V2I8Regs,
280 multiclass FloatBinVOp<string asmstr, SDNode OpNode,
281 NVPTXInst f64=NOP, NVPTXInst f32=NOP,
282 NVPTXInst f32_ftz=NOP> {
283 def V2F64 : VecBinaryOp<V2AsmStr<!strconcat(asmstr, "f64")>, OpNode,
285 def V4F32_ftz : VecBinaryOp<V4AsmStr<!strconcat(asmstr, "ftz.f32")>, OpNode,
286 V4F32Regs, f32_ftz>, Requires<[doF32FTZ]>;
287 def V2F32_ftz : VecBinaryOp<V2AsmStr<!strconcat(asmstr, "ftz.f32")>, OpNode,
288 V2F32Regs, f32_ftz>, Requires<[doF32FTZ]>;
289 def V4F32 : VecBinaryOp<V4AsmStr<!strconcat(asmstr, "f32")>, OpNode,
291 def V2F32 : VecBinaryOp<V2AsmStr<!strconcat(asmstr, "f32")>, OpNode,
295 multiclass IntUnaryVOp<string asmstr, PatFrag OpNode,
296 NVPTXInst i64op=NOP, NVPTXInst i32op=NOP,
297 NVPTXInst i16op=NOP, NVPTXInst i8op=NOP> {
298 def V2I64 : VecUnaryOp<V2UnaryStr<!strconcat(asmstr, "64")>, OpNode,
300 def V4I32 : VecUnaryOp<V4UnaryStr<!strconcat(asmstr, "32")>, OpNode,
302 def V2I32 : VecUnaryOp<V2UnaryStr<!strconcat(asmstr, "32")>, OpNode,
304 def V4I16 : VecUnaryOp<V4UnaryStr<!strconcat(asmstr, "16")>, OpNode,
306 def V2I16 : VecUnaryOp<V2UnaryStr<!strconcat(asmstr, "16")>, OpNode,
308 def V4I8 : VecUnaryOp<V4UnaryStr<!strconcat(asmstr, "16")>, OpNode,
310 def V2I8 : VecUnaryOp<V2UnaryStr<!strconcat(asmstr, "16")>, OpNode,
315 // Integer Arithmetic
316 let VecInstType=isVecOther.Value in {
317 defm VAdd : IntBinVOp<"add.s", add, ADDi64rr, ADDi32rr, ADDi16rr, ADDi8rr>;
318 defm VSub : IntBinVOp<"sub.s", sub, SUBi64rr, SUBi32rr, SUBi16rr, SUBi8rr>;
320 def AddCCV4I32 : VecBinaryOp<V4AsmStr<"add.cc.s32">, addc, V4I32Regs,
322 def AddCCV2I32 : VecBinaryOp<V2AsmStr<"add.cc.s32">, addc, V2I32Regs,
324 def SubCCV4I32 : VecBinaryOp<V4AsmStr<"sub.cc.s32">, subc, V4I32Regs,
326 def SubCCV2I32 : VecBinaryOp<V2AsmStr<"sub.cc.s32">, subc, V2I32Regs,
328 def AddCCCV4I32 : VecBinaryOp<V4AsmStr<"addc.cc.s32">, adde, V4I32Regs,
330 def AddCCCV2I32 : VecBinaryOp<V2AsmStr<"addc.cc.s32">, adde, V2I32Regs,
332 def SubCCCV4I32 : VecBinaryOp<V4AsmStr<"subc.cc.s32">, sube, V4I32Regs,
334 def SubCCCV2I32 : VecBinaryOp<V2AsmStr<"subc.cc.s32">, sube, V2I32Regs,
337 def ShiftLV2I64 : VecShiftOp<V2AsmStr<"shl.b64">, shl, V2I64Regs, V2I32Regs,
339 def ShiftLV2I32 : VecShiftOp<V2AsmStr<"shl.b32">, shl, V2I32Regs, V2I32Regs,
341 def ShiftLV4I32 : VecShiftOp<V4AsmStr<"shl.b32">, shl, V4I32Regs, V4I32Regs,
343 def ShiftLV2I16 : VecShiftOp<V2AsmStr<"shl.b16">, shl, V2I16Regs, V2I32Regs,
345 def ShiftLV4I16 : VecShiftOp<V4AsmStr<"shl.b16">, shl, V4I16Regs, V4I32Regs,
347 def ShiftLV2I8 : VecShiftOp<V2AsmStr<"shl.b16">, shl, V2I8Regs, V2I32Regs,
349 def ShiftLV4I8 : VecShiftOp<V4AsmStr<"shl.b16">, shl, V4I8Regs, V4I32Regs,
353 // cvt to v*i32, helpers for shift
354 class CVTtoVeci32<NVPTXRegClass inclass, NVPTXRegClass outclass, string asmstr,
355 NVPTXInst sInst=NOP> :
356 NVPTXVecInst<(outs outclass:$d), (ins inclass:$s), asmstr, [], sInst>;
358 class VecCVTStrHelper<string op, string dest, string src> {
359 string s=!strconcat(op, !strconcat("\t",
360 !strconcat(dest, !strconcat(", ", !strconcat(src, ";")))));
363 class Vec2CVTStr<string op> {
364 string s=!strconcat(VecCVTStrHelper<op, "${d}_0", "${s}_0">.s,
365 !strconcat("\n\t", VecCVTStrHelper<op, "${d}_1", "${s}_1">.s));
368 class Vec4CVTStr<string op> {
369 string s=!strconcat(VecCVTStrHelper<op, "${d}_0", "${s}_0">.s,
371 !strconcat(VecCVTStrHelper<op, "${d}_1", "${s}_1">.s,
373 !strconcat(VecCVTStrHelper<op, "${d}_2", "${s}_2">.s,
374 !strconcat("\n\t", VecCVTStrHelper<op, "${d}_3", "${s}_3">.s))))));
377 let VecInstType=isVecOther.Value in {
378 def CVTv2i8tov2i32 : CVTtoVeci32<V2I8Regs, V2I32Regs,
379 Vec2CVTStr<"cvt.u32.u16">.s, Zint_extendext8to32>;
380 def CVTv2i16tov2i32 : CVTtoVeci32<V2I16Regs, V2I32Regs,
381 Vec2CVTStr<"cvt.u32.u16">.s, Zint_extendext16to32>;
382 def CVTv4i8tov4i32 : CVTtoVeci32<V4I8Regs, V4I32Regs,
383 Vec4CVTStr<"cvt.u32.u16">.s, Zint_extendext8to32>;
384 def CVTv4i16tov4i32 : CVTtoVeci32<V4I16Regs, V4I32Regs,
385 Vec4CVTStr<"cvt.u32.u16">.s, Zint_extendext16to32>;
386 def CVTv2i64tov2i32 : CVTtoVeci32<V2I64Regs, V2I32Regs,
387 Vec2CVTStr<"cvt.u32.u64">.s, TRUNC_64to32>;
390 def : Pat<(shl V2I16Regs:$src1, V2I16Regs:$src2),
391 (ShiftLV2I16 V2I16Regs:$src1, (CVTv2i16tov2i32 V2I16Regs:$src2))>;
392 def : Pat<(shl V2I8Regs:$src1, V2I8Regs:$src2),
393 (ShiftLV2I8 V2I8Regs:$src1, (CVTv2i8tov2i32 V2I8Regs:$src2))>;
394 def : Pat<(shl V2I64Regs:$src1, V2I64Regs:$src2),
395 (ShiftLV2I64 V2I64Regs:$src1, (CVTv2i64tov2i32 V2I64Regs:$src2))>;
397 def : Pat<(shl V4I16Regs:$src1, V4I16Regs:$src2),
398 (ShiftLV4I16 V4I16Regs:$src1, (CVTv4i16tov4i32 V4I16Regs:$src2))>;
399 def : Pat<(shl V4I8Regs:$src1, V4I8Regs:$src2),
400 (ShiftLV4I8 V4I8Regs:$src1, (CVTv4i8tov4i32 V4I8Regs:$src2))>;
402 let VecInstType=isVecOther.Value in {
403 def ShiftRAV2I64 : VecShiftOp<V2AsmStr<"shr.s64">, sra, V2I64Regs, V2I32Regs,
405 def ShiftRAV2I32 : VecShiftOp<V2AsmStr<"shr.s32">, sra, V2I32Regs, V2I32Regs,
407 def ShiftRAV4I32 : VecShiftOp<V4AsmStr<"shr.s32">, sra, V4I32Regs, V4I32Regs,
409 def ShiftRAV2I16 : VecShiftOp<V2AsmStr<"shr.s16">, sra, V2I16Regs, V2I32Regs,
411 def ShiftRAV4I16 : VecShiftOp<V4AsmStr<"shr.s16">, sra, V4I16Regs, V4I32Regs,
413 def ShiftRAV2I8 : VecShiftOp<V2AsmStr<"shr.s16">, sra, V2I8Regs, V2I32Regs,
415 def ShiftRAV4I8 : VecShiftOp<V4AsmStr<"shr.s16">, sra, V4I8Regs, V4I32Regs,
418 def ShiftRLV2I64 : VecShiftOp<V2AsmStr<"shr.u64">, srl, V2I64Regs, V2I32Regs,
420 def ShiftRLV2I32 : VecShiftOp<V2AsmStr<"shr.u32">, srl, V2I32Regs, V2I32Regs,
422 def ShiftRLV4I32 : VecShiftOp<V4AsmStr<"shr.u32">, srl, V4I32Regs, V4I32Regs,
424 def ShiftRLV2I16 : VecShiftOp<V2AsmStr<"shr.u16">, srl, V2I16Regs, V2I32Regs,
426 def ShiftRLV4I16 : VecShiftOp<V4AsmStr<"shr.u16">, srl, V4I16Regs, V4I32Regs,
428 def ShiftRLV2I8 : VecShiftOp<V2AsmStr<"shr.u16">, srl, V2I8Regs, V2I32Regs,
430 def ShiftRLV4I8 : VecShiftOp<V4AsmStr<"shr.u16">, srl, V4I8Regs, V4I32Regs,
433 defm VMult : IntBinVOp<"mul.lo.s", mul, MULTi64rr, MULTi32rr, MULTi16rr,
435 defm VMultHS : IntBinVOp<"mul.hi.s", mulhs, MULTHSi64rr, MULTHSi32rr,
438 defm VMultHU : IntBinVOp<"mul.hi.u", mulhu, MULTHUi64rr, MULTHUi32rr,
441 defm VSDiv : IntBinVOp<"div.s", sdiv, SDIVi64rr, SDIVi32rr, SDIVi16rr,
443 defm VUDiv : IntBinVOp<"div.u", udiv, UDIVi64rr, UDIVi32rr, UDIVi16rr,
445 defm VSRem : IntBinVOp<"rem.s", srem, SREMi64rr, SREMi32rr, SREMi16rr,
447 defm VURem : IntBinVOp<"rem.u", urem, UREMi64rr, UREMi32rr, UREMi16rr,
451 def : Pat<(sra V2I16Regs:$src1, V2I16Regs:$src2),
452 (ShiftRAV2I16 V2I16Regs:$src1, (CVTv2i16tov2i32 V2I16Regs:$src2))>;
453 def : Pat<(sra V2I8Regs:$src1, V2I8Regs:$src2),
454 (ShiftRAV2I8 V2I8Regs:$src1, (CVTv2i8tov2i32 V2I8Regs:$src2))>;
455 def : Pat<(sra V2I64Regs:$src1, V2I64Regs:$src2),
456 (ShiftRAV2I64 V2I64Regs:$src1, (CVTv2i64tov2i32 V2I64Regs:$src2))>;
458 def : Pat<(sra V4I16Regs:$src1, V4I16Regs:$src2),
459 (ShiftRAV4I16 V4I16Regs:$src1, (CVTv4i16tov4i32 V4I16Regs:$src2))>;
460 def : Pat<(sra V4I8Regs:$src1, V4I8Regs:$src2),
461 (ShiftRAV4I8 V4I8Regs:$src1, (CVTv4i8tov4i32 V4I8Regs:$src2))>;
463 def : Pat<(srl V2I16Regs:$src1, V2I16Regs:$src2),
464 (ShiftRLV2I16 V2I16Regs:$src1, (CVTv2i16tov2i32 V2I16Regs:$src2))>;
465 def : Pat<(srl V2I8Regs:$src1, V2I8Regs:$src2),
466 (ShiftRLV2I8 V2I8Regs:$src1, (CVTv2i8tov2i32 V2I8Regs:$src2))>;
467 def : Pat<(srl V2I64Regs:$src1, V2I64Regs:$src2),
468 (ShiftRLV2I64 V2I64Regs:$src1, (CVTv2i64tov2i32 V2I64Regs:$src2))>;
470 def : Pat<(srl V4I16Regs:$src1, V4I16Regs:$src2),
471 (ShiftRLV4I16 V4I16Regs:$src1, (CVTv4i16tov4i32 V4I16Regs:$src2))>;
472 def : Pat<(srl V4I8Regs:$src1, V4I8Regs:$src2),
473 (ShiftRLV4I8 V4I8Regs:$src1, (CVTv4i8tov4i32 V4I8Regs:$src2))>;
475 multiclass VMAD<string asmstr, NVPTXRegClass regclassv4,
476 NVPTXRegClass regclassv2,
477 SDNode an=add, SDNode mn=mul, NVPTXInst sop=NOP,
479 def V4 : NVPTXVecInst<(outs regclassv4:$dst),
480 (ins regclassv4:$a, regclassv4:$b, regclassv4:$c),
482 [(set regclassv4:$dst,
483 (an (mn regclassv4:$a, regclassv4:$b), regclassv4:$c))],
486 def V2 : NVPTXVecInst<(outs regclassv2:$dst),
487 (ins regclassv2:$a, regclassv2:$b, regclassv2:$c),
489 [(set regclassv2:$dst,
490 (an (mn regclassv2:$a, regclassv2:$b), regclassv2:$c))],
495 multiclass VMADV2Only<string asmstr, NVPTXRegClass regclass, NVPTXInst sop=NOP,
497 def V2 : NVPTXVecInst<(outs regclass:$dst),
498 (ins regclass:$a, regclass:$b, regclass:$c),
500 [(set regclass:$dst, (add
501 (mul regclass:$a, regclass:$b), regclass:$c))], sop>,
504 multiclass VFMADV2Only<string asmstr, NVPTXRegClass regclass, NVPTXInst sop=NOP,
506 def V2 : NVPTXVecInst<(outs regclass:$dst),
507 (ins regclass:$a, regclass:$b, regclass:$c),
509 [(set regclass:$dst, (fadd
510 (fmul regclass:$a, regclass:$b), regclass:$c))], sop>,
514 let VecInstType=isVecOther.Value in {
515 defm I8MAD : VMAD<"mad.lo.s16", V4I8Regs, V2I8Regs, add, mul, MAD8rrr, true>;
516 defm I16MAD : VMAD<"mad.lo.s16", V4I16Regs, V2I16Regs, add, mul, MAD16rrr,
518 defm I32MAD : VMAD<"mad.lo.s32", V4I32Regs, V2I32Regs, add, mul, MAD32rrr,
520 defm I64MAD : VMADV2Only<"mad.lo.s64", V2I64Regs, MAD64rrr, true>;
522 defm VNeg : IntUnaryVOp<"neg.s", ineg, INEG64, INEG32, INEG16, INEG8>;
524 defm VAddf : FloatBinVOp<"add.", fadd, FADDf64rr, FADDf32rr, FADDf32rr_ftz>;
525 defm VSubf : FloatBinVOp<"sub.", fsub, FSUBf64rr, FSUBf32rr, FSUBf32rr_ftz>;
526 defm VMulf : FloatBinVOp<"mul.", fmul, FMULf64rr, FMULf32rr, FMULf32rr_ftz>;
528 defm F32MAD_ftz : VMAD<"mad.ftz.f32", V4F32Regs, V2F32Regs, fadd, fmul,
529 FMAD32_ftzrrr, doFMADF32_ftz>;
530 defm F32FMA_ftz : VMAD<"fma.rn.ftz.f32", V4F32Regs, V2F32Regs, fadd, fmul,
531 FMA32_ftzrrr, doFMAF32_ftz>;
532 defm F32MAD : VMAD<"mad.f32", V4F32Regs, V2F32Regs, fadd, fmul, FMAD32rrr,
534 defm F32FMA : VMAD<"fma.rn.f32", V4F32Regs, V2F32Regs, fadd, fmul, FMA32rrr,
536 defm F64FMA : VFMADV2Only<"fma.rn.f64", V2F64Regs, FMA64rrr, doFMAF64>;
539 let VecInstType=isVecOther.Value in {
540 def V4F32Div_prec_ftz : VecBinaryOp<V4AsmStr<"div.rn.ftz.f32">, fdiv, V4F32Regs,
541 FDIV32rr_prec_ftz>, Requires<[doF32FTZ, reqPTX20]>;
542 def V2F32Div_prec_ftz : VecBinaryOp<V2AsmStr<"div.rn.ftz.f32">, fdiv, V2F32Regs,
543 FDIV32rr_prec_ftz>, Requires<[doF32FTZ, reqPTX20]>;
544 def V4F32Div_prec : VecBinaryOp<V4AsmStr<"div.rn.f32">, fdiv, V4F32Regs,
545 FDIV32rr_prec>, Requires<[reqPTX20]>;
546 def V2F32Div_prec : VecBinaryOp<V2AsmStr<"div.rn.f32">, fdiv, V2F32Regs,
547 FDIV32rr_prec>, Requires<[reqPTX20]>;
548 def V2F32Div_ftz : VecBinaryOp<V2AsmStr<"div.full.ftz.f32">, fdiv, V2F32Regs,
549 FDIV32rr_ftz>, Requires<[doF32FTZ]>;
550 def V4F32Div_ftz : VecBinaryOp<V4AsmStr<"div.full.ftz.f32">, fdiv, V4F32Regs,
551 FDIV32rr_ftz>, Requires<[doF32FTZ]>;
552 def V2F32Div : VecBinaryOp<V2AsmStr<"div.full.f32">, fdiv, V2F32Regs, FDIV32rr>;
553 def V4F32Div : VecBinaryOp<V4AsmStr<"div.full.f32">, fdiv, V4F32Regs, FDIV32rr>;
554 def V2F64Div : VecBinaryOp<V2AsmStr<"div.rn.f64">, fdiv, V2F64Regs, FDIV64rr>;
557 def fnegpat : PatFrag<(ops node:$in), (fneg node:$in)>;
559 let VecInstType=isVecOther.Value in {
560 def VNegv2f32_ftz : VecUnaryOp<V2UnaryStr<"neg.ftz.f32">, fnegpat, V2F32Regs,
561 FNEGf32_ftz>, Requires<[doF32FTZ]>;
562 def VNegv4f32_ftz : VecUnaryOp<V4UnaryStr<"neg.ftz.f32">, fnegpat, V4F32Regs,
563 FNEGf32_ftz>, Requires<[doF32FTZ]>;
564 def VNegv2f32 : VecUnaryOp<V2UnaryStr<"neg.f32">, fnegpat, V2F32Regs, FNEGf32>;
565 def VNegv4f32 : VecUnaryOp<V4UnaryStr<"neg.f32">, fnegpat, V4F32Regs, FNEGf32>;
566 def VNegv2f64 : VecUnaryOp<V2UnaryStr<"neg.f64">, fnegpat, V2F64Regs, FNEGf64>;
568 // Logical Arithmetic
569 defm VAnd : IntBinVOp<"and.b", and, ANDb64rr, ANDb32rr, ANDb16rr, ANDb8rr>;
570 defm VOr : IntBinVOp<"or.b", or, ORb64rr, ORb32rr, ORb16rr, ORb8rr>;
571 defm VXor : IntBinVOp<"xor.b", xor, XORb64rr, XORb32rr, XORb16rr, XORb8rr>;
573 defm VNot : IntUnaryVOp<"not.b", not, NOT64, NOT32, NOT16, NOT8>;
577 multiclass V2FPCONTRACT32_SUB_PAT<NVPTXInst Inst, Predicate Pred> {
578 def : Pat<(fsub V2F32Regs:$a, (fmul V2F32Regs:$b, V2F32Regs:$c)),
579 (Inst (VNegv2f32 V2F32Regs:$b), V2F32Regs:$c, V2F32Regs:$a)>,
582 def : Pat<(fsub (fmul V2F32Regs:$a, V2F32Regs:$b), V2F32Regs:$c),
583 (Inst V2F32Regs:$a, V2F32Regs:$b, (VNegv2f32 V2F32Regs:$c))>,
587 defm V2FMAF32ext_ftz : V2FPCONTRACT32_SUB_PAT<F32FMA_ftzV2, doFMAF32AGG_ftz>;
588 defm V2FMADF32ext_ftz : V2FPCONTRACT32_SUB_PAT<F32MAD_ftzV2, doFMADF32_ftz>;
589 defm V2FMAF32ext : V2FPCONTRACT32_SUB_PAT<F32FMAV2, doFMAF32AGG>;
590 defm V2FMADF32ext : V2FPCONTRACT32_SUB_PAT<F32MADV2, doFMADF32>;
592 multiclass V4FPCONTRACT32_SUB_PAT<NVPTXInst Inst, Predicate Pred> {
593 def : Pat<(fsub V4F32Regs:$a, (fmul V4F32Regs:$b, V4F32Regs:$c)),
594 (Inst (VNegv4f32 V4F32Regs:$b), V4F32Regs:$c, V4F32Regs:$a)>,
597 def : Pat<(fsub (fmul V4F32Regs:$a, V4F32Regs:$b), V4F32Regs:$c),
598 (Inst V4F32Regs:$a, V4F32Regs:$b, (VNegv4f32 V4F32Regs:$c))>,
602 defm V4FMAF32ext_ftz : V4FPCONTRACT32_SUB_PAT<F32FMA_ftzV4, doFMAF32AGG_ftz>;
603 defm V4FMADF32ext_ftz : V4FPCONTRACT32_SUB_PAT<F32MAD_ftzV4, doFMADF32_ftz>;
604 defm V4FMAF32ext : V4FPCONTRACT32_SUB_PAT<F32FMAV4, doFMAF32AGG>;
605 defm V4FMADF32ext : V4FPCONTRACT32_SUB_PAT<F32MADV4, doFMADF32>;
607 multiclass V2FPCONTRACT64_SUB_PAT<NVPTXInst Inst, Predicate Pred> {
608 def : Pat<(fsub V2F64Regs:$a, (fmul V2F64Regs:$b, V2F64Regs:$c)),
609 (Inst (VNegv2f64 V2F64Regs:$b), V2F64Regs:$c, V2F64Regs:$a)>,
612 def : Pat<(fsub (fmul V2F64Regs:$a, V2F64Regs:$b), V2F64Regs:$c),
613 (Inst V2F64Regs:$a, V2F64Regs:$b, (VNegv2f64 V2F64Regs:$c))>,
617 defm V2FMAF64ext : V2FPCONTRACT64_SUB_PAT<F64FMAV2, doFMAF64AGG>;
619 class VecModStr<string vecsize, string elem, string extra, string l="">
621 string t1 = !strconcat("${c", elem);
622 string t2 = !strconcat(t1, ":vecv");
623 string t3 = !strconcat(t2, vecsize);
624 string t4 = !strconcat(t3, extra);
625 string t5 = !strconcat(t4, l);
626 string s = !strconcat(t5, "}");
628 class ShuffleOneLine<string vecsize, string elem, string type>
630 string t1 = VecModStr<vecsize, elem, "comm", "1">.s;
631 string t2 = !strconcat(t1, "mov.");
632 string t3 = !strconcat(t2, type);
633 string t4 = !strconcat(t3, " \t${dst}_");
634 string t5 = !strconcat(t4, elem);
635 string t6 = !strconcat(t5, ", $src1");
636 string t7 = !strconcat(t6, VecModStr<vecsize, elem, "pos">.s);
637 string t8 = !strconcat(t7, ";\n\t");
638 string t9 = !strconcat(t8, VecModStr<vecsize, elem, "comm", "2">.s);
639 string t10 = !strconcat(t9, "mov.");
640 string t11 = !strconcat(t10, type);
641 string t12 = !strconcat(t11, " \t${dst}_");
642 string t13 = !strconcat(t12, elem);
643 string t14 = !strconcat(t13, ", $src2");
644 string t15 = !strconcat(t14, VecModStr<vecsize, elem, "pos">.s);
645 string s = !strconcat(t15, ";");
647 class ShuffleAsmStr2<string type>
649 string t1 = ShuffleOneLine<"2", "0", type>.s;
650 string t2 = !strconcat(t1, "\n\t");
651 string s = !strconcat(t2, ShuffleOneLine<"2", "1", type>.s);
653 class ShuffleAsmStr4<string type>
655 string t1 = ShuffleOneLine<"4", "0", type>.s;
656 string t2 = !strconcat(t1, "\n\t");
657 string t3 = !strconcat(t2, ShuffleOneLine<"4", "1", type>.s);
658 string t4 = !strconcat(t3, "\n\t");
659 string t5 = !strconcat(t4, ShuffleOneLine<"4", "2", type>.s);
660 string t6 = !strconcat(t5, "\n\t");
661 string s = !strconcat(t6, ShuffleOneLine<"4", "3", type>.s);
664 let neverHasSideEffects=1, VecInstType=isVecShuffle.Value in {
665 def VecShuffle_v4f32 : NVPTXVecInst<(outs V4F32Regs:$dst),
666 (ins V4F32Regs:$src1, V4F32Regs:$src2,
667 i8imm:$c0, i8imm:$c1, i8imm:$c2, i8imm:$c3),
668 !strconcat("//Mov $dst, $src1, $src2, $c0, $c1, $c2, $c3;\n\t",
669 ShuffleAsmStr4<"f32">.s),
672 def VecShuffle_v4i32 : NVPTXVecInst<(outs V4I32Regs:$dst),
673 (ins V4I32Regs:$src1, V4I32Regs:$src2,
674 i8imm:$c0, i8imm:$c1, i8imm:$c2, i8imm:$c3),
675 !strconcat("//Mov $dst, $src1, $src2, $c0, $c1, $c2, $c3;\n\t",
676 ShuffleAsmStr4<"u32">.s),
679 def VecShuffle_v4i16 : NVPTXVecInst<(outs V4I16Regs:$dst),
680 (ins V4I16Regs:$src1, V4I16Regs:$src2,
681 i8imm:$c0, i8imm:$c1, i8imm:$c2, i8imm:$c3),
682 !strconcat("//Mov $dst, $src1, $src2, $c0, $c1, $c2, $c3;\n\t",
683 ShuffleAsmStr4<"u16">.s),
686 def VecShuffle_v4i8 : NVPTXVecInst<(outs V4I8Regs:$dst),
687 (ins V4I8Regs:$src1, V4I8Regs:$src2,
688 i8imm:$c0, i8imm:$c1, i8imm:$c2, i8imm:$c3),
689 !strconcat("//Mov $dst, $src1, $src2, $c0, $c1, $c2, $c3;\n\t",
690 ShuffleAsmStr4<"u16">.s),
693 def VecShuffle_v2f32 : NVPTXVecInst<(outs V2F32Regs:$dst),
694 (ins V2F32Regs:$src1, V2F32Regs:$src2,
695 i8imm:$c0, i8imm:$c1),
696 !strconcat("//Mov $dst, $src1, $src2, $c0, $c1;\n\t",
697 ShuffleAsmStr2<"f32">.s),
700 def VecShuffle_v2i32 : NVPTXVecInst<(outs V2I32Regs:$dst),
701 (ins V2I32Regs:$src1, V2I32Regs:$src2,
702 i8imm:$c0, i8imm:$c1),
703 !strconcat("//Mov $dst, $src1, $src2, $c0, $c1;\n\t",
704 ShuffleAsmStr2<"u32">.s),
707 def VecShuffle_v2i8 : NVPTXVecInst<(outs V2I8Regs:$dst),
708 (ins V2I8Regs:$src1, V2I8Regs:$src2,
709 i8imm:$c0, i8imm:$c1),
710 !strconcat("//Mov $dst, $src1, $src2, $c0, $c1;\n\t",
711 ShuffleAsmStr2<"u16">.s),
714 def VecShuffle_v2i16 : NVPTXVecInst<(outs V2I16Regs:$dst),
715 (ins V2I16Regs:$src1, V2I16Regs:$src2,
716 i8imm:$c0, i8imm:$c1),
717 !strconcat("//Mov $dst, $src1, $src2, $c0, $c1;\n\t",
718 ShuffleAsmStr2<"u16">.s),
721 def VecShuffle_v2f64 : NVPTXVecInst<(outs V2F64Regs:$dst),
722 (ins V2F64Regs:$src1, V2F64Regs:$src2,
723 i8imm:$c0, i8imm:$c1),
724 !strconcat("//Mov $dst, $src1, $src2, $c0, $c1;\n\t",
725 ShuffleAsmStr2<"f64">.s),
728 def VecShuffle_v2i64 : NVPTXVecInst<(outs V2I64Regs:$dst),
729 (ins V2I64Regs:$src1, V2I64Regs:$src2,
730 i8imm:$c0, i8imm:$c1),
731 !strconcat("//Mov $dst, $src1, $src2, $c0, $c1;\n\t",
732 ShuffleAsmStr2<"u64">.s),
736 def ShuffleMask0 : SDNodeXForm<vector_shuffle, [{
737 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
738 return CurDAG->getTargetConstant(SVOp->getMaskElt(0), MVT::i32);
740 def ShuffleMask1 : SDNodeXForm<vector_shuffle, [{
741 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
742 return CurDAG->getTargetConstant(SVOp->getMaskElt(1), MVT::i32);
744 def ShuffleMask2 : SDNodeXForm<vector_shuffle, [{
745 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
746 return CurDAG->getTargetConstant(SVOp->getMaskElt(2), MVT::i32);
748 def ShuffleMask3 : SDNodeXForm<vector_shuffle, [{
749 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
750 return CurDAG->getTargetConstant(SVOp->getMaskElt(3), MVT::i32);
753 // The spurious call is here to silence a compiler warning about N being
755 def vec_shuf : PatFrag<(ops node:$lhs, node:$rhs),
756 (vector_shuffle node:$lhs, node:$rhs),
757 [{ N->getGluedNode(); return true; }]>;
759 def : Pat<(v2f64 (vec_shuf:$op V2F64Regs:$src1, V2F64Regs:$src2)),
760 (VecShuffle_v2f64 V2F64Regs:$src1, V2F64Regs:$src2,
761 (ShuffleMask0 node:$op), (ShuffleMask1 node:$op))>;
763 def : Pat<(v4f32 (vec_shuf:$op V4F32Regs:$src1, V4F32Regs:$src2)),
764 (VecShuffle_v4f32 V4F32Regs:$src1, V4F32Regs:$src2,
765 (ShuffleMask0 node:$op), (ShuffleMask1 node:$op),
766 (ShuffleMask2 node:$op), (ShuffleMask3 node:$op))>;
768 def : Pat<(v2f32 (vec_shuf:$op V2F32Regs:$src1, V2F32Regs:$src2)),
769 (VecShuffle_v2f32 V2F32Regs:$src1, V2F32Regs:$src2,
770 (ShuffleMask0 node:$op), (ShuffleMask1 node:$op))>;
772 def : Pat<(v2i64 (vec_shuf:$op V2I64Regs:$src1, V2I64Regs:$src2)),
773 (VecShuffle_v2i64 V2I64Regs:$src1, V2I64Regs:$src2,
774 (ShuffleMask0 node:$op), (ShuffleMask1 node:$op))>;
776 def : Pat<(v4i32 (vec_shuf:$op V4I32Regs:$src1, V4I32Regs:$src2)),
777 (VecShuffle_v4i32 V4I32Regs:$src1, V4I32Regs:$src2,
778 (ShuffleMask0 node:$op), (ShuffleMask1 node:$op),
779 (ShuffleMask2 node:$op), (ShuffleMask3 node:$op))>;
781 def : Pat<(v2i32 (vec_shuf:$op V2I32Regs:$src1, V2I32Regs:$src2)),
782 (VecShuffle_v2i32 V2I32Regs:$src1, V2I32Regs:$src2,
783 (ShuffleMask0 node:$op), (ShuffleMask1 node:$op))>;
785 def : Pat<(v4i16 (vec_shuf:$op V4I16Regs:$src1, V4I16Regs:$src2)),
786 (VecShuffle_v4i16 V4I16Regs:$src1, V4I16Regs:$src2,
787 (ShuffleMask0 node:$op), (ShuffleMask1 node:$op),
788 (ShuffleMask2 node:$op), (ShuffleMask3 node:$op))>;
790 def : Pat<(v2i16 (vec_shuf:$op V2I16Regs:$src1, V2I16Regs:$src2)),
791 (VecShuffle_v2i16 V2I16Regs:$src1, V2I16Regs:$src2,
792 (ShuffleMask0 node:$op), (ShuffleMask1 node:$op))>;
794 def : Pat<(v4i8 (vec_shuf:$op V4I8Regs:$src1, V4I8Regs:$src2)),
795 (VecShuffle_v4i8 V4I8Regs:$src1, V4I8Regs:$src2,
796 (ShuffleMask0 node:$op), (ShuffleMask1 node:$op),
797 (ShuffleMask2 node:$op), (ShuffleMask3 node:$op))>;
799 def : Pat<(v2i8 (vec_shuf:$op V2I8Regs:$src1, V2I8Regs:$src2)),
800 (VecShuffle_v2i8 V2I8Regs:$src1, V2I8Regs:$src2,
801 (ShuffleMask0 node:$op), (ShuffleMask1 node:$op))>;
803 class Build_Vector2<string asmstr, NVPTXRegClass vclass, NVPTXRegClass sclass,
805 : NVPTXVecInst<(outs vclass:$dst),
806 (ins sclass:$a1, sclass:$a2),
807 !strconcat(asmstr, "\t${dst:vecfull}, {{$a1, $a2}};"),
808 [(set vclass:$dst, (build_vector sclass:$a1, sclass:$a2))],
810 class Build_Vector4<string asmstr, NVPTXRegClass vclass, NVPTXRegClass sclass,
812 : NVPTXVecInst<(outs vclass:$dst),
813 (ins sclass:$a1, sclass:$a2, sclass:$a3, sclass:$a4),
814 !strconcat(asmstr, "\t${dst:vecfull}, {{$a1, $a2, $a3, $a4}};"),
816 (build_vector sclass:$a1, sclass:$a2,
817 sclass:$a3, sclass:$a4))], si>;
819 let isAsCheapAsAMove=1, VecInstType=isVecBuild.Value in {
820 def Build_Vector2_f32 : Build_Vector2<"mov.v2.f32", V2F32Regs, Float32Regs,
822 def Build_Vector2_f64 : Build_Vector2<"mov.v2.f64", V2F64Regs, Float64Regs,
825 def Build_Vector2_i32 : Build_Vector2<"mov.v2.u32", V2I32Regs, Int32Regs,
827 def Build_Vector2_i64 : Build_Vector2<"mov.v2.u64", V2I64Regs, Int64Regs,
829 def Build_Vector2_i16 : Build_Vector2<"mov.v2.u16", V2I16Regs, Int16Regs,
831 def Build_Vector2_i8 : Build_Vector2<"mov.v2.u16", V2I8Regs, Int8Regs,
834 def Build_Vector4_f32 : Build_Vector4<"mov.v4.f32", V4F32Regs, Float32Regs,
837 def Build_Vector4_i32 : Build_Vector4<"mov.v4.u32", V4I32Regs, Int32Regs,
839 def Build_Vector4_i16 : Build_Vector4<"mov.v4.u16", V4I16Regs, Int16Regs,
841 def Build_Vector4_i8 : Build_Vector4<"mov.v4.u16", V4I8Regs, Int8Regs,
845 class Vec_Move<string asmstr, NVPTXRegClass vclass, NVPTXInst sop=NOP>
846 : NVPTXVecInst<(outs vclass:$dst), (ins vclass:$src),
847 !strconcat(asmstr, "\t${dst:vecfull}, ${src:vecfull};"),
850 let isAsCheapAsAMove=1, neverHasSideEffects=1, IsSimpleMove=1,
851 VecInstType=isVecOther.Value in {
852 def V4f32Mov : Vec_Move<"mov.v4.f32", V4F32Regs, FMOV32rr>;
853 def V2f32Mov : Vec_Move<"mov.v2.f32", V2F32Regs, FMOV32rr>;
855 def V4i32Mov : Vec_Move<"mov.v4.u32", V4I32Regs, IMOV32rr>;
856 def V2i32Mov : Vec_Move<"mov.v2.u32", V2I32Regs, IMOV32rr>;
858 def V4i16Mov : Vec_Move<"mov.v4.u16", V4I16Regs, IMOV16rr>;
859 def V2i16Mov : Vec_Move<"mov.v2.u16", V2I16Regs, IMOV16rr>;
861 def V4i8Mov : Vec_Move<"mov.v4.u16", V4I8Regs, IMOV8rr>;
862 def V2i8Mov : Vec_Move<"mov.v2.u16", V2I8Regs, IMOV8rr>;
864 def V2f64Mov : Vec_Move<"mov.v2.f64", V2F64Regs, FMOV64rr>;
865 def V2i64Mov : Vec_Move<"mov.v2.u64", V2I64Regs, IMOV64rr>;
868 // extract subvector patterns
869 def extract_subvec : SDNode<"ISD::EXTRACT_SUBVECTOR",
870 SDTypeProfile<1, 2, [SDTCisPtrTy<2>]>>;
872 def : Pat<(v2f32 (extract_subvec V4F32Regs:$src, 0)),
873 (Build_Vector2_f32 (V4f32Extract V4F32Regs:$src, 0),
874 (V4f32Extract V4F32Regs:$src, 1))>;
875 def : Pat<(v2f32 (extract_subvec V4F32Regs:$src, 2)),
876 (Build_Vector2_f32 (V4f32Extract V4F32Regs:$src, 2),
877 (V4f32Extract V4F32Regs:$src, 3))>;
878 def : Pat<(v2i32 (extract_subvec V4I32Regs:$src, 0)),
879 (Build_Vector2_i32 (V4i32Extract V4I32Regs:$src, 0),
880 (V4i32Extract V4I32Regs:$src, 1))>;
881 def : Pat<(v2i32 (extract_subvec V4I32Regs:$src, 2)),
882 (Build_Vector2_i32 (V4i32Extract V4I32Regs:$src, 2),
883 (V4i32Extract V4I32Regs:$src, 3))>;
884 def : Pat<(v2i16 (extract_subvec V4I16Regs:$src, 0)),
885 (Build_Vector2_i16 (V4i16Extract V4I16Regs:$src, 0),
886 (V4i16Extract V4I16Regs:$src, 1))>;
887 def : Pat<(v2i16 (extract_subvec V4I16Regs:$src, 2)),
888 (Build_Vector2_i16 (V4i16Extract V4I16Regs:$src, 2),
889 (V4i16Extract V4I16Regs:$src, 3))>;
890 def : Pat<(v2i8 (extract_subvec V4I8Regs:$src, 0)),
891 (Build_Vector2_i8 (V4i8Extract V4I8Regs:$src, 0),
892 (V4i8Extract V4I8Regs:$src, 1))>;
893 def : Pat<(v2i8 (extract_subvec V4I8Regs:$src, 2)),
894 (Build_Vector2_i8 (V4i8Extract V4I8Regs:$src, 2),
895 (V4i8Extract V4I8Regs:$src, 3))>;
897 // Select instructions
898 class Select_OneLine<string type, string pos> {
899 string t1 = !strconcat("selp.", type);
900 string t2 = !strconcat(t1, " \t${dst}_");
901 string t3 = !strconcat(t2, pos);
902 string t4 = !strconcat(t3, ", ${src1}_");
903 string t5 = !strconcat(t4, pos);
904 string t6 = !strconcat(t5, ", ${src2}_");
905 string t7 = !strconcat(t6, pos);
906 string s = !strconcat(t7, ", $p;");
909 class Select_Str2<string type> {
910 string t1 = Select_OneLine<type, "0">.s;
911 string t2 = !strconcat(t1, "\n\t");
912 string s = !strconcat(t2, Select_OneLine<type, "1">.s);
915 class Select_Str4<string type> {
916 string t1 = Select_OneLine<type, "0">.s;
917 string t2 = !strconcat(t1, "\n\t");
918 string t3 = !strconcat(t2, Select_OneLine<type, "1">.s);
919 string t4 = !strconcat(t3, "\n\t");
920 string t5 = !strconcat(t4, Select_OneLine<type, "2">.s);
921 string t6 = !strconcat(t5, "\n\t");
922 string s = !strconcat(t6, Select_OneLine<type, "3">.s);
926 class Vec_Select<NVPTXRegClass vclass, string asmstr, NVPTXInst sop>
927 : NVPTXVecInst<(outs vclass:$dst),
928 (ins vclass:$src1, vclass:$src2, Int1Regs:$p),
930 [(set vclass:$dst, (select Int1Regs:$p, vclass:$src1,
934 let VecInstType=isVecOther.Value in {
935 def V2I64_Select : Vec_Select<V2I64Regs, Select_Str2<"b64">.s, SELECTi64rr>;
936 def V4I32_Select : Vec_Select<V4I32Regs, Select_Str4<"b32">.s, SELECTi32rr>;
937 def V2I32_Select : Vec_Select<V2I32Regs, Select_Str2<"b32">.s, SELECTi32rr>;
938 def V4I16_Select : Vec_Select<V4I16Regs, Select_Str4<"b16">.s, SELECTi16rr>;
939 def V2I16_Select : Vec_Select<V2I16Regs, Select_Str2<"b16">.s, SELECTi16rr>;
940 def V4I8_Select : Vec_Select<V4I8Regs, Select_Str4<"b16">.s, SELECTi8rr>;
941 def V2I8_Select : Vec_Select<V2I8Regs, Select_Str2<"b16">.s, SELECTi8rr>;
943 def V2F64_Select : Vec_Select<V2F64Regs, Select_Str2<"f64">.s, SELECTf64rr>;
944 def V4F32_Select : Vec_Select<V4F32Regs, Select_Str4<"f32">.s, SELECTf32rr>;
945 def V2F32_Select : Vec_Select<V2F32Regs, Select_Str2<"f32">.s, SELECTf32rr>;
948 // Comparison instructions
950 // setcc convenience fragments.
951 def vsetoeq : PatFrag<(ops node:$lhs, node:$rhs),
952 (setcc node:$lhs, node:$rhs, SETOEQ)>;
953 def vsetogt : PatFrag<(ops node:$lhs, node:$rhs),
954 (setcc node:$lhs, node:$rhs, SETOGT)>;
955 def vsetoge : PatFrag<(ops node:$lhs, node:$rhs),
956 (setcc node:$lhs, node:$rhs, SETOGE)>;
957 def vsetolt : PatFrag<(ops node:$lhs, node:$rhs),
958 (setcc node:$lhs, node:$rhs, SETOLT)>;
959 def vsetole : PatFrag<(ops node:$lhs, node:$rhs),
960 (setcc node:$lhs, node:$rhs, SETOLE)>;
961 def vsetone : PatFrag<(ops node:$lhs, node:$rhs),
962 (setcc node:$lhs, node:$rhs, SETONE)>;
963 def vseto : PatFrag<(ops node:$lhs, node:$rhs),
964 (setcc node:$lhs, node:$rhs, SETO)>;
965 def vsetuo : PatFrag<(ops node:$lhs, node:$rhs),
966 (setcc node:$lhs, node:$rhs, SETUO)>;
967 def vsetueq : PatFrag<(ops node:$lhs, node:$rhs),
968 (setcc node:$lhs, node:$rhs, SETUEQ)>;
969 def vsetugt : PatFrag<(ops node:$lhs, node:$rhs),
970 (setcc node:$lhs, node:$rhs, SETUGT)>;
971 def vsetuge : PatFrag<(ops node:$lhs, node:$rhs),
972 (setcc node:$lhs, node:$rhs, SETUGE)>;
973 def vsetult : PatFrag<(ops node:$lhs, node:$rhs),
974 (setcc node:$lhs, node:$rhs, SETULT)>;
975 def vsetule : PatFrag<(ops node:$lhs, node:$rhs),
976 (setcc node:$lhs, node:$rhs, SETULE)>;
977 def vsetune : PatFrag<(ops node:$lhs, node:$rhs),
978 (setcc node:$lhs, node:$rhs, SETUNE)>;
979 def vseteq : PatFrag<(ops node:$lhs, node:$rhs),
980 (setcc node:$lhs, node:$rhs, SETEQ)>;
981 def vsetgt : PatFrag<(ops node:$lhs, node:$rhs),
982 (setcc node:$lhs, node:$rhs, SETGT)>;
983 def vsetge : PatFrag<(ops node:$lhs, node:$rhs),
984 (setcc node:$lhs, node:$rhs, SETGE)>;
985 def vsetlt : PatFrag<(ops node:$lhs, node:$rhs),
986 (setcc node:$lhs, node:$rhs, SETLT)>;
987 def vsetle : PatFrag<(ops node:$lhs, node:$rhs),
988 (setcc node:$lhs, node:$rhs, SETLE)>;
989 def vsetne : PatFrag<(ops node:$lhs, node:$rhs),
990 (setcc node:$lhs, node:$rhs, SETNE)>;
992 class Vec_Compare<PatFrag op, NVPTXRegClass outrclass, NVPTXRegClass inrclass,
994 : NVPTXVecInst<(outs outrclass:$dst),
995 (ins inrclass:$a, inrclass:$b),
997 [(set outrclass:$dst, (op inrclass:$a, inrclass:$b))],
1000 multiclass Vec_Compare_All<PatFrag op,
1006 def V2I8 : Vec_Compare<op, V2I8Regs, V2I8Regs, inst8>;
1007 def V4I8 : Vec_Compare<op, V4I8Regs, V4I8Regs, inst8>;
1008 def V2I16 : Vec_Compare<op, V2I16Regs, V2I16Regs, inst16>;
1009 def V4I16 : Vec_Compare<op, V4I16Regs, V4I16Regs, inst16>;
1010 def V2I32 : Vec_Compare<op, V2I32Regs, V2I32Regs, inst32>;
1011 def V4I32 : Vec_Compare<op, V4I32Regs, V4I32Regs, inst32>;
1012 def V2I64 : Vec_Compare<op, V2I64Regs, V2I64Regs, inst64>;
1015 let VecInstType=isVecOther.Value in {
1016 defm VecSGT : Vec_Compare_All<vsetgt, ISetSGTi8rr_toi8, ISetSGTi16rr_toi16,
1017 ISetSGTi32rr_toi32, ISetSGTi64rr_toi64>;
1018 defm VecUGT : Vec_Compare_All<vsetugt, ISetUGTi8rr_toi8, ISetUGTi16rr_toi16,
1019 ISetUGTi32rr_toi32, ISetUGTi64rr_toi64>;
1020 defm VecSLT : Vec_Compare_All<vsetlt, ISetSLTi8rr_toi8, ISetSLTi16rr_toi16,
1021 ISetSLTi32rr_toi32, ISetSLTi64rr_toi64>;
1022 defm VecULT : Vec_Compare_All<vsetult, ISetULTi8rr_toi8, ISetULTi16rr_toi16,
1023 ISetULTi32rr_toi32, ISetULTi64rr_toi64>;
1024 defm VecSGE : Vec_Compare_All<vsetge, ISetSGEi8rr_toi8, ISetSGEi16rr_toi16,
1025 ISetSGEi32rr_toi32, ISetSGEi64rr_toi64>;
1026 defm VecUGE : Vec_Compare_All<vsetuge, ISetUGEi8rr_toi8, ISetUGEi16rr_toi16,
1027 ISetUGEi32rr_toi32, ISetUGEi64rr_toi64>;
1028 defm VecSLE : Vec_Compare_All<vsetle, ISetSLEi8rr_toi8, ISetSLEi16rr_toi16,
1029 ISetSLEi32rr_toi32, ISetSLEi64rr_toi64>;
1030 defm VecULE : Vec_Compare_All<vsetule, ISetULEi8rr_toi8, ISetULEi16rr_toi16,
1031 ISetULEi32rr_toi32, ISetULEi64rr_toi64>;
1032 defm VecSEQ : Vec_Compare_All<vseteq, ISetSEQi8rr_toi8, ISetSEQi16rr_toi16,
1033 ISetSEQi32rr_toi32, ISetSEQi64rr_toi64>;
1034 defm VecUEQ : Vec_Compare_All<vsetueq, ISetUEQi8rr_toi8, ISetUEQi16rr_toi16,
1035 ISetUEQi32rr_toi32, ISetUEQi64rr_toi64>;
1036 defm VecSNE : Vec_Compare_All<vsetne, ISetSNEi8rr_toi8, ISetSNEi16rr_toi16,
1037 ISetSNEi32rr_toi32, ISetSNEi64rr_toi64>;
1038 defm VecUNE : Vec_Compare_All<vsetune, ISetUNEi8rr_toi8, ISetUNEi16rr_toi16,
1039 ISetUNEi32rr_toi32, ISetUNEi64rr_toi64>;
1042 multiclass FVec_Compare_All<PatFrag op,
1046 def V2F32 : Vec_Compare<op, V2I32Regs, V2F32Regs, instf32>;
1047 def V4F32 : Vec_Compare<op, V4I32Regs, V4F32Regs, instf32>;
1048 def V2F64 : Vec_Compare<op, V2I64Regs, V2F64Regs, instf64>;
1051 let VecInstType=isVecOther.Value in {
1052 defm FVecGT : FVec_Compare_All<vsetogt, FSetGTf32rr_toi32,
1054 defm FVecLT : FVec_Compare_All<vsetolt, FSetLTf32rr_toi32,
1056 defm FVecGE : FVec_Compare_All<vsetoge, FSetGEf32rr_toi32,
1058 defm FVecLE : FVec_Compare_All<vsetole, FSetLEf32rr_toi32,
1060 defm FVecEQ : FVec_Compare_All<vsetoeq, FSetEQf32rr_toi32,
1062 defm FVecNE : FVec_Compare_All<vsetone, FSetNEf32rr_toi32,
1065 defm FVecUGT : FVec_Compare_All<vsetugt, FSetUGTf32rr_toi32,
1066 FSetUGTf64rr_toi64>;
1067 defm FVecULT : FVec_Compare_All<vsetult, FSetULTf32rr_toi32,
1068 FSetULTf64rr_toi64>;
1069 defm FVecUGE : FVec_Compare_All<vsetuge, FSetUGEf32rr_toi32,
1070 FSetUGEf64rr_toi64>;
1071 defm FVecULE : FVec_Compare_All<vsetule, FSetULEf32rr_toi32,
1072 FSetULEf64rr_toi64>;
1073 defm FVecUEQ : FVec_Compare_All<vsetueq, FSetUEQf32rr_toi32,
1074 FSetUEQf64rr_toi64>;
1075 defm FVecUNE : FVec_Compare_All<vsetune, FSetUNEf32rr_toi32,
1076 FSetUNEf64rr_toi64>;
1078 defm FVecNUM : FVec_Compare_All<vseto, FSetNUMf32rr_toi32,
1079 FSetNUMf64rr_toi64>;
1080 defm FVecNAN : FVec_Compare_All<vsetuo, FSetNANf32rr_toi32,
1081 FSetNANf64rr_toi64>;
1084 class LoadParamScalar4Inst<NVPTXRegClass regclass, string opstr> :
1085 NVPTXInst<(outs regclass:$d1, regclass:$d2, regclass:$d3, regclass:$d4),
1086 (ins i32imm:$a, i32imm:$b),
1087 !strconcat(!strconcat("ld.param", opstr),
1088 "\t{{$d1, $d2, $d3, $d4}}, [retval0+$b];"), []>;
1090 class LoadParamScalar2Inst<NVPTXRegClass regclass, string opstr> :
1091 NVPTXInst<(outs regclass:$d1, regclass:$d2),
1092 (ins i32imm:$a, i32imm:$b),
1093 !strconcat(!strconcat("ld.param", opstr),
1094 "\t{{$d1, $d2}}, [retval0+$b];"), []>;
1097 class StoreParamScalar4Inst<NVPTXRegClass regclass, string opstr> :
1099 (ins regclass:$s1, regclass:$s2, regclass:$s3, regclass:$s4,
1100 i32imm:$a, i32imm:$b),
1101 !strconcat(!strconcat("st.param", opstr),
1102 "\t[param$a+$b], {{$s1, $s2, $s3, $s4}};"), []>;
1104 class StoreParamScalar2Inst<NVPTXRegClass regclass, string opstr> :
1106 (ins regclass:$s1, regclass:$s2, i32imm:$a, i32imm:$b),
1107 !strconcat(!strconcat("st.param", opstr),
1108 "\t[param$a+$b], {{$s1, $s2}};"), []>;
1110 class StoreRetvalScalar4Inst<NVPTXRegClass regclass, string opstr> :
1112 (ins regclass:$s1, regclass:$s2, regclass:$s3, regclass:$s4,
1114 !strconcat(!strconcat("st.param", opstr),
1115 "\t[func_retval+$a], {{$s1, $s2, $s3, $s4}};"), []>;
1117 class StoreRetvalScalar2Inst<NVPTXRegClass regclass, string opstr> :
1119 (ins regclass:$s1, regclass:$s2, i32imm:$a),
1120 !strconcat(!strconcat("st.param", opstr),
1121 "\t[func_retval+$a], {{$s1, $s2}};"), []>;
1123 def LoadParamScalar4I32 : LoadParamScalar4Inst<Int32Regs, ".v4.b32">;
1124 def LoadParamScalar4I16 : LoadParamScalar4Inst<Int16Regs, ".v4.b16">;
1125 def LoadParamScalar4I8 : LoadParamScalar4Inst<Int8Regs, ".v4.b8">;
1127 def LoadParamScalar2I64 : LoadParamScalar2Inst<Int32Regs, ".v2.b64">;
1128 def LoadParamScalar2I32 : LoadParamScalar2Inst<Int32Regs, ".v2.b32">;
1129 def LoadParamScalar2I16 : LoadParamScalar2Inst<Int32Regs, ".v2.b16">;
1130 def LoadParamScalar2I8 : LoadParamScalar2Inst<Int32Regs, ".v2.b8">;
1132 def LoadParamScalar4F32 : LoadParamScalar4Inst<Float32Regs, ".v4.f32">;
1133 def LoadParamScalar2F32 : LoadParamScalar2Inst<Float32Regs, ".v2.f32">;
1134 def LoadParamScalar2F64 : LoadParamScalar2Inst<Float64Regs, ".v2.f64">;
1136 def StoreParamScalar4I32 : StoreParamScalar4Inst<Int32Regs, ".v4.b32">;
1137 def StoreParamScalar4I16 : StoreParamScalar4Inst<Int16Regs, ".v4.b16">;
1138 def StoreParamScalar4I8 : StoreParamScalar4Inst<Int8Regs, ".v4.b8">;
1140 def StoreParamScalar2I64 : StoreParamScalar2Inst<Int64Regs, ".v2.b64">;
1141 def StoreParamScalar2I32 : StoreParamScalar2Inst<Int32Regs, ".v2.b32">;
1142 def StoreParamScalar2I16 : StoreParamScalar2Inst<Int16Regs, ".v2.b16">;
1143 def StoreParamScalar2I8 : StoreParamScalar2Inst<Int8Regs, ".v2.b8">;
1145 def StoreParamScalar4F32 : StoreParamScalar4Inst<Float32Regs, ".v4.f32">;
1146 def StoreParamScalar2F32 : StoreParamScalar2Inst<Float32Regs, ".v2.f32">;
1147 def StoreParamScalar2F64 : StoreParamScalar2Inst<Float64Regs, ".v2.f64">;
1149 def StoreRetvalScalar4I32 : StoreRetvalScalar4Inst<Int32Regs, ".v4.b32">;
1150 def StoreRetvalScalar4I16 : StoreRetvalScalar4Inst<Int16Regs, ".v4.b16">;
1151 def StoreRetvalScalar4I8 : StoreRetvalScalar4Inst<Int8Regs, ".v4.b8">;
1153 def StoreRetvalScalar2I64 : StoreRetvalScalar2Inst<Int64Regs, ".v2.b64">;
1154 def StoreRetvalScalar2I32 : StoreRetvalScalar2Inst<Int32Regs, ".v2.b32">;
1155 def StoreRetvalScalar2I16 : StoreRetvalScalar2Inst<Int16Regs, ".v2.b16">;
1156 def StoreRetvalScalar2I8 : StoreRetvalScalar2Inst<Int8Regs, ".v2.b8">;
1158 def StoreRetvalScalar4F32 : StoreRetvalScalar4Inst<Float32Regs, ".v4.f32">;
1159 def StoreRetvalScalar2F32 : StoreRetvalScalar2Inst<Float32Regs, ".v2.f32">;
1160 def StoreRetvalScalar2F64 : StoreRetvalScalar2Inst<Float64Regs, ".v2.f64">;
1162 class LoadParamVecInst<NVPTXRegClass regclass, string opstr, NVPTXInst sop=NOP>:
1163 NVPTXVecInst<(outs regclass:$dst), (ins i32imm:$a, i32imm:$b),
1164 "loadparam : $dst <- [$a, $b]",
1165 [(set regclass:$dst, (LoadParam (i32 imm:$a), (i32 imm:$b)))],
1168 class StoreParamVecInst<NVPTXRegClass regclass, string opstr, NVPTXInst sop=NOP>
1169 : NVPTXVecInst<(outs), (ins regclass:$val, i32imm:$a, i32imm:$b),
1170 "storeparam : [$a, $b] <- $val",
1171 [(StoreParam (i32 imm:$a), (i32 imm:$b), regclass:$val)], sop>;
1173 class StoreRetvalVecInst<NVPTXRegClass regclass, string opstr,
1175 : NVPTXVecInst<(outs), (ins regclass:$val, i32imm:$a),
1176 "storeretval : retval[$a] <- $val",
1177 [(StoreRetval (i32 imm:$a), regclass:$val)], sop>;
1179 let VecInstType=isVecLD.Value in {
1180 def LoadParamV4I32 : LoadParamVecInst<V4I32Regs, ".v4.b32",
1181 LoadParamScalar4I32>;
1182 def LoadParamV4I16 : LoadParamVecInst<V4I16Regs, ".v4.b16",
1183 LoadParamScalar4I16>;
1184 def LoadParamV4I8 : LoadParamVecInst<V4I8Regs, ".v4.b8",
1185 LoadParamScalar4I8>;
1187 def LoadParamV2I64 : LoadParamVecInst<V2I64Regs, ".v2.b64",
1188 LoadParamScalar2I64>;
1189 def LoadParamV2I32 : LoadParamVecInst<V2I32Regs, ".v2.b32",
1190 LoadParamScalar2I32>;
1191 def LoadParamV2I16 : LoadParamVecInst<V2I16Regs, ".v2.b16",
1192 LoadParamScalar2I16>;
1193 def LoadParamV2I8 : LoadParamVecInst<V2I8Regs, ".v2.b8",
1194 LoadParamScalar2I8>;
1196 def LoadParamV4F32 : LoadParamVecInst<V4F32Regs, ".v4.f32",
1197 LoadParamScalar4F32>;
1198 def LoadParamV2F32 : LoadParamVecInst<V2F32Regs, ".v2.f32",
1199 LoadParamScalar2F32>;
1200 def LoadParamV2F64 : LoadParamVecInst<V2F64Regs, ".v2.f64",
1201 LoadParamScalar2F64>;
1204 let VecInstType=isVecST.Value in {
1205 def StoreParamV4I32 : StoreParamVecInst<V4I32Regs, ".v4.b32",
1206 StoreParamScalar4I32>;
1207 def StoreParamV4I16 : StoreParamVecInst<V4I16Regs, ".v4.b16",
1208 StoreParamScalar4I16>;
1209 def StoreParamV4I8 : StoreParamVecInst<V4I8Regs, ".v4.b8",
1210 StoreParamScalar4I8>;
1212 def StoreParamV2I64 : StoreParamVecInst<V2I64Regs, ".v2.b64",
1213 StoreParamScalar2I64>;
1214 def StoreParamV2I32 : StoreParamVecInst<V2I32Regs, ".v2.b32",
1215 StoreParamScalar2I32>;
1216 def StoreParamV2I16 : StoreParamVecInst<V2I16Regs, ".v2.b16",
1217 StoreParamScalar2I16>;
1218 def StoreParamV2I8 : StoreParamVecInst<V2I8Regs, ".v2.b8",
1219 StoreParamScalar2I8>;
1221 def StoreParamV4F32 : StoreParamVecInst<V4F32Regs, ".v4.f32",
1222 StoreParamScalar4F32>;
1223 def StoreParamV2F32 : StoreParamVecInst<V2F32Regs, ".v2.f32",
1224 StoreParamScalar2F32>;
1225 def StoreParamV2F64 : StoreParamVecInst<V2F64Regs, ".v2.f64",
1226 StoreParamScalar2F64>;
1228 def StoreRetvalV4I32 : StoreRetvalVecInst<V4I32Regs, ".v4.b32",
1229 StoreRetvalScalar4I32>;
1230 def StoreRetvalV4I16 : StoreRetvalVecInst<V4I16Regs, ".v4.b16",
1231 StoreRetvalScalar4I16>;
1232 def StoreRetvalV4I8 : StoreRetvalVecInst<V4I8Regs, ".v4.b8",
1233 StoreRetvalScalar4I8>;
1235 def StoreRetvalV2I64 : StoreRetvalVecInst<V2I64Regs, ".v2.b64",
1236 StoreRetvalScalar2I64>;
1237 def StoreRetvalV2I32 : StoreRetvalVecInst<V2I32Regs, ".v2.b32",
1238 StoreRetvalScalar2I32>;
1239 def StoreRetvalV2I16 : StoreRetvalVecInst<V2I16Regs, ".v2.b16",
1240 StoreRetvalScalar2I16>;
1241 def StoreRetvalV2I8 : StoreRetvalVecInst<V2I8Regs, ".v2.b8",
1242 StoreRetvalScalar2I8>;
1244 def StoreRetvalV4F32 : StoreRetvalVecInst<V4F32Regs, ".v4.f32",
1245 StoreRetvalScalar4F32>;
1246 def StoreRetvalV2F32 : StoreRetvalVecInst<V2F32Regs, ".v2.f32",
1247 StoreRetvalScalar2F32>;
1248 def StoreRetvalV2F64 : StoreRetvalVecInst<V2F64Regs, ".v2.f64",
1249 StoreRetvalScalar2F64>;
1254 // Int vector to int scalar bit convert
1256 def : Pat<(i32 (bitconvert V4I8Regs:$s)),
1257 (V4I8toI32 (V4i8Extract V4I8Regs:$s,0), (V4i8Extract V4I8Regs:$s,1),
1258 (V4i8Extract V4I8Regs:$s,2), (V4i8Extract V4I8Regs:$s,3))>;
1260 def : Pat<(i64 (bitconvert V4I16Regs:$s)),
1261 (V4I16toI64 (V4i16Extract V4I16Regs:$s,0),
1262 (V4i16Extract V4I16Regs:$s,1),
1263 (V4i16Extract V4I16Regs:$s,2),
1264 (V4i16Extract V4I16Regs:$s,3))>;
1266 def : Pat<(i16 (bitconvert V2I8Regs:$s)),
1267 (V2I8toI16 (V2i8Extract V2I8Regs:$s,0), (V2i8Extract V2I8Regs:$s,1))>;
1269 def : Pat<(i32 (bitconvert V2I16Regs:$s)),
1270 (V2I16toI32 (V2i16Extract V2I16Regs:$s,0),
1271 (V2i16Extract V2I16Regs:$s,1))>;
1273 def : Pat<(i64 (bitconvert V2I32Regs:$s)),
1274 (V2I32toI64 (V2i32Extract V2I32Regs:$s,0),
1275 (V2i32Extract V2I32Regs:$s,1))>;
1277 // Int scalar to int vector bit convert
1278 let VecInstType=isVecDest.Value in {
1280 def VecI32toV4I8 : NVPTXVecInst<(outs V4I8Regs:$d), (ins Int32Regs:$s),
1282 [(set V4I8Regs:$d, (bitconvert Int32Regs:$s))],
1285 def VecI64toV4I16 : NVPTXVecInst<(outs V4I16Regs:$d), (ins Int64Regs:$s),
1287 [(set V4I16Regs:$d, (bitconvert Int64Regs:$s))],
1290 def VecI16toV2I8 : NVPTXVecInst<(outs V2I8Regs:$d), (ins Int16Regs:$s),
1292 [(set V2I8Regs:$d, (bitconvert Int16Regs:$s))],
1295 def VecI32toV2I16 : NVPTXVecInst<(outs V2I16Regs:$d), (ins Int32Regs:$s),
1297 [(set V2I16Regs:$d, (bitconvert Int32Regs:$s))],
1300 def VecI64toV2I32 : NVPTXVecInst<(outs V2I32Regs:$d), (ins Int64Regs:$s),
1302 [(set V2I32Regs:$d, (bitconvert Int64Regs:$s))],
1306 // Int vector to int vector bit convert
1308 def : Pat<(v2i16 (bitconvert V4I8Regs:$s)),
1310 (V4I8toI32 (V4i8Extract V4I8Regs:$s,0), (V4i8Extract V4I8Regs:$s,1),
1311 (V4i8Extract V4I8Regs:$s,2), (V4i8Extract V4I8Regs:$s,3)))>;
1313 def : Pat<(v2i32 (bitconvert V4I16Regs:$s)),
1315 (V4I16toI64 (V4i16Extract V4I16Regs:$s,0), (V4i16Extract V4I16Regs:$s,1),
1316 (V4i16Extract V4I16Regs:$s,2), (V4i16Extract V4I16Regs:$s,3)))>;
1318 def : Pat<(v4i8 (bitconvert V2I16Regs:$s)),
1320 (V2I16toI32 (V2i16Extract V2I16Regs:$s,0), (V2i16Extract V2I16Regs:$s,1)))>;
1322 def : Pat<(v4i16 (bitconvert V2I32Regs:$s)),
1324 (V2I32toI64 (V2i32Extract V2I32Regs:$s,0), (V2i32Extract V2I32Regs:$s,1)))>;
1326 def : Pat<(v4i32 (bitconvert V2I64Regs:$s)),
1328 (V2i32Extract (VecI64toV2I32 (V2i64Extract V2I64Regs:$s, 0)), 0),
1329 (V2i32Extract (VecI64toV2I32 (V2i64Extract V2I64Regs:$s, 0)), 1),
1330 (V2i32Extract (VecI64toV2I32 (V2i64Extract V2I64Regs:$s, 1)), 0),
1331 (V2i32Extract (VecI64toV2I32 (V2i64Extract V2I64Regs:$s, 1)), 1))>;
1333 def : Pat<(v2i64 (bitconvert V4I32Regs:$s)),
1335 (V2I32toI64 (V4i32Extract V4I32Regs:$s,0), (V4i32Extract V4I32Regs:$s,1)),
1336 (V2I32toI64 (V4i32Extract V4I32Regs:$s,2), (V4i32Extract V4I32Regs:$s,3)))>;
1338 // Fp scalar to fp vector convert
1340 let VecInstType=isVecDest.Value in {
1341 def VecF64toV2F32 : NVPTXVecInst<(outs V2F32Regs:$d), (ins Float64Regs:$s),
1343 [(set V2F32Regs:$d, (bitconvert Float64Regs:$s))],
1347 // Fp vector to fp scalar convert
1349 def : Pat<(f64 (bitconvert V2F32Regs:$s)),
1350 (V2F32toF64 (V2f32Extract V2F32Regs:$s,0), (V2f32Extract V2F32Regs:$s,1))>;
1352 // Fp scalar to int vector convert
1354 def : Pat<(v4i8 (bitconvert Float32Regs:$s)),
1355 (VecI32toV4I8 (BITCONVERT_32_F2I Float32Regs:$s))>;
1357 def : Pat<(v2i16 (bitconvert Float32Regs:$s)),
1358 (VecI32toV2I16 (BITCONVERT_32_F2I Float32Regs:$s))>;
1360 def : Pat<(v4i16 (bitconvert Float64Regs:$s)),
1361 (VecI64toV4I16 (BITCONVERT_64_F2I Float64Regs:$s))>;
1363 def : Pat<(v2i32 (bitconvert Float64Regs:$s)),
1364 (VecI64toV2I32 (BITCONVERT_64_F2I Float64Regs:$s))>;
1366 // Int vector to fp scalar convert
1368 def : Pat<(f32 (bitconvert V4I8Regs:$s)),
1370 (V4I8toI32 (V4i8Extract V4I8Regs:$s,0), (V4i8Extract V4I8Regs:$s,1),
1371 (V4i8Extract V4I8Regs:$s,2), (V4i8Extract V4I8Regs:$s,3)))>;
1373 def : Pat<(f64 (bitconvert V4I16Regs:$s)),
1375 (V4I16toI64 (V4i16Extract V4I16Regs:$s,0), (V4i16Extract V4I16Regs:$s,1),
1376 (V4i16Extract V4I16Regs:$s,2), (V4i16Extract V4I16Regs:$s,3)))>;
1378 def : Pat<(f32 (bitconvert V2I16Regs:$s)),
1380 (V2I16toI32 (V2i16Extract V2I16Regs:$s,0), (V2i16Extract V2I16Regs:$s,1)))>;
1382 def : Pat<(f64 (bitconvert V2I32Regs:$s)),
1384 (V2I32toI64 (V2i32Extract V2I32Regs:$s,0), (V2i32Extract V2I32Regs:$s,1)))>;
1386 // Int scalar to fp vector convert
1388 def : Pat<(v2f32 (bitconvert Int64Regs:$s)),
1389 (VecF64toV2F32 (BITCONVERT_64_I2F Int64Regs:$s))>;
1391 // Fp vector to int scalar convert
1393 def : Pat<(i64 (bitconvert V2F32Regs:$s)),
1395 (V2F32toF64 (V2f32Extract V2F32Regs:$s,0), (V2f32Extract V2F32Regs:$s,1)))>;
1397 // Int vector to fp vector convert
1399 def : Pat<(v4f32 (bitconvert V2I64Regs:$s)),
1401 (BITCONVERT_32_I2F (V2i32Extract (VecI64toV2I32
1402 (V2i64Extract V2I64Regs:$s, 0)), 0)),
1403 (BITCONVERT_32_I2F (V2i32Extract (VecI64toV2I32
1404 (V2i64Extract V2I64Regs:$s, 0)), 1)),
1405 (BITCONVERT_32_I2F (V2i32Extract (VecI64toV2I32
1406 (V2i64Extract V2I64Regs:$s, 1)), 0)),
1407 (BITCONVERT_32_I2F (V2i32Extract (VecI64toV2I32
1408 (V2i64Extract V2I64Regs:$s, 1)), 1)))>;
1410 def : Pat<(v2f64 (bitconvert V2I64Regs:$s)),
1412 (BITCONVERT_64_I2F (V2i64Extract V2I64Regs:$s,0)),
1413 (BITCONVERT_64_I2F (V2i64Extract V2I64Regs:$s,1)))>;
1415 def : Pat<(v2f32 (bitconvert V2I32Regs:$s)),
1417 (BITCONVERT_32_I2F (V2i32Extract V2I32Regs:$s,0)),
1418 (BITCONVERT_32_I2F (V2i32Extract V2I32Regs:$s,1)))>;
1420 def : Pat<(v2f64 (bitconvert V4I32Regs:$s)),
1422 (BITCONVERT_64_I2F (V2I32toI64 (V4i32Extract V4I32Regs:$s,0),
1423 (V4i32Extract V4I32Regs:$s,1))),
1424 (BITCONVERT_64_I2F (V2I32toI64 (V4i32Extract V4I32Regs:$s,2),
1425 (V4i32Extract V4I32Regs:$s,3))))>;
1427 def : Pat<(v4f32 (bitconvert V4I32Regs:$s)),
1429 (BITCONVERT_32_I2F (V4i32Extract V4I32Regs:$s,0)),
1430 (BITCONVERT_32_I2F (V4i32Extract V4I32Regs:$s,1)),
1431 (BITCONVERT_32_I2F (V4i32Extract V4I32Regs:$s,2)),
1432 (BITCONVERT_32_I2F (V4i32Extract V4I32Regs:$s,3)))>;
1434 def : Pat<(v2f32 (bitconvert V4I16Regs:$s)),
1435 (VecF64toV2F32 (BITCONVERT_64_I2F
1436 (V4I16toI64 (V4i16Extract V4I16Regs:$s,0),
1437 (V4i16Extract V4I16Regs:$s,1),
1438 (V4i16Extract V4I16Regs:$s,2),
1439 (V4i16Extract V4I16Regs:$s,3))))>;
1441 // Fp vector to int vector convert
1443 def : Pat<(v2i64 (bitconvert V4F32Regs:$s)),
1445 (BITCONVERT_64_F2I (V2F32toF64 (V4f32Extract V4F32Regs:$s,0),
1446 (V4f32Extract V4F32Regs:$s,1))),
1447 (BITCONVERT_64_F2I (V2F32toF64 (V4f32Extract V4F32Regs:$s,2),
1448 (V4f32Extract V4F32Regs:$s,3))))>;
1450 def : Pat<(v2i64 (bitconvert V2F64Regs:$s)),
1452 (BITCONVERT_64_F2I (V2f64Extract V2F64Regs:$s,0)),
1453 (BITCONVERT_64_F2I (V2f64Extract V2F64Regs:$s,1)))>;
1455 def : Pat<(v2i32 (bitconvert V2F32Regs:$s)),
1457 (BITCONVERT_32_F2I (V2f32Extract V2F32Regs:$s,0)),
1458 (BITCONVERT_32_F2I (V2f32Extract V2F32Regs:$s,1)))>;
1460 def : Pat<(v4i32 (bitconvert V2F64Regs:$s)),
1462 (BITCONVERT_32_F2I (V2f32Extract (VecF64toV2F32
1463 (V2f64Extract V2F64Regs:$s, 0)), 0)),
1464 (BITCONVERT_32_F2I (V2f32Extract (VecF64toV2F32
1465 (V2f64Extract V2F64Regs:$s, 0)), 1)),
1466 (BITCONVERT_32_F2I (V2f32Extract (VecF64toV2F32
1467 (V2f64Extract V2F64Regs:$s, 1)), 0)),
1468 (BITCONVERT_32_F2I (V2f32Extract (VecF64toV2F32
1469 (V2f64Extract V2F64Regs:$s, 1)), 1)))>;
1471 def : Pat<(v4i32 (bitconvert V4F32Regs:$s)),
1473 (BITCONVERT_32_F2I (V4f32Extract V4F32Regs:$s,0)),
1474 (BITCONVERT_32_F2I (V4f32Extract V4F32Regs:$s,1)),
1475 (BITCONVERT_32_F2I (V4f32Extract V4F32Regs:$s,2)),
1476 (BITCONVERT_32_F2I (V4f32Extract V4F32Regs:$s,3)))>;
1478 def : Pat<(v4i16 (bitconvert V2F32Regs:$s)),
1479 (VecI64toV4I16 (BITCONVERT_64_F2I
1480 (V2F32toF64 (V2f32Extract V2F32Regs:$s,0),
1481 (V2f32Extract V2F32Regs:$s,1))))>;