1 //===-- NVPTXISelDAGToDAG.cpp - A dag to dag inst selector for NVPTX ------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file defines an instruction selector for the NVPTX target.
12 //===----------------------------------------------------------------------===//
14 #include "NVPTXISelDAGToDAG.h"
15 #include "llvm/IR/GlobalValue.h"
16 #include "llvm/IR/Instructions.h"
17 #include "llvm/Support/CommandLine.h"
18 #include "llvm/Support/Debug.h"
19 #include "llvm/Support/ErrorHandling.h"
20 #include "llvm/Support/raw_ostream.h"
21 #include "llvm/Target/TargetIntrinsicInfo.h"
25 #define DEBUG_TYPE "nvptx-isel"
27 unsigned FMAContractLevel = 0;
29 static cl::opt<unsigned, true>
30 FMAContractLevelOpt("nvptx-fma-level", cl::ZeroOrMore, cl::Hidden,
31 cl::desc("NVPTX Specific: FMA contraction (0: don't do it"
32 " 1: do it 2: do it aggressively"),
33 cl::location(FMAContractLevel),
36 static cl::opt<int> UsePrecDivF32(
37 "nvptx-prec-divf32", cl::ZeroOrMore, cl::Hidden,
38 cl::desc("NVPTX Specifies: 0 use div.approx, 1 use div.full, 2 use"
39 " IEEE Compliant F32 div.rnd if avaiable."),
43 UsePrecSqrtF32("nvptx-prec-sqrtf32", cl::Hidden,
44 cl::desc("NVPTX Specific: 0 use sqrt.approx, 1 use sqrt.rn."),
48 FtzEnabled("nvptx-f32ftz", cl::ZeroOrMore, cl::Hidden,
49 cl::desc("NVPTX Specific: Flush f32 subnormals to sign-preserving zero."),
53 /// createNVPTXISelDag - This pass converts a legalized DAG into a
54 /// NVPTX-specific DAG, ready for instruction scheduling.
55 FunctionPass *llvm::createNVPTXISelDag(NVPTXTargetMachine &TM,
56 llvm::CodeGenOpt::Level OptLevel) {
57 return new NVPTXDAGToDAGISel(TM, OptLevel);
60 NVPTXDAGToDAGISel::NVPTXDAGToDAGISel(NVPTXTargetMachine &tm,
61 CodeGenOpt::Level OptLevel)
62 : SelectionDAGISel(tm, OptLevel),
63 Subtarget(tm.getSubtarget<NVPTXSubtarget>()) {
65 doFMAF32 = (OptLevel > 0) && Subtarget.hasFMAF32() && (FMAContractLevel >= 1);
66 doFMAF64 = (OptLevel > 0) && Subtarget.hasFMAF64() && (FMAContractLevel >= 1);
68 (OptLevel > 0) && Subtarget.hasFMAF32() && (FMAContractLevel == 2);
70 (OptLevel > 0) && Subtarget.hasFMAF64() && (FMAContractLevel == 2);
72 allowFMA = (FMAContractLevel >= 1);
74 doMulWide = (OptLevel > 0);
77 int NVPTXDAGToDAGISel::getDivF32Level() const {
78 if (UsePrecDivF32.getNumOccurrences() > 0) {
79 // If nvptx-prec-div32=N is used on the command-line, always honor it
82 // Otherwise, use div.approx if fast math is enabled
83 if (TM.Options.UnsafeFPMath)
90 bool NVPTXDAGToDAGISel::usePrecSqrtF32() const {
91 if (UsePrecSqrtF32.getNumOccurrences() > 0) {
92 // If nvptx-prec-sqrtf32 is used on the command-line, always honor it
93 return UsePrecSqrtF32;
95 // Otherwise, use sqrt.approx if fast math is enabled
96 if (TM.Options.UnsafeFPMath)
103 bool NVPTXDAGToDAGISel::useF32FTZ() const {
104 if (FtzEnabled.getNumOccurrences() > 0) {
105 // If nvptx-f32ftz is used on the command-line, always honor it
108 const Function *F = MF->getFunction();
109 // Otherwise, check for an nvptx-f32ftz attribute on the function
110 if (F->hasFnAttribute("nvptx-f32ftz"))
111 return (F->getAttributes().getAttribute(AttributeSet::FunctionIndex,
113 .getValueAsString() == "true");
119 /// Select - Select instructions not customized! Used for
120 /// expanded, promoted and normal instructions.
121 SDNode *NVPTXDAGToDAGISel::Select(SDNode *N) {
123 if (N->isMachineOpcode()) {
125 return nullptr; // Already selected.
128 SDNode *ResNode = nullptr;
129 switch (N->getOpcode()) {
131 ResNode = SelectLoad(N);
134 ResNode = SelectStore(N);
136 case NVPTXISD::LoadV2:
137 case NVPTXISD::LoadV4:
138 ResNode = SelectLoadVector(N);
140 case NVPTXISD::LDGV2:
141 case NVPTXISD::LDGV4:
142 case NVPTXISD::LDUV2:
143 case NVPTXISD::LDUV4:
144 ResNode = SelectLDGLDUVector(N);
146 case NVPTXISD::StoreV2:
147 case NVPTXISD::StoreV4:
148 ResNode = SelectStoreVector(N);
150 case NVPTXISD::LoadParam:
151 case NVPTXISD::LoadParamV2:
152 case NVPTXISD::LoadParamV4:
153 ResNode = SelectLoadParam(N);
155 case NVPTXISD::StoreRetval:
156 case NVPTXISD::StoreRetvalV2:
157 case NVPTXISD::StoreRetvalV4:
158 ResNode = SelectStoreRetval(N);
160 case NVPTXISD::StoreParam:
161 case NVPTXISD::StoreParamV2:
162 case NVPTXISD::StoreParamV4:
163 case NVPTXISD::StoreParamS32:
164 case NVPTXISD::StoreParamU32:
165 ResNode = SelectStoreParam(N);
167 case ISD::INTRINSIC_WO_CHAIN:
168 ResNode = SelectIntrinsicNoChain(N);
170 case NVPTXISD::Tex1DFloatI32:
171 case NVPTXISD::Tex1DFloatFloat:
172 case NVPTXISD::Tex1DFloatFloatLevel:
173 case NVPTXISD::Tex1DFloatFloatGrad:
174 case NVPTXISD::Tex1DI32I32:
175 case NVPTXISD::Tex1DI32Float:
176 case NVPTXISD::Tex1DI32FloatLevel:
177 case NVPTXISD::Tex1DI32FloatGrad:
178 case NVPTXISD::Tex1DArrayFloatI32:
179 case NVPTXISD::Tex1DArrayFloatFloat:
180 case NVPTXISD::Tex1DArrayFloatFloatLevel:
181 case NVPTXISD::Tex1DArrayFloatFloatGrad:
182 case NVPTXISD::Tex1DArrayI32I32:
183 case NVPTXISD::Tex1DArrayI32Float:
184 case NVPTXISD::Tex1DArrayI32FloatLevel:
185 case NVPTXISD::Tex1DArrayI32FloatGrad:
186 case NVPTXISD::Tex2DFloatI32:
187 case NVPTXISD::Tex2DFloatFloat:
188 case NVPTXISD::Tex2DFloatFloatLevel:
189 case NVPTXISD::Tex2DFloatFloatGrad:
190 case NVPTXISD::Tex2DI32I32:
191 case NVPTXISD::Tex2DI32Float:
192 case NVPTXISD::Tex2DI32FloatLevel:
193 case NVPTXISD::Tex2DI32FloatGrad:
194 case NVPTXISD::Tex2DArrayFloatI32:
195 case NVPTXISD::Tex2DArrayFloatFloat:
196 case NVPTXISD::Tex2DArrayFloatFloatLevel:
197 case NVPTXISD::Tex2DArrayFloatFloatGrad:
198 case NVPTXISD::Tex2DArrayI32I32:
199 case NVPTXISD::Tex2DArrayI32Float:
200 case NVPTXISD::Tex2DArrayI32FloatLevel:
201 case NVPTXISD::Tex2DArrayI32FloatGrad:
202 case NVPTXISD::Tex3DFloatI32:
203 case NVPTXISD::Tex3DFloatFloat:
204 case NVPTXISD::Tex3DFloatFloatLevel:
205 case NVPTXISD::Tex3DFloatFloatGrad:
206 case NVPTXISD::Tex3DI32I32:
207 case NVPTXISD::Tex3DI32Float:
208 case NVPTXISD::Tex3DI32FloatLevel:
209 case NVPTXISD::Tex3DI32FloatGrad:
210 ResNode = SelectTextureIntrinsic(N);
212 case NVPTXISD::Suld1DI8Trap:
213 case NVPTXISD::Suld1DI16Trap:
214 case NVPTXISD::Suld1DI32Trap:
215 case NVPTXISD::Suld1DV2I8Trap:
216 case NVPTXISD::Suld1DV2I16Trap:
217 case NVPTXISD::Suld1DV2I32Trap:
218 case NVPTXISD::Suld1DV4I8Trap:
219 case NVPTXISD::Suld1DV4I16Trap:
220 case NVPTXISD::Suld1DV4I32Trap:
221 case NVPTXISD::Suld1DArrayI8Trap:
222 case NVPTXISD::Suld1DArrayI16Trap:
223 case NVPTXISD::Suld1DArrayI32Trap:
224 case NVPTXISD::Suld1DArrayV2I8Trap:
225 case NVPTXISD::Suld1DArrayV2I16Trap:
226 case NVPTXISD::Suld1DArrayV2I32Trap:
227 case NVPTXISD::Suld1DArrayV4I8Trap:
228 case NVPTXISD::Suld1DArrayV4I16Trap:
229 case NVPTXISD::Suld1DArrayV4I32Trap:
230 case NVPTXISD::Suld2DI8Trap:
231 case NVPTXISD::Suld2DI16Trap:
232 case NVPTXISD::Suld2DI32Trap:
233 case NVPTXISD::Suld2DV2I8Trap:
234 case NVPTXISD::Suld2DV2I16Trap:
235 case NVPTXISD::Suld2DV2I32Trap:
236 case NVPTXISD::Suld2DV4I8Trap:
237 case NVPTXISD::Suld2DV4I16Trap:
238 case NVPTXISD::Suld2DV4I32Trap:
239 case NVPTXISD::Suld2DArrayI8Trap:
240 case NVPTXISD::Suld2DArrayI16Trap:
241 case NVPTXISD::Suld2DArrayI32Trap:
242 case NVPTXISD::Suld2DArrayV2I8Trap:
243 case NVPTXISD::Suld2DArrayV2I16Trap:
244 case NVPTXISD::Suld2DArrayV2I32Trap:
245 case NVPTXISD::Suld2DArrayV4I8Trap:
246 case NVPTXISD::Suld2DArrayV4I16Trap:
247 case NVPTXISD::Suld2DArrayV4I32Trap:
248 case NVPTXISD::Suld3DI8Trap:
249 case NVPTXISD::Suld3DI16Trap:
250 case NVPTXISD::Suld3DI32Trap:
251 case NVPTXISD::Suld3DV2I8Trap:
252 case NVPTXISD::Suld3DV2I16Trap:
253 case NVPTXISD::Suld3DV2I32Trap:
254 case NVPTXISD::Suld3DV4I8Trap:
255 case NVPTXISD::Suld3DV4I16Trap:
256 case NVPTXISD::Suld3DV4I32Trap:
257 ResNode = SelectSurfaceIntrinsic(N);
263 ResNode = SelectBFE(N);
265 case ISD::ADDRSPACECAST:
266 ResNode = SelectAddrSpaceCast(N);
273 return SelectCode(N);
276 static unsigned int getCodeAddrSpace(MemSDNode *N,
277 const NVPTXSubtarget &Subtarget) {
278 const Value *Src = N->getMemOperand()->getValue();
281 return NVPTX::PTXLdStInstCode::GENERIC;
283 if (const PointerType *PT = dyn_cast<PointerType>(Src->getType())) {
284 switch (PT->getAddressSpace()) {
285 case llvm::ADDRESS_SPACE_LOCAL: return NVPTX::PTXLdStInstCode::LOCAL;
286 case llvm::ADDRESS_SPACE_GLOBAL: return NVPTX::PTXLdStInstCode::GLOBAL;
287 case llvm::ADDRESS_SPACE_SHARED: return NVPTX::PTXLdStInstCode::SHARED;
288 case llvm::ADDRESS_SPACE_GENERIC: return NVPTX::PTXLdStInstCode::GENERIC;
289 case llvm::ADDRESS_SPACE_PARAM: return NVPTX::PTXLdStInstCode::PARAM;
290 case llvm::ADDRESS_SPACE_CONST: return NVPTX::PTXLdStInstCode::CONSTANT;
294 return NVPTX::PTXLdStInstCode::GENERIC;
297 SDNode *NVPTXDAGToDAGISel::SelectIntrinsicNoChain(SDNode *N) {
298 unsigned IID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
302 case Intrinsic::nvvm_texsurf_handle_internal:
303 return SelectTexSurfHandle(N);
307 SDNode *NVPTXDAGToDAGISel::SelectTexSurfHandle(SDNode *N) {
308 // Op 0 is the intrinsic ID
309 SDValue Wrapper = N->getOperand(1);
310 SDValue GlobalVal = Wrapper.getOperand(0);
311 return CurDAG->getMachineNode(NVPTX::texsurf_handles, SDLoc(N), MVT::i64,
315 SDNode *NVPTXDAGToDAGISel::SelectAddrSpaceCast(SDNode *N) {
316 SDValue Src = N->getOperand(0);
317 AddrSpaceCastSDNode *CastN = cast<AddrSpaceCastSDNode>(N);
318 unsigned SrcAddrSpace = CastN->getSrcAddressSpace();
319 unsigned DstAddrSpace = CastN->getDestAddressSpace();
321 assert(SrcAddrSpace != DstAddrSpace &&
322 "addrspacecast must be between different address spaces");
324 if (DstAddrSpace == ADDRESS_SPACE_GENERIC) {
325 // Specific to generic
327 switch (SrcAddrSpace) {
328 default: report_fatal_error("Bad address space in addrspacecast");
329 case ADDRESS_SPACE_GLOBAL:
330 Opc = Subtarget.is64Bit() ? NVPTX::cvta_global_yes_64
331 : NVPTX::cvta_global_yes;
333 case ADDRESS_SPACE_SHARED:
334 Opc = Subtarget.is64Bit() ? NVPTX::cvta_shared_yes_64
335 : NVPTX::cvta_shared_yes;
337 case ADDRESS_SPACE_CONST:
338 Opc = Subtarget.is64Bit() ? NVPTX::cvta_const_yes_64
339 : NVPTX::cvta_const_yes;
341 case ADDRESS_SPACE_LOCAL:
342 Opc = Subtarget.is64Bit() ? NVPTX::cvta_local_yes_64
343 : NVPTX::cvta_local_yes;
346 return CurDAG->getMachineNode(Opc, SDLoc(N), N->getValueType(0), Src);
348 // Generic to specific
349 if (SrcAddrSpace != 0)
350 report_fatal_error("Cannot cast between two non-generic address spaces");
352 switch (DstAddrSpace) {
353 default: report_fatal_error("Bad address space in addrspacecast");
354 case ADDRESS_SPACE_GLOBAL:
355 Opc = Subtarget.is64Bit() ? NVPTX::cvta_to_global_yes_64
356 : NVPTX::cvta_to_global_yes;
358 case ADDRESS_SPACE_SHARED:
359 Opc = Subtarget.is64Bit() ? NVPTX::cvta_to_shared_yes_64
360 : NVPTX::cvta_to_shared_yes;
362 case ADDRESS_SPACE_CONST:
363 Opc = Subtarget.is64Bit() ? NVPTX::cvta_to_const_yes_64
364 : NVPTX::cvta_to_const_yes;
366 case ADDRESS_SPACE_LOCAL:
367 Opc = Subtarget.is64Bit() ? NVPTX::cvta_to_local_yes_64
368 : NVPTX::cvta_to_local_yes;
371 return CurDAG->getMachineNode(Opc, SDLoc(N), N->getValueType(0), Src);
375 SDNode *NVPTXDAGToDAGISel::SelectLoad(SDNode *N) {
377 LoadSDNode *LD = cast<LoadSDNode>(N);
378 EVT LoadedVT = LD->getMemoryVT();
379 SDNode *NVPTXLD = nullptr;
381 // do not support pre/post inc/dec
385 if (!LoadedVT.isSimple())
388 // Address Space Setting
389 unsigned int codeAddrSpace = getCodeAddrSpace(LD, Subtarget);
392 // - .volatile is only availalble for .global and .shared
393 bool isVolatile = LD->isVolatile();
394 if (codeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
395 codeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
396 codeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
400 MVT SimpleVT = LoadedVT.getSimpleVT();
401 unsigned vecType = NVPTX::PTXLdStInstCode::Scalar;
402 if (SimpleVT.isVector()) {
403 unsigned num = SimpleVT.getVectorNumElements();
405 vecType = NVPTX::PTXLdStInstCode::V2;
407 vecType = NVPTX::PTXLdStInstCode::V4;
412 // Type Setting: fromType + fromTypeWidth
414 // Sign : ISD::SEXTLOAD
415 // Unsign : ISD::ZEXTLOAD, ISD::NON_EXTLOAD or ISD::EXTLOAD and the
417 // Float : ISD::NON_EXTLOAD or ISD::EXTLOAD and the type is float
418 MVT ScalarVT = SimpleVT.getScalarType();
419 // Read at least 8 bits (predicates are stored as 8-bit values)
420 unsigned fromTypeWidth = std::max(8U, ScalarVT.getSizeInBits());
421 unsigned int fromType;
422 if ((LD->getExtensionType() == ISD::SEXTLOAD))
423 fromType = NVPTX::PTXLdStInstCode::Signed;
424 else if (ScalarVT.isFloatingPoint())
425 fromType = NVPTX::PTXLdStInstCode::Float;
427 fromType = NVPTX::PTXLdStInstCode::Unsigned;
429 // Create the machine instruction DAG
430 SDValue Chain = N->getOperand(0);
431 SDValue N1 = N->getOperand(1);
433 SDValue Offset, Base;
435 MVT::SimpleValueType TargetVT = LD->getSimpleValueType(0).SimpleTy;
437 if (SelectDirectAddr(N1, Addr)) {
440 Opcode = NVPTX::LD_i8_avar;
443 Opcode = NVPTX::LD_i16_avar;
446 Opcode = NVPTX::LD_i32_avar;
449 Opcode = NVPTX::LD_i64_avar;
452 Opcode = NVPTX::LD_f32_avar;
455 Opcode = NVPTX::LD_f64_avar;
460 SDValue Ops[] = { getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
461 getI32Imm(vecType), getI32Imm(fromType),
462 getI32Imm(fromTypeWidth), Addr, Chain };
463 NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops);
464 } else if (Subtarget.is64Bit()
465 ? SelectADDRsi64(N1.getNode(), N1, Base, Offset)
466 : SelectADDRsi(N1.getNode(), N1, Base, Offset)) {
469 Opcode = NVPTX::LD_i8_asi;
472 Opcode = NVPTX::LD_i16_asi;
475 Opcode = NVPTX::LD_i32_asi;
478 Opcode = NVPTX::LD_i64_asi;
481 Opcode = NVPTX::LD_f32_asi;
484 Opcode = NVPTX::LD_f64_asi;
489 SDValue Ops[] = { getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
490 getI32Imm(vecType), getI32Imm(fromType),
491 getI32Imm(fromTypeWidth), Base, Offset, Chain };
492 NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops);
493 } else if (Subtarget.is64Bit()
494 ? SelectADDRri64(N1.getNode(), N1, Base, Offset)
495 : SelectADDRri(N1.getNode(), N1, Base, Offset)) {
496 if (Subtarget.is64Bit()) {
499 Opcode = NVPTX::LD_i8_ari_64;
502 Opcode = NVPTX::LD_i16_ari_64;
505 Opcode = NVPTX::LD_i32_ari_64;
508 Opcode = NVPTX::LD_i64_ari_64;
511 Opcode = NVPTX::LD_f32_ari_64;
514 Opcode = NVPTX::LD_f64_ari_64;
522 Opcode = NVPTX::LD_i8_ari;
525 Opcode = NVPTX::LD_i16_ari;
528 Opcode = NVPTX::LD_i32_ari;
531 Opcode = NVPTX::LD_i64_ari;
534 Opcode = NVPTX::LD_f32_ari;
537 Opcode = NVPTX::LD_f64_ari;
543 SDValue Ops[] = { getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
544 getI32Imm(vecType), getI32Imm(fromType),
545 getI32Imm(fromTypeWidth), Base, Offset, Chain };
546 NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops);
548 if (Subtarget.is64Bit()) {
551 Opcode = NVPTX::LD_i8_areg_64;
554 Opcode = NVPTX::LD_i16_areg_64;
557 Opcode = NVPTX::LD_i32_areg_64;
560 Opcode = NVPTX::LD_i64_areg_64;
563 Opcode = NVPTX::LD_f32_areg_64;
566 Opcode = NVPTX::LD_f64_areg_64;
574 Opcode = NVPTX::LD_i8_areg;
577 Opcode = NVPTX::LD_i16_areg;
580 Opcode = NVPTX::LD_i32_areg;
583 Opcode = NVPTX::LD_i64_areg;
586 Opcode = NVPTX::LD_f32_areg;
589 Opcode = NVPTX::LD_f64_areg;
595 SDValue Ops[] = { getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
596 getI32Imm(vecType), getI32Imm(fromType),
597 getI32Imm(fromTypeWidth), N1, Chain };
598 NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops);
602 MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
603 MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
604 cast<MachineSDNode>(NVPTXLD)->setMemRefs(MemRefs0, MemRefs0 + 1);
610 SDNode *NVPTXDAGToDAGISel::SelectLoadVector(SDNode *N) {
612 SDValue Chain = N->getOperand(0);
613 SDValue Op1 = N->getOperand(1);
614 SDValue Addr, Offset, Base;
618 MemSDNode *MemSD = cast<MemSDNode>(N);
619 EVT LoadedVT = MemSD->getMemoryVT();
621 if (!LoadedVT.isSimple())
624 // Address Space Setting
625 unsigned int CodeAddrSpace = getCodeAddrSpace(MemSD, Subtarget);
628 // - .volatile is only availalble for .global and .shared
629 bool IsVolatile = MemSD->isVolatile();
630 if (CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
631 CodeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
632 CodeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
636 MVT SimpleVT = LoadedVT.getSimpleVT();
638 // Type Setting: fromType + fromTypeWidth
640 // Sign : ISD::SEXTLOAD
641 // Unsign : ISD::ZEXTLOAD, ISD::NON_EXTLOAD or ISD::EXTLOAD and the
643 // Float : ISD::NON_EXTLOAD or ISD::EXTLOAD and the type is float
644 MVT ScalarVT = SimpleVT.getScalarType();
645 // Read at least 8 bits (predicates are stored as 8-bit values)
646 unsigned FromTypeWidth = std::max(8U, ScalarVT.getSizeInBits());
647 unsigned int FromType;
648 // The last operand holds the original LoadSDNode::getExtensionType() value
649 unsigned ExtensionType = cast<ConstantSDNode>(
650 N->getOperand(N->getNumOperands() - 1))->getZExtValue();
651 if (ExtensionType == ISD::SEXTLOAD)
652 FromType = NVPTX::PTXLdStInstCode::Signed;
653 else if (ScalarVT.isFloatingPoint())
654 FromType = NVPTX::PTXLdStInstCode::Float;
656 FromType = NVPTX::PTXLdStInstCode::Unsigned;
660 switch (N->getOpcode()) {
661 case NVPTXISD::LoadV2:
662 VecType = NVPTX::PTXLdStInstCode::V2;
664 case NVPTXISD::LoadV4:
665 VecType = NVPTX::PTXLdStInstCode::V4;
671 EVT EltVT = N->getValueType(0);
673 if (SelectDirectAddr(Op1, Addr)) {
674 switch (N->getOpcode()) {
677 case NVPTXISD::LoadV2:
678 switch (EltVT.getSimpleVT().SimpleTy) {
682 Opcode = NVPTX::LDV_i8_v2_avar;
685 Opcode = NVPTX::LDV_i16_v2_avar;
688 Opcode = NVPTX::LDV_i32_v2_avar;
691 Opcode = NVPTX::LDV_i64_v2_avar;
694 Opcode = NVPTX::LDV_f32_v2_avar;
697 Opcode = NVPTX::LDV_f64_v2_avar;
701 case NVPTXISD::LoadV4:
702 switch (EltVT.getSimpleVT().SimpleTy) {
706 Opcode = NVPTX::LDV_i8_v4_avar;
709 Opcode = NVPTX::LDV_i16_v4_avar;
712 Opcode = NVPTX::LDV_i32_v4_avar;
715 Opcode = NVPTX::LDV_f32_v4_avar;
721 SDValue Ops[] = { getI32Imm(IsVolatile), getI32Imm(CodeAddrSpace),
722 getI32Imm(VecType), getI32Imm(FromType),
723 getI32Imm(FromTypeWidth), Addr, Chain };
724 LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
725 } else if (Subtarget.is64Bit()
726 ? SelectADDRsi64(Op1.getNode(), Op1, Base, Offset)
727 : SelectADDRsi(Op1.getNode(), Op1, Base, Offset)) {
728 switch (N->getOpcode()) {
731 case NVPTXISD::LoadV2:
732 switch (EltVT.getSimpleVT().SimpleTy) {
736 Opcode = NVPTX::LDV_i8_v2_asi;
739 Opcode = NVPTX::LDV_i16_v2_asi;
742 Opcode = NVPTX::LDV_i32_v2_asi;
745 Opcode = NVPTX::LDV_i64_v2_asi;
748 Opcode = NVPTX::LDV_f32_v2_asi;
751 Opcode = NVPTX::LDV_f64_v2_asi;
755 case NVPTXISD::LoadV4:
756 switch (EltVT.getSimpleVT().SimpleTy) {
760 Opcode = NVPTX::LDV_i8_v4_asi;
763 Opcode = NVPTX::LDV_i16_v4_asi;
766 Opcode = NVPTX::LDV_i32_v4_asi;
769 Opcode = NVPTX::LDV_f32_v4_asi;
775 SDValue Ops[] = { getI32Imm(IsVolatile), getI32Imm(CodeAddrSpace),
776 getI32Imm(VecType), getI32Imm(FromType),
777 getI32Imm(FromTypeWidth), Base, Offset, Chain };
778 LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
779 } else if (Subtarget.is64Bit()
780 ? SelectADDRri64(Op1.getNode(), Op1, Base, Offset)
781 : SelectADDRri(Op1.getNode(), Op1, Base, Offset)) {
782 if (Subtarget.is64Bit()) {
783 switch (N->getOpcode()) {
786 case NVPTXISD::LoadV2:
787 switch (EltVT.getSimpleVT().SimpleTy) {
791 Opcode = NVPTX::LDV_i8_v2_ari_64;
794 Opcode = NVPTX::LDV_i16_v2_ari_64;
797 Opcode = NVPTX::LDV_i32_v2_ari_64;
800 Opcode = NVPTX::LDV_i64_v2_ari_64;
803 Opcode = NVPTX::LDV_f32_v2_ari_64;
806 Opcode = NVPTX::LDV_f64_v2_ari_64;
810 case NVPTXISD::LoadV4:
811 switch (EltVT.getSimpleVT().SimpleTy) {
815 Opcode = NVPTX::LDV_i8_v4_ari_64;
818 Opcode = NVPTX::LDV_i16_v4_ari_64;
821 Opcode = NVPTX::LDV_i32_v4_ari_64;
824 Opcode = NVPTX::LDV_f32_v4_ari_64;
830 switch (N->getOpcode()) {
833 case NVPTXISD::LoadV2:
834 switch (EltVT.getSimpleVT().SimpleTy) {
838 Opcode = NVPTX::LDV_i8_v2_ari;
841 Opcode = NVPTX::LDV_i16_v2_ari;
844 Opcode = NVPTX::LDV_i32_v2_ari;
847 Opcode = NVPTX::LDV_i64_v2_ari;
850 Opcode = NVPTX::LDV_f32_v2_ari;
853 Opcode = NVPTX::LDV_f64_v2_ari;
857 case NVPTXISD::LoadV4:
858 switch (EltVT.getSimpleVT().SimpleTy) {
862 Opcode = NVPTX::LDV_i8_v4_ari;
865 Opcode = NVPTX::LDV_i16_v4_ari;
868 Opcode = NVPTX::LDV_i32_v4_ari;
871 Opcode = NVPTX::LDV_f32_v4_ari;
878 SDValue Ops[] = { getI32Imm(IsVolatile), getI32Imm(CodeAddrSpace),
879 getI32Imm(VecType), getI32Imm(FromType),
880 getI32Imm(FromTypeWidth), Base, Offset, Chain };
882 LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
884 if (Subtarget.is64Bit()) {
885 switch (N->getOpcode()) {
888 case NVPTXISD::LoadV2:
889 switch (EltVT.getSimpleVT().SimpleTy) {
893 Opcode = NVPTX::LDV_i8_v2_areg_64;
896 Opcode = NVPTX::LDV_i16_v2_areg_64;
899 Opcode = NVPTX::LDV_i32_v2_areg_64;
902 Opcode = NVPTX::LDV_i64_v2_areg_64;
905 Opcode = NVPTX::LDV_f32_v2_areg_64;
908 Opcode = NVPTX::LDV_f64_v2_areg_64;
912 case NVPTXISD::LoadV4:
913 switch (EltVT.getSimpleVT().SimpleTy) {
917 Opcode = NVPTX::LDV_i8_v4_areg_64;
920 Opcode = NVPTX::LDV_i16_v4_areg_64;
923 Opcode = NVPTX::LDV_i32_v4_areg_64;
926 Opcode = NVPTX::LDV_f32_v4_areg_64;
932 switch (N->getOpcode()) {
935 case NVPTXISD::LoadV2:
936 switch (EltVT.getSimpleVT().SimpleTy) {
940 Opcode = NVPTX::LDV_i8_v2_areg;
943 Opcode = NVPTX::LDV_i16_v2_areg;
946 Opcode = NVPTX::LDV_i32_v2_areg;
949 Opcode = NVPTX::LDV_i64_v2_areg;
952 Opcode = NVPTX::LDV_f32_v2_areg;
955 Opcode = NVPTX::LDV_f64_v2_areg;
959 case NVPTXISD::LoadV4:
960 switch (EltVT.getSimpleVT().SimpleTy) {
964 Opcode = NVPTX::LDV_i8_v4_areg;
967 Opcode = NVPTX::LDV_i16_v4_areg;
970 Opcode = NVPTX::LDV_i32_v4_areg;
973 Opcode = NVPTX::LDV_f32_v4_areg;
980 SDValue Ops[] = { getI32Imm(IsVolatile), getI32Imm(CodeAddrSpace),
981 getI32Imm(VecType), getI32Imm(FromType),
982 getI32Imm(FromTypeWidth), Op1, Chain };
983 LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
986 MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
987 MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
988 cast<MachineSDNode>(LD)->setMemRefs(MemRefs0, MemRefs0 + 1);
993 SDNode *NVPTXDAGToDAGISel::SelectLDGLDUVector(SDNode *N) {
995 SDValue Chain = N->getOperand(0);
996 SDValue Op1 = N->getOperand(1);
1000 MemSDNode *Mem = cast<MemSDNode>(N);
1001 SDValue Base, Offset, Addr;
1003 EVT EltVT = Mem->getMemoryVT().getVectorElementType();
1005 if (SelectDirectAddr(Op1, Addr)) {
1006 switch (N->getOpcode()) {
1009 case NVPTXISD::LDGV2:
1010 switch (EltVT.getSimpleVT().SimpleTy) {
1014 Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_avar;
1017 Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_avar;
1020 Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_avar;
1023 Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_avar;
1026 Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_avar;
1029 Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_avar;
1033 case NVPTXISD::LDUV2:
1034 switch (EltVT.getSimpleVT().SimpleTy) {
1038 Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_avar;
1041 Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_avar;
1044 Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_avar;
1047 Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_avar;
1050 Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_avar;
1053 Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_avar;
1057 case NVPTXISD::LDGV4:
1058 switch (EltVT.getSimpleVT().SimpleTy) {
1062 Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_avar;
1065 Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_avar;
1068 Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_avar;
1071 Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_avar;
1075 case NVPTXISD::LDUV4:
1076 switch (EltVT.getSimpleVT().SimpleTy) {
1080 Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_avar;
1083 Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_avar;
1086 Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_avar;
1089 Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_avar;
1095 SDValue Ops[] = { Addr, Chain };
1096 LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
1097 } else if (Subtarget.is64Bit()
1098 ? SelectADDRri64(Op1.getNode(), Op1, Base, Offset)
1099 : SelectADDRri(Op1.getNode(), Op1, Base, Offset)) {
1100 if (Subtarget.is64Bit()) {
1101 switch (N->getOpcode()) {
1104 case NVPTXISD::LDGV2:
1105 switch (EltVT.getSimpleVT().SimpleTy) {
1109 Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_ari64;
1112 Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_ari64;
1115 Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_ari64;
1118 Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_ari64;
1121 Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_ari64;
1124 Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_ari64;
1128 case NVPTXISD::LDUV2:
1129 switch (EltVT.getSimpleVT().SimpleTy) {
1133 Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_ari64;
1136 Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_ari64;
1139 Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_ari64;
1142 Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_ari64;
1145 Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_ari64;
1148 Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_ari64;
1152 case NVPTXISD::LDGV4:
1153 switch (EltVT.getSimpleVT().SimpleTy) {
1157 Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_ari64;
1160 Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_ari64;
1163 Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_ari64;
1166 Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_ari64;
1170 case NVPTXISD::LDUV4:
1171 switch (EltVT.getSimpleVT().SimpleTy) {
1175 Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_ari64;
1178 Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_ari64;
1181 Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_ari64;
1184 Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_ari64;
1190 switch (N->getOpcode()) {
1193 case NVPTXISD::LDGV2:
1194 switch (EltVT.getSimpleVT().SimpleTy) {
1198 Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_ari32;
1201 Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_ari32;
1204 Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_ari32;
1207 Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_ari32;
1210 Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_ari32;
1213 Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_ari32;
1217 case NVPTXISD::LDUV2:
1218 switch (EltVT.getSimpleVT().SimpleTy) {
1222 Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_ari32;
1225 Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_ari32;
1228 Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_ari32;
1231 Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_ari32;
1234 Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_ari32;
1237 Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_ari32;
1241 case NVPTXISD::LDGV4:
1242 switch (EltVT.getSimpleVT().SimpleTy) {
1246 Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_ari32;
1249 Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_ari32;
1252 Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_ari32;
1255 Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_ari32;
1259 case NVPTXISD::LDUV4:
1260 switch (EltVT.getSimpleVT().SimpleTy) {
1264 Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_ari32;
1267 Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_ari32;
1270 Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_ari32;
1273 Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_ari32;
1280 SDValue Ops[] = { Base, Offset, Chain };
1282 LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
1284 if (Subtarget.is64Bit()) {
1285 switch (N->getOpcode()) {
1288 case NVPTXISD::LDGV2:
1289 switch (EltVT.getSimpleVT().SimpleTy) {
1293 Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_areg64;
1296 Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_areg64;
1299 Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_areg64;
1302 Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_areg64;
1305 Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_areg64;
1308 Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_areg64;
1312 case NVPTXISD::LDUV2:
1313 switch (EltVT.getSimpleVT().SimpleTy) {
1317 Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_areg64;
1320 Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_areg64;
1323 Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_areg64;
1326 Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_areg64;
1329 Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_areg64;
1332 Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_areg64;
1336 case NVPTXISD::LDGV4:
1337 switch (EltVT.getSimpleVT().SimpleTy) {
1341 Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_areg64;
1344 Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_areg64;
1347 Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_areg64;
1350 Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_areg64;
1354 case NVPTXISD::LDUV4:
1355 switch (EltVT.getSimpleVT().SimpleTy) {
1359 Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_areg64;
1362 Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_areg64;
1365 Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_areg64;
1368 Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_areg64;
1374 switch (N->getOpcode()) {
1377 case NVPTXISD::LDGV2:
1378 switch (EltVT.getSimpleVT().SimpleTy) {
1382 Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_areg32;
1385 Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_areg32;
1388 Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_areg32;
1391 Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_areg32;
1394 Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_areg32;
1397 Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_areg32;
1401 case NVPTXISD::LDUV2:
1402 switch (EltVT.getSimpleVT().SimpleTy) {
1406 Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_areg32;
1409 Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_areg32;
1412 Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_areg32;
1415 Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_areg32;
1418 Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_areg32;
1421 Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_areg32;
1425 case NVPTXISD::LDGV4:
1426 switch (EltVT.getSimpleVT().SimpleTy) {
1430 Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_areg32;
1433 Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_areg32;
1436 Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_areg32;
1439 Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_areg32;
1443 case NVPTXISD::LDUV4:
1444 switch (EltVT.getSimpleVT().SimpleTy) {
1448 Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_areg32;
1451 Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_areg32;
1454 Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_areg32;
1457 Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_areg32;
1464 SDValue Ops[] = { Op1, Chain };
1465 LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
1468 MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
1469 MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
1470 cast<MachineSDNode>(LD)->setMemRefs(MemRefs0, MemRefs0 + 1);
1475 SDNode *NVPTXDAGToDAGISel::SelectStore(SDNode *N) {
1477 StoreSDNode *ST = cast<StoreSDNode>(N);
1478 EVT StoreVT = ST->getMemoryVT();
1479 SDNode *NVPTXST = nullptr;
1481 // do not support pre/post inc/dec
1482 if (ST->isIndexed())
1485 if (!StoreVT.isSimple())
1488 // Address Space Setting
1489 unsigned int codeAddrSpace = getCodeAddrSpace(ST, Subtarget);
1492 // - .volatile is only availalble for .global and .shared
1493 bool isVolatile = ST->isVolatile();
1494 if (codeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
1495 codeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
1496 codeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
1500 MVT SimpleVT = StoreVT.getSimpleVT();
1501 unsigned vecType = NVPTX::PTXLdStInstCode::Scalar;
1502 if (SimpleVT.isVector()) {
1503 unsigned num = SimpleVT.getVectorNumElements();
1505 vecType = NVPTX::PTXLdStInstCode::V2;
1507 vecType = NVPTX::PTXLdStInstCode::V4;
1512 // Type Setting: toType + toTypeWidth
1513 // - for integer type, always use 'u'
1515 MVT ScalarVT = SimpleVT.getScalarType();
1516 unsigned toTypeWidth = ScalarVT.getSizeInBits();
1517 unsigned int toType;
1518 if (ScalarVT.isFloatingPoint())
1519 toType = NVPTX::PTXLdStInstCode::Float;
1521 toType = NVPTX::PTXLdStInstCode::Unsigned;
1523 // Create the machine instruction DAG
1524 SDValue Chain = N->getOperand(0);
1525 SDValue N1 = N->getOperand(1);
1526 SDValue N2 = N->getOperand(2);
1528 SDValue Offset, Base;
1530 MVT::SimpleValueType SourceVT = N1.getNode()->getSimpleValueType(0).SimpleTy;
1532 if (SelectDirectAddr(N2, Addr)) {
1535 Opcode = NVPTX::ST_i8_avar;
1538 Opcode = NVPTX::ST_i16_avar;
1541 Opcode = NVPTX::ST_i32_avar;
1544 Opcode = NVPTX::ST_i64_avar;
1547 Opcode = NVPTX::ST_f32_avar;
1550 Opcode = NVPTX::ST_f64_avar;
1555 SDValue Ops[] = { N1, getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
1556 getI32Imm(vecType), getI32Imm(toType),
1557 getI32Imm(toTypeWidth), Addr, Chain };
1558 NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops);
1559 } else if (Subtarget.is64Bit()
1560 ? SelectADDRsi64(N2.getNode(), N2, Base, Offset)
1561 : SelectADDRsi(N2.getNode(), N2, Base, Offset)) {
1564 Opcode = NVPTX::ST_i8_asi;
1567 Opcode = NVPTX::ST_i16_asi;
1570 Opcode = NVPTX::ST_i32_asi;
1573 Opcode = NVPTX::ST_i64_asi;
1576 Opcode = NVPTX::ST_f32_asi;
1579 Opcode = NVPTX::ST_f64_asi;
1584 SDValue Ops[] = { N1, getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
1585 getI32Imm(vecType), getI32Imm(toType),
1586 getI32Imm(toTypeWidth), Base, Offset, Chain };
1587 NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops);
1588 } else if (Subtarget.is64Bit()
1589 ? SelectADDRri64(N2.getNode(), N2, Base, Offset)
1590 : SelectADDRri(N2.getNode(), N2, Base, Offset)) {
1591 if (Subtarget.is64Bit()) {
1594 Opcode = NVPTX::ST_i8_ari_64;
1597 Opcode = NVPTX::ST_i16_ari_64;
1600 Opcode = NVPTX::ST_i32_ari_64;
1603 Opcode = NVPTX::ST_i64_ari_64;
1606 Opcode = NVPTX::ST_f32_ari_64;
1609 Opcode = NVPTX::ST_f64_ari_64;
1617 Opcode = NVPTX::ST_i8_ari;
1620 Opcode = NVPTX::ST_i16_ari;
1623 Opcode = NVPTX::ST_i32_ari;
1626 Opcode = NVPTX::ST_i64_ari;
1629 Opcode = NVPTX::ST_f32_ari;
1632 Opcode = NVPTX::ST_f64_ari;
1638 SDValue Ops[] = { N1, getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
1639 getI32Imm(vecType), getI32Imm(toType),
1640 getI32Imm(toTypeWidth), Base, Offset, Chain };
1641 NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops);
1643 if (Subtarget.is64Bit()) {
1646 Opcode = NVPTX::ST_i8_areg_64;
1649 Opcode = NVPTX::ST_i16_areg_64;
1652 Opcode = NVPTX::ST_i32_areg_64;
1655 Opcode = NVPTX::ST_i64_areg_64;
1658 Opcode = NVPTX::ST_f32_areg_64;
1661 Opcode = NVPTX::ST_f64_areg_64;
1669 Opcode = NVPTX::ST_i8_areg;
1672 Opcode = NVPTX::ST_i16_areg;
1675 Opcode = NVPTX::ST_i32_areg;
1678 Opcode = NVPTX::ST_i64_areg;
1681 Opcode = NVPTX::ST_f32_areg;
1684 Opcode = NVPTX::ST_f64_areg;
1690 SDValue Ops[] = { N1, getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
1691 getI32Imm(vecType), getI32Imm(toType),
1692 getI32Imm(toTypeWidth), N2, Chain };
1693 NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops);
1697 MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
1698 MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
1699 cast<MachineSDNode>(NVPTXST)->setMemRefs(MemRefs0, MemRefs0 + 1);
1705 SDNode *NVPTXDAGToDAGISel::SelectStoreVector(SDNode *N) {
1706 SDValue Chain = N->getOperand(0);
1707 SDValue Op1 = N->getOperand(1);
1708 SDValue Addr, Offset, Base;
1712 EVT EltVT = Op1.getValueType();
1713 MemSDNode *MemSD = cast<MemSDNode>(N);
1714 EVT StoreVT = MemSD->getMemoryVT();
1716 // Address Space Setting
1717 unsigned CodeAddrSpace = getCodeAddrSpace(MemSD, Subtarget);
1719 if (CodeAddrSpace == NVPTX::PTXLdStInstCode::CONSTANT) {
1720 report_fatal_error("Cannot store to pointer that points to constant "
1725 // - .volatile is only availalble for .global and .shared
1726 bool IsVolatile = MemSD->isVolatile();
1727 if (CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
1728 CodeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
1729 CodeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
1732 // Type Setting: toType + toTypeWidth
1733 // - for integer type, always use 'u'
1734 assert(StoreVT.isSimple() && "Store value is not simple");
1735 MVT ScalarVT = StoreVT.getSimpleVT().getScalarType();
1736 unsigned ToTypeWidth = ScalarVT.getSizeInBits();
1738 if (ScalarVT.isFloatingPoint())
1739 ToType = NVPTX::PTXLdStInstCode::Float;
1741 ToType = NVPTX::PTXLdStInstCode::Unsigned;
1743 SmallVector<SDValue, 12> StOps;
1747 switch (N->getOpcode()) {
1748 case NVPTXISD::StoreV2:
1749 VecType = NVPTX::PTXLdStInstCode::V2;
1750 StOps.push_back(N->getOperand(1));
1751 StOps.push_back(N->getOperand(2));
1752 N2 = N->getOperand(3);
1754 case NVPTXISD::StoreV4:
1755 VecType = NVPTX::PTXLdStInstCode::V4;
1756 StOps.push_back(N->getOperand(1));
1757 StOps.push_back(N->getOperand(2));
1758 StOps.push_back(N->getOperand(3));
1759 StOps.push_back(N->getOperand(4));
1760 N2 = N->getOperand(5);
1766 StOps.push_back(getI32Imm(IsVolatile));
1767 StOps.push_back(getI32Imm(CodeAddrSpace));
1768 StOps.push_back(getI32Imm(VecType));
1769 StOps.push_back(getI32Imm(ToType));
1770 StOps.push_back(getI32Imm(ToTypeWidth));
1772 if (SelectDirectAddr(N2, Addr)) {
1773 switch (N->getOpcode()) {
1776 case NVPTXISD::StoreV2:
1777 switch (EltVT.getSimpleVT().SimpleTy) {
1781 Opcode = NVPTX::STV_i8_v2_avar;
1784 Opcode = NVPTX::STV_i16_v2_avar;
1787 Opcode = NVPTX::STV_i32_v2_avar;
1790 Opcode = NVPTX::STV_i64_v2_avar;
1793 Opcode = NVPTX::STV_f32_v2_avar;
1796 Opcode = NVPTX::STV_f64_v2_avar;
1800 case NVPTXISD::StoreV4:
1801 switch (EltVT.getSimpleVT().SimpleTy) {
1805 Opcode = NVPTX::STV_i8_v4_avar;
1808 Opcode = NVPTX::STV_i16_v4_avar;
1811 Opcode = NVPTX::STV_i32_v4_avar;
1814 Opcode = NVPTX::STV_f32_v4_avar;
1819 StOps.push_back(Addr);
1820 } else if (Subtarget.is64Bit()
1821 ? SelectADDRsi64(N2.getNode(), N2, Base, Offset)
1822 : SelectADDRsi(N2.getNode(), N2, Base, Offset)) {
1823 switch (N->getOpcode()) {
1826 case NVPTXISD::StoreV2:
1827 switch (EltVT.getSimpleVT().SimpleTy) {
1831 Opcode = NVPTX::STV_i8_v2_asi;
1834 Opcode = NVPTX::STV_i16_v2_asi;
1837 Opcode = NVPTX::STV_i32_v2_asi;
1840 Opcode = NVPTX::STV_i64_v2_asi;
1843 Opcode = NVPTX::STV_f32_v2_asi;
1846 Opcode = NVPTX::STV_f64_v2_asi;
1850 case NVPTXISD::StoreV4:
1851 switch (EltVT.getSimpleVT().SimpleTy) {
1855 Opcode = NVPTX::STV_i8_v4_asi;
1858 Opcode = NVPTX::STV_i16_v4_asi;
1861 Opcode = NVPTX::STV_i32_v4_asi;
1864 Opcode = NVPTX::STV_f32_v4_asi;
1869 StOps.push_back(Base);
1870 StOps.push_back(Offset);
1871 } else if (Subtarget.is64Bit()
1872 ? SelectADDRri64(N2.getNode(), N2, Base, Offset)
1873 : SelectADDRri(N2.getNode(), N2, Base, Offset)) {
1874 if (Subtarget.is64Bit()) {
1875 switch (N->getOpcode()) {
1878 case NVPTXISD::StoreV2:
1879 switch (EltVT.getSimpleVT().SimpleTy) {
1883 Opcode = NVPTX::STV_i8_v2_ari_64;
1886 Opcode = NVPTX::STV_i16_v2_ari_64;
1889 Opcode = NVPTX::STV_i32_v2_ari_64;
1892 Opcode = NVPTX::STV_i64_v2_ari_64;
1895 Opcode = NVPTX::STV_f32_v2_ari_64;
1898 Opcode = NVPTX::STV_f64_v2_ari_64;
1902 case NVPTXISD::StoreV4:
1903 switch (EltVT.getSimpleVT().SimpleTy) {
1907 Opcode = NVPTX::STV_i8_v4_ari_64;
1910 Opcode = NVPTX::STV_i16_v4_ari_64;
1913 Opcode = NVPTX::STV_i32_v4_ari_64;
1916 Opcode = NVPTX::STV_f32_v4_ari_64;
1922 switch (N->getOpcode()) {
1925 case NVPTXISD::StoreV2:
1926 switch (EltVT.getSimpleVT().SimpleTy) {
1930 Opcode = NVPTX::STV_i8_v2_ari;
1933 Opcode = NVPTX::STV_i16_v2_ari;
1936 Opcode = NVPTX::STV_i32_v2_ari;
1939 Opcode = NVPTX::STV_i64_v2_ari;
1942 Opcode = NVPTX::STV_f32_v2_ari;
1945 Opcode = NVPTX::STV_f64_v2_ari;
1949 case NVPTXISD::StoreV4:
1950 switch (EltVT.getSimpleVT().SimpleTy) {
1954 Opcode = NVPTX::STV_i8_v4_ari;
1957 Opcode = NVPTX::STV_i16_v4_ari;
1960 Opcode = NVPTX::STV_i32_v4_ari;
1963 Opcode = NVPTX::STV_f32_v4_ari;
1969 StOps.push_back(Base);
1970 StOps.push_back(Offset);
1972 if (Subtarget.is64Bit()) {
1973 switch (N->getOpcode()) {
1976 case NVPTXISD::StoreV2:
1977 switch (EltVT.getSimpleVT().SimpleTy) {
1981 Opcode = NVPTX::STV_i8_v2_areg_64;
1984 Opcode = NVPTX::STV_i16_v2_areg_64;
1987 Opcode = NVPTX::STV_i32_v2_areg_64;
1990 Opcode = NVPTX::STV_i64_v2_areg_64;
1993 Opcode = NVPTX::STV_f32_v2_areg_64;
1996 Opcode = NVPTX::STV_f64_v2_areg_64;
2000 case NVPTXISD::StoreV4:
2001 switch (EltVT.getSimpleVT().SimpleTy) {
2005 Opcode = NVPTX::STV_i8_v4_areg_64;
2008 Opcode = NVPTX::STV_i16_v4_areg_64;
2011 Opcode = NVPTX::STV_i32_v4_areg_64;
2014 Opcode = NVPTX::STV_f32_v4_areg_64;
2020 switch (N->getOpcode()) {
2023 case NVPTXISD::StoreV2:
2024 switch (EltVT.getSimpleVT().SimpleTy) {
2028 Opcode = NVPTX::STV_i8_v2_areg;
2031 Opcode = NVPTX::STV_i16_v2_areg;
2034 Opcode = NVPTX::STV_i32_v2_areg;
2037 Opcode = NVPTX::STV_i64_v2_areg;
2040 Opcode = NVPTX::STV_f32_v2_areg;
2043 Opcode = NVPTX::STV_f64_v2_areg;
2047 case NVPTXISD::StoreV4:
2048 switch (EltVT.getSimpleVT().SimpleTy) {
2052 Opcode = NVPTX::STV_i8_v4_areg;
2055 Opcode = NVPTX::STV_i16_v4_areg;
2058 Opcode = NVPTX::STV_i32_v4_areg;
2061 Opcode = NVPTX::STV_f32_v4_areg;
2067 StOps.push_back(N2);
2070 StOps.push_back(Chain);
2072 ST = CurDAG->getMachineNode(Opcode, DL, MVT::Other, StOps);
2074 MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
2075 MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
2076 cast<MachineSDNode>(ST)->setMemRefs(MemRefs0, MemRefs0 + 1);
2081 SDNode *NVPTXDAGToDAGISel::SelectLoadParam(SDNode *Node) {
2082 SDValue Chain = Node->getOperand(0);
2083 SDValue Offset = Node->getOperand(2);
2084 SDValue Flag = Node->getOperand(3);
2086 MemSDNode *Mem = cast<MemSDNode>(Node);
2089 switch (Node->getOpcode()) {
2092 case NVPTXISD::LoadParam:
2095 case NVPTXISD::LoadParamV2:
2098 case NVPTXISD::LoadParamV4:
2103 EVT EltVT = Node->getValueType(0);
2104 EVT MemVT = Mem->getMemoryVT();
2112 switch (MemVT.getSimpleVT().SimpleTy) {
2116 Opc = NVPTX::LoadParamMemI8;
2119 Opc = NVPTX::LoadParamMemI8;
2122 Opc = NVPTX::LoadParamMemI16;
2125 Opc = NVPTX::LoadParamMemI32;
2128 Opc = NVPTX::LoadParamMemI64;
2131 Opc = NVPTX::LoadParamMemF32;
2134 Opc = NVPTX::LoadParamMemF64;
2139 switch (MemVT.getSimpleVT().SimpleTy) {
2143 Opc = NVPTX::LoadParamMemV2I8;
2146 Opc = NVPTX::LoadParamMemV2I8;
2149 Opc = NVPTX::LoadParamMemV2I16;
2152 Opc = NVPTX::LoadParamMemV2I32;
2155 Opc = NVPTX::LoadParamMemV2I64;
2158 Opc = NVPTX::LoadParamMemV2F32;
2161 Opc = NVPTX::LoadParamMemV2F64;
2166 switch (MemVT.getSimpleVT().SimpleTy) {
2170 Opc = NVPTX::LoadParamMemV4I8;
2173 Opc = NVPTX::LoadParamMemV4I8;
2176 Opc = NVPTX::LoadParamMemV4I16;
2179 Opc = NVPTX::LoadParamMemV4I32;
2182 Opc = NVPTX::LoadParamMemV4F32;
2190 VTs = CurDAG->getVTList(EltVT, MVT::Other, MVT::Glue);
2191 } else if (VecSize == 2) {
2192 VTs = CurDAG->getVTList(EltVT, EltVT, MVT::Other, MVT::Glue);
2194 EVT EVTs[] = { EltVT, EltVT, EltVT, EltVT, MVT::Other, MVT::Glue };
2195 VTs = CurDAG->getVTList(EVTs);
2198 unsigned OffsetVal = cast<ConstantSDNode>(Offset)->getZExtValue();
2200 SmallVector<SDValue, 2> Ops;
2201 Ops.push_back(CurDAG->getTargetConstant(OffsetVal, MVT::i32));
2202 Ops.push_back(Chain);
2203 Ops.push_back(Flag);
2206 CurDAG->getMachineNode(Opc, DL, VTs, Ops);
2210 SDNode *NVPTXDAGToDAGISel::SelectStoreRetval(SDNode *N) {
2212 SDValue Chain = N->getOperand(0);
2213 SDValue Offset = N->getOperand(1);
2214 unsigned OffsetVal = cast<ConstantSDNode>(Offset)->getZExtValue();
2215 MemSDNode *Mem = cast<MemSDNode>(N);
2217 // How many elements do we have?
2218 unsigned NumElts = 1;
2219 switch (N->getOpcode()) {
2222 case NVPTXISD::StoreRetval:
2225 case NVPTXISD::StoreRetvalV2:
2228 case NVPTXISD::StoreRetvalV4:
2233 // Build vector of operands
2234 SmallVector<SDValue, 6> Ops;
2235 for (unsigned i = 0; i < NumElts; ++i)
2236 Ops.push_back(N->getOperand(i + 2));
2237 Ops.push_back(CurDAG->getTargetConstant(OffsetVal, MVT::i32));
2238 Ops.push_back(Chain);
2240 // Determine target opcode
2241 // If we have an i1, use an 8-bit store. The lowering code in
2242 // NVPTXISelLowering will have already emitted an upcast.
2243 unsigned Opcode = 0;
2248 switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
2252 Opcode = NVPTX::StoreRetvalI8;
2255 Opcode = NVPTX::StoreRetvalI8;
2258 Opcode = NVPTX::StoreRetvalI16;
2261 Opcode = NVPTX::StoreRetvalI32;
2264 Opcode = NVPTX::StoreRetvalI64;
2267 Opcode = NVPTX::StoreRetvalF32;
2270 Opcode = NVPTX::StoreRetvalF64;
2275 switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
2279 Opcode = NVPTX::StoreRetvalV2I8;
2282 Opcode = NVPTX::StoreRetvalV2I8;
2285 Opcode = NVPTX::StoreRetvalV2I16;
2288 Opcode = NVPTX::StoreRetvalV2I32;
2291 Opcode = NVPTX::StoreRetvalV2I64;
2294 Opcode = NVPTX::StoreRetvalV2F32;
2297 Opcode = NVPTX::StoreRetvalV2F64;
2302 switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
2306 Opcode = NVPTX::StoreRetvalV4I8;
2309 Opcode = NVPTX::StoreRetvalV4I8;
2312 Opcode = NVPTX::StoreRetvalV4I16;
2315 Opcode = NVPTX::StoreRetvalV4I32;
2318 Opcode = NVPTX::StoreRetvalV4F32;
2325 CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops);
2326 MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
2327 MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
2328 cast<MachineSDNode>(Ret)->setMemRefs(MemRefs0, MemRefs0 + 1);
2333 SDNode *NVPTXDAGToDAGISel::SelectStoreParam(SDNode *N) {
2335 SDValue Chain = N->getOperand(0);
2336 SDValue Param = N->getOperand(1);
2337 unsigned ParamVal = cast<ConstantSDNode>(Param)->getZExtValue();
2338 SDValue Offset = N->getOperand(2);
2339 unsigned OffsetVal = cast<ConstantSDNode>(Offset)->getZExtValue();
2340 MemSDNode *Mem = cast<MemSDNode>(N);
2341 SDValue Flag = N->getOperand(N->getNumOperands() - 1);
2343 // How many elements do we have?
2344 unsigned NumElts = 1;
2345 switch (N->getOpcode()) {
2348 case NVPTXISD::StoreParamU32:
2349 case NVPTXISD::StoreParamS32:
2350 case NVPTXISD::StoreParam:
2353 case NVPTXISD::StoreParamV2:
2356 case NVPTXISD::StoreParamV4:
2361 // Build vector of operands
2362 SmallVector<SDValue, 8> Ops;
2363 for (unsigned i = 0; i < NumElts; ++i)
2364 Ops.push_back(N->getOperand(i + 3));
2365 Ops.push_back(CurDAG->getTargetConstant(ParamVal, MVT::i32));
2366 Ops.push_back(CurDAG->getTargetConstant(OffsetVal, MVT::i32));
2367 Ops.push_back(Chain);
2368 Ops.push_back(Flag);
2370 // Determine target opcode
2371 // If we have an i1, use an 8-bit store. The lowering code in
2372 // NVPTXISelLowering will have already emitted an upcast.
2373 unsigned Opcode = 0;
2374 switch (N->getOpcode()) {
2380 switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
2384 Opcode = NVPTX::StoreParamI8;
2387 Opcode = NVPTX::StoreParamI8;
2390 Opcode = NVPTX::StoreParamI16;
2393 Opcode = NVPTX::StoreParamI32;
2396 Opcode = NVPTX::StoreParamI64;
2399 Opcode = NVPTX::StoreParamF32;
2402 Opcode = NVPTX::StoreParamF64;
2407 switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
2411 Opcode = NVPTX::StoreParamV2I8;
2414 Opcode = NVPTX::StoreParamV2I8;
2417 Opcode = NVPTX::StoreParamV2I16;
2420 Opcode = NVPTX::StoreParamV2I32;
2423 Opcode = NVPTX::StoreParamV2I64;
2426 Opcode = NVPTX::StoreParamV2F32;
2429 Opcode = NVPTX::StoreParamV2F64;
2434 switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
2438 Opcode = NVPTX::StoreParamV4I8;
2441 Opcode = NVPTX::StoreParamV4I8;
2444 Opcode = NVPTX::StoreParamV4I16;
2447 Opcode = NVPTX::StoreParamV4I32;
2450 Opcode = NVPTX::StoreParamV4F32;
2456 // Special case: if we have a sign-extend/zero-extend node, insert the
2457 // conversion instruction first, and use that as the value operand to
2458 // the selected StoreParam node.
2459 case NVPTXISD::StoreParamU32: {
2460 Opcode = NVPTX::StoreParamI32;
2461 SDValue CvtNone = CurDAG->getTargetConstant(NVPTX::PTXCvtMode::NONE,
2463 SDNode *Cvt = CurDAG->getMachineNode(NVPTX::CVT_u32_u16, DL,
2464 MVT::i32, Ops[0], CvtNone);
2465 Ops[0] = SDValue(Cvt, 0);
2468 case NVPTXISD::StoreParamS32: {
2469 Opcode = NVPTX::StoreParamI32;
2470 SDValue CvtNone = CurDAG->getTargetConstant(NVPTX::PTXCvtMode::NONE,
2472 SDNode *Cvt = CurDAG->getMachineNode(NVPTX::CVT_s32_s16, DL,
2473 MVT::i32, Ops[0], CvtNone);
2474 Ops[0] = SDValue(Cvt, 0);
2479 SDVTList RetVTs = CurDAG->getVTList(MVT::Other, MVT::Glue);
2481 CurDAG->getMachineNode(Opcode, DL, RetVTs, Ops);
2482 MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
2483 MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
2484 cast<MachineSDNode>(Ret)->setMemRefs(MemRefs0, MemRefs0 + 1);
2489 SDNode *NVPTXDAGToDAGISel::SelectTextureIntrinsic(SDNode *N) {
2490 SDValue Chain = N->getOperand(0);
2491 SDValue TexRef = N->getOperand(1);
2492 SDValue SampRef = N->getOperand(2);
2493 SDNode *Ret = nullptr;
2495 SmallVector<SDValue, 8> Ops;
2497 switch (N->getOpcode()) {
2498 default: return nullptr;
2499 case NVPTXISD::Tex1DFloatI32:
2500 Opc = NVPTX::TEX_1D_F32_I32;
2502 case NVPTXISD::Tex1DFloatFloat:
2503 Opc = NVPTX::TEX_1D_F32_F32;
2505 case NVPTXISD::Tex1DFloatFloatLevel:
2506 Opc = NVPTX::TEX_1D_F32_F32_LEVEL;
2508 case NVPTXISD::Tex1DFloatFloatGrad:
2509 Opc = NVPTX::TEX_1D_F32_F32_GRAD;
2511 case NVPTXISD::Tex1DI32I32:
2512 Opc = NVPTX::TEX_1D_I32_I32;
2514 case NVPTXISD::Tex1DI32Float:
2515 Opc = NVPTX::TEX_1D_I32_F32;
2517 case NVPTXISD::Tex1DI32FloatLevel:
2518 Opc = NVPTX::TEX_1D_I32_F32_LEVEL;
2520 case NVPTXISD::Tex1DI32FloatGrad:
2521 Opc = NVPTX::TEX_1D_I32_F32_GRAD;
2523 case NVPTXISD::Tex1DArrayFloatI32:
2524 Opc = NVPTX::TEX_1D_ARRAY_F32_I32;
2526 case NVPTXISD::Tex1DArrayFloatFloat:
2527 Opc = NVPTX::TEX_1D_ARRAY_F32_F32;
2529 case NVPTXISD::Tex1DArrayFloatFloatLevel:
2530 Opc = NVPTX::TEX_1D_ARRAY_F32_F32_LEVEL;
2532 case NVPTXISD::Tex1DArrayFloatFloatGrad:
2533 Opc = NVPTX::TEX_1D_ARRAY_F32_F32_GRAD;
2535 case NVPTXISD::Tex1DArrayI32I32:
2536 Opc = NVPTX::TEX_1D_ARRAY_I32_I32;
2538 case NVPTXISD::Tex1DArrayI32Float:
2539 Opc = NVPTX::TEX_1D_ARRAY_I32_F32;
2541 case NVPTXISD::Tex1DArrayI32FloatLevel:
2542 Opc = NVPTX::TEX_1D_ARRAY_I32_F32_LEVEL;
2544 case NVPTXISD::Tex1DArrayI32FloatGrad:
2545 Opc = NVPTX::TEX_1D_ARRAY_I32_F32_GRAD;
2547 case NVPTXISD::Tex2DFloatI32:
2548 Opc = NVPTX::TEX_2D_F32_I32;
2550 case NVPTXISD::Tex2DFloatFloat:
2551 Opc = NVPTX::TEX_2D_F32_F32;
2553 case NVPTXISD::Tex2DFloatFloatLevel:
2554 Opc = NVPTX::TEX_2D_F32_F32_LEVEL;
2556 case NVPTXISD::Tex2DFloatFloatGrad:
2557 Opc = NVPTX::TEX_2D_F32_F32_GRAD;
2559 case NVPTXISD::Tex2DI32I32:
2560 Opc = NVPTX::TEX_2D_I32_I32;
2562 case NVPTXISD::Tex2DI32Float:
2563 Opc = NVPTX::TEX_2D_I32_F32;
2565 case NVPTXISD::Tex2DI32FloatLevel:
2566 Opc = NVPTX::TEX_2D_I32_F32_LEVEL;
2568 case NVPTXISD::Tex2DI32FloatGrad:
2569 Opc = NVPTX::TEX_2D_I32_F32_GRAD;
2571 case NVPTXISD::Tex2DArrayFloatI32:
2572 Opc = NVPTX::TEX_2D_ARRAY_F32_I32;
2574 case NVPTXISD::Tex2DArrayFloatFloat:
2575 Opc = NVPTX::TEX_2D_ARRAY_F32_F32;
2577 case NVPTXISD::Tex2DArrayFloatFloatLevel:
2578 Opc = NVPTX::TEX_2D_ARRAY_F32_F32_LEVEL;
2580 case NVPTXISD::Tex2DArrayFloatFloatGrad:
2581 Opc = NVPTX::TEX_2D_ARRAY_F32_F32_GRAD;
2583 case NVPTXISD::Tex2DArrayI32I32:
2584 Opc = NVPTX::TEX_2D_ARRAY_I32_I32;
2586 case NVPTXISD::Tex2DArrayI32Float:
2587 Opc = NVPTX::TEX_2D_ARRAY_I32_F32;
2589 case NVPTXISD::Tex2DArrayI32FloatLevel:
2590 Opc = NVPTX::TEX_2D_ARRAY_I32_F32_LEVEL;
2592 case NVPTXISD::Tex2DArrayI32FloatGrad:
2593 Opc = NVPTX::TEX_2D_ARRAY_I32_F32_GRAD;
2595 case NVPTXISD::Tex3DFloatI32:
2596 Opc = NVPTX::TEX_3D_F32_I32;
2598 case NVPTXISD::Tex3DFloatFloat:
2599 Opc = NVPTX::TEX_3D_F32_F32;
2601 case NVPTXISD::Tex3DFloatFloatLevel:
2602 Opc = NVPTX::TEX_3D_F32_F32_LEVEL;
2604 case NVPTXISD::Tex3DFloatFloatGrad:
2605 Opc = NVPTX::TEX_3D_F32_F32_GRAD;
2607 case NVPTXISD::Tex3DI32I32:
2608 Opc = NVPTX::TEX_3D_I32_I32;
2610 case NVPTXISD::Tex3DI32Float:
2611 Opc = NVPTX::TEX_3D_I32_F32;
2613 case NVPTXISD::Tex3DI32FloatLevel:
2614 Opc = NVPTX::TEX_3D_I32_F32_LEVEL;
2616 case NVPTXISD::Tex3DI32FloatGrad:
2617 Opc = NVPTX::TEX_3D_I32_F32_GRAD;
2621 Ops.push_back(TexRef);
2622 Ops.push_back(SampRef);
2624 // Copy over indices
2625 for (unsigned i = 3; i < N->getNumOperands(); ++i) {
2626 Ops.push_back(N->getOperand(i));
2629 Ops.push_back(Chain);
2630 Ret = CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops);
2634 SDNode *NVPTXDAGToDAGISel::SelectSurfaceIntrinsic(SDNode *N) {
2635 SDValue Chain = N->getOperand(0);
2636 SDValue TexHandle = N->getOperand(1);
2637 SDNode *Ret = nullptr;
2639 SmallVector<SDValue, 8> Ops;
2640 switch (N->getOpcode()) {
2641 default: return nullptr;
2642 case NVPTXISD::Suld1DI8Trap:
2643 Opc = NVPTX::SULD_1D_I8_TRAP;
2644 Ops.push_back(TexHandle);
2645 Ops.push_back(N->getOperand(2));
2646 Ops.push_back(Chain);
2648 case NVPTXISD::Suld1DI16Trap:
2649 Opc = NVPTX::SULD_1D_I16_TRAP;
2650 Ops.push_back(TexHandle);
2651 Ops.push_back(N->getOperand(2));
2652 Ops.push_back(Chain);
2654 case NVPTXISD::Suld1DI32Trap:
2655 Opc = NVPTX::SULD_1D_I32_TRAP;
2656 Ops.push_back(TexHandle);
2657 Ops.push_back(N->getOperand(2));
2658 Ops.push_back(Chain);
2660 case NVPTXISD::Suld1DV2I8Trap:
2661 Opc = NVPTX::SULD_1D_V2I8_TRAP;
2662 Ops.push_back(TexHandle);
2663 Ops.push_back(N->getOperand(2));
2664 Ops.push_back(Chain);
2666 case NVPTXISD::Suld1DV2I16Trap:
2667 Opc = NVPTX::SULD_1D_V2I16_TRAP;
2668 Ops.push_back(TexHandle);
2669 Ops.push_back(N->getOperand(2));
2670 Ops.push_back(Chain);
2672 case NVPTXISD::Suld1DV2I32Trap:
2673 Opc = NVPTX::SULD_1D_V2I32_TRAP;
2674 Ops.push_back(TexHandle);
2675 Ops.push_back(N->getOperand(2));
2676 Ops.push_back(Chain);
2678 case NVPTXISD::Suld1DV4I8Trap:
2679 Opc = NVPTX::SULD_1D_V4I8_TRAP;
2680 Ops.push_back(TexHandle);
2681 Ops.push_back(N->getOperand(2));
2682 Ops.push_back(Chain);
2684 case NVPTXISD::Suld1DV4I16Trap:
2685 Opc = NVPTX::SULD_1D_V4I16_TRAP;
2686 Ops.push_back(TexHandle);
2687 Ops.push_back(N->getOperand(2));
2688 Ops.push_back(Chain);
2690 case NVPTXISD::Suld1DV4I32Trap:
2691 Opc = NVPTX::SULD_1D_V4I32_TRAP;
2692 Ops.push_back(TexHandle);
2693 Ops.push_back(N->getOperand(2));
2694 Ops.push_back(Chain);
2696 case NVPTXISD::Suld1DArrayI8Trap:
2697 Opc = NVPTX::SULD_1D_ARRAY_I8_TRAP;
2698 Ops.push_back(TexHandle);
2699 Ops.push_back(N->getOperand(2));
2700 Ops.push_back(N->getOperand(3));
2701 Ops.push_back(Chain);
2703 case NVPTXISD::Suld1DArrayI16Trap:
2704 Opc = NVPTX::SULD_1D_ARRAY_I16_TRAP;
2705 Ops.push_back(TexHandle);
2706 Ops.push_back(N->getOperand(2));
2707 Ops.push_back(N->getOperand(3));
2708 Ops.push_back(Chain);
2710 case NVPTXISD::Suld1DArrayI32Trap:
2711 Opc = NVPTX::SULD_1D_ARRAY_I32_TRAP;
2712 Ops.push_back(TexHandle);
2713 Ops.push_back(N->getOperand(2));
2714 Ops.push_back(N->getOperand(3));
2715 Ops.push_back(Chain);
2717 case NVPTXISD::Suld1DArrayV2I8Trap:
2718 Opc = NVPTX::SULD_1D_ARRAY_V2I8_TRAP;
2719 Ops.push_back(TexHandle);
2720 Ops.push_back(N->getOperand(2));
2721 Ops.push_back(N->getOperand(3));
2722 Ops.push_back(Chain);
2724 case NVPTXISD::Suld1DArrayV2I16Trap:
2725 Opc = NVPTX::SULD_1D_ARRAY_V2I16_TRAP;
2726 Ops.push_back(TexHandle);
2727 Ops.push_back(N->getOperand(2));
2728 Ops.push_back(N->getOperand(3));
2729 Ops.push_back(Chain);
2731 case NVPTXISD::Suld1DArrayV2I32Trap:
2732 Opc = NVPTX::SULD_1D_ARRAY_V2I32_TRAP;
2733 Ops.push_back(TexHandle);
2734 Ops.push_back(N->getOperand(2));
2735 Ops.push_back(N->getOperand(3));
2736 Ops.push_back(Chain);
2738 case NVPTXISD::Suld1DArrayV4I8Trap:
2739 Opc = NVPTX::SULD_1D_ARRAY_V4I8_TRAP;
2740 Ops.push_back(TexHandle);
2741 Ops.push_back(N->getOperand(2));
2742 Ops.push_back(N->getOperand(3));
2743 Ops.push_back(Chain);
2745 case NVPTXISD::Suld1DArrayV4I16Trap:
2746 Opc = NVPTX::SULD_1D_ARRAY_V4I16_TRAP;
2747 Ops.push_back(TexHandle);
2748 Ops.push_back(N->getOperand(2));
2749 Ops.push_back(N->getOperand(3));
2750 Ops.push_back(Chain);
2752 case NVPTXISD::Suld1DArrayV4I32Trap:
2753 Opc = NVPTX::SULD_1D_ARRAY_V4I32_TRAP;
2754 Ops.push_back(TexHandle);
2755 Ops.push_back(N->getOperand(2));
2756 Ops.push_back(N->getOperand(3));
2757 Ops.push_back(Chain);
2759 case NVPTXISD::Suld2DI8Trap:
2760 Opc = NVPTX::SULD_2D_I8_TRAP;
2761 Ops.push_back(TexHandle);
2762 Ops.push_back(N->getOperand(2));
2763 Ops.push_back(N->getOperand(3));
2764 Ops.push_back(Chain);
2766 case NVPTXISD::Suld2DI16Trap:
2767 Opc = NVPTX::SULD_2D_I16_TRAP;
2768 Ops.push_back(TexHandle);
2769 Ops.push_back(N->getOperand(2));
2770 Ops.push_back(N->getOperand(3));
2771 Ops.push_back(Chain);
2773 case NVPTXISD::Suld2DI32Trap:
2774 Opc = NVPTX::SULD_2D_I32_TRAP;
2775 Ops.push_back(TexHandle);
2776 Ops.push_back(N->getOperand(2));
2777 Ops.push_back(N->getOperand(3));
2778 Ops.push_back(Chain);
2780 case NVPTXISD::Suld2DV2I8Trap:
2781 Opc = NVPTX::SULD_2D_V2I8_TRAP;
2782 Ops.push_back(TexHandle);
2783 Ops.push_back(N->getOperand(2));
2784 Ops.push_back(N->getOperand(3));
2785 Ops.push_back(Chain);
2787 case NVPTXISD::Suld2DV2I16Trap:
2788 Opc = NVPTX::SULD_2D_V2I16_TRAP;
2789 Ops.push_back(TexHandle);
2790 Ops.push_back(N->getOperand(2));
2791 Ops.push_back(N->getOperand(3));
2792 Ops.push_back(Chain);
2794 case NVPTXISD::Suld2DV2I32Trap:
2795 Opc = NVPTX::SULD_2D_V2I32_TRAP;
2796 Ops.push_back(TexHandle);
2797 Ops.push_back(N->getOperand(2));
2798 Ops.push_back(N->getOperand(3));
2799 Ops.push_back(Chain);
2801 case NVPTXISD::Suld2DV4I8Trap:
2802 Opc = NVPTX::SULD_2D_V4I8_TRAP;
2803 Ops.push_back(TexHandle);
2804 Ops.push_back(N->getOperand(2));
2805 Ops.push_back(N->getOperand(3));
2806 Ops.push_back(Chain);
2808 case NVPTXISD::Suld2DV4I16Trap:
2809 Opc = NVPTX::SULD_2D_V4I16_TRAP;
2810 Ops.push_back(TexHandle);
2811 Ops.push_back(N->getOperand(2));
2812 Ops.push_back(N->getOperand(3));
2813 Ops.push_back(Chain);
2815 case NVPTXISD::Suld2DV4I32Trap:
2816 Opc = NVPTX::SULD_2D_V4I32_TRAP;
2817 Ops.push_back(TexHandle);
2818 Ops.push_back(N->getOperand(2));
2819 Ops.push_back(N->getOperand(3));
2820 Ops.push_back(Chain);
2822 case NVPTXISD::Suld2DArrayI8Trap:
2823 Opc = NVPTX::SULD_2D_ARRAY_I8_TRAP;
2824 Ops.push_back(TexHandle);
2825 Ops.push_back(N->getOperand(2));
2826 Ops.push_back(N->getOperand(3));
2827 Ops.push_back(N->getOperand(4));
2828 Ops.push_back(Chain);
2830 case NVPTXISD::Suld2DArrayI16Trap:
2831 Opc = NVPTX::SULD_2D_ARRAY_I16_TRAP;
2832 Ops.push_back(TexHandle);
2833 Ops.push_back(N->getOperand(2));
2834 Ops.push_back(N->getOperand(3));
2835 Ops.push_back(N->getOperand(4));
2836 Ops.push_back(Chain);
2838 case NVPTXISD::Suld2DArrayI32Trap:
2839 Opc = NVPTX::SULD_2D_ARRAY_I32_TRAP;
2840 Ops.push_back(TexHandle);
2841 Ops.push_back(N->getOperand(2));
2842 Ops.push_back(N->getOperand(3));
2843 Ops.push_back(N->getOperand(4));
2844 Ops.push_back(Chain);
2846 case NVPTXISD::Suld2DArrayV2I8Trap:
2847 Opc = NVPTX::SULD_2D_ARRAY_V2I8_TRAP;
2848 Ops.push_back(TexHandle);
2849 Ops.push_back(N->getOperand(2));
2850 Ops.push_back(N->getOperand(3));
2851 Ops.push_back(N->getOperand(4));
2852 Ops.push_back(Chain);
2854 case NVPTXISD::Suld2DArrayV2I16Trap:
2855 Opc = NVPTX::SULD_2D_ARRAY_V2I16_TRAP;
2856 Ops.push_back(TexHandle);
2857 Ops.push_back(N->getOperand(2));
2858 Ops.push_back(N->getOperand(3));
2859 Ops.push_back(N->getOperand(4));
2860 Ops.push_back(Chain);
2862 case NVPTXISD::Suld2DArrayV2I32Trap:
2863 Opc = NVPTX::SULD_2D_ARRAY_V2I32_TRAP;
2864 Ops.push_back(TexHandle);
2865 Ops.push_back(N->getOperand(2));
2866 Ops.push_back(N->getOperand(3));
2867 Ops.push_back(N->getOperand(4));
2868 Ops.push_back(Chain);
2870 case NVPTXISD::Suld2DArrayV4I8Trap:
2871 Opc = NVPTX::SULD_2D_ARRAY_V4I8_TRAP;
2872 Ops.push_back(TexHandle);
2873 Ops.push_back(N->getOperand(2));
2874 Ops.push_back(N->getOperand(3));
2875 Ops.push_back(N->getOperand(4));
2876 Ops.push_back(Chain);
2878 case NVPTXISD::Suld2DArrayV4I16Trap:
2879 Opc = NVPTX::SULD_2D_ARRAY_V4I16_TRAP;
2880 Ops.push_back(TexHandle);
2881 Ops.push_back(N->getOperand(2));
2882 Ops.push_back(N->getOperand(3));
2883 Ops.push_back(N->getOperand(4));
2884 Ops.push_back(Chain);
2886 case NVPTXISD::Suld2DArrayV4I32Trap:
2887 Opc = NVPTX::SULD_2D_ARRAY_V4I32_TRAP;
2888 Ops.push_back(TexHandle);
2889 Ops.push_back(N->getOperand(2));
2890 Ops.push_back(N->getOperand(3));
2891 Ops.push_back(N->getOperand(4));
2892 Ops.push_back(Chain);
2894 case NVPTXISD::Suld3DI8Trap:
2895 Opc = NVPTX::SULD_3D_I8_TRAP;
2896 Ops.push_back(TexHandle);
2897 Ops.push_back(N->getOperand(2));
2898 Ops.push_back(N->getOperand(3));
2899 Ops.push_back(N->getOperand(4));
2900 Ops.push_back(Chain);
2902 case NVPTXISD::Suld3DI16Trap:
2903 Opc = NVPTX::SULD_3D_I16_TRAP;
2904 Ops.push_back(TexHandle);
2905 Ops.push_back(N->getOperand(2));
2906 Ops.push_back(N->getOperand(3));
2907 Ops.push_back(N->getOperand(4));
2908 Ops.push_back(Chain);
2910 case NVPTXISD::Suld3DI32Trap:
2911 Opc = NVPTX::SULD_3D_I32_TRAP;
2912 Ops.push_back(TexHandle);
2913 Ops.push_back(N->getOperand(2));
2914 Ops.push_back(N->getOperand(3));
2915 Ops.push_back(N->getOperand(4));
2916 Ops.push_back(Chain);
2918 case NVPTXISD::Suld3DV2I8Trap:
2919 Opc = NVPTX::SULD_3D_V2I8_TRAP;
2920 Ops.push_back(TexHandle);
2921 Ops.push_back(N->getOperand(2));
2922 Ops.push_back(N->getOperand(3));
2923 Ops.push_back(N->getOperand(4));
2924 Ops.push_back(Chain);
2926 case NVPTXISD::Suld3DV2I16Trap:
2927 Opc = NVPTX::SULD_3D_V2I16_TRAP;
2928 Ops.push_back(TexHandle);
2929 Ops.push_back(N->getOperand(2));
2930 Ops.push_back(N->getOperand(3));
2931 Ops.push_back(N->getOperand(4));
2932 Ops.push_back(Chain);
2934 case NVPTXISD::Suld3DV2I32Trap:
2935 Opc = NVPTX::SULD_3D_V2I32_TRAP;
2936 Ops.push_back(TexHandle);
2937 Ops.push_back(N->getOperand(2));
2938 Ops.push_back(N->getOperand(3));
2939 Ops.push_back(N->getOperand(4));
2940 Ops.push_back(Chain);
2942 case NVPTXISD::Suld3DV4I8Trap:
2943 Opc = NVPTX::SULD_3D_V4I8_TRAP;
2944 Ops.push_back(TexHandle);
2945 Ops.push_back(N->getOperand(2));
2946 Ops.push_back(N->getOperand(3));
2947 Ops.push_back(N->getOperand(4));
2948 Ops.push_back(Chain);
2950 case NVPTXISD::Suld3DV4I16Trap:
2951 Opc = NVPTX::SULD_3D_V4I16_TRAP;
2952 Ops.push_back(TexHandle);
2953 Ops.push_back(N->getOperand(2));
2954 Ops.push_back(N->getOperand(3));
2955 Ops.push_back(N->getOperand(4));
2956 Ops.push_back(Chain);
2958 case NVPTXISD::Suld3DV4I32Trap:
2959 Opc = NVPTX::SULD_3D_V4I32_TRAP;
2960 Ops.push_back(TexHandle);
2961 Ops.push_back(N->getOperand(2));
2962 Ops.push_back(N->getOperand(3));
2963 Ops.push_back(N->getOperand(4));
2964 Ops.push_back(Chain);
2967 Ret = CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops);
2971 /// SelectBFE - Look for instruction sequences that can be made more efficient
2972 /// by using the 'bfe' (bit-field extract) PTX instruction
2973 SDNode *NVPTXDAGToDAGISel::SelectBFE(SDNode *N) {
2974 SDValue LHS = N->getOperand(0);
2975 SDValue RHS = N->getOperand(1);
2979 bool IsSigned = false;
2981 if (N->getOpcode() == ISD::AND) {
2982 // Canonicalize the operands
2983 // We want 'and %val, %mask'
2984 if (isa<ConstantSDNode>(LHS) && !isa<ConstantSDNode>(RHS)) {
2985 std::swap(LHS, RHS);
2988 ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(RHS);
2990 // We need a constant mask on the RHS of the AND
2994 // Extract the mask bits
2995 uint64_t MaskVal = Mask->getZExtValue();
2996 if (!isMask_64(MaskVal)) {
2997 // We *could* handle shifted masks here, but doing so would require an
2998 // 'and' operation to fix up the low-order bits so we would trade
2999 // shr+and for bfe+and, which has the same throughput
3003 // How many bits are in our mask?
3004 uint64_t NumBits = CountTrailingOnes_64(MaskVal);
3005 Len = CurDAG->getTargetConstant(NumBits, MVT::i32);
3007 if (LHS.getOpcode() == ISD::SRL || LHS.getOpcode() == ISD::SRA) {
3008 // We have a 'srl/and' pair, extract the effective start bit and length
3009 Val = LHS.getNode()->getOperand(0);
3010 Start = LHS.getNode()->getOperand(1);
3011 ConstantSDNode *StartConst = dyn_cast<ConstantSDNode>(Start);
3013 uint64_t StartVal = StartConst->getZExtValue();
3014 // How many "good" bits do we have left? "good" is defined here as bits
3015 // that exist in the original value, not shifted in.
3016 uint64_t GoodBits = Start.getValueType().getSizeInBits() - StartVal;
3017 if (NumBits > GoodBits) {
3018 // Do not handle the case where bits have been shifted in. In theory
3019 // we could handle this, but the cost is likely higher than just
3020 // emitting the srl/and pair.
3023 Start = CurDAG->getTargetConstant(StartVal, MVT::i32);
3025 // Do not handle the case where the shift amount (can be zero if no srl
3026 // was found) is not constant. We could handle this case, but it would
3027 // require run-time logic that would be more expensive than just
3028 // emitting the srl/and pair.
3032 // Do not handle the case where the LHS of the and is not a shift. While
3033 // it would be trivial to handle this case, it would just transform
3034 // 'and' -> 'bfe', but 'and' has higher-throughput.
3037 } else if (N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) {
3038 if (LHS->getOpcode() == ISD::AND) {
3039 ConstantSDNode *ShiftCnst = dyn_cast<ConstantSDNode>(RHS);
3041 // Shift amount must be constant
3045 uint64_t ShiftAmt = ShiftCnst->getZExtValue();
3047 SDValue AndLHS = LHS->getOperand(0);
3048 SDValue AndRHS = LHS->getOperand(1);
3050 // Canonicalize the AND to have the mask on the RHS
3051 if (isa<ConstantSDNode>(AndLHS)) {
3052 std::swap(AndLHS, AndRHS);
3055 ConstantSDNode *MaskCnst = dyn_cast<ConstantSDNode>(AndRHS);
3057 // Mask must be constant
3061 uint64_t MaskVal = MaskCnst->getZExtValue();
3064 if (isMask_64(MaskVal)) {
3066 // The number of bits in the result bitfield will be the number of
3067 // trailing ones (the AND) minus the number of bits we shift off
3068 NumBits = CountTrailingOnes_64(MaskVal) - ShiftAmt;
3069 } else if (isShiftedMask_64(MaskVal)) {
3070 NumZeros = countTrailingZeros(MaskVal);
3071 unsigned NumOnes = CountTrailingOnes_64(MaskVal >> NumZeros);
3072 // The number of bits in the result bitfield will be the number of
3073 // trailing zeros plus the number of set bits in the mask minus the
3074 // number of bits we shift off
3075 NumBits = NumZeros + NumOnes - ShiftAmt;
3077 // This is not a mask we can handle
3081 if (ShiftAmt < NumZeros) {
3082 // Handling this case would require extra logic that would make this
3083 // transformation non-profitable
3088 Start = CurDAG->getTargetConstant(ShiftAmt, MVT::i32);
3089 Len = CurDAG->getTargetConstant(NumBits, MVT::i32);
3090 } else if (LHS->getOpcode() == ISD::SHL) {
3091 // Here, we have a pattern like:
3093 // (sra (shl val, NN), MM)
3095 // (srl (shl val, NN), MM)
3097 // If MM >= NN, we can efficiently optimize this with bfe
3098 Val = LHS->getOperand(0);
3100 SDValue ShlRHS = LHS->getOperand(1);
3101 ConstantSDNode *ShlCnst = dyn_cast<ConstantSDNode>(ShlRHS);
3103 // Shift amount must be constant
3106 uint64_t InnerShiftAmt = ShlCnst->getZExtValue();
3108 SDValue ShrRHS = RHS;
3109 ConstantSDNode *ShrCnst = dyn_cast<ConstantSDNode>(ShrRHS);
3111 // Shift amount must be constant
3114 uint64_t OuterShiftAmt = ShrCnst->getZExtValue();
3116 // To avoid extra codegen and be profitable, we need Outer >= Inner
3117 if (OuterShiftAmt < InnerShiftAmt) {
3121 // If the outer shift is more than the type size, we have no bitfield to
3122 // extract (since we also check that the inner shift is <= the outer shift
3123 // then this also implies that the inner shift is < the type size)
3124 if (OuterShiftAmt >= Val.getValueType().getSizeInBits()) {
3129 CurDAG->getTargetConstant(OuterShiftAmt - InnerShiftAmt, MVT::i32);
3131 CurDAG->getTargetConstant(Val.getValueType().getSizeInBits() -
3132 OuterShiftAmt, MVT::i32);
3134 if (N->getOpcode() == ISD::SRA) {
3135 // If we have a arithmetic right shift, we need to use the signed bfe
3150 // For the BFE operations we form here from "and" and "srl", always use the
3151 // unsigned variants.
3152 if (Val.getValueType() == MVT::i32) {
3154 Opc = NVPTX::BFE_S32rii;
3156 Opc = NVPTX::BFE_U32rii;
3158 } else if (Val.getValueType() == MVT::i64) {
3160 Opc = NVPTX::BFE_S64rii;
3162 Opc = NVPTX::BFE_U64rii;
3165 // We cannot handle this type
3174 CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops);
3179 // SelectDirectAddr - Match a direct address for DAG.
3180 // A direct address could be a globaladdress or externalsymbol.
3181 bool NVPTXDAGToDAGISel::SelectDirectAddr(SDValue N, SDValue &Address) {
3182 // Return true if TGA or ES.
3183 if (N.getOpcode() == ISD::TargetGlobalAddress ||
3184 N.getOpcode() == ISD::TargetExternalSymbol) {
3188 if (N.getOpcode() == NVPTXISD::Wrapper) {
3189 Address = N.getOperand(0);
3192 if (N.getOpcode() == ISD::INTRINSIC_WO_CHAIN) {
3193 unsigned IID = cast<ConstantSDNode>(N.getOperand(0))->getZExtValue();
3194 if (IID == Intrinsic::nvvm_ptr_gen_to_param)
3195 if (N.getOperand(1).getOpcode() == NVPTXISD::MoveParam)
3196 return (SelectDirectAddr(N.getOperand(1).getOperand(0), Address));
3202 bool NVPTXDAGToDAGISel::SelectADDRsi_imp(
3203 SDNode *OpNode, SDValue Addr, SDValue &Base, SDValue &Offset, MVT mvt) {
3204 if (Addr.getOpcode() == ISD::ADD) {
3205 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) {
3206 SDValue base = Addr.getOperand(0);
3207 if (SelectDirectAddr(base, Base)) {
3208 Offset = CurDAG->getTargetConstant(CN->getZExtValue(), mvt);
3217 bool NVPTXDAGToDAGISel::SelectADDRsi(SDNode *OpNode, SDValue Addr,
3218 SDValue &Base, SDValue &Offset) {
3219 return SelectADDRsi_imp(OpNode, Addr, Base, Offset, MVT::i32);
3223 bool NVPTXDAGToDAGISel::SelectADDRsi64(SDNode *OpNode, SDValue Addr,
3224 SDValue &Base, SDValue &Offset) {
3225 return SelectADDRsi_imp(OpNode, Addr, Base, Offset, MVT::i64);
3229 bool NVPTXDAGToDAGISel::SelectADDRri_imp(
3230 SDNode *OpNode, SDValue Addr, SDValue &Base, SDValue &Offset, MVT mvt) {
3231 if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
3232 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), mvt);
3233 Offset = CurDAG->getTargetConstant(0, mvt);
3236 if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
3237 Addr.getOpcode() == ISD::TargetGlobalAddress)
3238 return false; // direct calls.
3240 if (Addr.getOpcode() == ISD::ADD) {
3241 if (SelectDirectAddr(Addr.getOperand(0), Addr)) {
3244 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) {
3245 if (FrameIndexSDNode *FIN =
3246 dyn_cast<FrameIndexSDNode>(Addr.getOperand(0)))
3247 // Constant offset from frame ref.
3248 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), mvt);
3250 Base = Addr.getOperand(0);
3251 Offset = CurDAG->getTargetConstant(CN->getZExtValue(), mvt);
3259 bool NVPTXDAGToDAGISel::SelectADDRri(SDNode *OpNode, SDValue Addr,
3260 SDValue &Base, SDValue &Offset) {
3261 return SelectADDRri_imp(OpNode, Addr, Base, Offset, MVT::i32);
3265 bool NVPTXDAGToDAGISel::SelectADDRri64(SDNode *OpNode, SDValue Addr,
3266 SDValue &Base, SDValue &Offset) {
3267 return SelectADDRri_imp(OpNode, Addr, Base, Offset, MVT::i64);
3270 bool NVPTXDAGToDAGISel::ChkMemSDNodeAddressSpace(SDNode *N,
3271 unsigned int spN) const {
3272 const Value *Src = nullptr;
3273 // Even though MemIntrinsicSDNode is a subclas of MemSDNode,
3274 // the classof() for MemSDNode does not include MemIntrinsicSDNode
3275 // (See SelectionDAGNodes.h). So we need to check for both.
3276 if (MemSDNode *mN = dyn_cast<MemSDNode>(N)) {
3277 if (spN == 0 && mN->getMemOperand()->getPseudoValue())
3279 Src = mN->getMemOperand()->getValue();
3280 } else if (MemSDNode *mN = dyn_cast<MemIntrinsicSDNode>(N)) {
3281 if (spN == 0 && mN->getMemOperand()->getPseudoValue())
3283 Src = mN->getMemOperand()->getValue();
3287 if (const PointerType *PT = dyn_cast<PointerType>(Src->getType()))
3288 return (PT->getAddressSpace() == spN);
3292 /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
3293 /// inline asm expressions.
3294 bool NVPTXDAGToDAGISel::SelectInlineAsmMemoryOperand(
3295 const SDValue &Op, char ConstraintCode, std::vector<SDValue> &OutOps) {
3297 switch (ConstraintCode) {
3301 if (SelectDirectAddr(Op, Op0)) {
3302 OutOps.push_back(Op0);
3303 OutOps.push_back(CurDAG->getTargetConstant(0, MVT::i32));
3306 if (SelectADDRri(Op.getNode(), Op, Op0, Op1)) {
3307 OutOps.push_back(Op0);
3308 OutOps.push_back(Op1);