1 //===-- NVPTXISelDAGToDAG.cpp - A dag to dag inst selector for NVPTX ------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file defines an instruction selector for the NVPTX target.
12 //===----------------------------------------------------------------------===//
14 #include "NVPTXISelDAGToDAG.h"
15 #include "llvm/IR/GlobalValue.h"
16 #include "llvm/IR/Instructions.h"
17 #include "llvm/Support/CommandLine.h"
18 #include "llvm/Support/Debug.h"
19 #include "llvm/Support/ErrorHandling.h"
20 #include "llvm/Support/raw_ostream.h"
21 #include "llvm/Target/TargetIntrinsicInfo.h"
25 #define DEBUG_TYPE "nvptx-isel"
28 FMAContractLevel("nvptx-fma-level", cl::ZeroOrMore, cl::Hidden,
29 cl::desc("NVPTX Specific: FMA contraction (0: don't do it"
30 " 1: do it 2: do it aggressively"),
33 static cl::opt<int> UsePrecDivF32(
34 "nvptx-prec-divf32", cl::ZeroOrMore, cl::Hidden,
35 cl::desc("NVPTX Specifies: 0 use div.approx, 1 use div.full, 2 use"
36 " IEEE Compliant F32 div.rnd if avaiable."),
40 UsePrecSqrtF32("nvptx-prec-sqrtf32", cl::Hidden,
41 cl::desc("NVPTX Specific: 0 use sqrt.approx, 1 use sqrt.rn."),
45 FtzEnabled("nvptx-f32ftz", cl::ZeroOrMore, cl::Hidden,
46 cl::desc("NVPTX Specific: Flush f32 subnormals to sign-preserving zero."),
50 /// createNVPTXISelDag - This pass converts a legalized DAG into a
51 /// NVPTX-specific DAG, ready for instruction scheduling.
52 FunctionPass *llvm::createNVPTXISelDag(NVPTXTargetMachine &TM,
53 llvm::CodeGenOpt::Level OptLevel) {
54 return new NVPTXDAGToDAGISel(TM, OptLevel);
57 NVPTXDAGToDAGISel::NVPTXDAGToDAGISel(NVPTXTargetMachine &tm,
58 CodeGenOpt::Level OptLevel)
59 : SelectionDAGISel(tm, OptLevel),
60 Subtarget(tm.getSubtarget<NVPTXSubtarget>()) {
62 doFMAF32 = (OptLevel > 0) && Subtarget.hasFMAF32() && (FMAContractLevel >= 1);
63 doFMAF64 = (OptLevel > 0) && Subtarget.hasFMAF64() && (FMAContractLevel >= 1);
65 (OptLevel > 0) && Subtarget.hasFMAF32() && (FMAContractLevel == 2);
67 (OptLevel > 0) && Subtarget.hasFMAF64() && (FMAContractLevel == 2);
69 allowFMA = (FMAContractLevel >= 1);
71 doMulWide = (OptLevel > 0);
74 int NVPTXDAGToDAGISel::getDivF32Level() const {
75 if (UsePrecDivF32.getNumOccurrences() > 0) {
76 // If nvptx-prec-div32=N is used on the command-line, always honor it
79 // Otherwise, use div.approx if fast math is enabled
80 if (TM.Options.UnsafeFPMath)
87 bool NVPTXDAGToDAGISel::usePrecSqrtF32() const {
88 if (UsePrecSqrtF32.getNumOccurrences() > 0) {
89 // If nvptx-prec-sqrtf32 is used on the command-line, always honor it
90 return UsePrecSqrtF32;
92 // Otherwise, use sqrt.approx if fast math is enabled
93 if (TM.Options.UnsafeFPMath)
100 bool NVPTXDAGToDAGISel::useF32FTZ() const {
101 if (FtzEnabled.getNumOccurrences() > 0) {
102 // If nvptx-f32ftz is used on the command-line, always honor it
105 const Function *F = MF->getFunction();
106 // Otherwise, check for an nvptx-f32ftz attribute on the function
107 if (F->hasFnAttribute("nvptx-f32ftz"))
108 return (F->getAttributes().getAttribute(AttributeSet::FunctionIndex,
110 .getValueAsString() == "true");
116 /// Select - Select instructions not customized! Used for
117 /// expanded, promoted and normal instructions.
118 SDNode *NVPTXDAGToDAGISel::Select(SDNode *N) {
120 if (N->isMachineOpcode()) {
122 return nullptr; // Already selected.
125 SDNode *ResNode = nullptr;
126 switch (N->getOpcode()) {
128 ResNode = SelectLoad(N);
131 ResNode = SelectStore(N);
133 case NVPTXISD::LoadV2:
134 case NVPTXISD::LoadV4:
135 ResNode = SelectLoadVector(N);
137 case NVPTXISD::LDGV2:
138 case NVPTXISD::LDGV4:
139 case NVPTXISD::LDUV2:
140 case NVPTXISD::LDUV4:
141 ResNode = SelectLDGLDUVector(N);
143 case NVPTXISD::StoreV2:
144 case NVPTXISD::StoreV4:
145 ResNode = SelectStoreVector(N);
147 case NVPTXISD::LoadParam:
148 case NVPTXISD::LoadParamV2:
149 case NVPTXISD::LoadParamV4:
150 ResNode = SelectLoadParam(N);
152 case NVPTXISD::StoreRetval:
153 case NVPTXISD::StoreRetvalV2:
154 case NVPTXISD::StoreRetvalV4:
155 ResNode = SelectStoreRetval(N);
157 case NVPTXISD::StoreParam:
158 case NVPTXISD::StoreParamV2:
159 case NVPTXISD::StoreParamV4:
160 case NVPTXISD::StoreParamS32:
161 case NVPTXISD::StoreParamU32:
162 ResNode = SelectStoreParam(N);
164 case ISD::INTRINSIC_WO_CHAIN:
165 ResNode = SelectIntrinsicNoChain(N);
167 case NVPTXISD::Tex1DFloatI32:
168 case NVPTXISD::Tex1DFloatFloat:
169 case NVPTXISD::Tex1DFloatFloatLevel:
170 case NVPTXISD::Tex1DFloatFloatGrad:
171 case NVPTXISD::Tex1DI32I32:
172 case NVPTXISD::Tex1DI32Float:
173 case NVPTXISD::Tex1DI32FloatLevel:
174 case NVPTXISD::Tex1DI32FloatGrad:
175 case NVPTXISD::Tex1DArrayFloatI32:
176 case NVPTXISD::Tex1DArrayFloatFloat:
177 case NVPTXISD::Tex1DArrayFloatFloatLevel:
178 case NVPTXISD::Tex1DArrayFloatFloatGrad:
179 case NVPTXISD::Tex1DArrayI32I32:
180 case NVPTXISD::Tex1DArrayI32Float:
181 case NVPTXISD::Tex1DArrayI32FloatLevel:
182 case NVPTXISD::Tex1DArrayI32FloatGrad:
183 case NVPTXISD::Tex2DFloatI32:
184 case NVPTXISD::Tex2DFloatFloat:
185 case NVPTXISD::Tex2DFloatFloatLevel:
186 case NVPTXISD::Tex2DFloatFloatGrad:
187 case NVPTXISD::Tex2DI32I32:
188 case NVPTXISD::Tex2DI32Float:
189 case NVPTXISD::Tex2DI32FloatLevel:
190 case NVPTXISD::Tex2DI32FloatGrad:
191 case NVPTXISD::Tex2DArrayFloatI32:
192 case NVPTXISD::Tex2DArrayFloatFloat:
193 case NVPTXISD::Tex2DArrayFloatFloatLevel:
194 case NVPTXISD::Tex2DArrayFloatFloatGrad:
195 case NVPTXISD::Tex2DArrayI32I32:
196 case NVPTXISD::Tex2DArrayI32Float:
197 case NVPTXISD::Tex2DArrayI32FloatLevel:
198 case NVPTXISD::Tex2DArrayI32FloatGrad:
199 case NVPTXISD::Tex3DFloatI32:
200 case NVPTXISD::Tex3DFloatFloat:
201 case NVPTXISD::Tex3DFloatFloatLevel:
202 case NVPTXISD::Tex3DFloatFloatGrad:
203 case NVPTXISD::Tex3DI32I32:
204 case NVPTXISD::Tex3DI32Float:
205 case NVPTXISD::Tex3DI32FloatLevel:
206 case NVPTXISD::Tex3DI32FloatGrad:
207 ResNode = SelectTextureIntrinsic(N);
209 case NVPTXISD::Suld1DI8Trap:
210 case NVPTXISD::Suld1DI16Trap:
211 case NVPTXISD::Suld1DI32Trap:
212 case NVPTXISD::Suld1DV2I8Trap:
213 case NVPTXISD::Suld1DV2I16Trap:
214 case NVPTXISD::Suld1DV2I32Trap:
215 case NVPTXISD::Suld1DV4I8Trap:
216 case NVPTXISD::Suld1DV4I16Trap:
217 case NVPTXISD::Suld1DV4I32Trap:
218 case NVPTXISD::Suld1DArrayI8Trap:
219 case NVPTXISD::Suld1DArrayI16Trap:
220 case NVPTXISD::Suld1DArrayI32Trap:
221 case NVPTXISD::Suld1DArrayV2I8Trap:
222 case NVPTXISD::Suld1DArrayV2I16Trap:
223 case NVPTXISD::Suld1DArrayV2I32Trap:
224 case NVPTXISD::Suld1DArrayV4I8Trap:
225 case NVPTXISD::Suld1DArrayV4I16Trap:
226 case NVPTXISD::Suld1DArrayV4I32Trap:
227 case NVPTXISD::Suld2DI8Trap:
228 case NVPTXISD::Suld2DI16Trap:
229 case NVPTXISD::Suld2DI32Trap:
230 case NVPTXISD::Suld2DV2I8Trap:
231 case NVPTXISD::Suld2DV2I16Trap:
232 case NVPTXISD::Suld2DV2I32Trap:
233 case NVPTXISD::Suld2DV4I8Trap:
234 case NVPTXISD::Suld2DV4I16Trap:
235 case NVPTXISD::Suld2DV4I32Trap:
236 case NVPTXISD::Suld2DArrayI8Trap:
237 case NVPTXISD::Suld2DArrayI16Trap:
238 case NVPTXISD::Suld2DArrayI32Trap:
239 case NVPTXISD::Suld2DArrayV2I8Trap:
240 case NVPTXISD::Suld2DArrayV2I16Trap:
241 case NVPTXISD::Suld2DArrayV2I32Trap:
242 case NVPTXISD::Suld2DArrayV4I8Trap:
243 case NVPTXISD::Suld2DArrayV4I16Trap:
244 case NVPTXISD::Suld2DArrayV4I32Trap:
245 case NVPTXISD::Suld3DI8Trap:
246 case NVPTXISD::Suld3DI16Trap:
247 case NVPTXISD::Suld3DI32Trap:
248 case NVPTXISD::Suld3DV2I8Trap:
249 case NVPTXISD::Suld3DV2I16Trap:
250 case NVPTXISD::Suld3DV2I32Trap:
251 case NVPTXISD::Suld3DV4I8Trap:
252 case NVPTXISD::Suld3DV4I16Trap:
253 case NVPTXISD::Suld3DV4I32Trap:
254 ResNode = SelectSurfaceIntrinsic(N);
260 ResNode = SelectBFE(N);
262 case ISD::ADDRSPACECAST:
263 ResNode = SelectAddrSpaceCast(N);
270 return SelectCode(N);
273 static unsigned int getCodeAddrSpace(MemSDNode *N,
274 const NVPTXSubtarget &Subtarget) {
275 const Value *Src = N->getMemOperand()->getValue();
278 return NVPTX::PTXLdStInstCode::GENERIC;
280 if (const PointerType *PT = dyn_cast<PointerType>(Src->getType())) {
281 switch (PT->getAddressSpace()) {
282 case llvm::ADDRESS_SPACE_LOCAL: return NVPTX::PTXLdStInstCode::LOCAL;
283 case llvm::ADDRESS_SPACE_GLOBAL: return NVPTX::PTXLdStInstCode::GLOBAL;
284 case llvm::ADDRESS_SPACE_SHARED: return NVPTX::PTXLdStInstCode::SHARED;
285 case llvm::ADDRESS_SPACE_GENERIC: return NVPTX::PTXLdStInstCode::GENERIC;
286 case llvm::ADDRESS_SPACE_PARAM: return NVPTX::PTXLdStInstCode::PARAM;
287 case llvm::ADDRESS_SPACE_CONST: return NVPTX::PTXLdStInstCode::CONSTANT;
291 return NVPTX::PTXLdStInstCode::GENERIC;
294 SDNode *NVPTXDAGToDAGISel::SelectIntrinsicNoChain(SDNode *N) {
295 unsigned IID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
299 case Intrinsic::nvvm_texsurf_handle_internal:
300 return SelectTexSurfHandle(N);
304 SDNode *NVPTXDAGToDAGISel::SelectTexSurfHandle(SDNode *N) {
305 // Op 0 is the intrinsic ID
306 SDValue Wrapper = N->getOperand(1);
307 SDValue GlobalVal = Wrapper.getOperand(0);
308 return CurDAG->getMachineNode(NVPTX::texsurf_handles, SDLoc(N), MVT::i64,
312 SDNode *NVPTXDAGToDAGISel::SelectAddrSpaceCast(SDNode *N) {
313 SDValue Src = N->getOperand(0);
314 AddrSpaceCastSDNode *CastN = cast<AddrSpaceCastSDNode>(N);
315 unsigned SrcAddrSpace = CastN->getSrcAddressSpace();
316 unsigned DstAddrSpace = CastN->getDestAddressSpace();
318 assert(SrcAddrSpace != DstAddrSpace &&
319 "addrspacecast must be between different address spaces");
321 if (DstAddrSpace == ADDRESS_SPACE_GENERIC) {
322 // Specific to generic
324 switch (SrcAddrSpace) {
325 default: report_fatal_error("Bad address space in addrspacecast");
326 case ADDRESS_SPACE_GLOBAL:
327 Opc = Subtarget.is64Bit() ? NVPTX::cvta_global_yes_64
328 : NVPTX::cvta_global_yes;
330 case ADDRESS_SPACE_SHARED:
331 Opc = Subtarget.is64Bit() ? NVPTX::cvta_shared_yes_64
332 : NVPTX::cvta_shared_yes;
334 case ADDRESS_SPACE_CONST:
335 Opc = Subtarget.is64Bit() ? NVPTX::cvta_const_yes_64
336 : NVPTX::cvta_const_yes;
338 case ADDRESS_SPACE_LOCAL:
339 Opc = Subtarget.is64Bit() ? NVPTX::cvta_local_yes_64
340 : NVPTX::cvta_local_yes;
343 return CurDAG->getMachineNode(Opc, SDLoc(N), N->getValueType(0), Src);
345 // Generic to specific
346 if (SrcAddrSpace != 0)
347 report_fatal_error("Cannot cast between two non-generic address spaces");
349 switch (DstAddrSpace) {
350 default: report_fatal_error("Bad address space in addrspacecast");
351 case ADDRESS_SPACE_GLOBAL:
352 Opc = Subtarget.is64Bit() ? NVPTX::cvta_to_global_yes_64
353 : NVPTX::cvta_to_global_yes;
355 case ADDRESS_SPACE_SHARED:
356 Opc = Subtarget.is64Bit() ? NVPTX::cvta_to_shared_yes_64
357 : NVPTX::cvta_to_shared_yes;
359 case ADDRESS_SPACE_CONST:
360 Opc = Subtarget.is64Bit() ? NVPTX::cvta_to_const_yes_64
361 : NVPTX::cvta_to_const_yes;
363 case ADDRESS_SPACE_LOCAL:
364 Opc = Subtarget.is64Bit() ? NVPTX::cvta_to_local_yes_64
365 : NVPTX::cvta_to_local_yes;
368 return CurDAG->getMachineNode(Opc, SDLoc(N), N->getValueType(0), Src);
372 SDNode *NVPTXDAGToDAGISel::SelectLoad(SDNode *N) {
374 LoadSDNode *LD = cast<LoadSDNode>(N);
375 EVT LoadedVT = LD->getMemoryVT();
376 SDNode *NVPTXLD = nullptr;
378 // do not support pre/post inc/dec
382 if (!LoadedVT.isSimple())
385 // Address Space Setting
386 unsigned int codeAddrSpace = getCodeAddrSpace(LD, Subtarget);
389 // - .volatile is only availalble for .global and .shared
390 bool isVolatile = LD->isVolatile();
391 if (codeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
392 codeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
393 codeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
397 MVT SimpleVT = LoadedVT.getSimpleVT();
398 unsigned vecType = NVPTX::PTXLdStInstCode::Scalar;
399 if (SimpleVT.isVector()) {
400 unsigned num = SimpleVT.getVectorNumElements();
402 vecType = NVPTX::PTXLdStInstCode::V2;
404 vecType = NVPTX::PTXLdStInstCode::V4;
409 // Type Setting: fromType + fromTypeWidth
411 // Sign : ISD::SEXTLOAD
412 // Unsign : ISD::ZEXTLOAD, ISD::NON_EXTLOAD or ISD::EXTLOAD and the
414 // Float : ISD::NON_EXTLOAD or ISD::EXTLOAD and the type is float
415 MVT ScalarVT = SimpleVT.getScalarType();
416 // Read at least 8 bits (predicates are stored as 8-bit values)
417 unsigned fromTypeWidth = std::max(8U, ScalarVT.getSizeInBits());
418 unsigned int fromType;
419 if ((LD->getExtensionType() == ISD::SEXTLOAD))
420 fromType = NVPTX::PTXLdStInstCode::Signed;
421 else if (ScalarVT.isFloatingPoint())
422 fromType = NVPTX::PTXLdStInstCode::Float;
424 fromType = NVPTX::PTXLdStInstCode::Unsigned;
426 // Create the machine instruction DAG
427 SDValue Chain = N->getOperand(0);
428 SDValue N1 = N->getOperand(1);
430 SDValue Offset, Base;
432 MVT::SimpleValueType TargetVT = LD->getSimpleValueType(0).SimpleTy;
434 if (SelectDirectAddr(N1, Addr)) {
437 Opcode = NVPTX::LD_i8_avar;
440 Opcode = NVPTX::LD_i16_avar;
443 Opcode = NVPTX::LD_i32_avar;
446 Opcode = NVPTX::LD_i64_avar;
449 Opcode = NVPTX::LD_f32_avar;
452 Opcode = NVPTX::LD_f64_avar;
457 SDValue Ops[] = { getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
458 getI32Imm(vecType), getI32Imm(fromType),
459 getI32Imm(fromTypeWidth), Addr, Chain };
460 NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops);
461 } else if (Subtarget.is64Bit()
462 ? SelectADDRsi64(N1.getNode(), N1, Base, Offset)
463 : SelectADDRsi(N1.getNode(), N1, Base, Offset)) {
466 Opcode = NVPTX::LD_i8_asi;
469 Opcode = NVPTX::LD_i16_asi;
472 Opcode = NVPTX::LD_i32_asi;
475 Opcode = NVPTX::LD_i64_asi;
478 Opcode = NVPTX::LD_f32_asi;
481 Opcode = NVPTX::LD_f64_asi;
486 SDValue Ops[] = { getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
487 getI32Imm(vecType), getI32Imm(fromType),
488 getI32Imm(fromTypeWidth), Base, Offset, Chain };
489 NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops);
490 } else if (Subtarget.is64Bit()
491 ? SelectADDRri64(N1.getNode(), N1, Base, Offset)
492 : SelectADDRri(N1.getNode(), N1, Base, Offset)) {
493 if (Subtarget.is64Bit()) {
496 Opcode = NVPTX::LD_i8_ari_64;
499 Opcode = NVPTX::LD_i16_ari_64;
502 Opcode = NVPTX::LD_i32_ari_64;
505 Opcode = NVPTX::LD_i64_ari_64;
508 Opcode = NVPTX::LD_f32_ari_64;
511 Opcode = NVPTX::LD_f64_ari_64;
519 Opcode = NVPTX::LD_i8_ari;
522 Opcode = NVPTX::LD_i16_ari;
525 Opcode = NVPTX::LD_i32_ari;
528 Opcode = NVPTX::LD_i64_ari;
531 Opcode = NVPTX::LD_f32_ari;
534 Opcode = NVPTX::LD_f64_ari;
540 SDValue Ops[] = { getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
541 getI32Imm(vecType), getI32Imm(fromType),
542 getI32Imm(fromTypeWidth), Base, Offset, Chain };
543 NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops);
545 if (Subtarget.is64Bit()) {
548 Opcode = NVPTX::LD_i8_areg_64;
551 Opcode = NVPTX::LD_i16_areg_64;
554 Opcode = NVPTX::LD_i32_areg_64;
557 Opcode = NVPTX::LD_i64_areg_64;
560 Opcode = NVPTX::LD_f32_areg_64;
563 Opcode = NVPTX::LD_f64_areg_64;
571 Opcode = NVPTX::LD_i8_areg;
574 Opcode = NVPTX::LD_i16_areg;
577 Opcode = NVPTX::LD_i32_areg;
580 Opcode = NVPTX::LD_i64_areg;
583 Opcode = NVPTX::LD_f32_areg;
586 Opcode = NVPTX::LD_f64_areg;
592 SDValue Ops[] = { getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
593 getI32Imm(vecType), getI32Imm(fromType),
594 getI32Imm(fromTypeWidth), N1, Chain };
595 NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops);
599 MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
600 MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
601 cast<MachineSDNode>(NVPTXLD)->setMemRefs(MemRefs0, MemRefs0 + 1);
607 SDNode *NVPTXDAGToDAGISel::SelectLoadVector(SDNode *N) {
609 SDValue Chain = N->getOperand(0);
610 SDValue Op1 = N->getOperand(1);
611 SDValue Addr, Offset, Base;
615 MemSDNode *MemSD = cast<MemSDNode>(N);
616 EVT LoadedVT = MemSD->getMemoryVT();
618 if (!LoadedVT.isSimple())
621 // Address Space Setting
622 unsigned int CodeAddrSpace = getCodeAddrSpace(MemSD, Subtarget);
625 // - .volatile is only availalble for .global and .shared
626 bool IsVolatile = MemSD->isVolatile();
627 if (CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
628 CodeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
629 CodeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
633 MVT SimpleVT = LoadedVT.getSimpleVT();
635 // Type Setting: fromType + fromTypeWidth
637 // Sign : ISD::SEXTLOAD
638 // Unsign : ISD::ZEXTLOAD, ISD::NON_EXTLOAD or ISD::EXTLOAD and the
640 // Float : ISD::NON_EXTLOAD or ISD::EXTLOAD and the type is float
641 MVT ScalarVT = SimpleVT.getScalarType();
642 // Read at least 8 bits (predicates are stored as 8-bit values)
643 unsigned FromTypeWidth = std::max(8U, ScalarVT.getSizeInBits());
644 unsigned int FromType;
645 // The last operand holds the original LoadSDNode::getExtensionType() value
646 unsigned ExtensionType = cast<ConstantSDNode>(
647 N->getOperand(N->getNumOperands() - 1))->getZExtValue();
648 if (ExtensionType == ISD::SEXTLOAD)
649 FromType = NVPTX::PTXLdStInstCode::Signed;
650 else if (ScalarVT.isFloatingPoint())
651 FromType = NVPTX::PTXLdStInstCode::Float;
653 FromType = NVPTX::PTXLdStInstCode::Unsigned;
657 switch (N->getOpcode()) {
658 case NVPTXISD::LoadV2:
659 VecType = NVPTX::PTXLdStInstCode::V2;
661 case NVPTXISD::LoadV4:
662 VecType = NVPTX::PTXLdStInstCode::V4;
668 EVT EltVT = N->getValueType(0);
670 if (SelectDirectAddr(Op1, Addr)) {
671 switch (N->getOpcode()) {
674 case NVPTXISD::LoadV2:
675 switch (EltVT.getSimpleVT().SimpleTy) {
679 Opcode = NVPTX::LDV_i8_v2_avar;
682 Opcode = NVPTX::LDV_i16_v2_avar;
685 Opcode = NVPTX::LDV_i32_v2_avar;
688 Opcode = NVPTX::LDV_i64_v2_avar;
691 Opcode = NVPTX::LDV_f32_v2_avar;
694 Opcode = NVPTX::LDV_f64_v2_avar;
698 case NVPTXISD::LoadV4:
699 switch (EltVT.getSimpleVT().SimpleTy) {
703 Opcode = NVPTX::LDV_i8_v4_avar;
706 Opcode = NVPTX::LDV_i16_v4_avar;
709 Opcode = NVPTX::LDV_i32_v4_avar;
712 Opcode = NVPTX::LDV_f32_v4_avar;
718 SDValue Ops[] = { getI32Imm(IsVolatile), getI32Imm(CodeAddrSpace),
719 getI32Imm(VecType), getI32Imm(FromType),
720 getI32Imm(FromTypeWidth), Addr, Chain };
721 LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
722 } else if (Subtarget.is64Bit()
723 ? SelectADDRsi64(Op1.getNode(), Op1, Base, Offset)
724 : SelectADDRsi(Op1.getNode(), Op1, Base, Offset)) {
725 switch (N->getOpcode()) {
728 case NVPTXISD::LoadV2:
729 switch (EltVT.getSimpleVT().SimpleTy) {
733 Opcode = NVPTX::LDV_i8_v2_asi;
736 Opcode = NVPTX::LDV_i16_v2_asi;
739 Opcode = NVPTX::LDV_i32_v2_asi;
742 Opcode = NVPTX::LDV_i64_v2_asi;
745 Opcode = NVPTX::LDV_f32_v2_asi;
748 Opcode = NVPTX::LDV_f64_v2_asi;
752 case NVPTXISD::LoadV4:
753 switch (EltVT.getSimpleVT().SimpleTy) {
757 Opcode = NVPTX::LDV_i8_v4_asi;
760 Opcode = NVPTX::LDV_i16_v4_asi;
763 Opcode = NVPTX::LDV_i32_v4_asi;
766 Opcode = NVPTX::LDV_f32_v4_asi;
772 SDValue Ops[] = { getI32Imm(IsVolatile), getI32Imm(CodeAddrSpace),
773 getI32Imm(VecType), getI32Imm(FromType),
774 getI32Imm(FromTypeWidth), Base, Offset, Chain };
775 LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
776 } else if (Subtarget.is64Bit()
777 ? SelectADDRri64(Op1.getNode(), Op1, Base, Offset)
778 : SelectADDRri(Op1.getNode(), Op1, Base, Offset)) {
779 if (Subtarget.is64Bit()) {
780 switch (N->getOpcode()) {
783 case NVPTXISD::LoadV2:
784 switch (EltVT.getSimpleVT().SimpleTy) {
788 Opcode = NVPTX::LDV_i8_v2_ari_64;
791 Opcode = NVPTX::LDV_i16_v2_ari_64;
794 Opcode = NVPTX::LDV_i32_v2_ari_64;
797 Opcode = NVPTX::LDV_i64_v2_ari_64;
800 Opcode = NVPTX::LDV_f32_v2_ari_64;
803 Opcode = NVPTX::LDV_f64_v2_ari_64;
807 case NVPTXISD::LoadV4:
808 switch (EltVT.getSimpleVT().SimpleTy) {
812 Opcode = NVPTX::LDV_i8_v4_ari_64;
815 Opcode = NVPTX::LDV_i16_v4_ari_64;
818 Opcode = NVPTX::LDV_i32_v4_ari_64;
821 Opcode = NVPTX::LDV_f32_v4_ari_64;
827 switch (N->getOpcode()) {
830 case NVPTXISD::LoadV2:
831 switch (EltVT.getSimpleVT().SimpleTy) {
835 Opcode = NVPTX::LDV_i8_v2_ari;
838 Opcode = NVPTX::LDV_i16_v2_ari;
841 Opcode = NVPTX::LDV_i32_v2_ari;
844 Opcode = NVPTX::LDV_i64_v2_ari;
847 Opcode = NVPTX::LDV_f32_v2_ari;
850 Opcode = NVPTX::LDV_f64_v2_ari;
854 case NVPTXISD::LoadV4:
855 switch (EltVT.getSimpleVT().SimpleTy) {
859 Opcode = NVPTX::LDV_i8_v4_ari;
862 Opcode = NVPTX::LDV_i16_v4_ari;
865 Opcode = NVPTX::LDV_i32_v4_ari;
868 Opcode = NVPTX::LDV_f32_v4_ari;
875 SDValue Ops[] = { getI32Imm(IsVolatile), getI32Imm(CodeAddrSpace),
876 getI32Imm(VecType), getI32Imm(FromType),
877 getI32Imm(FromTypeWidth), Base, Offset, Chain };
879 LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
881 if (Subtarget.is64Bit()) {
882 switch (N->getOpcode()) {
885 case NVPTXISD::LoadV2:
886 switch (EltVT.getSimpleVT().SimpleTy) {
890 Opcode = NVPTX::LDV_i8_v2_areg_64;
893 Opcode = NVPTX::LDV_i16_v2_areg_64;
896 Opcode = NVPTX::LDV_i32_v2_areg_64;
899 Opcode = NVPTX::LDV_i64_v2_areg_64;
902 Opcode = NVPTX::LDV_f32_v2_areg_64;
905 Opcode = NVPTX::LDV_f64_v2_areg_64;
909 case NVPTXISD::LoadV4:
910 switch (EltVT.getSimpleVT().SimpleTy) {
914 Opcode = NVPTX::LDV_i8_v4_areg_64;
917 Opcode = NVPTX::LDV_i16_v4_areg_64;
920 Opcode = NVPTX::LDV_i32_v4_areg_64;
923 Opcode = NVPTX::LDV_f32_v4_areg_64;
929 switch (N->getOpcode()) {
932 case NVPTXISD::LoadV2:
933 switch (EltVT.getSimpleVT().SimpleTy) {
937 Opcode = NVPTX::LDV_i8_v2_areg;
940 Opcode = NVPTX::LDV_i16_v2_areg;
943 Opcode = NVPTX::LDV_i32_v2_areg;
946 Opcode = NVPTX::LDV_i64_v2_areg;
949 Opcode = NVPTX::LDV_f32_v2_areg;
952 Opcode = NVPTX::LDV_f64_v2_areg;
956 case NVPTXISD::LoadV4:
957 switch (EltVT.getSimpleVT().SimpleTy) {
961 Opcode = NVPTX::LDV_i8_v4_areg;
964 Opcode = NVPTX::LDV_i16_v4_areg;
967 Opcode = NVPTX::LDV_i32_v4_areg;
970 Opcode = NVPTX::LDV_f32_v4_areg;
977 SDValue Ops[] = { getI32Imm(IsVolatile), getI32Imm(CodeAddrSpace),
978 getI32Imm(VecType), getI32Imm(FromType),
979 getI32Imm(FromTypeWidth), Op1, Chain };
980 LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
983 MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
984 MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
985 cast<MachineSDNode>(LD)->setMemRefs(MemRefs0, MemRefs0 + 1);
990 SDNode *NVPTXDAGToDAGISel::SelectLDGLDUVector(SDNode *N) {
992 SDValue Chain = N->getOperand(0);
993 SDValue Op1 = N->getOperand(1);
997 MemSDNode *Mem = cast<MemSDNode>(N);
998 SDValue Base, Offset, Addr;
1000 EVT EltVT = Mem->getMemoryVT().getVectorElementType();
1002 if (SelectDirectAddr(Op1, Addr)) {
1003 switch (N->getOpcode()) {
1006 case NVPTXISD::LDGV2:
1007 switch (EltVT.getSimpleVT().SimpleTy) {
1011 Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_avar;
1014 Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_avar;
1017 Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_avar;
1020 Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_avar;
1023 Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_avar;
1026 Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_avar;
1030 case NVPTXISD::LDUV2:
1031 switch (EltVT.getSimpleVT().SimpleTy) {
1035 Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_avar;
1038 Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_avar;
1041 Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_avar;
1044 Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_avar;
1047 Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_avar;
1050 Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_avar;
1054 case NVPTXISD::LDGV4:
1055 switch (EltVT.getSimpleVT().SimpleTy) {
1059 Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_avar;
1062 Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_avar;
1065 Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_avar;
1068 Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_avar;
1072 case NVPTXISD::LDUV4:
1073 switch (EltVT.getSimpleVT().SimpleTy) {
1077 Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_avar;
1080 Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_avar;
1083 Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_avar;
1086 Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_avar;
1092 SDValue Ops[] = { Addr, Chain };
1093 LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
1094 } else if (Subtarget.is64Bit()
1095 ? SelectADDRri64(Op1.getNode(), Op1, Base, Offset)
1096 : SelectADDRri(Op1.getNode(), Op1, Base, Offset)) {
1097 if (Subtarget.is64Bit()) {
1098 switch (N->getOpcode()) {
1101 case NVPTXISD::LDGV2:
1102 switch (EltVT.getSimpleVT().SimpleTy) {
1106 Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_ari64;
1109 Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_ari64;
1112 Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_ari64;
1115 Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_ari64;
1118 Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_ari64;
1121 Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_ari64;
1125 case NVPTXISD::LDUV2:
1126 switch (EltVT.getSimpleVT().SimpleTy) {
1130 Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_ari64;
1133 Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_ari64;
1136 Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_ari64;
1139 Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_ari64;
1142 Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_ari64;
1145 Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_ari64;
1149 case NVPTXISD::LDGV4:
1150 switch (EltVT.getSimpleVT().SimpleTy) {
1154 Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_ari64;
1157 Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_ari64;
1160 Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_ari64;
1163 Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_ari64;
1167 case NVPTXISD::LDUV4:
1168 switch (EltVT.getSimpleVT().SimpleTy) {
1172 Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_ari64;
1175 Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_ari64;
1178 Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_ari64;
1181 Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_ari64;
1187 switch (N->getOpcode()) {
1190 case NVPTXISD::LDGV2:
1191 switch (EltVT.getSimpleVT().SimpleTy) {
1195 Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_ari32;
1198 Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_ari32;
1201 Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_ari32;
1204 Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_ari32;
1207 Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_ari32;
1210 Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_ari32;
1214 case NVPTXISD::LDUV2:
1215 switch (EltVT.getSimpleVT().SimpleTy) {
1219 Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_ari32;
1222 Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_ari32;
1225 Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_ari32;
1228 Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_ari32;
1231 Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_ari32;
1234 Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_ari32;
1238 case NVPTXISD::LDGV4:
1239 switch (EltVT.getSimpleVT().SimpleTy) {
1243 Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_ari32;
1246 Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_ari32;
1249 Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_ari32;
1252 Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_ari32;
1256 case NVPTXISD::LDUV4:
1257 switch (EltVT.getSimpleVT().SimpleTy) {
1261 Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_ari32;
1264 Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_ari32;
1267 Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_ari32;
1270 Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_ari32;
1277 SDValue Ops[] = { Base, Offset, Chain };
1279 LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
1281 if (Subtarget.is64Bit()) {
1282 switch (N->getOpcode()) {
1285 case NVPTXISD::LDGV2:
1286 switch (EltVT.getSimpleVT().SimpleTy) {
1290 Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_areg64;
1293 Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_areg64;
1296 Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_areg64;
1299 Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_areg64;
1302 Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_areg64;
1305 Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_areg64;
1309 case NVPTXISD::LDUV2:
1310 switch (EltVT.getSimpleVT().SimpleTy) {
1314 Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_areg64;
1317 Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_areg64;
1320 Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_areg64;
1323 Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_areg64;
1326 Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_areg64;
1329 Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_areg64;
1333 case NVPTXISD::LDGV4:
1334 switch (EltVT.getSimpleVT().SimpleTy) {
1338 Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_areg64;
1341 Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_areg64;
1344 Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_areg64;
1347 Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_areg64;
1351 case NVPTXISD::LDUV4:
1352 switch (EltVT.getSimpleVT().SimpleTy) {
1356 Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_areg64;
1359 Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_areg64;
1362 Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_areg64;
1365 Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_areg64;
1371 switch (N->getOpcode()) {
1374 case NVPTXISD::LDGV2:
1375 switch (EltVT.getSimpleVT().SimpleTy) {
1379 Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_areg32;
1382 Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_areg32;
1385 Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_areg32;
1388 Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_areg32;
1391 Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_areg32;
1394 Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_areg32;
1398 case NVPTXISD::LDUV2:
1399 switch (EltVT.getSimpleVT().SimpleTy) {
1403 Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_areg32;
1406 Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_areg32;
1409 Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_areg32;
1412 Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_areg32;
1415 Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_areg32;
1418 Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_areg32;
1422 case NVPTXISD::LDGV4:
1423 switch (EltVT.getSimpleVT().SimpleTy) {
1427 Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_areg32;
1430 Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_areg32;
1433 Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_areg32;
1436 Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_areg32;
1440 case NVPTXISD::LDUV4:
1441 switch (EltVT.getSimpleVT().SimpleTy) {
1445 Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_areg32;
1448 Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_areg32;
1451 Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_areg32;
1454 Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_areg32;
1461 SDValue Ops[] = { Op1, Chain };
1462 LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
1465 MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
1466 MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
1467 cast<MachineSDNode>(LD)->setMemRefs(MemRefs0, MemRefs0 + 1);
1472 SDNode *NVPTXDAGToDAGISel::SelectStore(SDNode *N) {
1474 StoreSDNode *ST = cast<StoreSDNode>(N);
1475 EVT StoreVT = ST->getMemoryVT();
1476 SDNode *NVPTXST = nullptr;
1478 // do not support pre/post inc/dec
1479 if (ST->isIndexed())
1482 if (!StoreVT.isSimple())
1485 // Address Space Setting
1486 unsigned int codeAddrSpace = getCodeAddrSpace(ST, Subtarget);
1489 // - .volatile is only availalble for .global and .shared
1490 bool isVolatile = ST->isVolatile();
1491 if (codeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
1492 codeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
1493 codeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
1497 MVT SimpleVT = StoreVT.getSimpleVT();
1498 unsigned vecType = NVPTX::PTXLdStInstCode::Scalar;
1499 if (SimpleVT.isVector()) {
1500 unsigned num = SimpleVT.getVectorNumElements();
1502 vecType = NVPTX::PTXLdStInstCode::V2;
1504 vecType = NVPTX::PTXLdStInstCode::V4;
1509 // Type Setting: toType + toTypeWidth
1510 // - for integer type, always use 'u'
1512 MVT ScalarVT = SimpleVT.getScalarType();
1513 unsigned toTypeWidth = ScalarVT.getSizeInBits();
1514 unsigned int toType;
1515 if (ScalarVT.isFloatingPoint())
1516 toType = NVPTX::PTXLdStInstCode::Float;
1518 toType = NVPTX::PTXLdStInstCode::Unsigned;
1520 // Create the machine instruction DAG
1521 SDValue Chain = N->getOperand(0);
1522 SDValue N1 = N->getOperand(1);
1523 SDValue N2 = N->getOperand(2);
1525 SDValue Offset, Base;
1527 MVT::SimpleValueType SourceVT = N1.getNode()->getSimpleValueType(0).SimpleTy;
1529 if (SelectDirectAddr(N2, Addr)) {
1532 Opcode = NVPTX::ST_i8_avar;
1535 Opcode = NVPTX::ST_i16_avar;
1538 Opcode = NVPTX::ST_i32_avar;
1541 Opcode = NVPTX::ST_i64_avar;
1544 Opcode = NVPTX::ST_f32_avar;
1547 Opcode = NVPTX::ST_f64_avar;
1552 SDValue Ops[] = { N1, getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
1553 getI32Imm(vecType), getI32Imm(toType),
1554 getI32Imm(toTypeWidth), Addr, Chain };
1555 NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops);
1556 } else if (Subtarget.is64Bit()
1557 ? SelectADDRsi64(N2.getNode(), N2, Base, Offset)
1558 : SelectADDRsi(N2.getNode(), N2, Base, Offset)) {
1561 Opcode = NVPTX::ST_i8_asi;
1564 Opcode = NVPTX::ST_i16_asi;
1567 Opcode = NVPTX::ST_i32_asi;
1570 Opcode = NVPTX::ST_i64_asi;
1573 Opcode = NVPTX::ST_f32_asi;
1576 Opcode = NVPTX::ST_f64_asi;
1581 SDValue Ops[] = { N1, getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
1582 getI32Imm(vecType), getI32Imm(toType),
1583 getI32Imm(toTypeWidth), Base, Offset, Chain };
1584 NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops);
1585 } else if (Subtarget.is64Bit()
1586 ? SelectADDRri64(N2.getNode(), N2, Base, Offset)
1587 : SelectADDRri(N2.getNode(), N2, Base, Offset)) {
1588 if (Subtarget.is64Bit()) {
1591 Opcode = NVPTX::ST_i8_ari_64;
1594 Opcode = NVPTX::ST_i16_ari_64;
1597 Opcode = NVPTX::ST_i32_ari_64;
1600 Opcode = NVPTX::ST_i64_ari_64;
1603 Opcode = NVPTX::ST_f32_ari_64;
1606 Opcode = NVPTX::ST_f64_ari_64;
1614 Opcode = NVPTX::ST_i8_ari;
1617 Opcode = NVPTX::ST_i16_ari;
1620 Opcode = NVPTX::ST_i32_ari;
1623 Opcode = NVPTX::ST_i64_ari;
1626 Opcode = NVPTX::ST_f32_ari;
1629 Opcode = NVPTX::ST_f64_ari;
1635 SDValue Ops[] = { N1, getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
1636 getI32Imm(vecType), getI32Imm(toType),
1637 getI32Imm(toTypeWidth), Base, Offset, Chain };
1638 NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops);
1640 if (Subtarget.is64Bit()) {
1643 Opcode = NVPTX::ST_i8_areg_64;
1646 Opcode = NVPTX::ST_i16_areg_64;
1649 Opcode = NVPTX::ST_i32_areg_64;
1652 Opcode = NVPTX::ST_i64_areg_64;
1655 Opcode = NVPTX::ST_f32_areg_64;
1658 Opcode = NVPTX::ST_f64_areg_64;
1666 Opcode = NVPTX::ST_i8_areg;
1669 Opcode = NVPTX::ST_i16_areg;
1672 Opcode = NVPTX::ST_i32_areg;
1675 Opcode = NVPTX::ST_i64_areg;
1678 Opcode = NVPTX::ST_f32_areg;
1681 Opcode = NVPTX::ST_f64_areg;
1687 SDValue Ops[] = { N1, getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
1688 getI32Imm(vecType), getI32Imm(toType),
1689 getI32Imm(toTypeWidth), N2, Chain };
1690 NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops);
1694 MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
1695 MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
1696 cast<MachineSDNode>(NVPTXST)->setMemRefs(MemRefs0, MemRefs0 + 1);
1702 SDNode *NVPTXDAGToDAGISel::SelectStoreVector(SDNode *N) {
1703 SDValue Chain = N->getOperand(0);
1704 SDValue Op1 = N->getOperand(1);
1705 SDValue Addr, Offset, Base;
1709 EVT EltVT = Op1.getValueType();
1710 MemSDNode *MemSD = cast<MemSDNode>(N);
1711 EVT StoreVT = MemSD->getMemoryVT();
1713 // Address Space Setting
1714 unsigned CodeAddrSpace = getCodeAddrSpace(MemSD, Subtarget);
1716 if (CodeAddrSpace == NVPTX::PTXLdStInstCode::CONSTANT) {
1717 report_fatal_error("Cannot store to pointer that points to constant "
1722 // - .volatile is only availalble for .global and .shared
1723 bool IsVolatile = MemSD->isVolatile();
1724 if (CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
1725 CodeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
1726 CodeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
1729 // Type Setting: toType + toTypeWidth
1730 // - for integer type, always use 'u'
1731 assert(StoreVT.isSimple() && "Store value is not simple");
1732 MVT ScalarVT = StoreVT.getSimpleVT().getScalarType();
1733 unsigned ToTypeWidth = ScalarVT.getSizeInBits();
1735 if (ScalarVT.isFloatingPoint())
1736 ToType = NVPTX::PTXLdStInstCode::Float;
1738 ToType = NVPTX::PTXLdStInstCode::Unsigned;
1740 SmallVector<SDValue, 12> StOps;
1744 switch (N->getOpcode()) {
1745 case NVPTXISD::StoreV2:
1746 VecType = NVPTX::PTXLdStInstCode::V2;
1747 StOps.push_back(N->getOperand(1));
1748 StOps.push_back(N->getOperand(2));
1749 N2 = N->getOperand(3);
1751 case NVPTXISD::StoreV4:
1752 VecType = NVPTX::PTXLdStInstCode::V4;
1753 StOps.push_back(N->getOperand(1));
1754 StOps.push_back(N->getOperand(2));
1755 StOps.push_back(N->getOperand(3));
1756 StOps.push_back(N->getOperand(4));
1757 N2 = N->getOperand(5);
1763 StOps.push_back(getI32Imm(IsVolatile));
1764 StOps.push_back(getI32Imm(CodeAddrSpace));
1765 StOps.push_back(getI32Imm(VecType));
1766 StOps.push_back(getI32Imm(ToType));
1767 StOps.push_back(getI32Imm(ToTypeWidth));
1769 if (SelectDirectAddr(N2, Addr)) {
1770 switch (N->getOpcode()) {
1773 case NVPTXISD::StoreV2:
1774 switch (EltVT.getSimpleVT().SimpleTy) {
1778 Opcode = NVPTX::STV_i8_v2_avar;
1781 Opcode = NVPTX::STV_i16_v2_avar;
1784 Opcode = NVPTX::STV_i32_v2_avar;
1787 Opcode = NVPTX::STV_i64_v2_avar;
1790 Opcode = NVPTX::STV_f32_v2_avar;
1793 Opcode = NVPTX::STV_f64_v2_avar;
1797 case NVPTXISD::StoreV4:
1798 switch (EltVT.getSimpleVT().SimpleTy) {
1802 Opcode = NVPTX::STV_i8_v4_avar;
1805 Opcode = NVPTX::STV_i16_v4_avar;
1808 Opcode = NVPTX::STV_i32_v4_avar;
1811 Opcode = NVPTX::STV_f32_v4_avar;
1816 StOps.push_back(Addr);
1817 } else if (Subtarget.is64Bit()
1818 ? SelectADDRsi64(N2.getNode(), N2, Base, Offset)
1819 : SelectADDRsi(N2.getNode(), N2, Base, Offset)) {
1820 switch (N->getOpcode()) {
1823 case NVPTXISD::StoreV2:
1824 switch (EltVT.getSimpleVT().SimpleTy) {
1828 Opcode = NVPTX::STV_i8_v2_asi;
1831 Opcode = NVPTX::STV_i16_v2_asi;
1834 Opcode = NVPTX::STV_i32_v2_asi;
1837 Opcode = NVPTX::STV_i64_v2_asi;
1840 Opcode = NVPTX::STV_f32_v2_asi;
1843 Opcode = NVPTX::STV_f64_v2_asi;
1847 case NVPTXISD::StoreV4:
1848 switch (EltVT.getSimpleVT().SimpleTy) {
1852 Opcode = NVPTX::STV_i8_v4_asi;
1855 Opcode = NVPTX::STV_i16_v4_asi;
1858 Opcode = NVPTX::STV_i32_v4_asi;
1861 Opcode = NVPTX::STV_f32_v4_asi;
1866 StOps.push_back(Base);
1867 StOps.push_back(Offset);
1868 } else if (Subtarget.is64Bit()
1869 ? SelectADDRri64(N2.getNode(), N2, Base, Offset)
1870 : SelectADDRri(N2.getNode(), N2, Base, Offset)) {
1871 if (Subtarget.is64Bit()) {
1872 switch (N->getOpcode()) {
1875 case NVPTXISD::StoreV2:
1876 switch (EltVT.getSimpleVT().SimpleTy) {
1880 Opcode = NVPTX::STV_i8_v2_ari_64;
1883 Opcode = NVPTX::STV_i16_v2_ari_64;
1886 Opcode = NVPTX::STV_i32_v2_ari_64;
1889 Opcode = NVPTX::STV_i64_v2_ari_64;
1892 Opcode = NVPTX::STV_f32_v2_ari_64;
1895 Opcode = NVPTX::STV_f64_v2_ari_64;
1899 case NVPTXISD::StoreV4:
1900 switch (EltVT.getSimpleVT().SimpleTy) {
1904 Opcode = NVPTX::STV_i8_v4_ari_64;
1907 Opcode = NVPTX::STV_i16_v4_ari_64;
1910 Opcode = NVPTX::STV_i32_v4_ari_64;
1913 Opcode = NVPTX::STV_f32_v4_ari_64;
1919 switch (N->getOpcode()) {
1922 case NVPTXISD::StoreV2:
1923 switch (EltVT.getSimpleVT().SimpleTy) {
1927 Opcode = NVPTX::STV_i8_v2_ari;
1930 Opcode = NVPTX::STV_i16_v2_ari;
1933 Opcode = NVPTX::STV_i32_v2_ari;
1936 Opcode = NVPTX::STV_i64_v2_ari;
1939 Opcode = NVPTX::STV_f32_v2_ari;
1942 Opcode = NVPTX::STV_f64_v2_ari;
1946 case NVPTXISD::StoreV4:
1947 switch (EltVT.getSimpleVT().SimpleTy) {
1951 Opcode = NVPTX::STV_i8_v4_ari;
1954 Opcode = NVPTX::STV_i16_v4_ari;
1957 Opcode = NVPTX::STV_i32_v4_ari;
1960 Opcode = NVPTX::STV_f32_v4_ari;
1966 StOps.push_back(Base);
1967 StOps.push_back(Offset);
1969 if (Subtarget.is64Bit()) {
1970 switch (N->getOpcode()) {
1973 case NVPTXISD::StoreV2:
1974 switch (EltVT.getSimpleVT().SimpleTy) {
1978 Opcode = NVPTX::STV_i8_v2_areg_64;
1981 Opcode = NVPTX::STV_i16_v2_areg_64;
1984 Opcode = NVPTX::STV_i32_v2_areg_64;
1987 Opcode = NVPTX::STV_i64_v2_areg_64;
1990 Opcode = NVPTX::STV_f32_v2_areg_64;
1993 Opcode = NVPTX::STV_f64_v2_areg_64;
1997 case NVPTXISD::StoreV4:
1998 switch (EltVT.getSimpleVT().SimpleTy) {
2002 Opcode = NVPTX::STV_i8_v4_areg_64;
2005 Opcode = NVPTX::STV_i16_v4_areg_64;
2008 Opcode = NVPTX::STV_i32_v4_areg_64;
2011 Opcode = NVPTX::STV_f32_v4_areg_64;
2017 switch (N->getOpcode()) {
2020 case NVPTXISD::StoreV2:
2021 switch (EltVT.getSimpleVT().SimpleTy) {
2025 Opcode = NVPTX::STV_i8_v2_areg;
2028 Opcode = NVPTX::STV_i16_v2_areg;
2031 Opcode = NVPTX::STV_i32_v2_areg;
2034 Opcode = NVPTX::STV_i64_v2_areg;
2037 Opcode = NVPTX::STV_f32_v2_areg;
2040 Opcode = NVPTX::STV_f64_v2_areg;
2044 case NVPTXISD::StoreV4:
2045 switch (EltVT.getSimpleVT().SimpleTy) {
2049 Opcode = NVPTX::STV_i8_v4_areg;
2052 Opcode = NVPTX::STV_i16_v4_areg;
2055 Opcode = NVPTX::STV_i32_v4_areg;
2058 Opcode = NVPTX::STV_f32_v4_areg;
2064 StOps.push_back(N2);
2067 StOps.push_back(Chain);
2069 ST = CurDAG->getMachineNode(Opcode, DL, MVT::Other, StOps);
2071 MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
2072 MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
2073 cast<MachineSDNode>(ST)->setMemRefs(MemRefs0, MemRefs0 + 1);
2078 SDNode *NVPTXDAGToDAGISel::SelectLoadParam(SDNode *Node) {
2079 SDValue Chain = Node->getOperand(0);
2080 SDValue Offset = Node->getOperand(2);
2081 SDValue Flag = Node->getOperand(3);
2083 MemSDNode *Mem = cast<MemSDNode>(Node);
2086 switch (Node->getOpcode()) {
2089 case NVPTXISD::LoadParam:
2092 case NVPTXISD::LoadParamV2:
2095 case NVPTXISD::LoadParamV4:
2100 EVT EltVT = Node->getValueType(0);
2101 EVT MemVT = Mem->getMemoryVT();
2109 switch (MemVT.getSimpleVT().SimpleTy) {
2113 Opc = NVPTX::LoadParamMemI8;
2116 Opc = NVPTX::LoadParamMemI8;
2119 Opc = NVPTX::LoadParamMemI16;
2122 Opc = NVPTX::LoadParamMemI32;
2125 Opc = NVPTX::LoadParamMemI64;
2128 Opc = NVPTX::LoadParamMemF32;
2131 Opc = NVPTX::LoadParamMemF64;
2136 switch (MemVT.getSimpleVT().SimpleTy) {
2140 Opc = NVPTX::LoadParamMemV2I8;
2143 Opc = NVPTX::LoadParamMemV2I8;
2146 Opc = NVPTX::LoadParamMemV2I16;
2149 Opc = NVPTX::LoadParamMemV2I32;
2152 Opc = NVPTX::LoadParamMemV2I64;
2155 Opc = NVPTX::LoadParamMemV2F32;
2158 Opc = NVPTX::LoadParamMemV2F64;
2163 switch (MemVT.getSimpleVT().SimpleTy) {
2167 Opc = NVPTX::LoadParamMemV4I8;
2170 Opc = NVPTX::LoadParamMemV4I8;
2173 Opc = NVPTX::LoadParamMemV4I16;
2176 Opc = NVPTX::LoadParamMemV4I32;
2179 Opc = NVPTX::LoadParamMemV4F32;
2187 VTs = CurDAG->getVTList(EltVT, MVT::Other, MVT::Glue);
2188 } else if (VecSize == 2) {
2189 VTs = CurDAG->getVTList(EltVT, EltVT, MVT::Other, MVT::Glue);
2191 EVT EVTs[] = { EltVT, EltVT, EltVT, EltVT, MVT::Other, MVT::Glue };
2192 VTs = CurDAG->getVTList(EVTs);
2195 unsigned OffsetVal = cast<ConstantSDNode>(Offset)->getZExtValue();
2197 SmallVector<SDValue, 2> Ops;
2198 Ops.push_back(CurDAG->getTargetConstant(OffsetVal, MVT::i32));
2199 Ops.push_back(Chain);
2200 Ops.push_back(Flag);
2203 CurDAG->getMachineNode(Opc, DL, VTs, Ops);
2207 SDNode *NVPTXDAGToDAGISel::SelectStoreRetval(SDNode *N) {
2209 SDValue Chain = N->getOperand(0);
2210 SDValue Offset = N->getOperand(1);
2211 unsigned OffsetVal = cast<ConstantSDNode>(Offset)->getZExtValue();
2212 MemSDNode *Mem = cast<MemSDNode>(N);
2214 // How many elements do we have?
2215 unsigned NumElts = 1;
2216 switch (N->getOpcode()) {
2219 case NVPTXISD::StoreRetval:
2222 case NVPTXISD::StoreRetvalV2:
2225 case NVPTXISD::StoreRetvalV4:
2230 // Build vector of operands
2231 SmallVector<SDValue, 6> Ops;
2232 for (unsigned i = 0; i < NumElts; ++i)
2233 Ops.push_back(N->getOperand(i + 2));
2234 Ops.push_back(CurDAG->getTargetConstant(OffsetVal, MVT::i32));
2235 Ops.push_back(Chain);
2237 // Determine target opcode
2238 // If we have an i1, use an 8-bit store. The lowering code in
2239 // NVPTXISelLowering will have already emitted an upcast.
2240 unsigned Opcode = 0;
2245 switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
2249 Opcode = NVPTX::StoreRetvalI8;
2252 Opcode = NVPTX::StoreRetvalI8;
2255 Opcode = NVPTX::StoreRetvalI16;
2258 Opcode = NVPTX::StoreRetvalI32;
2261 Opcode = NVPTX::StoreRetvalI64;
2264 Opcode = NVPTX::StoreRetvalF32;
2267 Opcode = NVPTX::StoreRetvalF64;
2272 switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
2276 Opcode = NVPTX::StoreRetvalV2I8;
2279 Opcode = NVPTX::StoreRetvalV2I8;
2282 Opcode = NVPTX::StoreRetvalV2I16;
2285 Opcode = NVPTX::StoreRetvalV2I32;
2288 Opcode = NVPTX::StoreRetvalV2I64;
2291 Opcode = NVPTX::StoreRetvalV2F32;
2294 Opcode = NVPTX::StoreRetvalV2F64;
2299 switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
2303 Opcode = NVPTX::StoreRetvalV4I8;
2306 Opcode = NVPTX::StoreRetvalV4I8;
2309 Opcode = NVPTX::StoreRetvalV4I16;
2312 Opcode = NVPTX::StoreRetvalV4I32;
2315 Opcode = NVPTX::StoreRetvalV4F32;
2322 CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops);
2323 MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
2324 MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
2325 cast<MachineSDNode>(Ret)->setMemRefs(MemRefs0, MemRefs0 + 1);
2330 SDNode *NVPTXDAGToDAGISel::SelectStoreParam(SDNode *N) {
2332 SDValue Chain = N->getOperand(0);
2333 SDValue Param = N->getOperand(1);
2334 unsigned ParamVal = cast<ConstantSDNode>(Param)->getZExtValue();
2335 SDValue Offset = N->getOperand(2);
2336 unsigned OffsetVal = cast<ConstantSDNode>(Offset)->getZExtValue();
2337 MemSDNode *Mem = cast<MemSDNode>(N);
2338 SDValue Flag = N->getOperand(N->getNumOperands() - 1);
2340 // How many elements do we have?
2341 unsigned NumElts = 1;
2342 switch (N->getOpcode()) {
2345 case NVPTXISD::StoreParamU32:
2346 case NVPTXISD::StoreParamS32:
2347 case NVPTXISD::StoreParam:
2350 case NVPTXISD::StoreParamV2:
2353 case NVPTXISD::StoreParamV4:
2358 // Build vector of operands
2359 SmallVector<SDValue, 8> Ops;
2360 for (unsigned i = 0; i < NumElts; ++i)
2361 Ops.push_back(N->getOperand(i + 3));
2362 Ops.push_back(CurDAG->getTargetConstant(ParamVal, MVT::i32));
2363 Ops.push_back(CurDAG->getTargetConstant(OffsetVal, MVT::i32));
2364 Ops.push_back(Chain);
2365 Ops.push_back(Flag);
2367 // Determine target opcode
2368 // If we have an i1, use an 8-bit store. The lowering code in
2369 // NVPTXISelLowering will have already emitted an upcast.
2370 unsigned Opcode = 0;
2371 switch (N->getOpcode()) {
2377 switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
2381 Opcode = NVPTX::StoreParamI8;
2384 Opcode = NVPTX::StoreParamI8;
2387 Opcode = NVPTX::StoreParamI16;
2390 Opcode = NVPTX::StoreParamI32;
2393 Opcode = NVPTX::StoreParamI64;
2396 Opcode = NVPTX::StoreParamF32;
2399 Opcode = NVPTX::StoreParamF64;
2404 switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
2408 Opcode = NVPTX::StoreParamV2I8;
2411 Opcode = NVPTX::StoreParamV2I8;
2414 Opcode = NVPTX::StoreParamV2I16;
2417 Opcode = NVPTX::StoreParamV2I32;
2420 Opcode = NVPTX::StoreParamV2I64;
2423 Opcode = NVPTX::StoreParamV2F32;
2426 Opcode = NVPTX::StoreParamV2F64;
2431 switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
2435 Opcode = NVPTX::StoreParamV4I8;
2438 Opcode = NVPTX::StoreParamV4I8;
2441 Opcode = NVPTX::StoreParamV4I16;
2444 Opcode = NVPTX::StoreParamV4I32;
2447 Opcode = NVPTX::StoreParamV4F32;
2453 // Special case: if we have a sign-extend/zero-extend node, insert the
2454 // conversion instruction first, and use that as the value operand to
2455 // the selected StoreParam node.
2456 case NVPTXISD::StoreParamU32: {
2457 Opcode = NVPTX::StoreParamI32;
2458 SDValue CvtNone = CurDAG->getTargetConstant(NVPTX::PTXCvtMode::NONE,
2460 SDNode *Cvt = CurDAG->getMachineNode(NVPTX::CVT_u32_u16, DL,
2461 MVT::i32, Ops[0], CvtNone);
2462 Ops[0] = SDValue(Cvt, 0);
2465 case NVPTXISD::StoreParamS32: {
2466 Opcode = NVPTX::StoreParamI32;
2467 SDValue CvtNone = CurDAG->getTargetConstant(NVPTX::PTXCvtMode::NONE,
2469 SDNode *Cvt = CurDAG->getMachineNode(NVPTX::CVT_s32_s16, DL,
2470 MVT::i32, Ops[0], CvtNone);
2471 Ops[0] = SDValue(Cvt, 0);
2476 SDVTList RetVTs = CurDAG->getVTList(MVT::Other, MVT::Glue);
2478 CurDAG->getMachineNode(Opcode, DL, RetVTs, Ops);
2479 MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
2480 MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
2481 cast<MachineSDNode>(Ret)->setMemRefs(MemRefs0, MemRefs0 + 1);
2486 SDNode *NVPTXDAGToDAGISel::SelectTextureIntrinsic(SDNode *N) {
2487 SDValue Chain = N->getOperand(0);
2488 SDValue TexRef = N->getOperand(1);
2489 SDValue SampRef = N->getOperand(2);
2490 SDNode *Ret = nullptr;
2492 SmallVector<SDValue, 8> Ops;
2494 switch (N->getOpcode()) {
2495 default: return nullptr;
2496 case NVPTXISD::Tex1DFloatI32:
2497 Opc = NVPTX::TEX_1D_F32_I32;
2499 case NVPTXISD::Tex1DFloatFloat:
2500 Opc = NVPTX::TEX_1D_F32_F32;
2502 case NVPTXISD::Tex1DFloatFloatLevel:
2503 Opc = NVPTX::TEX_1D_F32_F32_LEVEL;
2505 case NVPTXISD::Tex1DFloatFloatGrad:
2506 Opc = NVPTX::TEX_1D_F32_F32_GRAD;
2508 case NVPTXISD::Tex1DI32I32:
2509 Opc = NVPTX::TEX_1D_I32_I32;
2511 case NVPTXISD::Tex1DI32Float:
2512 Opc = NVPTX::TEX_1D_I32_F32;
2514 case NVPTXISD::Tex1DI32FloatLevel:
2515 Opc = NVPTX::TEX_1D_I32_F32_LEVEL;
2517 case NVPTXISD::Tex1DI32FloatGrad:
2518 Opc = NVPTX::TEX_1D_I32_F32_GRAD;
2520 case NVPTXISD::Tex1DArrayFloatI32:
2521 Opc = NVPTX::TEX_1D_ARRAY_F32_I32;
2523 case NVPTXISD::Tex1DArrayFloatFloat:
2524 Opc = NVPTX::TEX_1D_ARRAY_F32_F32;
2526 case NVPTXISD::Tex1DArrayFloatFloatLevel:
2527 Opc = NVPTX::TEX_1D_ARRAY_F32_F32_LEVEL;
2529 case NVPTXISD::Tex1DArrayFloatFloatGrad:
2530 Opc = NVPTX::TEX_1D_ARRAY_F32_F32_GRAD;
2532 case NVPTXISD::Tex1DArrayI32I32:
2533 Opc = NVPTX::TEX_1D_ARRAY_I32_I32;
2535 case NVPTXISD::Tex1DArrayI32Float:
2536 Opc = NVPTX::TEX_1D_ARRAY_I32_F32;
2538 case NVPTXISD::Tex1DArrayI32FloatLevel:
2539 Opc = NVPTX::TEX_1D_ARRAY_I32_F32_LEVEL;
2541 case NVPTXISD::Tex1DArrayI32FloatGrad:
2542 Opc = NVPTX::TEX_1D_ARRAY_I32_F32_GRAD;
2544 case NVPTXISD::Tex2DFloatI32:
2545 Opc = NVPTX::TEX_2D_F32_I32;
2547 case NVPTXISD::Tex2DFloatFloat:
2548 Opc = NVPTX::TEX_2D_F32_F32;
2550 case NVPTXISD::Tex2DFloatFloatLevel:
2551 Opc = NVPTX::TEX_2D_F32_F32_LEVEL;
2553 case NVPTXISD::Tex2DFloatFloatGrad:
2554 Opc = NVPTX::TEX_2D_F32_F32_GRAD;
2556 case NVPTXISD::Tex2DI32I32:
2557 Opc = NVPTX::TEX_2D_I32_I32;
2559 case NVPTXISD::Tex2DI32Float:
2560 Opc = NVPTX::TEX_2D_I32_F32;
2562 case NVPTXISD::Tex2DI32FloatLevel:
2563 Opc = NVPTX::TEX_2D_I32_F32_LEVEL;
2565 case NVPTXISD::Tex2DI32FloatGrad:
2566 Opc = NVPTX::TEX_2D_I32_F32_GRAD;
2568 case NVPTXISD::Tex2DArrayFloatI32:
2569 Opc = NVPTX::TEX_2D_ARRAY_F32_I32;
2571 case NVPTXISD::Tex2DArrayFloatFloat:
2572 Opc = NVPTX::TEX_2D_ARRAY_F32_F32;
2574 case NVPTXISD::Tex2DArrayFloatFloatLevel:
2575 Opc = NVPTX::TEX_2D_ARRAY_F32_F32_LEVEL;
2577 case NVPTXISD::Tex2DArrayFloatFloatGrad:
2578 Opc = NVPTX::TEX_2D_ARRAY_F32_F32_GRAD;
2580 case NVPTXISD::Tex2DArrayI32I32:
2581 Opc = NVPTX::TEX_2D_ARRAY_I32_I32;
2583 case NVPTXISD::Tex2DArrayI32Float:
2584 Opc = NVPTX::TEX_2D_ARRAY_I32_F32;
2586 case NVPTXISD::Tex2DArrayI32FloatLevel:
2587 Opc = NVPTX::TEX_2D_ARRAY_I32_F32_LEVEL;
2589 case NVPTXISD::Tex2DArrayI32FloatGrad:
2590 Opc = NVPTX::TEX_2D_ARRAY_I32_F32_GRAD;
2592 case NVPTXISD::Tex3DFloatI32:
2593 Opc = NVPTX::TEX_3D_F32_I32;
2595 case NVPTXISD::Tex3DFloatFloat:
2596 Opc = NVPTX::TEX_3D_F32_F32;
2598 case NVPTXISD::Tex3DFloatFloatLevel:
2599 Opc = NVPTX::TEX_3D_F32_F32_LEVEL;
2601 case NVPTXISD::Tex3DFloatFloatGrad:
2602 Opc = NVPTX::TEX_3D_F32_F32_GRAD;
2604 case NVPTXISD::Tex3DI32I32:
2605 Opc = NVPTX::TEX_3D_I32_I32;
2607 case NVPTXISD::Tex3DI32Float:
2608 Opc = NVPTX::TEX_3D_I32_F32;
2610 case NVPTXISD::Tex3DI32FloatLevel:
2611 Opc = NVPTX::TEX_3D_I32_F32_LEVEL;
2613 case NVPTXISD::Tex3DI32FloatGrad:
2614 Opc = NVPTX::TEX_3D_I32_F32_GRAD;
2618 Ops.push_back(TexRef);
2619 Ops.push_back(SampRef);
2621 // Copy over indices
2622 for (unsigned i = 3; i < N->getNumOperands(); ++i) {
2623 Ops.push_back(N->getOperand(i));
2626 Ops.push_back(Chain);
2627 Ret = CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops);
2631 SDNode *NVPTXDAGToDAGISel::SelectSurfaceIntrinsic(SDNode *N) {
2632 SDValue Chain = N->getOperand(0);
2633 SDValue TexHandle = N->getOperand(1);
2634 SDNode *Ret = nullptr;
2636 SmallVector<SDValue, 8> Ops;
2637 switch (N->getOpcode()) {
2638 default: return nullptr;
2639 case NVPTXISD::Suld1DI8Trap:
2640 Opc = NVPTX::SULD_1D_I8_TRAP;
2641 Ops.push_back(TexHandle);
2642 Ops.push_back(N->getOperand(2));
2643 Ops.push_back(Chain);
2645 case NVPTXISD::Suld1DI16Trap:
2646 Opc = NVPTX::SULD_1D_I16_TRAP;
2647 Ops.push_back(TexHandle);
2648 Ops.push_back(N->getOperand(2));
2649 Ops.push_back(Chain);
2651 case NVPTXISD::Suld1DI32Trap:
2652 Opc = NVPTX::SULD_1D_I32_TRAP;
2653 Ops.push_back(TexHandle);
2654 Ops.push_back(N->getOperand(2));
2655 Ops.push_back(Chain);
2657 case NVPTXISD::Suld1DV2I8Trap:
2658 Opc = NVPTX::SULD_1D_V2I8_TRAP;
2659 Ops.push_back(TexHandle);
2660 Ops.push_back(N->getOperand(2));
2661 Ops.push_back(Chain);
2663 case NVPTXISD::Suld1DV2I16Trap:
2664 Opc = NVPTX::SULD_1D_V2I16_TRAP;
2665 Ops.push_back(TexHandle);
2666 Ops.push_back(N->getOperand(2));
2667 Ops.push_back(Chain);
2669 case NVPTXISD::Suld1DV2I32Trap:
2670 Opc = NVPTX::SULD_1D_V2I32_TRAP;
2671 Ops.push_back(TexHandle);
2672 Ops.push_back(N->getOperand(2));
2673 Ops.push_back(Chain);
2675 case NVPTXISD::Suld1DV4I8Trap:
2676 Opc = NVPTX::SULD_1D_V4I8_TRAP;
2677 Ops.push_back(TexHandle);
2678 Ops.push_back(N->getOperand(2));
2679 Ops.push_back(Chain);
2681 case NVPTXISD::Suld1DV4I16Trap:
2682 Opc = NVPTX::SULD_1D_V4I16_TRAP;
2683 Ops.push_back(TexHandle);
2684 Ops.push_back(N->getOperand(2));
2685 Ops.push_back(Chain);
2687 case NVPTXISD::Suld1DV4I32Trap:
2688 Opc = NVPTX::SULD_1D_V4I32_TRAP;
2689 Ops.push_back(TexHandle);
2690 Ops.push_back(N->getOperand(2));
2691 Ops.push_back(Chain);
2693 case NVPTXISD::Suld1DArrayI8Trap:
2694 Opc = NVPTX::SULD_1D_ARRAY_I8_TRAP;
2695 Ops.push_back(TexHandle);
2696 Ops.push_back(N->getOperand(2));
2697 Ops.push_back(N->getOperand(3));
2698 Ops.push_back(Chain);
2700 case NVPTXISD::Suld1DArrayI16Trap:
2701 Opc = NVPTX::SULD_1D_ARRAY_I16_TRAP;
2702 Ops.push_back(TexHandle);
2703 Ops.push_back(N->getOperand(2));
2704 Ops.push_back(N->getOperand(3));
2705 Ops.push_back(Chain);
2707 case NVPTXISD::Suld1DArrayI32Trap:
2708 Opc = NVPTX::SULD_1D_ARRAY_I32_TRAP;
2709 Ops.push_back(TexHandle);
2710 Ops.push_back(N->getOperand(2));
2711 Ops.push_back(N->getOperand(3));
2712 Ops.push_back(Chain);
2714 case NVPTXISD::Suld1DArrayV2I8Trap:
2715 Opc = NVPTX::SULD_1D_ARRAY_V2I8_TRAP;
2716 Ops.push_back(TexHandle);
2717 Ops.push_back(N->getOperand(2));
2718 Ops.push_back(N->getOperand(3));
2719 Ops.push_back(Chain);
2721 case NVPTXISD::Suld1DArrayV2I16Trap:
2722 Opc = NVPTX::SULD_1D_ARRAY_V2I16_TRAP;
2723 Ops.push_back(TexHandle);
2724 Ops.push_back(N->getOperand(2));
2725 Ops.push_back(N->getOperand(3));
2726 Ops.push_back(Chain);
2728 case NVPTXISD::Suld1DArrayV2I32Trap:
2729 Opc = NVPTX::SULD_1D_ARRAY_V2I32_TRAP;
2730 Ops.push_back(TexHandle);
2731 Ops.push_back(N->getOperand(2));
2732 Ops.push_back(N->getOperand(3));
2733 Ops.push_back(Chain);
2735 case NVPTXISD::Suld1DArrayV4I8Trap:
2736 Opc = NVPTX::SULD_1D_ARRAY_V4I8_TRAP;
2737 Ops.push_back(TexHandle);
2738 Ops.push_back(N->getOperand(2));
2739 Ops.push_back(N->getOperand(3));
2740 Ops.push_back(Chain);
2742 case NVPTXISD::Suld1DArrayV4I16Trap:
2743 Opc = NVPTX::SULD_1D_ARRAY_V4I16_TRAP;
2744 Ops.push_back(TexHandle);
2745 Ops.push_back(N->getOperand(2));
2746 Ops.push_back(N->getOperand(3));
2747 Ops.push_back(Chain);
2749 case NVPTXISD::Suld1DArrayV4I32Trap:
2750 Opc = NVPTX::SULD_1D_ARRAY_V4I32_TRAP;
2751 Ops.push_back(TexHandle);
2752 Ops.push_back(N->getOperand(2));
2753 Ops.push_back(N->getOperand(3));
2754 Ops.push_back(Chain);
2756 case NVPTXISD::Suld2DI8Trap:
2757 Opc = NVPTX::SULD_2D_I8_TRAP;
2758 Ops.push_back(TexHandle);
2759 Ops.push_back(N->getOperand(2));
2760 Ops.push_back(N->getOperand(3));
2761 Ops.push_back(Chain);
2763 case NVPTXISD::Suld2DI16Trap:
2764 Opc = NVPTX::SULD_2D_I16_TRAP;
2765 Ops.push_back(TexHandle);
2766 Ops.push_back(N->getOperand(2));
2767 Ops.push_back(N->getOperand(3));
2768 Ops.push_back(Chain);
2770 case NVPTXISD::Suld2DI32Trap:
2771 Opc = NVPTX::SULD_2D_I32_TRAP;
2772 Ops.push_back(TexHandle);
2773 Ops.push_back(N->getOperand(2));
2774 Ops.push_back(N->getOperand(3));
2775 Ops.push_back(Chain);
2777 case NVPTXISD::Suld2DV2I8Trap:
2778 Opc = NVPTX::SULD_2D_V2I8_TRAP;
2779 Ops.push_back(TexHandle);
2780 Ops.push_back(N->getOperand(2));
2781 Ops.push_back(N->getOperand(3));
2782 Ops.push_back(Chain);
2784 case NVPTXISD::Suld2DV2I16Trap:
2785 Opc = NVPTX::SULD_2D_V2I16_TRAP;
2786 Ops.push_back(TexHandle);
2787 Ops.push_back(N->getOperand(2));
2788 Ops.push_back(N->getOperand(3));
2789 Ops.push_back(Chain);
2791 case NVPTXISD::Suld2DV2I32Trap:
2792 Opc = NVPTX::SULD_2D_V2I32_TRAP;
2793 Ops.push_back(TexHandle);
2794 Ops.push_back(N->getOperand(2));
2795 Ops.push_back(N->getOperand(3));
2796 Ops.push_back(Chain);
2798 case NVPTXISD::Suld2DV4I8Trap:
2799 Opc = NVPTX::SULD_2D_V4I8_TRAP;
2800 Ops.push_back(TexHandle);
2801 Ops.push_back(N->getOperand(2));
2802 Ops.push_back(N->getOperand(3));
2803 Ops.push_back(Chain);
2805 case NVPTXISD::Suld2DV4I16Trap:
2806 Opc = NVPTX::SULD_2D_V4I16_TRAP;
2807 Ops.push_back(TexHandle);
2808 Ops.push_back(N->getOperand(2));
2809 Ops.push_back(N->getOperand(3));
2810 Ops.push_back(Chain);
2812 case NVPTXISD::Suld2DV4I32Trap:
2813 Opc = NVPTX::SULD_2D_V4I32_TRAP;
2814 Ops.push_back(TexHandle);
2815 Ops.push_back(N->getOperand(2));
2816 Ops.push_back(N->getOperand(3));
2817 Ops.push_back(Chain);
2819 case NVPTXISD::Suld2DArrayI8Trap:
2820 Opc = NVPTX::SULD_2D_ARRAY_I8_TRAP;
2821 Ops.push_back(TexHandle);
2822 Ops.push_back(N->getOperand(2));
2823 Ops.push_back(N->getOperand(3));
2824 Ops.push_back(N->getOperand(4));
2825 Ops.push_back(Chain);
2827 case NVPTXISD::Suld2DArrayI16Trap:
2828 Opc = NVPTX::SULD_2D_ARRAY_I16_TRAP;
2829 Ops.push_back(TexHandle);
2830 Ops.push_back(N->getOperand(2));
2831 Ops.push_back(N->getOperand(3));
2832 Ops.push_back(N->getOperand(4));
2833 Ops.push_back(Chain);
2835 case NVPTXISD::Suld2DArrayI32Trap:
2836 Opc = NVPTX::SULD_2D_ARRAY_I32_TRAP;
2837 Ops.push_back(TexHandle);
2838 Ops.push_back(N->getOperand(2));
2839 Ops.push_back(N->getOperand(3));
2840 Ops.push_back(N->getOperand(4));
2841 Ops.push_back(Chain);
2843 case NVPTXISD::Suld2DArrayV2I8Trap:
2844 Opc = NVPTX::SULD_2D_ARRAY_V2I8_TRAP;
2845 Ops.push_back(TexHandle);
2846 Ops.push_back(N->getOperand(2));
2847 Ops.push_back(N->getOperand(3));
2848 Ops.push_back(N->getOperand(4));
2849 Ops.push_back(Chain);
2851 case NVPTXISD::Suld2DArrayV2I16Trap:
2852 Opc = NVPTX::SULD_2D_ARRAY_V2I16_TRAP;
2853 Ops.push_back(TexHandle);
2854 Ops.push_back(N->getOperand(2));
2855 Ops.push_back(N->getOperand(3));
2856 Ops.push_back(N->getOperand(4));
2857 Ops.push_back(Chain);
2859 case NVPTXISD::Suld2DArrayV2I32Trap:
2860 Opc = NVPTX::SULD_2D_ARRAY_V2I32_TRAP;
2861 Ops.push_back(TexHandle);
2862 Ops.push_back(N->getOperand(2));
2863 Ops.push_back(N->getOperand(3));
2864 Ops.push_back(N->getOperand(4));
2865 Ops.push_back(Chain);
2867 case NVPTXISD::Suld2DArrayV4I8Trap:
2868 Opc = NVPTX::SULD_2D_ARRAY_V4I8_TRAP;
2869 Ops.push_back(TexHandle);
2870 Ops.push_back(N->getOperand(2));
2871 Ops.push_back(N->getOperand(3));
2872 Ops.push_back(N->getOperand(4));
2873 Ops.push_back(Chain);
2875 case NVPTXISD::Suld2DArrayV4I16Trap:
2876 Opc = NVPTX::SULD_2D_ARRAY_V4I16_TRAP;
2877 Ops.push_back(TexHandle);
2878 Ops.push_back(N->getOperand(2));
2879 Ops.push_back(N->getOperand(3));
2880 Ops.push_back(N->getOperand(4));
2881 Ops.push_back(Chain);
2883 case NVPTXISD::Suld2DArrayV4I32Trap:
2884 Opc = NVPTX::SULD_2D_ARRAY_V4I32_TRAP;
2885 Ops.push_back(TexHandle);
2886 Ops.push_back(N->getOperand(2));
2887 Ops.push_back(N->getOperand(3));
2888 Ops.push_back(N->getOperand(4));
2889 Ops.push_back(Chain);
2891 case NVPTXISD::Suld3DI8Trap:
2892 Opc = NVPTX::SULD_3D_I8_TRAP;
2893 Ops.push_back(TexHandle);
2894 Ops.push_back(N->getOperand(2));
2895 Ops.push_back(N->getOperand(3));
2896 Ops.push_back(N->getOperand(4));
2897 Ops.push_back(Chain);
2899 case NVPTXISD::Suld3DI16Trap:
2900 Opc = NVPTX::SULD_3D_I16_TRAP;
2901 Ops.push_back(TexHandle);
2902 Ops.push_back(N->getOperand(2));
2903 Ops.push_back(N->getOperand(3));
2904 Ops.push_back(N->getOperand(4));
2905 Ops.push_back(Chain);
2907 case NVPTXISD::Suld3DI32Trap:
2908 Opc = NVPTX::SULD_3D_I32_TRAP;
2909 Ops.push_back(TexHandle);
2910 Ops.push_back(N->getOperand(2));
2911 Ops.push_back(N->getOperand(3));
2912 Ops.push_back(N->getOperand(4));
2913 Ops.push_back(Chain);
2915 case NVPTXISD::Suld3DV2I8Trap:
2916 Opc = NVPTX::SULD_3D_V2I8_TRAP;
2917 Ops.push_back(TexHandle);
2918 Ops.push_back(N->getOperand(2));
2919 Ops.push_back(N->getOperand(3));
2920 Ops.push_back(N->getOperand(4));
2921 Ops.push_back(Chain);
2923 case NVPTXISD::Suld3DV2I16Trap:
2924 Opc = NVPTX::SULD_3D_V2I16_TRAP;
2925 Ops.push_back(TexHandle);
2926 Ops.push_back(N->getOperand(2));
2927 Ops.push_back(N->getOperand(3));
2928 Ops.push_back(N->getOperand(4));
2929 Ops.push_back(Chain);
2931 case NVPTXISD::Suld3DV2I32Trap:
2932 Opc = NVPTX::SULD_3D_V2I32_TRAP;
2933 Ops.push_back(TexHandle);
2934 Ops.push_back(N->getOperand(2));
2935 Ops.push_back(N->getOperand(3));
2936 Ops.push_back(N->getOperand(4));
2937 Ops.push_back(Chain);
2939 case NVPTXISD::Suld3DV4I8Trap:
2940 Opc = NVPTX::SULD_3D_V4I8_TRAP;
2941 Ops.push_back(TexHandle);
2942 Ops.push_back(N->getOperand(2));
2943 Ops.push_back(N->getOperand(3));
2944 Ops.push_back(N->getOperand(4));
2945 Ops.push_back(Chain);
2947 case NVPTXISD::Suld3DV4I16Trap:
2948 Opc = NVPTX::SULD_3D_V4I16_TRAP;
2949 Ops.push_back(TexHandle);
2950 Ops.push_back(N->getOperand(2));
2951 Ops.push_back(N->getOperand(3));
2952 Ops.push_back(N->getOperand(4));
2953 Ops.push_back(Chain);
2955 case NVPTXISD::Suld3DV4I32Trap:
2956 Opc = NVPTX::SULD_3D_V4I32_TRAP;
2957 Ops.push_back(TexHandle);
2958 Ops.push_back(N->getOperand(2));
2959 Ops.push_back(N->getOperand(3));
2960 Ops.push_back(N->getOperand(4));
2961 Ops.push_back(Chain);
2964 Ret = CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops);
2968 /// SelectBFE - Look for instruction sequences that can be made more efficient
2969 /// by using the 'bfe' (bit-field extract) PTX instruction
2970 SDNode *NVPTXDAGToDAGISel::SelectBFE(SDNode *N) {
2971 SDValue LHS = N->getOperand(0);
2972 SDValue RHS = N->getOperand(1);
2976 bool IsSigned = false;
2978 if (N->getOpcode() == ISD::AND) {
2979 // Canonicalize the operands
2980 // We want 'and %val, %mask'
2981 if (isa<ConstantSDNode>(LHS) && !isa<ConstantSDNode>(RHS)) {
2982 std::swap(LHS, RHS);
2985 ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(RHS);
2987 // We need a constant mask on the RHS of the AND
2991 // Extract the mask bits
2992 uint64_t MaskVal = Mask->getZExtValue();
2993 if (!isMask_64(MaskVal)) {
2994 // We *could* handle shifted masks here, but doing so would require an
2995 // 'and' operation to fix up the low-order bits so we would trade
2996 // shr+and for bfe+and, which has the same throughput
3000 // How many bits are in our mask?
3001 uint64_t NumBits = CountTrailingOnes_64(MaskVal);
3002 Len = CurDAG->getTargetConstant(NumBits, MVT::i32);
3004 if (LHS.getOpcode() == ISD::SRL || LHS.getOpcode() == ISD::SRA) {
3005 // We have a 'srl/and' pair, extract the effective start bit and length
3006 Val = LHS.getNode()->getOperand(0);
3007 Start = LHS.getNode()->getOperand(1);
3008 ConstantSDNode *StartConst = dyn_cast<ConstantSDNode>(Start);
3010 uint64_t StartVal = StartConst->getZExtValue();
3011 // How many "good" bits do we have left? "good" is defined here as bits
3012 // that exist in the original value, not shifted in.
3013 uint64_t GoodBits = Start.getValueType().getSizeInBits() - StartVal;
3014 if (NumBits > GoodBits) {
3015 // Do not handle the case where bits have been shifted in. In theory
3016 // we could handle this, but the cost is likely higher than just
3017 // emitting the srl/and pair.
3020 Start = CurDAG->getTargetConstant(StartVal, MVT::i32);
3022 // Do not handle the case where the shift amount (can be zero if no srl
3023 // was found) is not constant. We could handle this case, but it would
3024 // require run-time logic that would be more expensive than just
3025 // emitting the srl/and pair.
3029 // Do not handle the case where the LHS of the and is not a shift. While
3030 // it would be trivial to handle this case, it would just transform
3031 // 'and' -> 'bfe', but 'and' has higher-throughput.
3034 } else if (N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) {
3035 if (LHS->getOpcode() == ISD::AND) {
3036 ConstantSDNode *ShiftCnst = dyn_cast<ConstantSDNode>(RHS);
3038 // Shift amount must be constant
3042 uint64_t ShiftAmt = ShiftCnst->getZExtValue();
3044 SDValue AndLHS = LHS->getOperand(0);
3045 SDValue AndRHS = LHS->getOperand(1);
3047 // Canonicalize the AND to have the mask on the RHS
3048 if (isa<ConstantSDNode>(AndLHS)) {
3049 std::swap(AndLHS, AndRHS);
3052 ConstantSDNode *MaskCnst = dyn_cast<ConstantSDNode>(AndRHS);
3054 // Mask must be constant
3058 uint64_t MaskVal = MaskCnst->getZExtValue();
3061 if (isMask_64(MaskVal)) {
3063 // The number of bits in the result bitfield will be the number of
3064 // trailing ones (the AND) minus the number of bits we shift off
3065 NumBits = CountTrailingOnes_64(MaskVal) - ShiftAmt;
3066 } else if (isShiftedMask_64(MaskVal)) {
3067 NumZeros = countTrailingZeros(MaskVal);
3068 unsigned NumOnes = CountTrailingOnes_64(MaskVal >> NumZeros);
3069 // The number of bits in the result bitfield will be the number of
3070 // trailing zeros plus the number of set bits in the mask minus the
3071 // number of bits we shift off
3072 NumBits = NumZeros + NumOnes - ShiftAmt;
3074 // This is not a mask we can handle
3078 if (ShiftAmt < NumZeros) {
3079 // Handling this case would require extra logic that would make this
3080 // transformation non-profitable
3085 Start = CurDAG->getTargetConstant(ShiftAmt, MVT::i32);
3086 Len = CurDAG->getTargetConstant(NumBits, MVT::i32);
3087 } else if (LHS->getOpcode() == ISD::SHL) {
3088 // Here, we have a pattern like:
3090 // (sra (shl val, NN), MM)
3092 // (srl (shl val, NN), MM)
3094 // If MM >= NN, we can efficiently optimize this with bfe
3095 Val = LHS->getOperand(0);
3097 SDValue ShlRHS = LHS->getOperand(1);
3098 ConstantSDNode *ShlCnst = dyn_cast<ConstantSDNode>(ShlRHS);
3100 // Shift amount must be constant
3103 uint64_t InnerShiftAmt = ShlCnst->getZExtValue();
3105 SDValue ShrRHS = RHS;
3106 ConstantSDNode *ShrCnst = dyn_cast<ConstantSDNode>(ShrRHS);
3108 // Shift amount must be constant
3111 uint64_t OuterShiftAmt = ShrCnst->getZExtValue();
3113 // To avoid extra codegen and be profitable, we need Outer >= Inner
3114 if (OuterShiftAmt < InnerShiftAmt) {
3118 // If the outer shift is more than the type size, we have no bitfield to
3119 // extract (since we also check that the inner shift is <= the outer shift
3120 // then this also implies that the inner shift is < the type size)
3121 if (OuterShiftAmt >= Val.getValueType().getSizeInBits()) {
3126 CurDAG->getTargetConstant(OuterShiftAmt - InnerShiftAmt, MVT::i32);
3128 CurDAG->getTargetConstant(Val.getValueType().getSizeInBits() -
3129 OuterShiftAmt, MVT::i32);
3131 if (N->getOpcode() == ISD::SRA) {
3132 // If we have a arithmetic right shift, we need to use the signed bfe
3147 // For the BFE operations we form here from "and" and "srl", always use the
3148 // unsigned variants.
3149 if (Val.getValueType() == MVT::i32) {
3151 Opc = NVPTX::BFE_S32rii;
3153 Opc = NVPTX::BFE_U32rii;
3155 } else if (Val.getValueType() == MVT::i64) {
3157 Opc = NVPTX::BFE_S64rii;
3159 Opc = NVPTX::BFE_U64rii;
3162 // We cannot handle this type
3171 CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops);
3176 // SelectDirectAddr - Match a direct address for DAG.
3177 // A direct address could be a globaladdress or externalsymbol.
3178 bool NVPTXDAGToDAGISel::SelectDirectAddr(SDValue N, SDValue &Address) {
3179 // Return true if TGA or ES.
3180 if (N.getOpcode() == ISD::TargetGlobalAddress ||
3181 N.getOpcode() == ISD::TargetExternalSymbol) {
3185 if (N.getOpcode() == NVPTXISD::Wrapper) {
3186 Address = N.getOperand(0);
3189 if (N.getOpcode() == ISD::INTRINSIC_WO_CHAIN) {
3190 unsigned IID = cast<ConstantSDNode>(N.getOperand(0))->getZExtValue();
3191 if (IID == Intrinsic::nvvm_ptr_gen_to_param)
3192 if (N.getOperand(1).getOpcode() == NVPTXISD::MoveParam)
3193 return (SelectDirectAddr(N.getOperand(1).getOperand(0), Address));
3199 bool NVPTXDAGToDAGISel::SelectADDRsi_imp(
3200 SDNode *OpNode, SDValue Addr, SDValue &Base, SDValue &Offset, MVT mvt) {
3201 if (Addr.getOpcode() == ISD::ADD) {
3202 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) {
3203 SDValue base = Addr.getOperand(0);
3204 if (SelectDirectAddr(base, Base)) {
3205 Offset = CurDAG->getTargetConstant(CN->getZExtValue(), mvt);
3214 bool NVPTXDAGToDAGISel::SelectADDRsi(SDNode *OpNode, SDValue Addr,
3215 SDValue &Base, SDValue &Offset) {
3216 return SelectADDRsi_imp(OpNode, Addr, Base, Offset, MVT::i32);
3220 bool NVPTXDAGToDAGISel::SelectADDRsi64(SDNode *OpNode, SDValue Addr,
3221 SDValue &Base, SDValue &Offset) {
3222 return SelectADDRsi_imp(OpNode, Addr, Base, Offset, MVT::i64);
3226 bool NVPTXDAGToDAGISel::SelectADDRri_imp(
3227 SDNode *OpNode, SDValue Addr, SDValue &Base, SDValue &Offset, MVT mvt) {
3228 if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
3229 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), mvt);
3230 Offset = CurDAG->getTargetConstant(0, mvt);
3233 if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
3234 Addr.getOpcode() == ISD::TargetGlobalAddress)
3235 return false; // direct calls.
3237 if (Addr.getOpcode() == ISD::ADD) {
3238 if (SelectDirectAddr(Addr.getOperand(0), Addr)) {
3241 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) {
3242 if (FrameIndexSDNode *FIN =
3243 dyn_cast<FrameIndexSDNode>(Addr.getOperand(0)))
3244 // Constant offset from frame ref.
3245 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), mvt);
3247 Base = Addr.getOperand(0);
3248 Offset = CurDAG->getTargetConstant(CN->getZExtValue(), mvt);
3256 bool NVPTXDAGToDAGISel::SelectADDRri(SDNode *OpNode, SDValue Addr,
3257 SDValue &Base, SDValue &Offset) {
3258 return SelectADDRri_imp(OpNode, Addr, Base, Offset, MVT::i32);
3262 bool NVPTXDAGToDAGISel::SelectADDRri64(SDNode *OpNode, SDValue Addr,
3263 SDValue &Base, SDValue &Offset) {
3264 return SelectADDRri_imp(OpNode, Addr, Base, Offset, MVT::i64);
3267 bool NVPTXDAGToDAGISel::ChkMemSDNodeAddressSpace(SDNode *N,
3268 unsigned int spN) const {
3269 const Value *Src = nullptr;
3270 // Even though MemIntrinsicSDNode is a subclas of MemSDNode,
3271 // the classof() for MemSDNode does not include MemIntrinsicSDNode
3272 // (See SelectionDAGNodes.h). So we need to check for both.
3273 if (MemSDNode *mN = dyn_cast<MemSDNode>(N)) {
3274 if (spN == 0 && mN->getMemOperand()->getPseudoValue())
3276 Src = mN->getMemOperand()->getValue();
3277 } else if (MemSDNode *mN = dyn_cast<MemIntrinsicSDNode>(N)) {
3278 if (spN == 0 && mN->getMemOperand()->getPseudoValue())
3280 Src = mN->getMemOperand()->getValue();
3284 if (const PointerType *PT = dyn_cast<PointerType>(Src->getType()))
3285 return (PT->getAddressSpace() == spN);
3289 /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
3290 /// inline asm expressions.
3291 bool NVPTXDAGToDAGISel::SelectInlineAsmMemoryOperand(
3292 const SDValue &Op, char ConstraintCode, std::vector<SDValue> &OutOps) {
3294 switch (ConstraintCode) {
3298 if (SelectDirectAddr(Op, Op0)) {
3299 OutOps.push_back(Op0);
3300 OutOps.push_back(CurDAG->getTargetConstant(0, MVT::i32));
3303 if (SelectADDRri(Op.getNode(), Op, Op0, Op1)) {
3304 OutOps.push_back(Op0);
3305 OutOps.push_back(Op1);