1 //===-- NVPTXISelDAGToDAG.cpp - A dag to dag inst selector for NVPTX ------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file defines an instruction selector for the NVPTX target.
12 //===----------------------------------------------------------------------===//
14 #include "NVPTXISelDAGToDAG.h"
15 #include "llvm/IR/GlobalValue.h"
16 #include "llvm/IR/Instructions.h"
17 #include "llvm/Support/CommandLine.h"
18 #include "llvm/Support/Debug.h"
19 #include "llvm/Support/ErrorHandling.h"
20 #include "llvm/Support/raw_ostream.h"
21 #include "llvm/Target/TargetIntrinsicInfo.h"
24 #define DEBUG_TYPE "nvptx-isel"
29 FMAContractLevel("nvptx-fma-level", cl::ZeroOrMore, cl::Hidden,
30 cl::desc("NVPTX Specific: FMA contraction (0: don't do it"
31 " 1: do it 2: do it aggressively"),
34 static cl::opt<int> UsePrecDivF32(
35 "nvptx-prec-divf32", cl::ZeroOrMore, cl::Hidden,
36 cl::desc("NVPTX Specifies: 0 use div.approx, 1 use div.full, 2 use"
37 " IEEE Compliant F32 div.rnd if avaiable."),
41 UsePrecSqrtF32("nvptx-prec-sqrtf32", cl::Hidden,
42 cl::desc("NVPTX Specific: 0 use sqrt.approx, 1 use sqrt.rn."),
46 FtzEnabled("nvptx-f32ftz", cl::ZeroOrMore, cl::Hidden,
47 cl::desc("NVPTX Specific: Flush f32 subnormals to sign-preserving zero."),
51 /// createNVPTXISelDag - This pass converts a legalized DAG into a
52 /// NVPTX-specific DAG, ready for instruction scheduling.
53 FunctionPass *llvm::createNVPTXISelDag(NVPTXTargetMachine &TM,
54 llvm::CodeGenOpt::Level OptLevel) {
55 return new NVPTXDAGToDAGISel(TM, OptLevel);
58 NVPTXDAGToDAGISel::NVPTXDAGToDAGISel(NVPTXTargetMachine &tm,
59 CodeGenOpt::Level OptLevel)
60 : SelectionDAGISel(tm, OptLevel),
61 Subtarget(tm.getSubtarget<NVPTXSubtarget>()) {
63 doFMAF32 = (OptLevel > 0) && Subtarget.hasFMAF32() && (FMAContractLevel >= 1);
64 doFMAF64 = (OptLevel > 0) && Subtarget.hasFMAF64() && (FMAContractLevel >= 1);
66 (OptLevel > 0) && Subtarget.hasFMAF32() && (FMAContractLevel == 2);
68 (OptLevel > 0) && Subtarget.hasFMAF64() && (FMAContractLevel == 2);
70 allowFMA = (FMAContractLevel >= 1);
72 doMulWide = (OptLevel > 0);
75 int NVPTXDAGToDAGISel::getDivF32Level() const {
76 if (UsePrecDivF32.getNumOccurrences() > 0) {
77 // If nvptx-prec-div32=N is used on the command-line, always honor it
80 // Otherwise, use div.approx if fast math is enabled
81 if (TM.Options.UnsafeFPMath)
88 bool NVPTXDAGToDAGISel::usePrecSqrtF32() const {
89 if (UsePrecSqrtF32.getNumOccurrences() > 0) {
90 // If nvptx-prec-sqrtf32 is used on the command-line, always honor it
91 return UsePrecSqrtF32;
93 // Otherwise, use sqrt.approx if fast math is enabled
94 if (TM.Options.UnsafeFPMath)
101 bool NVPTXDAGToDAGISel::useF32FTZ() const {
102 if (FtzEnabled.getNumOccurrences() > 0) {
103 // If nvptx-f32ftz is used on the command-line, always honor it
106 const Function *F = MF->getFunction();
107 // Otherwise, check for an nvptx-f32ftz attribute on the function
108 if (F->hasFnAttribute("nvptx-f32ftz"))
109 return (F->getAttributes().getAttribute(AttributeSet::FunctionIndex,
111 .getValueAsString() == "true");
117 /// Select - Select instructions not customized! Used for
118 /// expanded, promoted and normal instructions.
119 SDNode *NVPTXDAGToDAGISel::Select(SDNode *N) {
121 if (N->isMachineOpcode()) {
123 return NULL; // Already selected.
126 SDNode *ResNode = NULL;
127 switch (N->getOpcode()) {
129 ResNode = SelectLoad(N);
132 ResNode = SelectStore(N);
134 case NVPTXISD::LoadV2:
135 case NVPTXISD::LoadV4:
136 ResNode = SelectLoadVector(N);
138 case NVPTXISD::LDGV2:
139 case NVPTXISD::LDGV4:
140 case NVPTXISD::LDUV2:
141 case NVPTXISD::LDUV4:
142 ResNode = SelectLDGLDUVector(N);
144 case NVPTXISD::StoreV2:
145 case NVPTXISD::StoreV4:
146 ResNode = SelectStoreVector(N);
148 case NVPTXISD::LoadParam:
149 case NVPTXISD::LoadParamV2:
150 case NVPTXISD::LoadParamV4:
151 ResNode = SelectLoadParam(N);
153 case NVPTXISD::StoreRetval:
154 case NVPTXISD::StoreRetvalV2:
155 case NVPTXISD::StoreRetvalV4:
156 ResNode = SelectStoreRetval(N);
158 case NVPTXISD::StoreParam:
159 case NVPTXISD::StoreParamV2:
160 case NVPTXISD::StoreParamV4:
161 case NVPTXISD::StoreParamS32:
162 case NVPTXISD::StoreParamU32:
163 ResNode = SelectStoreParam(N);
170 return SelectCode(N);
173 static unsigned int getCodeAddrSpace(MemSDNode *N,
174 const NVPTXSubtarget &Subtarget) {
175 const Value *Src = N->getSrcValue();
178 return NVPTX::PTXLdStInstCode::GENERIC;
180 if (const PointerType *PT = dyn_cast<PointerType>(Src->getType())) {
181 switch (PT->getAddressSpace()) {
182 case llvm::ADDRESS_SPACE_LOCAL: return NVPTX::PTXLdStInstCode::LOCAL;
183 case llvm::ADDRESS_SPACE_GLOBAL: return NVPTX::PTXLdStInstCode::GLOBAL;
184 case llvm::ADDRESS_SPACE_SHARED: return NVPTX::PTXLdStInstCode::SHARED;
185 case llvm::ADDRESS_SPACE_GENERIC: return NVPTX::PTXLdStInstCode::GENERIC;
186 case llvm::ADDRESS_SPACE_PARAM: return NVPTX::PTXLdStInstCode::PARAM;
187 case llvm::ADDRESS_SPACE_CONST: return NVPTX::PTXLdStInstCode::CONSTANT;
191 return NVPTX::PTXLdStInstCode::GENERIC;
194 SDNode *NVPTXDAGToDAGISel::SelectLoad(SDNode *N) {
196 LoadSDNode *LD = cast<LoadSDNode>(N);
197 EVT LoadedVT = LD->getMemoryVT();
198 SDNode *NVPTXLD = NULL;
200 // do not support pre/post inc/dec
204 if (!LoadedVT.isSimple())
207 // Address Space Setting
208 unsigned int codeAddrSpace = getCodeAddrSpace(LD, Subtarget);
211 // - .volatile is only availalble for .global and .shared
212 bool isVolatile = LD->isVolatile();
213 if (codeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
214 codeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
215 codeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
219 MVT SimpleVT = LoadedVT.getSimpleVT();
220 unsigned vecType = NVPTX::PTXLdStInstCode::Scalar;
221 if (SimpleVT.isVector()) {
222 unsigned num = SimpleVT.getVectorNumElements();
224 vecType = NVPTX::PTXLdStInstCode::V2;
226 vecType = NVPTX::PTXLdStInstCode::V4;
231 // Type Setting: fromType + fromTypeWidth
233 // Sign : ISD::SEXTLOAD
234 // Unsign : ISD::ZEXTLOAD, ISD::NON_EXTLOAD or ISD::EXTLOAD and the
236 // Float : ISD::NON_EXTLOAD or ISD::EXTLOAD and the type is float
237 MVT ScalarVT = SimpleVT.getScalarType();
238 // Read at least 8 bits (predicates are stored as 8-bit values)
239 unsigned fromTypeWidth = std::max(8U, ScalarVT.getSizeInBits());
240 unsigned int fromType;
241 if ((LD->getExtensionType() == ISD::SEXTLOAD))
242 fromType = NVPTX::PTXLdStInstCode::Signed;
243 else if (ScalarVT.isFloatingPoint())
244 fromType = NVPTX::PTXLdStInstCode::Float;
246 fromType = NVPTX::PTXLdStInstCode::Unsigned;
248 // Create the machine instruction DAG
249 SDValue Chain = N->getOperand(0);
250 SDValue N1 = N->getOperand(1);
252 SDValue Offset, Base;
254 MVT::SimpleValueType TargetVT = LD->getSimpleValueType(0).SimpleTy;
256 if (SelectDirectAddr(N1, Addr)) {
259 Opcode = NVPTX::LD_i8_avar;
262 Opcode = NVPTX::LD_i16_avar;
265 Opcode = NVPTX::LD_i32_avar;
268 Opcode = NVPTX::LD_i64_avar;
271 Opcode = NVPTX::LD_f32_avar;
274 Opcode = NVPTX::LD_f64_avar;
279 SDValue Ops[] = { getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
280 getI32Imm(vecType), getI32Imm(fromType),
281 getI32Imm(fromTypeWidth), Addr, Chain };
282 NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops);
283 } else if (Subtarget.is64Bit()
284 ? SelectADDRsi64(N1.getNode(), N1, Base, Offset)
285 : SelectADDRsi(N1.getNode(), N1, Base, Offset)) {
288 Opcode = NVPTX::LD_i8_asi;
291 Opcode = NVPTX::LD_i16_asi;
294 Opcode = NVPTX::LD_i32_asi;
297 Opcode = NVPTX::LD_i64_asi;
300 Opcode = NVPTX::LD_f32_asi;
303 Opcode = NVPTX::LD_f64_asi;
308 SDValue Ops[] = { getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
309 getI32Imm(vecType), getI32Imm(fromType),
310 getI32Imm(fromTypeWidth), Base, Offset, Chain };
311 NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops);
312 } else if (Subtarget.is64Bit()
313 ? SelectADDRri64(N1.getNode(), N1, Base, Offset)
314 : SelectADDRri(N1.getNode(), N1, Base, Offset)) {
315 if (Subtarget.is64Bit()) {
318 Opcode = NVPTX::LD_i8_ari_64;
321 Opcode = NVPTX::LD_i16_ari_64;
324 Opcode = NVPTX::LD_i32_ari_64;
327 Opcode = NVPTX::LD_i64_ari_64;
330 Opcode = NVPTX::LD_f32_ari_64;
333 Opcode = NVPTX::LD_f64_ari_64;
341 Opcode = NVPTX::LD_i8_ari;
344 Opcode = NVPTX::LD_i16_ari;
347 Opcode = NVPTX::LD_i32_ari;
350 Opcode = NVPTX::LD_i64_ari;
353 Opcode = NVPTX::LD_f32_ari;
356 Opcode = NVPTX::LD_f64_ari;
362 SDValue Ops[] = { getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
363 getI32Imm(vecType), getI32Imm(fromType),
364 getI32Imm(fromTypeWidth), Base, Offset, Chain };
365 NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops);
367 if (Subtarget.is64Bit()) {
370 Opcode = NVPTX::LD_i8_areg_64;
373 Opcode = NVPTX::LD_i16_areg_64;
376 Opcode = NVPTX::LD_i32_areg_64;
379 Opcode = NVPTX::LD_i64_areg_64;
382 Opcode = NVPTX::LD_f32_areg_64;
385 Opcode = NVPTX::LD_f64_areg_64;
393 Opcode = NVPTX::LD_i8_areg;
396 Opcode = NVPTX::LD_i16_areg;
399 Opcode = NVPTX::LD_i32_areg;
402 Opcode = NVPTX::LD_i64_areg;
405 Opcode = NVPTX::LD_f32_areg;
408 Opcode = NVPTX::LD_f64_areg;
414 SDValue Ops[] = { getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
415 getI32Imm(vecType), getI32Imm(fromType),
416 getI32Imm(fromTypeWidth), N1, Chain };
417 NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops);
420 if (NVPTXLD != NULL) {
421 MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
422 MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
423 cast<MachineSDNode>(NVPTXLD)->setMemRefs(MemRefs0, MemRefs0 + 1);
429 SDNode *NVPTXDAGToDAGISel::SelectLoadVector(SDNode *N) {
431 SDValue Chain = N->getOperand(0);
432 SDValue Op1 = N->getOperand(1);
433 SDValue Addr, Offset, Base;
437 MemSDNode *MemSD = cast<MemSDNode>(N);
438 EVT LoadedVT = MemSD->getMemoryVT();
440 if (!LoadedVT.isSimple())
443 // Address Space Setting
444 unsigned int CodeAddrSpace = getCodeAddrSpace(MemSD, Subtarget);
447 // - .volatile is only availalble for .global and .shared
448 bool IsVolatile = MemSD->isVolatile();
449 if (CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
450 CodeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
451 CodeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
455 MVT SimpleVT = LoadedVT.getSimpleVT();
457 // Type Setting: fromType + fromTypeWidth
459 // Sign : ISD::SEXTLOAD
460 // Unsign : ISD::ZEXTLOAD, ISD::NON_EXTLOAD or ISD::EXTLOAD and the
462 // Float : ISD::NON_EXTLOAD or ISD::EXTLOAD and the type is float
463 MVT ScalarVT = SimpleVT.getScalarType();
464 // Read at least 8 bits (predicates are stored as 8-bit values)
465 unsigned FromTypeWidth = std::max(8U, ScalarVT.getSizeInBits());
466 unsigned int FromType;
467 // The last operand holds the original LoadSDNode::getExtensionType() value
468 unsigned ExtensionType = cast<ConstantSDNode>(
469 N->getOperand(N->getNumOperands() - 1))->getZExtValue();
470 if (ExtensionType == ISD::SEXTLOAD)
471 FromType = NVPTX::PTXLdStInstCode::Signed;
472 else if (ScalarVT.isFloatingPoint())
473 FromType = NVPTX::PTXLdStInstCode::Float;
475 FromType = NVPTX::PTXLdStInstCode::Unsigned;
479 switch (N->getOpcode()) {
480 case NVPTXISD::LoadV2:
481 VecType = NVPTX::PTXLdStInstCode::V2;
483 case NVPTXISD::LoadV4:
484 VecType = NVPTX::PTXLdStInstCode::V4;
490 EVT EltVT = N->getValueType(0);
492 if (SelectDirectAddr(Op1, Addr)) {
493 switch (N->getOpcode()) {
496 case NVPTXISD::LoadV2:
497 switch (EltVT.getSimpleVT().SimpleTy) {
501 Opcode = NVPTX::LDV_i8_v2_avar;
504 Opcode = NVPTX::LDV_i16_v2_avar;
507 Opcode = NVPTX::LDV_i32_v2_avar;
510 Opcode = NVPTX::LDV_i64_v2_avar;
513 Opcode = NVPTX::LDV_f32_v2_avar;
516 Opcode = NVPTX::LDV_f64_v2_avar;
520 case NVPTXISD::LoadV4:
521 switch (EltVT.getSimpleVT().SimpleTy) {
525 Opcode = NVPTX::LDV_i8_v4_avar;
528 Opcode = NVPTX::LDV_i16_v4_avar;
531 Opcode = NVPTX::LDV_i32_v4_avar;
534 Opcode = NVPTX::LDV_f32_v4_avar;
540 SDValue Ops[] = { getI32Imm(IsVolatile), getI32Imm(CodeAddrSpace),
541 getI32Imm(VecType), getI32Imm(FromType),
542 getI32Imm(FromTypeWidth), Addr, Chain };
543 LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
544 } else if (Subtarget.is64Bit()
545 ? SelectADDRsi64(Op1.getNode(), Op1, Base, Offset)
546 : SelectADDRsi(Op1.getNode(), Op1, Base, Offset)) {
547 switch (N->getOpcode()) {
550 case NVPTXISD::LoadV2:
551 switch (EltVT.getSimpleVT().SimpleTy) {
555 Opcode = NVPTX::LDV_i8_v2_asi;
558 Opcode = NVPTX::LDV_i16_v2_asi;
561 Opcode = NVPTX::LDV_i32_v2_asi;
564 Opcode = NVPTX::LDV_i64_v2_asi;
567 Opcode = NVPTX::LDV_f32_v2_asi;
570 Opcode = NVPTX::LDV_f64_v2_asi;
574 case NVPTXISD::LoadV4:
575 switch (EltVT.getSimpleVT().SimpleTy) {
579 Opcode = NVPTX::LDV_i8_v4_asi;
582 Opcode = NVPTX::LDV_i16_v4_asi;
585 Opcode = NVPTX::LDV_i32_v4_asi;
588 Opcode = NVPTX::LDV_f32_v4_asi;
594 SDValue Ops[] = { getI32Imm(IsVolatile), getI32Imm(CodeAddrSpace),
595 getI32Imm(VecType), getI32Imm(FromType),
596 getI32Imm(FromTypeWidth), Base, Offset, Chain };
597 LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
598 } else if (Subtarget.is64Bit()
599 ? SelectADDRri64(Op1.getNode(), Op1, Base, Offset)
600 : SelectADDRri(Op1.getNode(), Op1, Base, Offset)) {
601 if (Subtarget.is64Bit()) {
602 switch (N->getOpcode()) {
605 case NVPTXISD::LoadV2:
606 switch (EltVT.getSimpleVT().SimpleTy) {
610 Opcode = NVPTX::LDV_i8_v2_ari_64;
613 Opcode = NVPTX::LDV_i16_v2_ari_64;
616 Opcode = NVPTX::LDV_i32_v2_ari_64;
619 Opcode = NVPTX::LDV_i64_v2_ari_64;
622 Opcode = NVPTX::LDV_f32_v2_ari_64;
625 Opcode = NVPTX::LDV_f64_v2_ari_64;
629 case NVPTXISD::LoadV4:
630 switch (EltVT.getSimpleVT().SimpleTy) {
634 Opcode = NVPTX::LDV_i8_v4_ari_64;
637 Opcode = NVPTX::LDV_i16_v4_ari_64;
640 Opcode = NVPTX::LDV_i32_v4_ari_64;
643 Opcode = NVPTX::LDV_f32_v4_ari_64;
649 switch (N->getOpcode()) {
652 case NVPTXISD::LoadV2:
653 switch (EltVT.getSimpleVT().SimpleTy) {
657 Opcode = NVPTX::LDV_i8_v2_ari;
660 Opcode = NVPTX::LDV_i16_v2_ari;
663 Opcode = NVPTX::LDV_i32_v2_ari;
666 Opcode = NVPTX::LDV_i64_v2_ari;
669 Opcode = NVPTX::LDV_f32_v2_ari;
672 Opcode = NVPTX::LDV_f64_v2_ari;
676 case NVPTXISD::LoadV4:
677 switch (EltVT.getSimpleVT().SimpleTy) {
681 Opcode = NVPTX::LDV_i8_v4_ari;
684 Opcode = NVPTX::LDV_i16_v4_ari;
687 Opcode = NVPTX::LDV_i32_v4_ari;
690 Opcode = NVPTX::LDV_f32_v4_ari;
697 SDValue Ops[] = { getI32Imm(IsVolatile), getI32Imm(CodeAddrSpace),
698 getI32Imm(VecType), getI32Imm(FromType),
699 getI32Imm(FromTypeWidth), Base, Offset, Chain };
701 LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
703 if (Subtarget.is64Bit()) {
704 switch (N->getOpcode()) {
707 case NVPTXISD::LoadV2:
708 switch (EltVT.getSimpleVT().SimpleTy) {
712 Opcode = NVPTX::LDV_i8_v2_areg_64;
715 Opcode = NVPTX::LDV_i16_v2_areg_64;
718 Opcode = NVPTX::LDV_i32_v2_areg_64;
721 Opcode = NVPTX::LDV_i64_v2_areg_64;
724 Opcode = NVPTX::LDV_f32_v2_areg_64;
727 Opcode = NVPTX::LDV_f64_v2_areg_64;
731 case NVPTXISD::LoadV4:
732 switch (EltVT.getSimpleVT().SimpleTy) {
736 Opcode = NVPTX::LDV_i8_v4_areg_64;
739 Opcode = NVPTX::LDV_i16_v4_areg_64;
742 Opcode = NVPTX::LDV_i32_v4_areg_64;
745 Opcode = NVPTX::LDV_f32_v4_areg_64;
751 switch (N->getOpcode()) {
754 case NVPTXISD::LoadV2:
755 switch (EltVT.getSimpleVT().SimpleTy) {
759 Opcode = NVPTX::LDV_i8_v2_areg;
762 Opcode = NVPTX::LDV_i16_v2_areg;
765 Opcode = NVPTX::LDV_i32_v2_areg;
768 Opcode = NVPTX::LDV_i64_v2_areg;
771 Opcode = NVPTX::LDV_f32_v2_areg;
774 Opcode = NVPTX::LDV_f64_v2_areg;
778 case NVPTXISD::LoadV4:
779 switch (EltVT.getSimpleVT().SimpleTy) {
783 Opcode = NVPTX::LDV_i8_v4_areg;
786 Opcode = NVPTX::LDV_i16_v4_areg;
789 Opcode = NVPTX::LDV_i32_v4_areg;
792 Opcode = NVPTX::LDV_f32_v4_areg;
799 SDValue Ops[] = { getI32Imm(IsVolatile), getI32Imm(CodeAddrSpace),
800 getI32Imm(VecType), getI32Imm(FromType),
801 getI32Imm(FromTypeWidth), Op1, Chain };
802 LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
805 MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
806 MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
807 cast<MachineSDNode>(LD)->setMemRefs(MemRefs0, MemRefs0 + 1);
812 SDNode *NVPTXDAGToDAGISel::SelectLDGLDUVector(SDNode *N) {
814 SDValue Chain = N->getOperand(0);
815 SDValue Op1 = N->getOperand(1);
819 MemSDNode *Mem = cast<MemSDNode>(N);
820 SDValue Base, Offset, Addr;
822 EVT EltVT = Mem->getMemoryVT().getVectorElementType();
824 if (SelectDirectAddr(Op1, Addr)) {
825 switch (N->getOpcode()) {
828 case NVPTXISD::LDGV2:
829 switch (EltVT.getSimpleVT().SimpleTy) {
833 Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_avar;
836 Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_avar;
839 Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_avar;
842 Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_avar;
845 Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_avar;
848 Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_avar;
852 case NVPTXISD::LDUV2:
853 switch (EltVT.getSimpleVT().SimpleTy) {
857 Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_avar;
860 Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_avar;
863 Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_avar;
866 Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_avar;
869 Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_avar;
872 Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_avar;
876 case NVPTXISD::LDGV4:
877 switch (EltVT.getSimpleVT().SimpleTy) {
881 Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_avar;
884 Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_avar;
887 Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_avar;
890 Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_avar;
894 case NVPTXISD::LDUV4:
895 switch (EltVT.getSimpleVT().SimpleTy) {
899 Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_avar;
902 Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_avar;
905 Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_avar;
908 Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_avar;
914 SDValue Ops[] = { Addr, Chain };
915 LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(),
916 ArrayRef<SDValue>(Ops, 2));
917 } else if (Subtarget.is64Bit()
918 ? SelectADDRri64(Op1.getNode(), Op1, Base, Offset)
919 : SelectADDRri(Op1.getNode(), Op1, Base, Offset)) {
920 if (Subtarget.is64Bit()) {
921 switch (N->getOpcode()) {
924 case NVPTXISD::LDGV2:
925 switch (EltVT.getSimpleVT().SimpleTy) {
929 Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_ari64;
932 Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_ari64;
935 Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_ari64;
938 Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_ari64;
941 Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_ari64;
944 Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_ari64;
948 case NVPTXISD::LDUV2:
949 switch (EltVT.getSimpleVT().SimpleTy) {
953 Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_ari64;
956 Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_ari64;
959 Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_ari64;
962 Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_ari64;
965 Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_ari64;
968 Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_ari64;
972 case NVPTXISD::LDGV4:
973 switch (EltVT.getSimpleVT().SimpleTy) {
977 Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_ari64;
980 Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_ari64;
983 Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_ari64;
986 Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_ari64;
990 case NVPTXISD::LDUV4:
991 switch (EltVT.getSimpleVT().SimpleTy) {
995 Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_ari64;
998 Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_ari64;
1001 Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_ari64;
1004 Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_ari64;
1010 switch (N->getOpcode()) {
1013 case NVPTXISD::LDGV2:
1014 switch (EltVT.getSimpleVT().SimpleTy) {
1018 Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_ari32;
1021 Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_ari32;
1024 Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_ari32;
1027 Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_ari32;
1030 Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_ari32;
1033 Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_ari32;
1037 case NVPTXISD::LDUV2:
1038 switch (EltVT.getSimpleVT().SimpleTy) {
1042 Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_ari32;
1045 Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_ari32;
1048 Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_ari32;
1051 Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_ari32;
1054 Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_ari32;
1057 Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_ari32;
1061 case NVPTXISD::LDGV4:
1062 switch (EltVT.getSimpleVT().SimpleTy) {
1066 Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_ari32;
1069 Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_ari32;
1072 Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_ari32;
1075 Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_ari32;
1079 case NVPTXISD::LDUV4:
1080 switch (EltVT.getSimpleVT().SimpleTy) {
1084 Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_ari32;
1087 Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_ari32;
1090 Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_ari32;
1093 Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_ari32;
1100 SDValue Ops[] = { Base, Offset, Chain };
1102 LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(),
1103 ArrayRef<SDValue>(Ops, 3));
1105 if (Subtarget.is64Bit()) {
1106 switch (N->getOpcode()) {
1109 case NVPTXISD::LDGV2:
1110 switch (EltVT.getSimpleVT().SimpleTy) {
1114 Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_areg64;
1117 Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_areg64;
1120 Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_areg64;
1123 Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_areg64;
1126 Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_areg64;
1129 Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_areg64;
1133 case NVPTXISD::LDUV2:
1134 switch (EltVT.getSimpleVT().SimpleTy) {
1138 Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_areg64;
1141 Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_areg64;
1144 Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_areg64;
1147 Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_areg64;
1150 Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_areg64;
1153 Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_areg64;
1157 case NVPTXISD::LDGV4:
1158 switch (EltVT.getSimpleVT().SimpleTy) {
1162 Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_areg64;
1165 Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_areg64;
1168 Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_areg64;
1171 Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_areg64;
1175 case NVPTXISD::LDUV4:
1176 switch (EltVT.getSimpleVT().SimpleTy) {
1180 Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_areg64;
1183 Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_areg64;
1186 Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_areg64;
1189 Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_areg64;
1195 switch (N->getOpcode()) {
1198 case NVPTXISD::LDGV2:
1199 switch (EltVT.getSimpleVT().SimpleTy) {
1203 Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_areg32;
1206 Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_areg32;
1209 Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_areg32;
1212 Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_areg32;
1215 Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_areg32;
1218 Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_areg32;
1222 case NVPTXISD::LDUV2:
1223 switch (EltVT.getSimpleVT().SimpleTy) {
1227 Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_areg32;
1230 Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_areg32;
1233 Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_areg32;
1236 Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_areg32;
1239 Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_areg32;
1242 Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_areg32;
1246 case NVPTXISD::LDGV4:
1247 switch (EltVT.getSimpleVT().SimpleTy) {
1251 Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_areg32;
1254 Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_areg32;
1257 Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_areg32;
1260 Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_areg32;
1264 case NVPTXISD::LDUV4:
1265 switch (EltVT.getSimpleVT().SimpleTy) {
1269 Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_areg32;
1272 Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_areg32;
1275 Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_areg32;
1278 Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_areg32;
1285 SDValue Ops[] = { Op1, Chain };
1286 LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(),
1287 ArrayRef<SDValue>(Ops, 2));
1290 MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
1291 MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
1292 cast<MachineSDNode>(LD)->setMemRefs(MemRefs0, MemRefs0 + 1);
1297 SDNode *NVPTXDAGToDAGISel::SelectStore(SDNode *N) {
1299 StoreSDNode *ST = cast<StoreSDNode>(N);
1300 EVT StoreVT = ST->getMemoryVT();
1301 SDNode *NVPTXST = NULL;
1303 // do not support pre/post inc/dec
1304 if (ST->isIndexed())
1307 if (!StoreVT.isSimple())
1310 // Address Space Setting
1311 unsigned int codeAddrSpace = getCodeAddrSpace(ST, Subtarget);
1314 // - .volatile is only availalble for .global and .shared
1315 bool isVolatile = ST->isVolatile();
1316 if (codeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
1317 codeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
1318 codeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
1322 MVT SimpleVT = StoreVT.getSimpleVT();
1323 unsigned vecType = NVPTX::PTXLdStInstCode::Scalar;
1324 if (SimpleVT.isVector()) {
1325 unsigned num = SimpleVT.getVectorNumElements();
1327 vecType = NVPTX::PTXLdStInstCode::V2;
1329 vecType = NVPTX::PTXLdStInstCode::V4;
1334 // Type Setting: toType + toTypeWidth
1335 // - for integer type, always use 'u'
1337 MVT ScalarVT = SimpleVT.getScalarType();
1338 unsigned toTypeWidth = ScalarVT.getSizeInBits();
1339 unsigned int toType;
1340 if (ScalarVT.isFloatingPoint())
1341 toType = NVPTX::PTXLdStInstCode::Float;
1343 toType = NVPTX::PTXLdStInstCode::Unsigned;
1345 // Create the machine instruction DAG
1346 SDValue Chain = N->getOperand(0);
1347 SDValue N1 = N->getOperand(1);
1348 SDValue N2 = N->getOperand(2);
1350 SDValue Offset, Base;
1352 MVT::SimpleValueType SourceVT = N1.getNode()->getSimpleValueType(0).SimpleTy;
1354 if (SelectDirectAddr(N2, Addr)) {
1357 Opcode = NVPTX::ST_i8_avar;
1360 Opcode = NVPTX::ST_i16_avar;
1363 Opcode = NVPTX::ST_i32_avar;
1366 Opcode = NVPTX::ST_i64_avar;
1369 Opcode = NVPTX::ST_f32_avar;
1372 Opcode = NVPTX::ST_f64_avar;
1377 SDValue Ops[] = { N1, getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
1378 getI32Imm(vecType), getI32Imm(toType),
1379 getI32Imm(toTypeWidth), Addr, Chain };
1380 NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops);
1381 } else if (Subtarget.is64Bit()
1382 ? SelectADDRsi64(N2.getNode(), N2, Base, Offset)
1383 : SelectADDRsi(N2.getNode(), N2, Base, Offset)) {
1386 Opcode = NVPTX::ST_i8_asi;
1389 Opcode = NVPTX::ST_i16_asi;
1392 Opcode = NVPTX::ST_i32_asi;
1395 Opcode = NVPTX::ST_i64_asi;
1398 Opcode = NVPTX::ST_f32_asi;
1401 Opcode = NVPTX::ST_f64_asi;
1406 SDValue Ops[] = { N1, getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
1407 getI32Imm(vecType), getI32Imm(toType),
1408 getI32Imm(toTypeWidth), Base, Offset, Chain };
1409 NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops);
1410 } else if (Subtarget.is64Bit()
1411 ? SelectADDRri64(N2.getNode(), N2, Base, Offset)
1412 : SelectADDRri(N2.getNode(), N2, Base, Offset)) {
1413 if (Subtarget.is64Bit()) {
1416 Opcode = NVPTX::ST_i8_ari_64;
1419 Opcode = NVPTX::ST_i16_ari_64;
1422 Opcode = NVPTX::ST_i32_ari_64;
1425 Opcode = NVPTX::ST_i64_ari_64;
1428 Opcode = NVPTX::ST_f32_ari_64;
1431 Opcode = NVPTX::ST_f64_ari_64;
1439 Opcode = NVPTX::ST_i8_ari;
1442 Opcode = NVPTX::ST_i16_ari;
1445 Opcode = NVPTX::ST_i32_ari;
1448 Opcode = NVPTX::ST_i64_ari;
1451 Opcode = NVPTX::ST_f32_ari;
1454 Opcode = NVPTX::ST_f64_ari;
1460 SDValue Ops[] = { N1, getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
1461 getI32Imm(vecType), getI32Imm(toType),
1462 getI32Imm(toTypeWidth), Base, Offset, Chain };
1463 NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops);
1465 if (Subtarget.is64Bit()) {
1468 Opcode = NVPTX::ST_i8_areg_64;
1471 Opcode = NVPTX::ST_i16_areg_64;
1474 Opcode = NVPTX::ST_i32_areg_64;
1477 Opcode = NVPTX::ST_i64_areg_64;
1480 Opcode = NVPTX::ST_f32_areg_64;
1483 Opcode = NVPTX::ST_f64_areg_64;
1491 Opcode = NVPTX::ST_i8_areg;
1494 Opcode = NVPTX::ST_i16_areg;
1497 Opcode = NVPTX::ST_i32_areg;
1500 Opcode = NVPTX::ST_i64_areg;
1503 Opcode = NVPTX::ST_f32_areg;
1506 Opcode = NVPTX::ST_f64_areg;
1512 SDValue Ops[] = { N1, getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
1513 getI32Imm(vecType), getI32Imm(toType),
1514 getI32Imm(toTypeWidth), N2, Chain };
1515 NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops);
1518 if (NVPTXST != NULL) {
1519 MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
1520 MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
1521 cast<MachineSDNode>(NVPTXST)->setMemRefs(MemRefs0, MemRefs0 + 1);
1527 SDNode *NVPTXDAGToDAGISel::SelectStoreVector(SDNode *N) {
1528 SDValue Chain = N->getOperand(0);
1529 SDValue Op1 = N->getOperand(1);
1530 SDValue Addr, Offset, Base;
1534 EVT EltVT = Op1.getValueType();
1535 MemSDNode *MemSD = cast<MemSDNode>(N);
1536 EVT StoreVT = MemSD->getMemoryVT();
1538 // Address Space Setting
1539 unsigned CodeAddrSpace = getCodeAddrSpace(MemSD, Subtarget);
1541 if (CodeAddrSpace == NVPTX::PTXLdStInstCode::CONSTANT) {
1542 report_fatal_error("Cannot store to pointer that points to constant "
1547 // - .volatile is only availalble for .global and .shared
1548 bool IsVolatile = MemSD->isVolatile();
1549 if (CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
1550 CodeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
1551 CodeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
1554 // Type Setting: toType + toTypeWidth
1555 // - for integer type, always use 'u'
1556 assert(StoreVT.isSimple() && "Store value is not simple");
1557 MVT ScalarVT = StoreVT.getSimpleVT().getScalarType();
1558 unsigned ToTypeWidth = ScalarVT.getSizeInBits();
1560 if (ScalarVT.isFloatingPoint())
1561 ToType = NVPTX::PTXLdStInstCode::Float;
1563 ToType = NVPTX::PTXLdStInstCode::Unsigned;
1565 SmallVector<SDValue, 12> StOps;
1569 switch (N->getOpcode()) {
1570 case NVPTXISD::StoreV2:
1571 VecType = NVPTX::PTXLdStInstCode::V2;
1572 StOps.push_back(N->getOperand(1));
1573 StOps.push_back(N->getOperand(2));
1574 N2 = N->getOperand(3);
1576 case NVPTXISD::StoreV4:
1577 VecType = NVPTX::PTXLdStInstCode::V4;
1578 StOps.push_back(N->getOperand(1));
1579 StOps.push_back(N->getOperand(2));
1580 StOps.push_back(N->getOperand(3));
1581 StOps.push_back(N->getOperand(4));
1582 N2 = N->getOperand(5);
1588 StOps.push_back(getI32Imm(IsVolatile));
1589 StOps.push_back(getI32Imm(CodeAddrSpace));
1590 StOps.push_back(getI32Imm(VecType));
1591 StOps.push_back(getI32Imm(ToType));
1592 StOps.push_back(getI32Imm(ToTypeWidth));
1594 if (SelectDirectAddr(N2, Addr)) {
1595 switch (N->getOpcode()) {
1598 case NVPTXISD::StoreV2:
1599 switch (EltVT.getSimpleVT().SimpleTy) {
1603 Opcode = NVPTX::STV_i8_v2_avar;
1606 Opcode = NVPTX::STV_i16_v2_avar;
1609 Opcode = NVPTX::STV_i32_v2_avar;
1612 Opcode = NVPTX::STV_i64_v2_avar;
1615 Opcode = NVPTX::STV_f32_v2_avar;
1618 Opcode = NVPTX::STV_f64_v2_avar;
1622 case NVPTXISD::StoreV4:
1623 switch (EltVT.getSimpleVT().SimpleTy) {
1627 Opcode = NVPTX::STV_i8_v4_avar;
1630 Opcode = NVPTX::STV_i16_v4_avar;
1633 Opcode = NVPTX::STV_i32_v4_avar;
1636 Opcode = NVPTX::STV_f32_v4_avar;
1641 StOps.push_back(Addr);
1642 } else if (Subtarget.is64Bit()
1643 ? SelectADDRsi64(N2.getNode(), N2, Base, Offset)
1644 : SelectADDRsi(N2.getNode(), N2, Base, Offset)) {
1645 switch (N->getOpcode()) {
1648 case NVPTXISD::StoreV2:
1649 switch (EltVT.getSimpleVT().SimpleTy) {
1653 Opcode = NVPTX::STV_i8_v2_asi;
1656 Opcode = NVPTX::STV_i16_v2_asi;
1659 Opcode = NVPTX::STV_i32_v2_asi;
1662 Opcode = NVPTX::STV_i64_v2_asi;
1665 Opcode = NVPTX::STV_f32_v2_asi;
1668 Opcode = NVPTX::STV_f64_v2_asi;
1672 case NVPTXISD::StoreV4:
1673 switch (EltVT.getSimpleVT().SimpleTy) {
1677 Opcode = NVPTX::STV_i8_v4_asi;
1680 Opcode = NVPTX::STV_i16_v4_asi;
1683 Opcode = NVPTX::STV_i32_v4_asi;
1686 Opcode = NVPTX::STV_f32_v4_asi;
1691 StOps.push_back(Base);
1692 StOps.push_back(Offset);
1693 } else if (Subtarget.is64Bit()
1694 ? SelectADDRri64(N2.getNode(), N2, Base, Offset)
1695 : SelectADDRri(N2.getNode(), N2, Base, Offset)) {
1696 if (Subtarget.is64Bit()) {
1697 switch (N->getOpcode()) {
1700 case NVPTXISD::StoreV2:
1701 switch (EltVT.getSimpleVT().SimpleTy) {
1705 Opcode = NVPTX::STV_i8_v2_ari_64;
1708 Opcode = NVPTX::STV_i16_v2_ari_64;
1711 Opcode = NVPTX::STV_i32_v2_ari_64;
1714 Opcode = NVPTX::STV_i64_v2_ari_64;
1717 Opcode = NVPTX::STV_f32_v2_ari_64;
1720 Opcode = NVPTX::STV_f64_v2_ari_64;
1724 case NVPTXISD::StoreV4:
1725 switch (EltVT.getSimpleVT().SimpleTy) {
1729 Opcode = NVPTX::STV_i8_v4_ari_64;
1732 Opcode = NVPTX::STV_i16_v4_ari_64;
1735 Opcode = NVPTX::STV_i32_v4_ari_64;
1738 Opcode = NVPTX::STV_f32_v4_ari_64;
1744 switch (N->getOpcode()) {
1747 case NVPTXISD::StoreV2:
1748 switch (EltVT.getSimpleVT().SimpleTy) {
1752 Opcode = NVPTX::STV_i8_v2_ari;
1755 Opcode = NVPTX::STV_i16_v2_ari;
1758 Opcode = NVPTX::STV_i32_v2_ari;
1761 Opcode = NVPTX::STV_i64_v2_ari;
1764 Opcode = NVPTX::STV_f32_v2_ari;
1767 Opcode = NVPTX::STV_f64_v2_ari;
1771 case NVPTXISD::StoreV4:
1772 switch (EltVT.getSimpleVT().SimpleTy) {
1776 Opcode = NVPTX::STV_i8_v4_ari;
1779 Opcode = NVPTX::STV_i16_v4_ari;
1782 Opcode = NVPTX::STV_i32_v4_ari;
1785 Opcode = NVPTX::STV_f32_v4_ari;
1791 StOps.push_back(Base);
1792 StOps.push_back(Offset);
1794 if (Subtarget.is64Bit()) {
1795 switch (N->getOpcode()) {
1798 case NVPTXISD::StoreV2:
1799 switch (EltVT.getSimpleVT().SimpleTy) {
1803 Opcode = NVPTX::STV_i8_v2_areg_64;
1806 Opcode = NVPTX::STV_i16_v2_areg_64;
1809 Opcode = NVPTX::STV_i32_v2_areg_64;
1812 Opcode = NVPTX::STV_i64_v2_areg_64;
1815 Opcode = NVPTX::STV_f32_v2_areg_64;
1818 Opcode = NVPTX::STV_f64_v2_areg_64;
1822 case NVPTXISD::StoreV4:
1823 switch (EltVT.getSimpleVT().SimpleTy) {
1827 Opcode = NVPTX::STV_i8_v4_areg_64;
1830 Opcode = NVPTX::STV_i16_v4_areg_64;
1833 Opcode = NVPTX::STV_i32_v4_areg_64;
1836 Opcode = NVPTX::STV_f32_v4_areg_64;
1842 switch (N->getOpcode()) {
1845 case NVPTXISD::StoreV2:
1846 switch (EltVT.getSimpleVT().SimpleTy) {
1850 Opcode = NVPTX::STV_i8_v2_areg;
1853 Opcode = NVPTX::STV_i16_v2_areg;
1856 Opcode = NVPTX::STV_i32_v2_areg;
1859 Opcode = NVPTX::STV_i64_v2_areg;
1862 Opcode = NVPTX::STV_f32_v2_areg;
1865 Opcode = NVPTX::STV_f64_v2_areg;
1869 case NVPTXISD::StoreV4:
1870 switch (EltVT.getSimpleVT().SimpleTy) {
1874 Opcode = NVPTX::STV_i8_v4_areg;
1877 Opcode = NVPTX::STV_i16_v4_areg;
1880 Opcode = NVPTX::STV_i32_v4_areg;
1883 Opcode = NVPTX::STV_f32_v4_areg;
1889 StOps.push_back(N2);
1892 StOps.push_back(Chain);
1894 ST = CurDAG->getMachineNode(Opcode, DL, MVT::Other, StOps);
1896 MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
1897 MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
1898 cast<MachineSDNode>(ST)->setMemRefs(MemRefs0, MemRefs0 + 1);
1903 SDNode *NVPTXDAGToDAGISel::SelectLoadParam(SDNode *Node) {
1904 SDValue Chain = Node->getOperand(0);
1905 SDValue Offset = Node->getOperand(2);
1906 SDValue Flag = Node->getOperand(3);
1908 MemSDNode *Mem = cast<MemSDNode>(Node);
1911 switch (Node->getOpcode()) {
1914 case NVPTXISD::LoadParam:
1917 case NVPTXISD::LoadParamV2:
1920 case NVPTXISD::LoadParamV4:
1925 EVT EltVT = Node->getValueType(0);
1926 EVT MemVT = Mem->getMemoryVT();
1934 switch (MemVT.getSimpleVT().SimpleTy) {
1938 Opc = NVPTX::LoadParamMemI8;
1941 Opc = NVPTX::LoadParamMemI8;
1944 Opc = NVPTX::LoadParamMemI16;
1947 Opc = NVPTX::LoadParamMemI32;
1950 Opc = NVPTX::LoadParamMemI64;
1953 Opc = NVPTX::LoadParamMemF32;
1956 Opc = NVPTX::LoadParamMemF64;
1961 switch (MemVT.getSimpleVT().SimpleTy) {
1965 Opc = NVPTX::LoadParamMemV2I8;
1968 Opc = NVPTX::LoadParamMemV2I8;
1971 Opc = NVPTX::LoadParamMemV2I16;
1974 Opc = NVPTX::LoadParamMemV2I32;
1977 Opc = NVPTX::LoadParamMemV2I64;
1980 Opc = NVPTX::LoadParamMemV2F32;
1983 Opc = NVPTX::LoadParamMemV2F64;
1988 switch (MemVT.getSimpleVT().SimpleTy) {
1992 Opc = NVPTX::LoadParamMemV4I8;
1995 Opc = NVPTX::LoadParamMemV4I8;
1998 Opc = NVPTX::LoadParamMemV4I16;
2001 Opc = NVPTX::LoadParamMemV4I32;
2004 Opc = NVPTX::LoadParamMemV4F32;
2012 VTs = CurDAG->getVTList(EltVT, MVT::Other, MVT::Glue);
2013 } else if (VecSize == 2) {
2014 VTs = CurDAG->getVTList(EltVT, EltVT, MVT::Other, MVT::Glue);
2016 EVT EVTs[] = { EltVT, EltVT, EltVT, EltVT, MVT::Other, MVT::Glue };
2017 VTs = CurDAG->getVTList(&EVTs[0], array_lengthof(EVTs));
2020 unsigned OffsetVal = cast<ConstantSDNode>(Offset)->getZExtValue();
2022 SmallVector<SDValue, 2> Ops;
2023 Ops.push_back(CurDAG->getTargetConstant(OffsetVal, MVT::i32));
2024 Ops.push_back(Chain);
2025 Ops.push_back(Flag);
2028 CurDAG->getMachineNode(Opc, DL, VTs, Ops);
2032 SDNode *NVPTXDAGToDAGISel::SelectStoreRetval(SDNode *N) {
2034 SDValue Chain = N->getOperand(0);
2035 SDValue Offset = N->getOperand(1);
2036 unsigned OffsetVal = cast<ConstantSDNode>(Offset)->getZExtValue();
2037 MemSDNode *Mem = cast<MemSDNode>(N);
2039 // How many elements do we have?
2040 unsigned NumElts = 1;
2041 switch (N->getOpcode()) {
2044 case NVPTXISD::StoreRetval:
2047 case NVPTXISD::StoreRetvalV2:
2050 case NVPTXISD::StoreRetvalV4:
2055 // Build vector of operands
2056 SmallVector<SDValue, 6> Ops;
2057 for (unsigned i = 0; i < NumElts; ++i)
2058 Ops.push_back(N->getOperand(i + 2));
2059 Ops.push_back(CurDAG->getTargetConstant(OffsetVal, MVT::i32));
2060 Ops.push_back(Chain);
2062 // Determine target opcode
2063 // If we have an i1, use an 8-bit store. The lowering code in
2064 // NVPTXISelLowering will have already emitted an upcast.
2065 unsigned Opcode = 0;
2070 switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
2074 Opcode = NVPTX::StoreRetvalI8;
2077 Opcode = NVPTX::StoreRetvalI8;
2080 Opcode = NVPTX::StoreRetvalI16;
2083 Opcode = NVPTX::StoreRetvalI32;
2086 Opcode = NVPTX::StoreRetvalI64;
2089 Opcode = NVPTX::StoreRetvalF32;
2092 Opcode = NVPTX::StoreRetvalF64;
2097 switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
2101 Opcode = NVPTX::StoreRetvalV2I8;
2104 Opcode = NVPTX::StoreRetvalV2I8;
2107 Opcode = NVPTX::StoreRetvalV2I16;
2110 Opcode = NVPTX::StoreRetvalV2I32;
2113 Opcode = NVPTX::StoreRetvalV2I64;
2116 Opcode = NVPTX::StoreRetvalV2F32;
2119 Opcode = NVPTX::StoreRetvalV2F64;
2124 switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
2128 Opcode = NVPTX::StoreRetvalV4I8;
2131 Opcode = NVPTX::StoreRetvalV4I8;
2134 Opcode = NVPTX::StoreRetvalV4I16;
2137 Opcode = NVPTX::StoreRetvalV4I32;
2140 Opcode = NVPTX::StoreRetvalV4F32;
2147 CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops);
2148 MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
2149 MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
2150 cast<MachineSDNode>(Ret)->setMemRefs(MemRefs0, MemRefs0 + 1);
2155 SDNode *NVPTXDAGToDAGISel::SelectStoreParam(SDNode *N) {
2157 SDValue Chain = N->getOperand(0);
2158 SDValue Param = N->getOperand(1);
2159 unsigned ParamVal = cast<ConstantSDNode>(Param)->getZExtValue();
2160 SDValue Offset = N->getOperand(2);
2161 unsigned OffsetVal = cast<ConstantSDNode>(Offset)->getZExtValue();
2162 MemSDNode *Mem = cast<MemSDNode>(N);
2163 SDValue Flag = N->getOperand(N->getNumOperands() - 1);
2165 // How many elements do we have?
2166 unsigned NumElts = 1;
2167 switch (N->getOpcode()) {
2170 case NVPTXISD::StoreParamU32:
2171 case NVPTXISD::StoreParamS32:
2172 case NVPTXISD::StoreParam:
2175 case NVPTXISD::StoreParamV2:
2178 case NVPTXISD::StoreParamV4:
2183 // Build vector of operands
2184 SmallVector<SDValue, 8> Ops;
2185 for (unsigned i = 0; i < NumElts; ++i)
2186 Ops.push_back(N->getOperand(i + 3));
2187 Ops.push_back(CurDAG->getTargetConstant(ParamVal, MVT::i32));
2188 Ops.push_back(CurDAG->getTargetConstant(OffsetVal, MVT::i32));
2189 Ops.push_back(Chain);
2190 Ops.push_back(Flag);
2192 // Determine target opcode
2193 // If we have an i1, use an 8-bit store. The lowering code in
2194 // NVPTXISelLowering will have already emitted an upcast.
2195 unsigned Opcode = 0;
2196 switch (N->getOpcode()) {
2202 switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
2206 Opcode = NVPTX::StoreParamI8;
2209 Opcode = NVPTX::StoreParamI8;
2212 Opcode = NVPTX::StoreParamI16;
2215 Opcode = NVPTX::StoreParamI32;
2218 Opcode = NVPTX::StoreParamI64;
2221 Opcode = NVPTX::StoreParamF32;
2224 Opcode = NVPTX::StoreParamF64;
2229 switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
2233 Opcode = NVPTX::StoreParamV2I8;
2236 Opcode = NVPTX::StoreParamV2I8;
2239 Opcode = NVPTX::StoreParamV2I16;
2242 Opcode = NVPTX::StoreParamV2I32;
2245 Opcode = NVPTX::StoreParamV2I64;
2248 Opcode = NVPTX::StoreParamV2F32;
2251 Opcode = NVPTX::StoreParamV2F64;
2256 switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
2260 Opcode = NVPTX::StoreParamV4I8;
2263 Opcode = NVPTX::StoreParamV4I8;
2266 Opcode = NVPTX::StoreParamV4I16;
2269 Opcode = NVPTX::StoreParamV4I32;
2272 Opcode = NVPTX::StoreParamV4F32;
2278 // Special case: if we have a sign-extend/zero-extend node, insert the
2279 // conversion instruction first, and use that as the value operand to
2280 // the selected StoreParam node.
2281 case NVPTXISD::StoreParamU32: {
2282 Opcode = NVPTX::StoreParamI32;
2283 SDValue CvtNone = CurDAG->getTargetConstant(NVPTX::PTXCvtMode::NONE,
2285 SDNode *Cvt = CurDAG->getMachineNode(NVPTX::CVT_u32_u16, DL,
2286 MVT::i32, Ops[0], CvtNone);
2287 Ops[0] = SDValue(Cvt, 0);
2290 case NVPTXISD::StoreParamS32: {
2291 Opcode = NVPTX::StoreParamI32;
2292 SDValue CvtNone = CurDAG->getTargetConstant(NVPTX::PTXCvtMode::NONE,
2294 SDNode *Cvt = CurDAG->getMachineNode(NVPTX::CVT_s32_s16, DL,
2295 MVT::i32, Ops[0], CvtNone);
2296 Ops[0] = SDValue(Cvt, 0);
2301 SDVTList RetVTs = CurDAG->getVTList(MVT::Other, MVT::Glue);
2303 CurDAG->getMachineNode(Opcode, DL, RetVTs, Ops);
2304 MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
2305 MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
2306 cast<MachineSDNode>(Ret)->setMemRefs(MemRefs0, MemRefs0 + 1);
2311 // SelectDirectAddr - Match a direct address for DAG.
2312 // A direct address could be a globaladdress or externalsymbol.
2313 bool NVPTXDAGToDAGISel::SelectDirectAddr(SDValue N, SDValue &Address) {
2314 // Return true if TGA or ES.
2315 if (N.getOpcode() == ISD::TargetGlobalAddress ||
2316 N.getOpcode() == ISD::TargetExternalSymbol) {
2320 if (N.getOpcode() == NVPTXISD::Wrapper) {
2321 Address = N.getOperand(0);
2324 if (N.getOpcode() == ISD::INTRINSIC_WO_CHAIN) {
2325 unsigned IID = cast<ConstantSDNode>(N.getOperand(0))->getZExtValue();
2326 if (IID == Intrinsic::nvvm_ptr_gen_to_param)
2327 if (N.getOperand(1).getOpcode() == NVPTXISD::MoveParam)
2328 return (SelectDirectAddr(N.getOperand(1).getOperand(0), Address));
2334 bool NVPTXDAGToDAGISel::SelectADDRsi_imp(
2335 SDNode *OpNode, SDValue Addr, SDValue &Base, SDValue &Offset, MVT mvt) {
2336 if (Addr.getOpcode() == ISD::ADD) {
2337 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) {
2338 SDValue base = Addr.getOperand(0);
2339 if (SelectDirectAddr(base, Base)) {
2340 Offset = CurDAG->getTargetConstant(CN->getZExtValue(), mvt);
2349 bool NVPTXDAGToDAGISel::SelectADDRsi(SDNode *OpNode, SDValue Addr,
2350 SDValue &Base, SDValue &Offset) {
2351 return SelectADDRsi_imp(OpNode, Addr, Base, Offset, MVT::i32);
2355 bool NVPTXDAGToDAGISel::SelectADDRsi64(SDNode *OpNode, SDValue Addr,
2356 SDValue &Base, SDValue &Offset) {
2357 return SelectADDRsi_imp(OpNode, Addr, Base, Offset, MVT::i64);
2361 bool NVPTXDAGToDAGISel::SelectADDRri_imp(
2362 SDNode *OpNode, SDValue Addr, SDValue &Base, SDValue &Offset, MVT mvt) {
2363 if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
2364 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), mvt);
2365 Offset = CurDAG->getTargetConstant(0, mvt);
2368 if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
2369 Addr.getOpcode() == ISD::TargetGlobalAddress)
2370 return false; // direct calls.
2372 if (Addr.getOpcode() == ISD::ADD) {
2373 if (SelectDirectAddr(Addr.getOperand(0), Addr)) {
2376 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) {
2377 if (FrameIndexSDNode *FIN =
2378 dyn_cast<FrameIndexSDNode>(Addr.getOperand(0)))
2379 // Constant offset from frame ref.
2380 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), mvt);
2382 Base = Addr.getOperand(0);
2383 Offset = CurDAG->getTargetConstant(CN->getZExtValue(), mvt);
2391 bool NVPTXDAGToDAGISel::SelectADDRri(SDNode *OpNode, SDValue Addr,
2392 SDValue &Base, SDValue &Offset) {
2393 return SelectADDRri_imp(OpNode, Addr, Base, Offset, MVT::i32);
2397 bool NVPTXDAGToDAGISel::SelectADDRri64(SDNode *OpNode, SDValue Addr,
2398 SDValue &Base, SDValue &Offset) {
2399 return SelectADDRri_imp(OpNode, Addr, Base, Offset, MVT::i64);
2402 bool NVPTXDAGToDAGISel::ChkMemSDNodeAddressSpace(SDNode *N,
2403 unsigned int spN) const {
2404 const Value *Src = NULL;
2405 // Even though MemIntrinsicSDNode is a subclas of MemSDNode,
2406 // the classof() for MemSDNode does not include MemIntrinsicSDNode
2407 // (See SelectionDAGNodes.h). So we need to check for both.
2408 if (MemSDNode *mN = dyn_cast<MemSDNode>(N)) {
2409 Src = mN->getSrcValue();
2410 } else if (MemSDNode *mN = dyn_cast<MemIntrinsicSDNode>(N)) {
2411 Src = mN->getSrcValue();
2415 if (const PointerType *PT = dyn_cast<PointerType>(Src->getType()))
2416 return (PT->getAddressSpace() == spN);
2420 /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
2421 /// inline asm expressions.
2422 bool NVPTXDAGToDAGISel::SelectInlineAsmMemoryOperand(
2423 const SDValue &Op, char ConstraintCode, std::vector<SDValue> &OutOps) {
2425 switch (ConstraintCode) {
2429 if (SelectDirectAddr(Op, Op0)) {
2430 OutOps.push_back(Op0);
2431 OutOps.push_back(CurDAG->getTargetConstant(0, MVT::i32));
2434 if (SelectADDRri(Op.getNode(), Op, Op0, Op1)) {
2435 OutOps.push_back(Op0);
2436 OutOps.push_back(Op1);