1 //===-- NVPTXISelDAGToDAG.cpp - A dag to dag inst selector for NVPTX ------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file defines an instruction selector for the NVPTX target.
12 //===----------------------------------------------------------------------===//
14 #include "NVPTXISelDAGToDAG.h"
15 #include "llvm/IR/GlobalValue.h"
16 #include "llvm/IR/Instructions.h"
17 #include "llvm/Support/CommandLine.h"
18 #include "llvm/Support/Debug.h"
19 #include "llvm/Support/ErrorHandling.h"
20 #include "llvm/Support/raw_ostream.h"
21 #include "llvm/Target/TargetIntrinsicInfo.h"
24 #define DEBUG_TYPE "nvptx-isel"
28 static cl::opt<bool> UseFMADInstruction(
29 "nvptx-mad-enable", cl::ZeroOrMore,
30 cl::desc("NVPTX Specific: Enable generating FMAD instructions"),
34 FMAContractLevel("nvptx-fma-level", cl::ZeroOrMore,
35 cl::desc("NVPTX Specific: FMA contraction (0: don't do it"
36 " 1: do it 2: do it aggressively"),
39 static cl::opt<int> UsePrecDivF32(
40 "nvptx-prec-divf32", cl::ZeroOrMore,
41 cl::desc("NVPTX Specifies: 0 use div.approx, 1 use div.full, 2 use"
42 " IEEE Compliant F32 div.rnd if avaiable."),
46 UsePrecSqrtF32("nvptx-prec-sqrtf32",
47 cl::desc("NVPTX Specific: 0 use sqrt.approx, 1 use sqrt.rn."),
50 /// createNVPTXISelDag - This pass converts a legalized DAG into a
51 /// NVPTX-specific DAG, ready for instruction scheduling.
52 FunctionPass *llvm::createNVPTXISelDag(NVPTXTargetMachine &TM,
53 llvm::CodeGenOpt::Level OptLevel) {
54 return new NVPTXDAGToDAGISel(TM, OptLevel);
57 NVPTXDAGToDAGISel::NVPTXDAGToDAGISel(NVPTXTargetMachine &tm,
58 CodeGenOpt::Level OptLevel)
59 : SelectionDAGISel(tm, OptLevel),
60 Subtarget(tm.getSubtarget<NVPTXSubtarget>()) {
61 // Always do fma.f32 fpcontract if the target supports the instruction.
62 // Always do fma.f64 fpcontract if the target supports the instruction.
63 // Do mad.f32 is nvptx-mad-enable is specified and the target does not
66 doFMADF32 = (OptLevel > 0) && UseFMADInstruction && !Subtarget.hasFMAF32();
67 doFMAF32 = (OptLevel > 0) && Subtarget.hasFMAF32() && (FMAContractLevel >= 1);
68 doFMAF64 = (OptLevel > 0) && Subtarget.hasFMAF64() && (FMAContractLevel >= 1);
70 (OptLevel > 0) && Subtarget.hasFMAF32() && (FMAContractLevel == 2);
72 (OptLevel > 0) && Subtarget.hasFMAF64() && (FMAContractLevel == 2);
74 allowFMA = (FMAContractLevel >= 1) || UseFMADInstruction;
78 doMulWide = (OptLevel > 0);
80 // Decide how to translate f32 div
81 do_DIVF32_PREC = UsePrecDivF32;
82 // Decide how to translate f32 sqrt
83 do_SQRTF32_PREC = UsePrecSqrtF32;
84 // sm less than sm_20 does not support div.rnd. Use div.full.
85 if (do_DIVF32_PREC == 2 && !Subtarget.reqPTX20())
90 /// Select - Select instructions not customized! Used for
91 /// expanded, promoted and normal instructions.
92 SDNode *NVPTXDAGToDAGISel::Select(SDNode *N) {
94 if (N->isMachineOpcode())
95 return NULL; // Already selected.
97 SDNode *ResNode = NULL;
98 switch (N->getOpcode()) {
100 ResNode = SelectLoad(N);
103 ResNode = SelectStore(N);
105 case NVPTXISD::LoadV2:
106 case NVPTXISD::LoadV4:
107 ResNode = SelectLoadVector(N);
109 case NVPTXISD::LDGV2:
110 case NVPTXISD::LDGV4:
111 case NVPTXISD::LDUV2:
112 case NVPTXISD::LDUV4:
113 ResNode = SelectLDGLDUVector(N);
115 case NVPTXISD::StoreV2:
116 case NVPTXISD::StoreV4:
117 ResNode = SelectStoreVector(N);
119 case NVPTXISD::LoadParam:
120 case NVPTXISD::LoadParamV2:
121 case NVPTXISD::LoadParamV4:
122 ResNode = SelectLoadParam(N);
124 case NVPTXISD::StoreRetval:
125 case NVPTXISD::StoreRetvalV2:
126 case NVPTXISD::StoreRetvalV4:
127 ResNode = SelectStoreRetval(N);
129 case NVPTXISD::StoreParam:
130 case NVPTXISD::StoreParamV2:
131 case NVPTXISD::StoreParamV4:
132 case NVPTXISD::StoreParamS32:
133 case NVPTXISD::StoreParamU32:
134 ResNode = SelectStoreParam(N);
141 return SelectCode(N);
144 static unsigned int getCodeAddrSpace(MemSDNode *N,
145 const NVPTXSubtarget &Subtarget) {
146 const Value *Src = N->getSrcValue();
149 return NVPTX::PTXLdStInstCode::GENERIC;
151 if (const PointerType *PT = dyn_cast<PointerType>(Src->getType())) {
152 switch (PT->getAddressSpace()) {
153 case llvm::ADDRESS_SPACE_LOCAL: return NVPTX::PTXLdStInstCode::LOCAL;
154 case llvm::ADDRESS_SPACE_GLOBAL: return NVPTX::PTXLdStInstCode::GLOBAL;
155 case llvm::ADDRESS_SPACE_SHARED: return NVPTX::PTXLdStInstCode::SHARED;
156 case llvm::ADDRESS_SPACE_GENERIC: return NVPTX::PTXLdStInstCode::GENERIC;
157 case llvm::ADDRESS_SPACE_PARAM: return NVPTX::PTXLdStInstCode::PARAM;
158 case llvm::ADDRESS_SPACE_CONST: return NVPTX::PTXLdStInstCode::CONSTANT;
162 return NVPTX::PTXLdStInstCode::GENERIC;
165 SDNode *NVPTXDAGToDAGISel::SelectLoad(SDNode *N) {
167 LoadSDNode *LD = cast<LoadSDNode>(N);
168 EVT LoadedVT = LD->getMemoryVT();
169 SDNode *NVPTXLD = NULL;
171 // do not support pre/post inc/dec
175 if (!LoadedVT.isSimple())
178 // Address Space Setting
179 unsigned int codeAddrSpace = getCodeAddrSpace(LD, Subtarget);
182 // - .volatile is only availalble for .global and .shared
183 bool isVolatile = LD->isVolatile();
184 if (codeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
185 codeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
186 codeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
190 MVT SimpleVT = LoadedVT.getSimpleVT();
191 unsigned vecType = NVPTX::PTXLdStInstCode::Scalar;
192 if (SimpleVT.isVector()) {
193 unsigned num = SimpleVT.getVectorNumElements();
195 vecType = NVPTX::PTXLdStInstCode::V2;
197 vecType = NVPTX::PTXLdStInstCode::V4;
202 // Type Setting: fromType + fromTypeWidth
204 // Sign : ISD::SEXTLOAD
205 // Unsign : ISD::ZEXTLOAD, ISD::NON_EXTLOAD or ISD::EXTLOAD and the
207 // Float : ISD::NON_EXTLOAD or ISD::EXTLOAD and the type is float
208 MVT ScalarVT = SimpleVT.getScalarType();
209 // Read at least 8 bits (predicates are stored as 8-bit values)
210 unsigned fromTypeWidth = std::max(8U, ScalarVT.getSizeInBits());
211 unsigned int fromType;
212 if ((LD->getExtensionType() == ISD::SEXTLOAD))
213 fromType = NVPTX::PTXLdStInstCode::Signed;
214 else if (ScalarVT.isFloatingPoint())
215 fromType = NVPTX::PTXLdStInstCode::Float;
217 fromType = NVPTX::PTXLdStInstCode::Unsigned;
219 // Create the machine instruction DAG
220 SDValue Chain = N->getOperand(0);
221 SDValue N1 = N->getOperand(1);
223 SDValue Offset, Base;
225 MVT::SimpleValueType TargetVT = LD->getValueType(0).getSimpleVT().SimpleTy;
227 if (SelectDirectAddr(N1, Addr)) {
230 Opcode = NVPTX::LD_i8_avar;
233 Opcode = NVPTX::LD_i16_avar;
236 Opcode = NVPTX::LD_i32_avar;
239 Opcode = NVPTX::LD_i64_avar;
242 Opcode = NVPTX::LD_f32_avar;
245 Opcode = NVPTX::LD_f64_avar;
250 SDValue Ops[] = { getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
251 getI32Imm(vecType), getI32Imm(fromType),
252 getI32Imm(fromTypeWidth), Addr, Chain };
253 NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops);
254 } else if (Subtarget.is64Bit()
255 ? SelectADDRsi64(N1.getNode(), N1, Base, Offset)
256 : SelectADDRsi(N1.getNode(), N1, Base, Offset)) {
259 Opcode = NVPTX::LD_i8_asi;
262 Opcode = NVPTX::LD_i16_asi;
265 Opcode = NVPTX::LD_i32_asi;
268 Opcode = NVPTX::LD_i64_asi;
271 Opcode = NVPTX::LD_f32_asi;
274 Opcode = NVPTX::LD_f64_asi;
279 SDValue Ops[] = { getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
280 getI32Imm(vecType), getI32Imm(fromType),
281 getI32Imm(fromTypeWidth), Base, Offset, Chain };
282 NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops);
283 } else if (Subtarget.is64Bit()
284 ? SelectADDRri64(N1.getNode(), N1, Base, Offset)
285 : SelectADDRri(N1.getNode(), N1, Base, Offset)) {
286 if (Subtarget.is64Bit()) {
289 Opcode = NVPTX::LD_i8_ari_64;
292 Opcode = NVPTX::LD_i16_ari_64;
295 Opcode = NVPTX::LD_i32_ari_64;
298 Opcode = NVPTX::LD_i64_ari_64;
301 Opcode = NVPTX::LD_f32_ari_64;
304 Opcode = NVPTX::LD_f64_ari_64;
312 Opcode = NVPTX::LD_i8_ari;
315 Opcode = NVPTX::LD_i16_ari;
318 Opcode = NVPTX::LD_i32_ari;
321 Opcode = NVPTX::LD_i64_ari;
324 Opcode = NVPTX::LD_f32_ari;
327 Opcode = NVPTX::LD_f64_ari;
333 SDValue Ops[] = { getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
334 getI32Imm(vecType), getI32Imm(fromType),
335 getI32Imm(fromTypeWidth), Base, Offset, Chain };
336 NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops);
338 if (Subtarget.is64Bit()) {
341 Opcode = NVPTX::LD_i8_areg_64;
344 Opcode = NVPTX::LD_i16_areg_64;
347 Opcode = NVPTX::LD_i32_areg_64;
350 Opcode = NVPTX::LD_i64_areg_64;
353 Opcode = NVPTX::LD_f32_areg_64;
356 Opcode = NVPTX::LD_f64_areg_64;
364 Opcode = NVPTX::LD_i8_areg;
367 Opcode = NVPTX::LD_i16_areg;
370 Opcode = NVPTX::LD_i32_areg;
373 Opcode = NVPTX::LD_i64_areg;
376 Opcode = NVPTX::LD_f32_areg;
379 Opcode = NVPTX::LD_f64_areg;
385 SDValue Ops[] = { getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
386 getI32Imm(vecType), getI32Imm(fromType),
387 getI32Imm(fromTypeWidth), N1, Chain };
388 NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops);
391 if (NVPTXLD != NULL) {
392 MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
393 MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
394 cast<MachineSDNode>(NVPTXLD)->setMemRefs(MemRefs0, MemRefs0 + 1);
400 SDNode *NVPTXDAGToDAGISel::SelectLoadVector(SDNode *N) {
402 SDValue Chain = N->getOperand(0);
403 SDValue Op1 = N->getOperand(1);
404 SDValue Addr, Offset, Base;
408 MemSDNode *MemSD = cast<MemSDNode>(N);
409 EVT LoadedVT = MemSD->getMemoryVT();
411 if (!LoadedVT.isSimple())
414 // Address Space Setting
415 unsigned int CodeAddrSpace = getCodeAddrSpace(MemSD, Subtarget);
418 // - .volatile is only availalble for .global and .shared
419 bool IsVolatile = MemSD->isVolatile();
420 if (CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
421 CodeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
422 CodeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
426 MVT SimpleVT = LoadedVT.getSimpleVT();
428 // Type Setting: fromType + fromTypeWidth
430 // Sign : ISD::SEXTLOAD
431 // Unsign : ISD::ZEXTLOAD, ISD::NON_EXTLOAD or ISD::EXTLOAD and the
433 // Float : ISD::NON_EXTLOAD or ISD::EXTLOAD and the type is float
434 MVT ScalarVT = SimpleVT.getScalarType();
435 // Read at least 8 bits (predicates are stored as 8-bit values)
436 unsigned FromTypeWidth = std::max(8U, ScalarVT.getSizeInBits());
437 unsigned int FromType;
438 // The last operand holds the original LoadSDNode::getExtensionType() value
439 unsigned ExtensionType = cast<ConstantSDNode>(
440 N->getOperand(N->getNumOperands() - 1))->getZExtValue();
441 if (ExtensionType == ISD::SEXTLOAD)
442 FromType = NVPTX::PTXLdStInstCode::Signed;
443 else if (ScalarVT.isFloatingPoint())
444 FromType = NVPTX::PTXLdStInstCode::Float;
446 FromType = NVPTX::PTXLdStInstCode::Unsigned;
450 switch (N->getOpcode()) {
451 case NVPTXISD::LoadV2:
452 VecType = NVPTX::PTXLdStInstCode::V2;
454 case NVPTXISD::LoadV4:
455 VecType = NVPTX::PTXLdStInstCode::V4;
461 EVT EltVT = N->getValueType(0);
463 if (SelectDirectAddr(Op1, Addr)) {
464 switch (N->getOpcode()) {
467 case NVPTXISD::LoadV2:
468 switch (EltVT.getSimpleVT().SimpleTy) {
472 Opcode = NVPTX::LDV_i8_v2_avar;
475 Opcode = NVPTX::LDV_i16_v2_avar;
478 Opcode = NVPTX::LDV_i32_v2_avar;
481 Opcode = NVPTX::LDV_i64_v2_avar;
484 Opcode = NVPTX::LDV_f32_v2_avar;
487 Opcode = NVPTX::LDV_f64_v2_avar;
491 case NVPTXISD::LoadV4:
492 switch (EltVT.getSimpleVT().SimpleTy) {
496 Opcode = NVPTX::LDV_i8_v4_avar;
499 Opcode = NVPTX::LDV_i16_v4_avar;
502 Opcode = NVPTX::LDV_i32_v4_avar;
505 Opcode = NVPTX::LDV_f32_v4_avar;
511 SDValue Ops[] = { getI32Imm(IsVolatile), getI32Imm(CodeAddrSpace),
512 getI32Imm(VecType), getI32Imm(FromType),
513 getI32Imm(FromTypeWidth), Addr, Chain };
514 LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
515 } else if (Subtarget.is64Bit()
516 ? SelectADDRsi64(Op1.getNode(), Op1, Base, Offset)
517 : SelectADDRsi(Op1.getNode(), Op1, Base, Offset)) {
518 switch (N->getOpcode()) {
521 case NVPTXISD::LoadV2:
522 switch (EltVT.getSimpleVT().SimpleTy) {
526 Opcode = NVPTX::LDV_i8_v2_asi;
529 Opcode = NVPTX::LDV_i16_v2_asi;
532 Opcode = NVPTX::LDV_i32_v2_asi;
535 Opcode = NVPTX::LDV_i64_v2_asi;
538 Opcode = NVPTX::LDV_f32_v2_asi;
541 Opcode = NVPTX::LDV_f64_v2_asi;
545 case NVPTXISD::LoadV4:
546 switch (EltVT.getSimpleVT().SimpleTy) {
550 Opcode = NVPTX::LDV_i8_v4_asi;
553 Opcode = NVPTX::LDV_i16_v4_asi;
556 Opcode = NVPTX::LDV_i32_v4_asi;
559 Opcode = NVPTX::LDV_f32_v4_asi;
565 SDValue Ops[] = { getI32Imm(IsVolatile), getI32Imm(CodeAddrSpace),
566 getI32Imm(VecType), getI32Imm(FromType),
567 getI32Imm(FromTypeWidth), Base, Offset, Chain };
568 LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
569 } else if (Subtarget.is64Bit()
570 ? SelectADDRri64(Op1.getNode(), Op1, Base, Offset)
571 : SelectADDRri(Op1.getNode(), Op1, Base, Offset)) {
572 if (Subtarget.is64Bit()) {
573 switch (N->getOpcode()) {
576 case NVPTXISD::LoadV2:
577 switch (EltVT.getSimpleVT().SimpleTy) {
581 Opcode = NVPTX::LDV_i8_v2_ari_64;
584 Opcode = NVPTX::LDV_i16_v2_ari_64;
587 Opcode = NVPTX::LDV_i32_v2_ari_64;
590 Opcode = NVPTX::LDV_i64_v2_ari_64;
593 Opcode = NVPTX::LDV_f32_v2_ari_64;
596 Opcode = NVPTX::LDV_f64_v2_ari_64;
600 case NVPTXISD::LoadV4:
601 switch (EltVT.getSimpleVT().SimpleTy) {
605 Opcode = NVPTX::LDV_i8_v4_ari_64;
608 Opcode = NVPTX::LDV_i16_v4_ari_64;
611 Opcode = NVPTX::LDV_i32_v4_ari_64;
614 Opcode = NVPTX::LDV_f32_v4_ari_64;
620 switch (N->getOpcode()) {
623 case NVPTXISD::LoadV2:
624 switch (EltVT.getSimpleVT().SimpleTy) {
628 Opcode = NVPTX::LDV_i8_v2_ari;
631 Opcode = NVPTX::LDV_i16_v2_ari;
634 Opcode = NVPTX::LDV_i32_v2_ari;
637 Opcode = NVPTX::LDV_i64_v2_ari;
640 Opcode = NVPTX::LDV_f32_v2_ari;
643 Opcode = NVPTX::LDV_f64_v2_ari;
647 case NVPTXISD::LoadV4:
648 switch (EltVT.getSimpleVT().SimpleTy) {
652 Opcode = NVPTX::LDV_i8_v4_ari;
655 Opcode = NVPTX::LDV_i16_v4_ari;
658 Opcode = NVPTX::LDV_i32_v4_ari;
661 Opcode = NVPTX::LDV_f32_v4_ari;
668 SDValue Ops[] = { getI32Imm(IsVolatile), getI32Imm(CodeAddrSpace),
669 getI32Imm(VecType), getI32Imm(FromType),
670 getI32Imm(FromTypeWidth), Base, Offset, Chain };
672 LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
674 if (Subtarget.is64Bit()) {
675 switch (N->getOpcode()) {
678 case NVPTXISD::LoadV2:
679 switch (EltVT.getSimpleVT().SimpleTy) {
683 Opcode = NVPTX::LDV_i8_v2_areg_64;
686 Opcode = NVPTX::LDV_i16_v2_areg_64;
689 Opcode = NVPTX::LDV_i32_v2_areg_64;
692 Opcode = NVPTX::LDV_i64_v2_areg_64;
695 Opcode = NVPTX::LDV_f32_v2_areg_64;
698 Opcode = NVPTX::LDV_f64_v2_areg_64;
702 case NVPTXISD::LoadV4:
703 switch (EltVT.getSimpleVT().SimpleTy) {
707 Opcode = NVPTX::LDV_i8_v4_areg_64;
710 Opcode = NVPTX::LDV_i16_v4_areg_64;
713 Opcode = NVPTX::LDV_i32_v4_areg_64;
716 Opcode = NVPTX::LDV_f32_v4_areg_64;
722 switch (N->getOpcode()) {
725 case NVPTXISD::LoadV2:
726 switch (EltVT.getSimpleVT().SimpleTy) {
730 Opcode = NVPTX::LDV_i8_v2_areg;
733 Opcode = NVPTX::LDV_i16_v2_areg;
736 Opcode = NVPTX::LDV_i32_v2_areg;
739 Opcode = NVPTX::LDV_i64_v2_areg;
742 Opcode = NVPTX::LDV_f32_v2_areg;
745 Opcode = NVPTX::LDV_f64_v2_areg;
749 case NVPTXISD::LoadV4:
750 switch (EltVT.getSimpleVT().SimpleTy) {
754 Opcode = NVPTX::LDV_i8_v4_areg;
757 Opcode = NVPTX::LDV_i16_v4_areg;
760 Opcode = NVPTX::LDV_i32_v4_areg;
763 Opcode = NVPTX::LDV_f32_v4_areg;
770 SDValue Ops[] = { getI32Imm(IsVolatile), getI32Imm(CodeAddrSpace),
771 getI32Imm(VecType), getI32Imm(FromType),
772 getI32Imm(FromTypeWidth), Op1, Chain };
773 LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
776 MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
777 MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
778 cast<MachineSDNode>(LD)->setMemRefs(MemRefs0, MemRefs0 + 1);
783 SDNode *NVPTXDAGToDAGISel::SelectLDGLDUVector(SDNode *N) {
785 SDValue Chain = N->getOperand(0);
786 SDValue Op1 = N->getOperand(1);
790 MemSDNode *Mem = cast<MemSDNode>(N);
791 SDValue Base, Offset, Addr;
793 EVT EltVT = Mem->getMemoryVT().getVectorElementType();
795 if (SelectDirectAddr(Op1, Addr)) {
796 switch (N->getOpcode()) {
799 case NVPTXISD::LDGV2:
800 switch (EltVT.getSimpleVT().SimpleTy) {
804 Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_avar;
807 Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_avar;
810 Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_avar;
813 Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_avar;
816 Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_avar;
819 Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_avar;
823 case NVPTXISD::LDUV2:
824 switch (EltVT.getSimpleVT().SimpleTy) {
828 Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_avar;
831 Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_avar;
834 Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_avar;
837 Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_avar;
840 Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_avar;
843 Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_avar;
847 case NVPTXISD::LDGV4:
848 switch (EltVT.getSimpleVT().SimpleTy) {
852 Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_avar;
855 Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_avar;
858 Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_avar;
861 Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_avar;
865 case NVPTXISD::LDUV4:
866 switch (EltVT.getSimpleVT().SimpleTy) {
870 Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_avar;
873 Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_avar;
876 Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_avar;
879 Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_avar;
885 SDValue Ops[] = { Addr, Chain };
886 LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(),
887 ArrayRef<SDValue>(Ops, 2));
888 } else if (Subtarget.is64Bit()
889 ? SelectADDRri64(Op1.getNode(), Op1, Base, Offset)
890 : SelectADDRri(Op1.getNode(), Op1, Base, Offset)) {
891 if (Subtarget.is64Bit()) {
892 switch (N->getOpcode()) {
895 case NVPTXISD::LDGV2:
896 switch (EltVT.getSimpleVT().SimpleTy) {
900 Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_ari64;
903 Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_ari64;
906 Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_ari64;
909 Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_ari64;
912 Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_ari64;
915 Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_ari64;
919 case NVPTXISD::LDUV2:
920 switch (EltVT.getSimpleVT().SimpleTy) {
924 Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_ari64;
927 Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_ari64;
930 Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_ari64;
933 Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_ari64;
936 Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_ari64;
939 Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_ari64;
943 case NVPTXISD::LDGV4:
944 switch (EltVT.getSimpleVT().SimpleTy) {
948 Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_ari64;
951 Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_ari64;
954 Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_ari64;
957 Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_ari64;
961 case NVPTXISD::LDUV4:
962 switch (EltVT.getSimpleVT().SimpleTy) {
966 Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_ari64;
969 Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_ari64;
972 Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_ari64;
975 Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_ari64;
981 switch (N->getOpcode()) {
984 case NVPTXISD::LDGV2:
985 switch (EltVT.getSimpleVT().SimpleTy) {
989 Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_ari32;
992 Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_ari32;
995 Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_ari32;
998 Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_ari32;
1001 Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_ari32;
1004 Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_ari32;
1008 case NVPTXISD::LDUV2:
1009 switch (EltVT.getSimpleVT().SimpleTy) {
1013 Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_ari32;
1016 Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_ari32;
1019 Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_ari32;
1022 Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_ari32;
1025 Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_ari32;
1028 Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_ari32;
1032 case NVPTXISD::LDGV4:
1033 switch (EltVT.getSimpleVT().SimpleTy) {
1037 Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_ari32;
1040 Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_ari32;
1043 Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_ari32;
1046 Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_ari32;
1050 case NVPTXISD::LDUV4:
1051 switch (EltVT.getSimpleVT().SimpleTy) {
1055 Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_ari32;
1058 Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_ari32;
1061 Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_ari32;
1064 Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_ari32;
1071 SDValue Ops[] = { Base, Offset, Chain };
1073 LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(),
1074 ArrayRef<SDValue>(Ops, 3));
1076 if (Subtarget.is64Bit()) {
1077 switch (N->getOpcode()) {
1080 case NVPTXISD::LDGV2:
1081 switch (EltVT.getSimpleVT().SimpleTy) {
1085 Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_areg64;
1088 Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_areg64;
1091 Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_areg64;
1094 Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_areg64;
1097 Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_areg64;
1100 Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_areg64;
1104 case NVPTXISD::LDUV2:
1105 switch (EltVT.getSimpleVT().SimpleTy) {
1109 Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_areg64;
1112 Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_areg64;
1115 Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_areg64;
1118 Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_areg64;
1121 Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_areg64;
1124 Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_areg64;
1128 case NVPTXISD::LDGV4:
1129 switch (EltVT.getSimpleVT().SimpleTy) {
1133 Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_areg64;
1136 Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_areg64;
1139 Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_areg64;
1142 Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_areg64;
1146 case NVPTXISD::LDUV4:
1147 switch (EltVT.getSimpleVT().SimpleTy) {
1151 Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_areg64;
1154 Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_areg64;
1157 Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_areg64;
1160 Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_areg64;
1166 switch (N->getOpcode()) {
1169 case NVPTXISD::LDGV2:
1170 switch (EltVT.getSimpleVT().SimpleTy) {
1174 Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_areg32;
1177 Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_areg32;
1180 Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_areg32;
1183 Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_areg32;
1186 Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_areg32;
1189 Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_areg32;
1193 case NVPTXISD::LDUV2:
1194 switch (EltVT.getSimpleVT().SimpleTy) {
1198 Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_areg32;
1201 Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_areg32;
1204 Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_areg32;
1207 Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_areg32;
1210 Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_areg32;
1213 Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_areg32;
1217 case NVPTXISD::LDGV4:
1218 switch (EltVT.getSimpleVT().SimpleTy) {
1222 Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_areg32;
1225 Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_areg32;
1228 Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_areg32;
1231 Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_areg32;
1235 case NVPTXISD::LDUV4:
1236 switch (EltVT.getSimpleVT().SimpleTy) {
1240 Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_areg32;
1243 Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_areg32;
1246 Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_areg32;
1249 Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_areg32;
1256 SDValue Ops[] = { Op1, Chain };
1257 LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(),
1258 ArrayRef<SDValue>(Ops, 2));
1261 MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
1262 MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
1263 cast<MachineSDNode>(LD)->setMemRefs(MemRefs0, MemRefs0 + 1);
1268 SDNode *NVPTXDAGToDAGISel::SelectStore(SDNode *N) {
1270 StoreSDNode *ST = cast<StoreSDNode>(N);
1271 EVT StoreVT = ST->getMemoryVT();
1272 SDNode *NVPTXST = NULL;
1274 // do not support pre/post inc/dec
1275 if (ST->isIndexed())
1278 if (!StoreVT.isSimple())
1281 // Address Space Setting
1282 unsigned int codeAddrSpace = getCodeAddrSpace(ST, Subtarget);
1285 // - .volatile is only availalble for .global and .shared
1286 bool isVolatile = ST->isVolatile();
1287 if (codeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
1288 codeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
1289 codeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
1293 MVT SimpleVT = StoreVT.getSimpleVT();
1294 unsigned vecType = NVPTX::PTXLdStInstCode::Scalar;
1295 if (SimpleVT.isVector()) {
1296 unsigned num = SimpleVT.getVectorNumElements();
1298 vecType = NVPTX::PTXLdStInstCode::V2;
1300 vecType = NVPTX::PTXLdStInstCode::V4;
1305 // Type Setting: toType + toTypeWidth
1306 // - for integer type, always use 'u'
1308 MVT ScalarVT = SimpleVT.getScalarType();
1309 unsigned toTypeWidth = ScalarVT.getSizeInBits();
1310 unsigned int toType;
1311 if (ScalarVT.isFloatingPoint())
1312 toType = NVPTX::PTXLdStInstCode::Float;
1314 toType = NVPTX::PTXLdStInstCode::Unsigned;
1316 // Create the machine instruction DAG
1317 SDValue Chain = N->getOperand(0);
1318 SDValue N1 = N->getOperand(1);
1319 SDValue N2 = N->getOperand(2);
1321 SDValue Offset, Base;
1323 MVT::SimpleValueType SourceVT =
1324 N1.getNode()->getValueType(0).getSimpleVT().SimpleTy;
1326 if (SelectDirectAddr(N2, Addr)) {
1329 Opcode = NVPTX::ST_i8_avar;
1332 Opcode = NVPTX::ST_i16_avar;
1335 Opcode = NVPTX::ST_i32_avar;
1338 Opcode = NVPTX::ST_i64_avar;
1341 Opcode = NVPTX::ST_f32_avar;
1344 Opcode = NVPTX::ST_f64_avar;
1349 SDValue Ops[] = { N1, getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
1350 getI32Imm(vecType), getI32Imm(toType),
1351 getI32Imm(toTypeWidth), Addr, Chain };
1352 NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops);
1353 } else if (Subtarget.is64Bit()
1354 ? SelectADDRsi64(N2.getNode(), N2, Base, Offset)
1355 : SelectADDRsi(N2.getNode(), N2, Base, Offset)) {
1358 Opcode = NVPTX::ST_i8_asi;
1361 Opcode = NVPTX::ST_i16_asi;
1364 Opcode = NVPTX::ST_i32_asi;
1367 Opcode = NVPTX::ST_i64_asi;
1370 Opcode = NVPTX::ST_f32_asi;
1373 Opcode = NVPTX::ST_f64_asi;
1378 SDValue Ops[] = { N1, getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
1379 getI32Imm(vecType), getI32Imm(toType),
1380 getI32Imm(toTypeWidth), Base, Offset, Chain };
1381 NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops);
1382 } else if (Subtarget.is64Bit()
1383 ? SelectADDRri64(N2.getNode(), N2, Base, Offset)
1384 : SelectADDRri(N2.getNode(), N2, Base, Offset)) {
1385 if (Subtarget.is64Bit()) {
1388 Opcode = NVPTX::ST_i8_ari_64;
1391 Opcode = NVPTX::ST_i16_ari_64;
1394 Opcode = NVPTX::ST_i32_ari_64;
1397 Opcode = NVPTX::ST_i64_ari_64;
1400 Opcode = NVPTX::ST_f32_ari_64;
1403 Opcode = NVPTX::ST_f64_ari_64;
1411 Opcode = NVPTX::ST_i8_ari;
1414 Opcode = NVPTX::ST_i16_ari;
1417 Opcode = NVPTX::ST_i32_ari;
1420 Opcode = NVPTX::ST_i64_ari;
1423 Opcode = NVPTX::ST_f32_ari;
1426 Opcode = NVPTX::ST_f64_ari;
1432 SDValue Ops[] = { N1, getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
1433 getI32Imm(vecType), getI32Imm(toType),
1434 getI32Imm(toTypeWidth), Base, Offset, Chain };
1435 NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops);
1437 if (Subtarget.is64Bit()) {
1440 Opcode = NVPTX::ST_i8_areg_64;
1443 Opcode = NVPTX::ST_i16_areg_64;
1446 Opcode = NVPTX::ST_i32_areg_64;
1449 Opcode = NVPTX::ST_i64_areg_64;
1452 Opcode = NVPTX::ST_f32_areg_64;
1455 Opcode = NVPTX::ST_f64_areg_64;
1463 Opcode = NVPTX::ST_i8_areg;
1466 Opcode = NVPTX::ST_i16_areg;
1469 Opcode = NVPTX::ST_i32_areg;
1472 Opcode = NVPTX::ST_i64_areg;
1475 Opcode = NVPTX::ST_f32_areg;
1478 Opcode = NVPTX::ST_f64_areg;
1484 SDValue Ops[] = { N1, getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
1485 getI32Imm(vecType), getI32Imm(toType),
1486 getI32Imm(toTypeWidth), N2, Chain };
1487 NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops);
1490 if (NVPTXST != NULL) {
1491 MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
1492 MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
1493 cast<MachineSDNode>(NVPTXST)->setMemRefs(MemRefs0, MemRefs0 + 1);
1499 SDNode *NVPTXDAGToDAGISel::SelectStoreVector(SDNode *N) {
1500 SDValue Chain = N->getOperand(0);
1501 SDValue Op1 = N->getOperand(1);
1502 SDValue Addr, Offset, Base;
1506 EVT EltVT = Op1.getValueType();
1507 MemSDNode *MemSD = cast<MemSDNode>(N);
1508 EVT StoreVT = MemSD->getMemoryVT();
1510 // Address Space Setting
1511 unsigned CodeAddrSpace = getCodeAddrSpace(MemSD, Subtarget);
1513 if (CodeAddrSpace == NVPTX::PTXLdStInstCode::CONSTANT) {
1514 report_fatal_error("Cannot store to pointer that points to constant "
1519 // - .volatile is only availalble for .global and .shared
1520 bool IsVolatile = MemSD->isVolatile();
1521 if (CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
1522 CodeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
1523 CodeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
1526 // Type Setting: toType + toTypeWidth
1527 // - for integer type, always use 'u'
1528 assert(StoreVT.isSimple() && "Store value is not simple");
1529 MVT ScalarVT = StoreVT.getSimpleVT().getScalarType();
1530 unsigned ToTypeWidth = ScalarVT.getSizeInBits();
1532 if (ScalarVT.isFloatingPoint())
1533 ToType = NVPTX::PTXLdStInstCode::Float;
1535 ToType = NVPTX::PTXLdStInstCode::Unsigned;
1537 SmallVector<SDValue, 12> StOps;
1541 switch (N->getOpcode()) {
1542 case NVPTXISD::StoreV2:
1543 VecType = NVPTX::PTXLdStInstCode::V2;
1544 StOps.push_back(N->getOperand(1));
1545 StOps.push_back(N->getOperand(2));
1546 N2 = N->getOperand(3);
1548 case NVPTXISD::StoreV4:
1549 VecType = NVPTX::PTXLdStInstCode::V4;
1550 StOps.push_back(N->getOperand(1));
1551 StOps.push_back(N->getOperand(2));
1552 StOps.push_back(N->getOperand(3));
1553 StOps.push_back(N->getOperand(4));
1554 N2 = N->getOperand(5);
1560 StOps.push_back(getI32Imm(IsVolatile));
1561 StOps.push_back(getI32Imm(CodeAddrSpace));
1562 StOps.push_back(getI32Imm(VecType));
1563 StOps.push_back(getI32Imm(ToType));
1564 StOps.push_back(getI32Imm(ToTypeWidth));
1566 if (SelectDirectAddr(N2, Addr)) {
1567 switch (N->getOpcode()) {
1570 case NVPTXISD::StoreV2:
1571 switch (EltVT.getSimpleVT().SimpleTy) {
1575 Opcode = NVPTX::STV_i8_v2_avar;
1578 Opcode = NVPTX::STV_i16_v2_avar;
1581 Opcode = NVPTX::STV_i32_v2_avar;
1584 Opcode = NVPTX::STV_i64_v2_avar;
1587 Opcode = NVPTX::STV_f32_v2_avar;
1590 Opcode = NVPTX::STV_f64_v2_avar;
1594 case NVPTXISD::StoreV4:
1595 switch (EltVT.getSimpleVT().SimpleTy) {
1599 Opcode = NVPTX::STV_i8_v4_avar;
1602 Opcode = NVPTX::STV_i16_v4_avar;
1605 Opcode = NVPTX::STV_i32_v4_avar;
1608 Opcode = NVPTX::STV_f32_v4_avar;
1613 StOps.push_back(Addr);
1614 } else if (Subtarget.is64Bit()
1615 ? SelectADDRsi64(N2.getNode(), N2, Base, Offset)
1616 : SelectADDRsi(N2.getNode(), N2, Base, Offset)) {
1617 switch (N->getOpcode()) {
1620 case NVPTXISD::StoreV2:
1621 switch (EltVT.getSimpleVT().SimpleTy) {
1625 Opcode = NVPTX::STV_i8_v2_asi;
1628 Opcode = NVPTX::STV_i16_v2_asi;
1631 Opcode = NVPTX::STV_i32_v2_asi;
1634 Opcode = NVPTX::STV_i64_v2_asi;
1637 Opcode = NVPTX::STV_f32_v2_asi;
1640 Opcode = NVPTX::STV_f64_v2_asi;
1644 case NVPTXISD::StoreV4:
1645 switch (EltVT.getSimpleVT().SimpleTy) {
1649 Opcode = NVPTX::STV_i8_v4_asi;
1652 Opcode = NVPTX::STV_i16_v4_asi;
1655 Opcode = NVPTX::STV_i32_v4_asi;
1658 Opcode = NVPTX::STV_f32_v4_asi;
1663 StOps.push_back(Base);
1664 StOps.push_back(Offset);
1665 } else if (Subtarget.is64Bit()
1666 ? SelectADDRri64(N2.getNode(), N2, Base, Offset)
1667 : SelectADDRri(N2.getNode(), N2, Base, Offset)) {
1668 if (Subtarget.is64Bit()) {
1669 switch (N->getOpcode()) {
1672 case NVPTXISD::StoreV2:
1673 switch (EltVT.getSimpleVT().SimpleTy) {
1677 Opcode = NVPTX::STV_i8_v2_ari_64;
1680 Opcode = NVPTX::STV_i16_v2_ari_64;
1683 Opcode = NVPTX::STV_i32_v2_ari_64;
1686 Opcode = NVPTX::STV_i64_v2_ari_64;
1689 Opcode = NVPTX::STV_f32_v2_ari_64;
1692 Opcode = NVPTX::STV_f64_v2_ari_64;
1696 case NVPTXISD::StoreV4:
1697 switch (EltVT.getSimpleVT().SimpleTy) {
1701 Opcode = NVPTX::STV_i8_v4_ari_64;
1704 Opcode = NVPTX::STV_i16_v4_ari_64;
1707 Opcode = NVPTX::STV_i32_v4_ari_64;
1710 Opcode = NVPTX::STV_f32_v4_ari_64;
1716 switch (N->getOpcode()) {
1719 case NVPTXISD::StoreV2:
1720 switch (EltVT.getSimpleVT().SimpleTy) {
1724 Opcode = NVPTX::STV_i8_v2_ari;
1727 Opcode = NVPTX::STV_i16_v2_ari;
1730 Opcode = NVPTX::STV_i32_v2_ari;
1733 Opcode = NVPTX::STV_i64_v2_ari;
1736 Opcode = NVPTX::STV_f32_v2_ari;
1739 Opcode = NVPTX::STV_f64_v2_ari;
1743 case NVPTXISD::StoreV4:
1744 switch (EltVT.getSimpleVT().SimpleTy) {
1748 Opcode = NVPTX::STV_i8_v4_ari;
1751 Opcode = NVPTX::STV_i16_v4_ari;
1754 Opcode = NVPTX::STV_i32_v4_ari;
1757 Opcode = NVPTX::STV_f32_v4_ari;
1763 StOps.push_back(Base);
1764 StOps.push_back(Offset);
1766 if (Subtarget.is64Bit()) {
1767 switch (N->getOpcode()) {
1770 case NVPTXISD::StoreV2:
1771 switch (EltVT.getSimpleVT().SimpleTy) {
1775 Opcode = NVPTX::STV_i8_v2_areg_64;
1778 Opcode = NVPTX::STV_i16_v2_areg_64;
1781 Opcode = NVPTX::STV_i32_v2_areg_64;
1784 Opcode = NVPTX::STV_i64_v2_areg_64;
1787 Opcode = NVPTX::STV_f32_v2_areg_64;
1790 Opcode = NVPTX::STV_f64_v2_areg_64;
1794 case NVPTXISD::StoreV4:
1795 switch (EltVT.getSimpleVT().SimpleTy) {
1799 Opcode = NVPTX::STV_i8_v4_areg_64;
1802 Opcode = NVPTX::STV_i16_v4_areg_64;
1805 Opcode = NVPTX::STV_i32_v4_areg_64;
1808 Opcode = NVPTX::STV_f32_v4_areg_64;
1814 switch (N->getOpcode()) {
1817 case NVPTXISD::StoreV2:
1818 switch (EltVT.getSimpleVT().SimpleTy) {
1822 Opcode = NVPTX::STV_i8_v2_areg;
1825 Opcode = NVPTX::STV_i16_v2_areg;
1828 Opcode = NVPTX::STV_i32_v2_areg;
1831 Opcode = NVPTX::STV_i64_v2_areg;
1834 Opcode = NVPTX::STV_f32_v2_areg;
1837 Opcode = NVPTX::STV_f64_v2_areg;
1841 case NVPTXISD::StoreV4:
1842 switch (EltVT.getSimpleVT().SimpleTy) {
1846 Opcode = NVPTX::STV_i8_v4_areg;
1849 Opcode = NVPTX::STV_i16_v4_areg;
1852 Opcode = NVPTX::STV_i32_v4_areg;
1855 Opcode = NVPTX::STV_f32_v4_areg;
1861 StOps.push_back(N2);
1864 StOps.push_back(Chain);
1866 ST = CurDAG->getMachineNode(Opcode, DL, MVT::Other, StOps);
1868 MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
1869 MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
1870 cast<MachineSDNode>(ST)->setMemRefs(MemRefs0, MemRefs0 + 1);
1875 SDNode *NVPTXDAGToDAGISel::SelectLoadParam(SDNode *Node) {
1876 SDValue Chain = Node->getOperand(0);
1877 SDValue Offset = Node->getOperand(2);
1878 SDValue Flag = Node->getOperand(3);
1880 MemSDNode *Mem = cast<MemSDNode>(Node);
1883 switch (Node->getOpcode()) {
1886 case NVPTXISD::LoadParam:
1889 case NVPTXISD::LoadParamV2:
1892 case NVPTXISD::LoadParamV4:
1897 EVT EltVT = Node->getValueType(0);
1898 EVT MemVT = Mem->getMemoryVT();
1906 switch (MemVT.getSimpleVT().SimpleTy) {
1910 Opc = NVPTX::LoadParamMemI8;
1913 Opc = NVPTX::LoadParamMemI8;
1916 Opc = NVPTX::LoadParamMemI16;
1919 Opc = NVPTX::LoadParamMemI32;
1922 Opc = NVPTX::LoadParamMemI64;
1925 Opc = NVPTX::LoadParamMemF32;
1928 Opc = NVPTX::LoadParamMemF64;
1933 switch (MemVT.getSimpleVT().SimpleTy) {
1937 Opc = NVPTX::LoadParamMemV2I8;
1940 Opc = NVPTX::LoadParamMemV2I8;
1943 Opc = NVPTX::LoadParamMemV2I16;
1946 Opc = NVPTX::LoadParamMemV2I32;
1949 Opc = NVPTX::LoadParamMemV2I64;
1952 Opc = NVPTX::LoadParamMemV2F32;
1955 Opc = NVPTX::LoadParamMemV2F64;
1960 switch (MemVT.getSimpleVT().SimpleTy) {
1964 Opc = NVPTX::LoadParamMemV4I8;
1967 Opc = NVPTX::LoadParamMemV4I8;
1970 Opc = NVPTX::LoadParamMemV4I16;
1973 Opc = NVPTX::LoadParamMemV4I32;
1976 Opc = NVPTX::LoadParamMemV4F32;
1984 VTs = CurDAG->getVTList(EltVT, MVT::Other, MVT::Glue);
1985 } else if (VecSize == 2) {
1986 VTs = CurDAG->getVTList(EltVT, EltVT, MVT::Other, MVT::Glue);
1988 EVT EVTs[] = { EltVT, EltVT, EltVT, EltVT, MVT::Other, MVT::Glue };
1989 VTs = CurDAG->getVTList(&EVTs[0], 5);
1992 unsigned OffsetVal = cast<ConstantSDNode>(Offset)->getZExtValue();
1994 SmallVector<SDValue, 2> Ops;
1995 Ops.push_back(CurDAG->getTargetConstant(OffsetVal, MVT::i32));
1996 Ops.push_back(Chain);
1997 Ops.push_back(Flag);
2000 CurDAG->getMachineNode(Opc, DL, VTs, Ops);
2004 SDNode *NVPTXDAGToDAGISel::SelectStoreRetval(SDNode *N) {
2006 SDValue Chain = N->getOperand(0);
2007 SDValue Offset = N->getOperand(1);
2008 unsigned OffsetVal = cast<ConstantSDNode>(Offset)->getZExtValue();
2009 MemSDNode *Mem = cast<MemSDNode>(N);
2011 // How many elements do we have?
2012 unsigned NumElts = 1;
2013 switch (N->getOpcode()) {
2016 case NVPTXISD::StoreRetval:
2019 case NVPTXISD::StoreRetvalV2:
2022 case NVPTXISD::StoreRetvalV4:
2027 // Build vector of operands
2028 SmallVector<SDValue, 6> Ops;
2029 for (unsigned i = 0; i < NumElts; ++i)
2030 Ops.push_back(N->getOperand(i + 2));
2031 Ops.push_back(CurDAG->getTargetConstant(OffsetVal, MVT::i32));
2032 Ops.push_back(Chain);
2034 // Determine target opcode
2035 // If we have an i1, use an 8-bit store. The lowering code in
2036 // NVPTXISelLowering will have already emitted an upcast.
2037 unsigned Opcode = 0;
2042 switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
2046 Opcode = NVPTX::StoreRetvalI8;
2049 Opcode = NVPTX::StoreRetvalI8;
2052 Opcode = NVPTX::StoreRetvalI16;
2055 Opcode = NVPTX::StoreRetvalI32;
2058 Opcode = NVPTX::StoreRetvalI64;
2061 Opcode = NVPTX::StoreRetvalF32;
2064 Opcode = NVPTX::StoreRetvalF64;
2069 switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
2073 Opcode = NVPTX::StoreRetvalV2I8;
2076 Opcode = NVPTX::StoreRetvalV2I8;
2079 Opcode = NVPTX::StoreRetvalV2I16;
2082 Opcode = NVPTX::StoreRetvalV2I32;
2085 Opcode = NVPTX::StoreRetvalV2I64;
2088 Opcode = NVPTX::StoreRetvalV2F32;
2091 Opcode = NVPTX::StoreRetvalV2F64;
2096 switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
2100 Opcode = NVPTX::StoreRetvalV4I8;
2103 Opcode = NVPTX::StoreRetvalV4I8;
2106 Opcode = NVPTX::StoreRetvalV4I16;
2109 Opcode = NVPTX::StoreRetvalV4I32;
2112 Opcode = NVPTX::StoreRetvalV4F32;
2119 CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops);
2120 MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
2121 MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
2122 cast<MachineSDNode>(Ret)->setMemRefs(MemRefs0, MemRefs0 + 1);
2127 SDNode *NVPTXDAGToDAGISel::SelectStoreParam(SDNode *N) {
2129 SDValue Chain = N->getOperand(0);
2130 SDValue Param = N->getOperand(1);
2131 unsigned ParamVal = cast<ConstantSDNode>(Param)->getZExtValue();
2132 SDValue Offset = N->getOperand(2);
2133 unsigned OffsetVal = cast<ConstantSDNode>(Offset)->getZExtValue();
2134 MemSDNode *Mem = cast<MemSDNode>(N);
2135 SDValue Flag = N->getOperand(N->getNumOperands() - 1);
2137 // How many elements do we have?
2138 unsigned NumElts = 1;
2139 switch (N->getOpcode()) {
2142 case NVPTXISD::StoreParamU32:
2143 case NVPTXISD::StoreParamS32:
2144 case NVPTXISD::StoreParam:
2147 case NVPTXISD::StoreParamV2:
2150 case NVPTXISD::StoreParamV4:
2155 // Build vector of operands
2156 SmallVector<SDValue, 8> Ops;
2157 for (unsigned i = 0; i < NumElts; ++i)
2158 Ops.push_back(N->getOperand(i + 3));
2159 Ops.push_back(CurDAG->getTargetConstant(ParamVal, MVT::i32));
2160 Ops.push_back(CurDAG->getTargetConstant(OffsetVal, MVT::i32));
2161 Ops.push_back(Chain);
2162 Ops.push_back(Flag);
2164 // Determine target opcode
2165 // If we have an i1, use an 8-bit store. The lowering code in
2166 // NVPTXISelLowering will have already emitted an upcast.
2167 unsigned Opcode = 0;
2168 switch (N->getOpcode()) {
2174 switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
2178 Opcode = NVPTX::StoreParamI8;
2181 Opcode = NVPTX::StoreParamI8;
2184 Opcode = NVPTX::StoreParamI16;
2187 Opcode = NVPTX::StoreParamI32;
2190 Opcode = NVPTX::StoreParamI64;
2193 Opcode = NVPTX::StoreParamF32;
2196 Opcode = NVPTX::StoreParamF64;
2201 switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
2205 Opcode = NVPTX::StoreParamV2I8;
2208 Opcode = NVPTX::StoreParamV2I8;
2211 Opcode = NVPTX::StoreParamV2I16;
2214 Opcode = NVPTX::StoreParamV2I32;
2217 Opcode = NVPTX::StoreParamV2I64;
2220 Opcode = NVPTX::StoreParamV2F32;
2223 Opcode = NVPTX::StoreParamV2F64;
2228 switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
2232 Opcode = NVPTX::StoreParamV4I8;
2235 Opcode = NVPTX::StoreParamV4I8;
2238 Opcode = NVPTX::StoreParamV4I16;
2241 Opcode = NVPTX::StoreParamV4I32;
2244 Opcode = NVPTX::StoreParamV4F32;
2250 // Special case: if we have a sign-extend/zero-extend node, insert the
2251 // conversion instruction first, and use that as the value operand to
2252 // the selected StoreParam node.
2253 case NVPTXISD::StoreParamU32: {
2254 Opcode = NVPTX::StoreParamI32;
2255 SDValue CvtNone = CurDAG->getTargetConstant(NVPTX::PTXCvtMode::NONE,
2257 SDNode *Cvt = CurDAG->getMachineNode(NVPTX::CVT_u32_u16, DL,
2258 MVT::i32, Ops[0], CvtNone);
2259 Ops[0] = SDValue(Cvt, 0);
2262 case NVPTXISD::StoreParamS32: {
2263 Opcode = NVPTX::StoreParamI32;
2264 SDValue CvtNone = CurDAG->getTargetConstant(NVPTX::PTXCvtMode::NONE,
2266 SDNode *Cvt = CurDAG->getMachineNode(NVPTX::CVT_s32_s16, DL,
2267 MVT::i32, Ops[0], CvtNone);
2268 Ops[0] = SDValue(Cvt, 0);
2273 SDVTList RetVTs = CurDAG->getVTList(MVT::Other, MVT::Glue);
2275 CurDAG->getMachineNode(Opcode, DL, RetVTs, Ops);
2276 MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
2277 MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
2278 cast<MachineSDNode>(Ret)->setMemRefs(MemRefs0, MemRefs0 + 1);
2283 // SelectDirectAddr - Match a direct address for DAG.
2284 // A direct address could be a globaladdress or externalsymbol.
2285 bool NVPTXDAGToDAGISel::SelectDirectAddr(SDValue N, SDValue &Address) {
2286 // Return true if TGA or ES.
2287 if (N.getOpcode() == ISD::TargetGlobalAddress ||
2288 N.getOpcode() == ISD::TargetExternalSymbol) {
2292 if (N.getOpcode() == NVPTXISD::Wrapper) {
2293 Address = N.getOperand(0);
2296 if (N.getOpcode() == ISD::INTRINSIC_WO_CHAIN) {
2297 unsigned IID = cast<ConstantSDNode>(N.getOperand(0))->getZExtValue();
2298 if (IID == Intrinsic::nvvm_ptr_gen_to_param)
2299 if (N.getOperand(1).getOpcode() == NVPTXISD::MoveParam)
2300 return (SelectDirectAddr(N.getOperand(1).getOperand(0), Address));
2306 bool NVPTXDAGToDAGISel::SelectADDRsi_imp(
2307 SDNode *OpNode, SDValue Addr, SDValue &Base, SDValue &Offset, MVT mvt) {
2308 if (Addr.getOpcode() == ISD::ADD) {
2309 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) {
2310 SDValue base = Addr.getOperand(0);
2311 if (SelectDirectAddr(base, Base)) {
2312 Offset = CurDAG->getTargetConstant(CN->getZExtValue(), mvt);
2321 bool NVPTXDAGToDAGISel::SelectADDRsi(SDNode *OpNode, SDValue Addr,
2322 SDValue &Base, SDValue &Offset) {
2323 return SelectADDRsi_imp(OpNode, Addr, Base, Offset, MVT::i32);
2327 bool NVPTXDAGToDAGISel::SelectADDRsi64(SDNode *OpNode, SDValue Addr,
2328 SDValue &Base, SDValue &Offset) {
2329 return SelectADDRsi_imp(OpNode, Addr, Base, Offset, MVT::i64);
2333 bool NVPTXDAGToDAGISel::SelectADDRri_imp(
2334 SDNode *OpNode, SDValue Addr, SDValue &Base, SDValue &Offset, MVT mvt) {
2335 if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
2336 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), mvt);
2337 Offset = CurDAG->getTargetConstant(0, mvt);
2340 if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
2341 Addr.getOpcode() == ISD::TargetGlobalAddress)
2342 return false; // direct calls.
2344 if (Addr.getOpcode() == ISD::ADD) {
2345 if (SelectDirectAddr(Addr.getOperand(0), Addr)) {
2348 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) {
2349 if (FrameIndexSDNode *FIN =
2350 dyn_cast<FrameIndexSDNode>(Addr.getOperand(0)))
2351 // Constant offset from frame ref.
2352 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), mvt);
2354 Base = Addr.getOperand(0);
2355 Offset = CurDAG->getTargetConstant(CN->getZExtValue(), mvt);
2363 bool NVPTXDAGToDAGISel::SelectADDRri(SDNode *OpNode, SDValue Addr,
2364 SDValue &Base, SDValue &Offset) {
2365 return SelectADDRri_imp(OpNode, Addr, Base, Offset, MVT::i32);
2369 bool NVPTXDAGToDAGISel::SelectADDRri64(SDNode *OpNode, SDValue Addr,
2370 SDValue &Base, SDValue &Offset) {
2371 return SelectADDRri_imp(OpNode, Addr, Base, Offset, MVT::i64);
2374 bool NVPTXDAGToDAGISel::ChkMemSDNodeAddressSpace(SDNode *N,
2375 unsigned int spN) const {
2376 const Value *Src = NULL;
2377 // Even though MemIntrinsicSDNode is a subclas of MemSDNode,
2378 // the classof() for MemSDNode does not include MemIntrinsicSDNode
2379 // (See SelectionDAGNodes.h). So we need to check for both.
2380 if (MemSDNode *mN = dyn_cast<MemSDNode>(N)) {
2381 Src = mN->getSrcValue();
2382 } else if (MemSDNode *mN = dyn_cast<MemIntrinsicSDNode>(N)) {
2383 Src = mN->getSrcValue();
2387 if (const PointerType *PT = dyn_cast<PointerType>(Src->getType()))
2388 return (PT->getAddressSpace() == spN);
2392 /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
2393 /// inline asm expressions.
2394 bool NVPTXDAGToDAGISel::SelectInlineAsmMemoryOperand(
2395 const SDValue &Op, char ConstraintCode, std::vector<SDValue> &OutOps) {
2397 switch (ConstraintCode) {
2401 if (SelectDirectAddr(Op, Op0)) {
2402 OutOps.push_back(Op0);
2403 OutOps.push_back(CurDAG->getTargetConstant(0, MVT::i32));
2406 if (SelectADDRri(Op.getNode(), Op, Op0, Op1)) {
2407 OutOps.push_back(Op0);
2408 OutOps.push_back(Op1);
2416 // Return true if N is a undef or a constant.
2417 // If N was undef, return a (i8imm 0) in Retval
2418 // If N was imm, convert it to i8imm and return in Retval
2419 // Note: The convert to i8imm is required, otherwise the
2420 // pattern matcher inserts a bunch of IMOVi8rr to convert
2421 // the imm to i8imm, and this causes instruction selection
2423 bool NVPTXDAGToDAGISel::UndefOrImm(SDValue Op, SDValue N, SDValue &Retval) {
2424 if (!(N.getOpcode() == ISD::UNDEF) && !(N.getOpcode() == ISD::Constant))
2427 if (N.getOpcode() == ISD::UNDEF)
2428 Retval = CurDAG->getTargetConstant(0, MVT::i8);
2430 ConstantSDNode *cn = cast<ConstantSDNode>(N.getNode());
2431 unsigned retval = cn->getZExtValue();
2432 Retval = CurDAG->getTargetConstant(retval, MVT::i8);