1 //===-- NVPTXISelDAGToDAG.cpp - A dag to dag inst selector for NVPTX ------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file defines an instruction selector for the NVPTX target.
12 //===----------------------------------------------------------------------===//
14 #include "NVPTXISelDAGToDAG.h"
15 #include "llvm/IR/GlobalValue.h"
16 #include "llvm/IR/Instructions.h"
17 #include "llvm/Support/CommandLine.h"
18 #include "llvm/Support/Debug.h"
19 #include "llvm/Support/ErrorHandling.h"
20 #include "llvm/Support/raw_ostream.h"
21 #include "llvm/Target/TargetIntrinsicInfo.h"
24 #define DEBUG_TYPE "nvptx-isel"
29 FMAContractLevel("nvptx-fma-level", cl::ZeroOrMore,
30 cl::desc("NVPTX Specific: FMA contraction (0: don't do it"
31 " 1: do it 2: do it aggressively"),
34 static cl::opt<int> UsePrecDivF32(
35 "nvptx-prec-divf32", cl::ZeroOrMore,
36 cl::desc("NVPTX Specifies: 0 use div.approx, 1 use div.full, 2 use"
37 " IEEE Compliant F32 div.rnd if avaiable."),
41 UsePrecSqrtF32("nvptx-prec-sqrtf32",
42 cl::desc("NVPTX Specific: 0 use sqrt.approx, 1 use sqrt.rn."),
46 FtzEnabled("nvptx-f32ftz", cl::ZeroOrMore,
47 cl::desc("NVPTX Specific: Flush f32 subnormals to sign-preserving zero."),
51 /// createNVPTXISelDag - This pass converts a legalized DAG into a
52 /// NVPTX-specific DAG, ready for instruction scheduling.
53 FunctionPass *llvm::createNVPTXISelDag(NVPTXTargetMachine &TM,
54 llvm::CodeGenOpt::Level OptLevel) {
55 return new NVPTXDAGToDAGISel(TM, OptLevel);
58 NVPTXDAGToDAGISel::NVPTXDAGToDAGISel(NVPTXTargetMachine &tm,
59 CodeGenOpt::Level OptLevel)
60 : SelectionDAGISel(tm, OptLevel),
61 Subtarget(tm.getSubtarget<NVPTXSubtarget>()) {
63 doFMAF32 = (OptLevel > 0) && Subtarget.hasFMAF32() && (FMAContractLevel >= 1);
64 doFMAF64 = (OptLevel > 0) && Subtarget.hasFMAF64() && (FMAContractLevel >= 1);
66 (OptLevel > 0) && Subtarget.hasFMAF32() && (FMAContractLevel == 2);
68 (OptLevel > 0) && Subtarget.hasFMAF64() && (FMAContractLevel == 2);
70 allowFMA = (FMAContractLevel >= 1);
72 doMulWide = (OptLevel > 0);
75 int NVPTXDAGToDAGISel::getDivF32Level() const {
76 if (UsePrecDivF32.getNumOccurrences() > 0) {
77 // If nvptx-prec-div32=N is used on the command-line, always honor it
80 // Otherwise, use div.approx if fast math is enabled
81 if (TM.Options.UnsafeFPMath)
88 bool NVPTXDAGToDAGISel::usePrecSqrtF32() const {
89 if (UsePrecSqrtF32.getNumOccurrences() > 0) {
90 // If nvptx-prec-sqrtf32 is used on the command-line, always honor it
91 return UsePrecSqrtF32;
93 // Otherwise, use sqrt.approx if fast math is enabled
94 if (TM.Options.UnsafeFPMath)
101 bool NVPTXDAGToDAGISel::useF32FTZ() const {
102 if (FtzEnabled.getNumOccurrences() > 0) {
103 // If nvptx-f32ftz is used on the command-line, always honor it
106 const Function *F = MF->getFunction();
107 // Otherwise, check for an nvptx-f32ftz attribute on the function
108 if (F->hasFnAttribute("nvptx-f32ftz"))
109 return (F->getAttributes().getAttribute(AttributeSet::FunctionIndex,
111 .getValueAsString() == "true");
117 /// Select - Select instructions not customized! Used for
118 /// expanded, promoted and normal instructions.
119 SDNode *NVPTXDAGToDAGISel::Select(SDNode *N) {
121 if (N->isMachineOpcode())
122 return NULL; // Already selected.
124 SDNode *ResNode = NULL;
125 switch (N->getOpcode()) {
127 ResNode = SelectLoad(N);
130 ResNode = SelectStore(N);
132 case NVPTXISD::LoadV2:
133 case NVPTXISD::LoadV4:
134 ResNode = SelectLoadVector(N);
136 case NVPTXISD::LDGV2:
137 case NVPTXISD::LDGV4:
138 case NVPTXISD::LDUV2:
139 case NVPTXISD::LDUV4:
140 ResNode = SelectLDGLDUVector(N);
142 case NVPTXISD::StoreV2:
143 case NVPTXISD::StoreV4:
144 ResNode = SelectStoreVector(N);
146 case NVPTXISD::LoadParam:
147 case NVPTXISD::LoadParamV2:
148 case NVPTXISD::LoadParamV4:
149 ResNode = SelectLoadParam(N);
151 case NVPTXISD::StoreRetval:
152 case NVPTXISD::StoreRetvalV2:
153 case NVPTXISD::StoreRetvalV4:
154 ResNode = SelectStoreRetval(N);
156 case NVPTXISD::StoreParam:
157 case NVPTXISD::StoreParamV2:
158 case NVPTXISD::StoreParamV4:
159 case NVPTXISD::StoreParamS32:
160 case NVPTXISD::StoreParamU32:
161 ResNode = SelectStoreParam(N);
168 return SelectCode(N);
171 static unsigned int getCodeAddrSpace(MemSDNode *N,
172 const NVPTXSubtarget &Subtarget) {
173 const Value *Src = N->getSrcValue();
176 return NVPTX::PTXLdStInstCode::GENERIC;
178 if (const PointerType *PT = dyn_cast<PointerType>(Src->getType())) {
179 switch (PT->getAddressSpace()) {
180 case llvm::ADDRESS_SPACE_LOCAL: return NVPTX::PTXLdStInstCode::LOCAL;
181 case llvm::ADDRESS_SPACE_GLOBAL: return NVPTX::PTXLdStInstCode::GLOBAL;
182 case llvm::ADDRESS_SPACE_SHARED: return NVPTX::PTXLdStInstCode::SHARED;
183 case llvm::ADDRESS_SPACE_GENERIC: return NVPTX::PTXLdStInstCode::GENERIC;
184 case llvm::ADDRESS_SPACE_PARAM: return NVPTX::PTXLdStInstCode::PARAM;
185 case llvm::ADDRESS_SPACE_CONST: return NVPTX::PTXLdStInstCode::CONSTANT;
189 return NVPTX::PTXLdStInstCode::GENERIC;
192 SDNode *NVPTXDAGToDAGISel::SelectLoad(SDNode *N) {
194 LoadSDNode *LD = cast<LoadSDNode>(N);
195 EVT LoadedVT = LD->getMemoryVT();
196 SDNode *NVPTXLD = NULL;
198 // do not support pre/post inc/dec
202 if (!LoadedVT.isSimple())
205 // Address Space Setting
206 unsigned int codeAddrSpace = getCodeAddrSpace(LD, Subtarget);
209 // - .volatile is only availalble for .global and .shared
210 bool isVolatile = LD->isVolatile();
211 if (codeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
212 codeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
213 codeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
217 MVT SimpleVT = LoadedVT.getSimpleVT();
218 unsigned vecType = NVPTX::PTXLdStInstCode::Scalar;
219 if (SimpleVT.isVector()) {
220 unsigned num = SimpleVT.getVectorNumElements();
222 vecType = NVPTX::PTXLdStInstCode::V2;
224 vecType = NVPTX::PTXLdStInstCode::V4;
229 // Type Setting: fromType + fromTypeWidth
231 // Sign : ISD::SEXTLOAD
232 // Unsign : ISD::ZEXTLOAD, ISD::NON_EXTLOAD or ISD::EXTLOAD and the
234 // Float : ISD::NON_EXTLOAD or ISD::EXTLOAD and the type is float
235 MVT ScalarVT = SimpleVT.getScalarType();
236 // Read at least 8 bits (predicates are stored as 8-bit values)
237 unsigned fromTypeWidth = std::max(8U, ScalarVT.getSizeInBits());
238 unsigned int fromType;
239 if ((LD->getExtensionType() == ISD::SEXTLOAD))
240 fromType = NVPTX::PTXLdStInstCode::Signed;
241 else if (ScalarVT.isFloatingPoint())
242 fromType = NVPTX::PTXLdStInstCode::Float;
244 fromType = NVPTX::PTXLdStInstCode::Unsigned;
246 // Create the machine instruction DAG
247 SDValue Chain = N->getOperand(0);
248 SDValue N1 = N->getOperand(1);
250 SDValue Offset, Base;
252 MVT::SimpleValueType TargetVT = LD->getValueType(0).getSimpleVT().SimpleTy;
254 if (SelectDirectAddr(N1, Addr)) {
257 Opcode = NVPTX::LD_i8_avar;
260 Opcode = NVPTX::LD_i16_avar;
263 Opcode = NVPTX::LD_i32_avar;
266 Opcode = NVPTX::LD_i64_avar;
269 Opcode = NVPTX::LD_f32_avar;
272 Opcode = NVPTX::LD_f64_avar;
277 SDValue Ops[] = { getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
278 getI32Imm(vecType), getI32Imm(fromType),
279 getI32Imm(fromTypeWidth), Addr, Chain };
280 NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops);
281 } else if (Subtarget.is64Bit()
282 ? SelectADDRsi64(N1.getNode(), N1, Base, Offset)
283 : SelectADDRsi(N1.getNode(), N1, Base, Offset)) {
286 Opcode = NVPTX::LD_i8_asi;
289 Opcode = NVPTX::LD_i16_asi;
292 Opcode = NVPTX::LD_i32_asi;
295 Opcode = NVPTX::LD_i64_asi;
298 Opcode = NVPTX::LD_f32_asi;
301 Opcode = NVPTX::LD_f64_asi;
306 SDValue Ops[] = { getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
307 getI32Imm(vecType), getI32Imm(fromType),
308 getI32Imm(fromTypeWidth), Base, Offset, Chain };
309 NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops);
310 } else if (Subtarget.is64Bit()
311 ? SelectADDRri64(N1.getNode(), N1, Base, Offset)
312 : SelectADDRri(N1.getNode(), N1, Base, Offset)) {
313 if (Subtarget.is64Bit()) {
316 Opcode = NVPTX::LD_i8_ari_64;
319 Opcode = NVPTX::LD_i16_ari_64;
322 Opcode = NVPTX::LD_i32_ari_64;
325 Opcode = NVPTX::LD_i64_ari_64;
328 Opcode = NVPTX::LD_f32_ari_64;
331 Opcode = NVPTX::LD_f64_ari_64;
339 Opcode = NVPTX::LD_i8_ari;
342 Opcode = NVPTX::LD_i16_ari;
345 Opcode = NVPTX::LD_i32_ari;
348 Opcode = NVPTX::LD_i64_ari;
351 Opcode = NVPTX::LD_f32_ari;
354 Opcode = NVPTX::LD_f64_ari;
360 SDValue Ops[] = { getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
361 getI32Imm(vecType), getI32Imm(fromType),
362 getI32Imm(fromTypeWidth), Base, Offset, Chain };
363 NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops);
365 if (Subtarget.is64Bit()) {
368 Opcode = NVPTX::LD_i8_areg_64;
371 Opcode = NVPTX::LD_i16_areg_64;
374 Opcode = NVPTX::LD_i32_areg_64;
377 Opcode = NVPTX::LD_i64_areg_64;
380 Opcode = NVPTX::LD_f32_areg_64;
383 Opcode = NVPTX::LD_f64_areg_64;
391 Opcode = NVPTX::LD_i8_areg;
394 Opcode = NVPTX::LD_i16_areg;
397 Opcode = NVPTX::LD_i32_areg;
400 Opcode = NVPTX::LD_i64_areg;
403 Opcode = NVPTX::LD_f32_areg;
406 Opcode = NVPTX::LD_f64_areg;
412 SDValue Ops[] = { getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
413 getI32Imm(vecType), getI32Imm(fromType),
414 getI32Imm(fromTypeWidth), N1, Chain };
415 NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops);
418 if (NVPTXLD != NULL) {
419 MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
420 MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
421 cast<MachineSDNode>(NVPTXLD)->setMemRefs(MemRefs0, MemRefs0 + 1);
427 SDNode *NVPTXDAGToDAGISel::SelectLoadVector(SDNode *N) {
429 SDValue Chain = N->getOperand(0);
430 SDValue Op1 = N->getOperand(1);
431 SDValue Addr, Offset, Base;
435 MemSDNode *MemSD = cast<MemSDNode>(N);
436 EVT LoadedVT = MemSD->getMemoryVT();
438 if (!LoadedVT.isSimple())
441 // Address Space Setting
442 unsigned int CodeAddrSpace = getCodeAddrSpace(MemSD, Subtarget);
445 // - .volatile is only availalble for .global and .shared
446 bool IsVolatile = MemSD->isVolatile();
447 if (CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
448 CodeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
449 CodeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
453 MVT SimpleVT = LoadedVT.getSimpleVT();
455 // Type Setting: fromType + fromTypeWidth
457 // Sign : ISD::SEXTLOAD
458 // Unsign : ISD::ZEXTLOAD, ISD::NON_EXTLOAD or ISD::EXTLOAD and the
460 // Float : ISD::NON_EXTLOAD or ISD::EXTLOAD and the type is float
461 MVT ScalarVT = SimpleVT.getScalarType();
462 // Read at least 8 bits (predicates are stored as 8-bit values)
463 unsigned FromTypeWidth = std::max(8U, ScalarVT.getSizeInBits());
464 unsigned int FromType;
465 // The last operand holds the original LoadSDNode::getExtensionType() value
466 unsigned ExtensionType = cast<ConstantSDNode>(
467 N->getOperand(N->getNumOperands() - 1))->getZExtValue();
468 if (ExtensionType == ISD::SEXTLOAD)
469 FromType = NVPTX::PTXLdStInstCode::Signed;
470 else if (ScalarVT.isFloatingPoint())
471 FromType = NVPTX::PTXLdStInstCode::Float;
473 FromType = NVPTX::PTXLdStInstCode::Unsigned;
477 switch (N->getOpcode()) {
478 case NVPTXISD::LoadV2:
479 VecType = NVPTX::PTXLdStInstCode::V2;
481 case NVPTXISD::LoadV4:
482 VecType = NVPTX::PTXLdStInstCode::V4;
488 EVT EltVT = N->getValueType(0);
490 if (SelectDirectAddr(Op1, Addr)) {
491 switch (N->getOpcode()) {
494 case NVPTXISD::LoadV2:
495 switch (EltVT.getSimpleVT().SimpleTy) {
499 Opcode = NVPTX::LDV_i8_v2_avar;
502 Opcode = NVPTX::LDV_i16_v2_avar;
505 Opcode = NVPTX::LDV_i32_v2_avar;
508 Opcode = NVPTX::LDV_i64_v2_avar;
511 Opcode = NVPTX::LDV_f32_v2_avar;
514 Opcode = NVPTX::LDV_f64_v2_avar;
518 case NVPTXISD::LoadV4:
519 switch (EltVT.getSimpleVT().SimpleTy) {
523 Opcode = NVPTX::LDV_i8_v4_avar;
526 Opcode = NVPTX::LDV_i16_v4_avar;
529 Opcode = NVPTX::LDV_i32_v4_avar;
532 Opcode = NVPTX::LDV_f32_v4_avar;
538 SDValue Ops[] = { getI32Imm(IsVolatile), getI32Imm(CodeAddrSpace),
539 getI32Imm(VecType), getI32Imm(FromType),
540 getI32Imm(FromTypeWidth), Addr, Chain };
541 LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
542 } else if (Subtarget.is64Bit()
543 ? SelectADDRsi64(Op1.getNode(), Op1, Base, Offset)
544 : SelectADDRsi(Op1.getNode(), Op1, Base, Offset)) {
545 switch (N->getOpcode()) {
548 case NVPTXISD::LoadV2:
549 switch (EltVT.getSimpleVT().SimpleTy) {
553 Opcode = NVPTX::LDV_i8_v2_asi;
556 Opcode = NVPTX::LDV_i16_v2_asi;
559 Opcode = NVPTX::LDV_i32_v2_asi;
562 Opcode = NVPTX::LDV_i64_v2_asi;
565 Opcode = NVPTX::LDV_f32_v2_asi;
568 Opcode = NVPTX::LDV_f64_v2_asi;
572 case NVPTXISD::LoadV4:
573 switch (EltVT.getSimpleVT().SimpleTy) {
577 Opcode = NVPTX::LDV_i8_v4_asi;
580 Opcode = NVPTX::LDV_i16_v4_asi;
583 Opcode = NVPTX::LDV_i32_v4_asi;
586 Opcode = NVPTX::LDV_f32_v4_asi;
592 SDValue Ops[] = { getI32Imm(IsVolatile), getI32Imm(CodeAddrSpace),
593 getI32Imm(VecType), getI32Imm(FromType),
594 getI32Imm(FromTypeWidth), Base, Offset, Chain };
595 LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
596 } else if (Subtarget.is64Bit()
597 ? SelectADDRri64(Op1.getNode(), Op1, Base, Offset)
598 : SelectADDRri(Op1.getNode(), Op1, Base, Offset)) {
599 if (Subtarget.is64Bit()) {
600 switch (N->getOpcode()) {
603 case NVPTXISD::LoadV2:
604 switch (EltVT.getSimpleVT().SimpleTy) {
608 Opcode = NVPTX::LDV_i8_v2_ari_64;
611 Opcode = NVPTX::LDV_i16_v2_ari_64;
614 Opcode = NVPTX::LDV_i32_v2_ari_64;
617 Opcode = NVPTX::LDV_i64_v2_ari_64;
620 Opcode = NVPTX::LDV_f32_v2_ari_64;
623 Opcode = NVPTX::LDV_f64_v2_ari_64;
627 case NVPTXISD::LoadV4:
628 switch (EltVT.getSimpleVT().SimpleTy) {
632 Opcode = NVPTX::LDV_i8_v4_ari_64;
635 Opcode = NVPTX::LDV_i16_v4_ari_64;
638 Opcode = NVPTX::LDV_i32_v4_ari_64;
641 Opcode = NVPTX::LDV_f32_v4_ari_64;
647 switch (N->getOpcode()) {
650 case NVPTXISD::LoadV2:
651 switch (EltVT.getSimpleVT().SimpleTy) {
655 Opcode = NVPTX::LDV_i8_v2_ari;
658 Opcode = NVPTX::LDV_i16_v2_ari;
661 Opcode = NVPTX::LDV_i32_v2_ari;
664 Opcode = NVPTX::LDV_i64_v2_ari;
667 Opcode = NVPTX::LDV_f32_v2_ari;
670 Opcode = NVPTX::LDV_f64_v2_ari;
674 case NVPTXISD::LoadV4:
675 switch (EltVT.getSimpleVT().SimpleTy) {
679 Opcode = NVPTX::LDV_i8_v4_ari;
682 Opcode = NVPTX::LDV_i16_v4_ari;
685 Opcode = NVPTX::LDV_i32_v4_ari;
688 Opcode = NVPTX::LDV_f32_v4_ari;
695 SDValue Ops[] = { getI32Imm(IsVolatile), getI32Imm(CodeAddrSpace),
696 getI32Imm(VecType), getI32Imm(FromType),
697 getI32Imm(FromTypeWidth), Base, Offset, Chain };
699 LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
701 if (Subtarget.is64Bit()) {
702 switch (N->getOpcode()) {
705 case NVPTXISD::LoadV2:
706 switch (EltVT.getSimpleVT().SimpleTy) {
710 Opcode = NVPTX::LDV_i8_v2_areg_64;
713 Opcode = NVPTX::LDV_i16_v2_areg_64;
716 Opcode = NVPTX::LDV_i32_v2_areg_64;
719 Opcode = NVPTX::LDV_i64_v2_areg_64;
722 Opcode = NVPTX::LDV_f32_v2_areg_64;
725 Opcode = NVPTX::LDV_f64_v2_areg_64;
729 case NVPTXISD::LoadV4:
730 switch (EltVT.getSimpleVT().SimpleTy) {
734 Opcode = NVPTX::LDV_i8_v4_areg_64;
737 Opcode = NVPTX::LDV_i16_v4_areg_64;
740 Opcode = NVPTX::LDV_i32_v4_areg_64;
743 Opcode = NVPTX::LDV_f32_v4_areg_64;
749 switch (N->getOpcode()) {
752 case NVPTXISD::LoadV2:
753 switch (EltVT.getSimpleVT().SimpleTy) {
757 Opcode = NVPTX::LDV_i8_v2_areg;
760 Opcode = NVPTX::LDV_i16_v2_areg;
763 Opcode = NVPTX::LDV_i32_v2_areg;
766 Opcode = NVPTX::LDV_i64_v2_areg;
769 Opcode = NVPTX::LDV_f32_v2_areg;
772 Opcode = NVPTX::LDV_f64_v2_areg;
776 case NVPTXISD::LoadV4:
777 switch (EltVT.getSimpleVT().SimpleTy) {
781 Opcode = NVPTX::LDV_i8_v4_areg;
784 Opcode = NVPTX::LDV_i16_v4_areg;
787 Opcode = NVPTX::LDV_i32_v4_areg;
790 Opcode = NVPTX::LDV_f32_v4_areg;
797 SDValue Ops[] = { getI32Imm(IsVolatile), getI32Imm(CodeAddrSpace),
798 getI32Imm(VecType), getI32Imm(FromType),
799 getI32Imm(FromTypeWidth), Op1, Chain };
800 LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
803 MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
804 MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
805 cast<MachineSDNode>(LD)->setMemRefs(MemRefs0, MemRefs0 + 1);
810 SDNode *NVPTXDAGToDAGISel::SelectLDGLDUVector(SDNode *N) {
812 SDValue Chain = N->getOperand(0);
813 SDValue Op1 = N->getOperand(1);
817 MemSDNode *Mem = cast<MemSDNode>(N);
818 SDValue Base, Offset, Addr;
820 EVT EltVT = Mem->getMemoryVT().getVectorElementType();
822 if (SelectDirectAddr(Op1, Addr)) {
823 switch (N->getOpcode()) {
826 case NVPTXISD::LDGV2:
827 switch (EltVT.getSimpleVT().SimpleTy) {
831 Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_avar;
834 Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_avar;
837 Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_avar;
840 Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_avar;
843 Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_avar;
846 Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_avar;
850 case NVPTXISD::LDUV2:
851 switch (EltVT.getSimpleVT().SimpleTy) {
855 Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_avar;
858 Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_avar;
861 Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_avar;
864 Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_avar;
867 Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_avar;
870 Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_avar;
874 case NVPTXISD::LDGV4:
875 switch (EltVT.getSimpleVT().SimpleTy) {
879 Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_avar;
882 Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_avar;
885 Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_avar;
888 Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_avar;
892 case NVPTXISD::LDUV4:
893 switch (EltVT.getSimpleVT().SimpleTy) {
897 Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_avar;
900 Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_avar;
903 Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_avar;
906 Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_avar;
912 SDValue Ops[] = { Addr, Chain };
913 LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(),
914 ArrayRef<SDValue>(Ops, 2));
915 } else if (Subtarget.is64Bit()
916 ? SelectADDRri64(Op1.getNode(), Op1, Base, Offset)
917 : SelectADDRri(Op1.getNode(), Op1, Base, Offset)) {
918 if (Subtarget.is64Bit()) {
919 switch (N->getOpcode()) {
922 case NVPTXISD::LDGV2:
923 switch (EltVT.getSimpleVT().SimpleTy) {
927 Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_ari64;
930 Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_ari64;
933 Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_ari64;
936 Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_ari64;
939 Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_ari64;
942 Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_ari64;
946 case NVPTXISD::LDUV2:
947 switch (EltVT.getSimpleVT().SimpleTy) {
951 Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_ari64;
954 Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_ari64;
957 Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_ari64;
960 Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_ari64;
963 Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_ari64;
966 Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_ari64;
970 case NVPTXISD::LDGV4:
971 switch (EltVT.getSimpleVT().SimpleTy) {
975 Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_ari64;
978 Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_ari64;
981 Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_ari64;
984 Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_ari64;
988 case NVPTXISD::LDUV4:
989 switch (EltVT.getSimpleVT().SimpleTy) {
993 Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_ari64;
996 Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_ari64;
999 Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_ari64;
1002 Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_ari64;
1008 switch (N->getOpcode()) {
1011 case NVPTXISD::LDGV2:
1012 switch (EltVT.getSimpleVT().SimpleTy) {
1016 Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_ari32;
1019 Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_ari32;
1022 Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_ari32;
1025 Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_ari32;
1028 Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_ari32;
1031 Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_ari32;
1035 case NVPTXISD::LDUV2:
1036 switch (EltVT.getSimpleVT().SimpleTy) {
1040 Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_ari32;
1043 Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_ari32;
1046 Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_ari32;
1049 Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_ari32;
1052 Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_ari32;
1055 Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_ari32;
1059 case NVPTXISD::LDGV4:
1060 switch (EltVT.getSimpleVT().SimpleTy) {
1064 Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_ari32;
1067 Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_ari32;
1070 Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_ari32;
1073 Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_ari32;
1077 case NVPTXISD::LDUV4:
1078 switch (EltVT.getSimpleVT().SimpleTy) {
1082 Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_ari32;
1085 Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_ari32;
1088 Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_ari32;
1091 Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_ari32;
1098 SDValue Ops[] = { Base, Offset, Chain };
1100 LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(),
1101 ArrayRef<SDValue>(Ops, 3));
1103 if (Subtarget.is64Bit()) {
1104 switch (N->getOpcode()) {
1107 case NVPTXISD::LDGV2:
1108 switch (EltVT.getSimpleVT().SimpleTy) {
1112 Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_areg64;
1115 Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_areg64;
1118 Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_areg64;
1121 Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_areg64;
1124 Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_areg64;
1127 Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_areg64;
1131 case NVPTXISD::LDUV2:
1132 switch (EltVT.getSimpleVT().SimpleTy) {
1136 Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_areg64;
1139 Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_areg64;
1142 Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_areg64;
1145 Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_areg64;
1148 Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_areg64;
1151 Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_areg64;
1155 case NVPTXISD::LDGV4:
1156 switch (EltVT.getSimpleVT().SimpleTy) {
1160 Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_areg64;
1163 Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_areg64;
1166 Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_areg64;
1169 Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_areg64;
1173 case NVPTXISD::LDUV4:
1174 switch (EltVT.getSimpleVT().SimpleTy) {
1178 Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_areg64;
1181 Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_areg64;
1184 Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_areg64;
1187 Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_areg64;
1193 switch (N->getOpcode()) {
1196 case NVPTXISD::LDGV2:
1197 switch (EltVT.getSimpleVT().SimpleTy) {
1201 Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_areg32;
1204 Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_areg32;
1207 Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_areg32;
1210 Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_areg32;
1213 Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_areg32;
1216 Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_areg32;
1220 case NVPTXISD::LDUV2:
1221 switch (EltVT.getSimpleVT().SimpleTy) {
1225 Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_areg32;
1228 Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_areg32;
1231 Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_areg32;
1234 Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_areg32;
1237 Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_areg32;
1240 Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_areg32;
1244 case NVPTXISD::LDGV4:
1245 switch (EltVT.getSimpleVT().SimpleTy) {
1249 Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_areg32;
1252 Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_areg32;
1255 Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_areg32;
1258 Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_areg32;
1262 case NVPTXISD::LDUV4:
1263 switch (EltVT.getSimpleVT().SimpleTy) {
1267 Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_areg32;
1270 Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_areg32;
1273 Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_areg32;
1276 Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_areg32;
1283 SDValue Ops[] = { Op1, Chain };
1284 LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(),
1285 ArrayRef<SDValue>(Ops, 2));
1288 MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
1289 MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
1290 cast<MachineSDNode>(LD)->setMemRefs(MemRefs0, MemRefs0 + 1);
1295 SDNode *NVPTXDAGToDAGISel::SelectStore(SDNode *N) {
1297 StoreSDNode *ST = cast<StoreSDNode>(N);
1298 EVT StoreVT = ST->getMemoryVT();
1299 SDNode *NVPTXST = NULL;
1301 // do not support pre/post inc/dec
1302 if (ST->isIndexed())
1305 if (!StoreVT.isSimple())
1308 // Address Space Setting
1309 unsigned int codeAddrSpace = getCodeAddrSpace(ST, Subtarget);
1312 // - .volatile is only availalble for .global and .shared
1313 bool isVolatile = ST->isVolatile();
1314 if (codeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
1315 codeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
1316 codeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
1320 MVT SimpleVT = StoreVT.getSimpleVT();
1321 unsigned vecType = NVPTX::PTXLdStInstCode::Scalar;
1322 if (SimpleVT.isVector()) {
1323 unsigned num = SimpleVT.getVectorNumElements();
1325 vecType = NVPTX::PTXLdStInstCode::V2;
1327 vecType = NVPTX::PTXLdStInstCode::V4;
1332 // Type Setting: toType + toTypeWidth
1333 // - for integer type, always use 'u'
1335 MVT ScalarVT = SimpleVT.getScalarType();
1336 unsigned toTypeWidth = ScalarVT.getSizeInBits();
1337 unsigned int toType;
1338 if (ScalarVT.isFloatingPoint())
1339 toType = NVPTX::PTXLdStInstCode::Float;
1341 toType = NVPTX::PTXLdStInstCode::Unsigned;
1343 // Create the machine instruction DAG
1344 SDValue Chain = N->getOperand(0);
1345 SDValue N1 = N->getOperand(1);
1346 SDValue N2 = N->getOperand(2);
1348 SDValue Offset, Base;
1350 MVT::SimpleValueType SourceVT =
1351 N1.getNode()->getValueType(0).getSimpleVT().SimpleTy;
1353 if (SelectDirectAddr(N2, Addr)) {
1356 Opcode = NVPTX::ST_i8_avar;
1359 Opcode = NVPTX::ST_i16_avar;
1362 Opcode = NVPTX::ST_i32_avar;
1365 Opcode = NVPTX::ST_i64_avar;
1368 Opcode = NVPTX::ST_f32_avar;
1371 Opcode = NVPTX::ST_f64_avar;
1376 SDValue Ops[] = { N1, getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
1377 getI32Imm(vecType), getI32Imm(toType),
1378 getI32Imm(toTypeWidth), Addr, Chain };
1379 NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops);
1380 } else if (Subtarget.is64Bit()
1381 ? SelectADDRsi64(N2.getNode(), N2, Base, Offset)
1382 : SelectADDRsi(N2.getNode(), N2, Base, Offset)) {
1385 Opcode = NVPTX::ST_i8_asi;
1388 Opcode = NVPTX::ST_i16_asi;
1391 Opcode = NVPTX::ST_i32_asi;
1394 Opcode = NVPTX::ST_i64_asi;
1397 Opcode = NVPTX::ST_f32_asi;
1400 Opcode = NVPTX::ST_f64_asi;
1405 SDValue Ops[] = { N1, getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
1406 getI32Imm(vecType), getI32Imm(toType),
1407 getI32Imm(toTypeWidth), Base, Offset, Chain };
1408 NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops);
1409 } else if (Subtarget.is64Bit()
1410 ? SelectADDRri64(N2.getNode(), N2, Base, Offset)
1411 : SelectADDRri(N2.getNode(), N2, Base, Offset)) {
1412 if (Subtarget.is64Bit()) {
1415 Opcode = NVPTX::ST_i8_ari_64;
1418 Opcode = NVPTX::ST_i16_ari_64;
1421 Opcode = NVPTX::ST_i32_ari_64;
1424 Opcode = NVPTX::ST_i64_ari_64;
1427 Opcode = NVPTX::ST_f32_ari_64;
1430 Opcode = NVPTX::ST_f64_ari_64;
1438 Opcode = NVPTX::ST_i8_ari;
1441 Opcode = NVPTX::ST_i16_ari;
1444 Opcode = NVPTX::ST_i32_ari;
1447 Opcode = NVPTX::ST_i64_ari;
1450 Opcode = NVPTX::ST_f32_ari;
1453 Opcode = NVPTX::ST_f64_ari;
1459 SDValue Ops[] = { N1, getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
1460 getI32Imm(vecType), getI32Imm(toType),
1461 getI32Imm(toTypeWidth), Base, Offset, Chain };
1462 NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops);
1464 if (Subtarget.is64Bit()) {
1467 Opcode = NVPTX::ST_i8_areg_64;
1470 Opcode = NVPTX::ST_i16_areg_64;
1473 Opcode = NVPTX::ST_i32_areg_64;
1476 Opcode = NVPTX::ST_i64_areg_64;
1479 Opcode = NVPTX::ST_f32_areg_64;
1482 Opcode = NVPTX::ST_f64_areg_64;
1490 Opcode = NVPTX::ST_i8_areg;
1493 Opcode = NVPTX::ST_i16_areg;
1496 Opcode = NVPTX::ST_i32_areg;
1499 Opcode = NVPTX::ST_i64_areg;
1502 Opcode = NVPTX::ST_f32_areg;
1505 Opcode = NVPTX::ST_f64_areg;
1511 SDValue Ops[] = { N1, getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
1512 getI32Imm(vecType), getI32Imm(toType),
1513 getI32Imm(toTypeWidth), N2, Chain };
1514 NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops);
1517 if (NVPTXST != NULL) {
1518 MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
1519 MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
1520 cast<MachineSDNode>(NVPTXST)->setMemRefs(MemRefs0, MemRefs0 + 1);
1526 SDNode *NVPTXDAGToDAGISel::SelectStoreVector(SDNode *N) {
1527 SDValue Chain = N->getOperand(0);
1528 SDValue Op1 = N->getOperand(1);
1529 SDValue Addr, Offset, Base;
1533 EVT EltVT = Op1.getValueType();
1534 MemSDNode *MemSD = cast<MemSDNode>(N);
1535 EVT StoreVT = MemSD->getMemoryVT();
1537 // Address Space Setting
1538 unsigned CodeAddrSpace = getCodeAddrSpace(MemSD, Subtarget);
1540 if (CodeAddrSpace == NVPTX::PTXLdStInstCode::CONSTANT) {
1541 report_fatal_error("Cannot store to pointer that points to constant "
1546 // - .volatile is only availalble for .global and .shared
1547 bool IsVolatile = MemSD->isVolatile();
1548 if (CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
1549 CodeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
1550 CodeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
1553 // Type Setting: toType + toTypeWidth
1554 // - for integer type, always use 'u'
1555 assert(StoreVT.isSimple() && "Store value is not simple");
1556 MVT ScalarVT = StoreVT.getSimpleVT().getScalarType();
1557 unsigned ToTypeWidth = ScalarVT.getSizeInBits();
1559 if (ScalarVT.isFloatingPoint())
1560 ToType = NVPTX::PTXLdStInstCode::Float;
1562 ToType = NVPTX::PTXLdStInstCode::Unsigned;
1564 SmallVector<SDValue, 12> StOps;
1568 switch (N->getOpcode()) {
1569 case NVPTXISD::StoreV2:
1570 VecType = NVPTX::PTXLdStInstCode::V2;
1571 StOps.push_back(N->getOperand(1));
1572 StOps.push_back(N->getOperand(2));
1573 N2 = N->getOperand(3);
1575 case NVPTXISD::StoreV4:
1576 VecType = NVPTX::PTXLdStInstCode::V4;
1577 StOps.push_back(N->getOperand(1));
1578 StOps.push_back(N->getOperand(2));
1579 StOps.push_back(N->getOperand(3));
1580 StOps.push_back(N->getOperand(4));
1581 N2 = N->getOperand(5);
1587 StOps.push_back(getI32Imm(IsVolatile));
1588 StOps.push_back(getI32Imm(CodeAddrSpace));
1589 StOps.push_back(getI32Imm(VecType));
1590 StOps.push_back(getI32Imm(ToType));
1591 StOps.push_back(getI32Imm(ToTypeWidth));
1593 if (SelectDirectAddr(N2, Addr)) {
1594 switch (N->getOpcode()) {
1597 case NVPTXISD::StoreV2:
1598 switch (EltVT.getSimpleVT().SimpleTy) {
1602 Opcode = NVPTX::STV_i8_v2_avar;
1605 Opcode = NVPTX::STV_i16_v2_avar;
1608 Opcode = NVPTX::STV_i32_v2_avar;
1611 Opcode = NVPTX::STV_i64_v2_avar;
1614 Opcode = NVPTX::STV_f32_v2_avar;
1617 Opcode = NVPTX::STV_f64_v2_avar;
1621 case NVPTXISD::StoreV4:
1622 switch (EltVT.getSimpleVT().SimpleTy) {
1626 Opcode = NVPTX::STV_i8_v4_avar;
1629 Opcode = NVPTX::STV_i16_v4_avar;
1632 Opcode = NVPTX::STV_i32_v4_avar;
1635 Opcode = NVPTX::STV_f32_v4_avar;
1640 StOps.push_back(Addr);
1641 } else if (Subtarget.is64Bit()
1642 ? SelectADDRsi64(N2.getNode(), N2, Base, Offset)
1643 : SelectADDRsi(N2.getNode(), N2, Base, Offset)) {
1644 switch (N->getOpcode()) {
1647 case NVPTXISD::StoreV2:
1648 switch (EltVT.getSimpleVT().SimpleTy) {
1652 Opcode = NVPTX::STV_i8_v2_asi;
1655 Opcode = NVPTX::STV_i16_v2_asi;
1658 Opcode = NVPTX::STV_i32_v2_asi;
1661 Opcode = NVPTX::STV_i64_v2_asi;
1664 Opcode = NVPTX::STV_f32_v2_asi;
1667 Opcode = NVPTX::STV_f64_v2_asi;
1671 case NVPTXISD::StoreV4:
1672 switch (EltVT.getSimpleVT().SimpleTy) {
1676 Opcode = NVPTX::STV_i8_v4_asi;
1679 Opcode = NVPTX::STV_i16_v4_asi;
1682 Opcode = NVPTX::STV_i32_v4_asi;
1685 Opcode = NVPTX::STV_f32_v4_asi;
1690 StOps.push_back(Base);
1691 StOps.push_back(Offset);
1692 } else if (Subtarget.is64Bit()
1693 ? SelectADDRri64(N2.getNode(), N2, Base, Offset)
1694 : SelectADDRri(N2.getNode(), N2, Base, Offset)) {
1695 if (Subtarget.is64Bit()) {
1696 switch (N->getOpcode()) {
1699 case NVPTXISD::StoreV2:
1700 switch (EltVT.getSimpleVT().SimpleTy) {
1704 Opcode = NVPTX::STV_i8_v2_ari_64;
1707 Opcode = NVPTX::STV_i16_v2_ari_64;
1710 Opcode = NVPTX::STV_i32_v2_ari_64;
1713 Opcode = NVPTX::STV_i64_v2_ari_64;
1716 Opcode = NVPTX::STV_f32_v2_ari_64;
1719 Opcode = NVPTX::STV_f64_v2_ari_64;
1723 case NVPTXISD::StoreV4:
1724 switch (EltVT.getSimpleVT().SimpleTy) {
1728 Opcode = NVPTX::STV_i8_v4_ari_64;
1731 Opcode = NVPTX::STV_i16_v4_ari_64;
1734 Opcode = NVPTX::STV_i32_v4_ari_64;
1737 Opcode = NVPTX::STV_f32_v4_ari_64;
1743 switch (N->getOpcode()) {
1746 case NVPTXISD::StoreV2:
1747 switch (EltVT.getSimpleVT().SimpleTy) {
1751 Opcode = NVPTX::STV_i8_v2_ari;
1754 Opcode = NVPTX::STV_i16_v2_ari;
1757 Opcode = NVPTX::STV_i32_v2_ari;
1760 Opcode = NVPTX::STV_i64_v2_ari;
1763 Opcode = NVPTX::STV_f32_v2_ari;
1766 Opcode = NVPTX::STV_f64_v2_ari;
1770 case NVPTXISD::StoreV4:
1771 switch (EltVT.getSimpleVT().SimpleTy) {
1775 Opcode = NVPTX::STV_i8_v4_ari;
1778 Opcode = NVPTX::STV_i16_v4_ari;
1781 Opcode = NVPTX::STV_i32_v4_ari;
1784 Opcode = NVPTX::STV_f32_v4_ari;
1790 StOps.push_back(Base);
1791 StOps.push_back(Offset);
1793 if (Subtarget.is64Bit()) {
1794 switch (N->getOpcode()) {
1797 case NVPTXISD::StoreV2:
1798 switch (EltVT.getSimpleVT().SimpleTy) {
1802 Opcode = NVPTX::STV_i8_v2_areg_64;
1805 Opcode = NVPTX::STV_i16_v2_areg_64;
1808 Opcode = NVPTX::STV_i32_v2_areg_64;
1811 Opcode = NVPTX::STV_i64_v2_areg_64;
1814 Opcode = NVPTX::STV_f32_v2_areg_64;
1817 Opcode = NVPTX::STV_f64_v2_areg_64;
1821 case NVPTXISD::StoreV4:
1822 switch (EltVT.getSimpleVT().SimpleTy) {
1826 Opcode = NVPTX::STV_i8_v4_areg_64;
1829 Opcode = NVPTX::STV_i16_v4_areg_64;
1832 Opcode = NVPTX::STV_i32_v4_areg_64;
1835 Opcode = NVPTX::STV_f32_v4_areg_64;
1841 switch (N->getOpcode()) {
1844 case NVPTXISD::StoreV2:
1845 switch (EltVT.getSimpleVT().SimpleTy) {
1849 Opcode = NVPTX::STV_i8_v2_areg;
1852 Opcode = NVPTX::STV_i16_v2_areg;
1855 Opcode = NVPTX::STV_i32_v2_areg;
1858 Opcode = NVPTX::STV_i64_v2_areg;
1861 Opcode = NVPTX::STV_f32_v2_areg;
1864 Opcode = NVPTX::STV_f64_v2_areg;
1868 case NVPTXISD::StoreV4:
1869 switch (EltVT.getSimpleVT().SimpleTy) {
1873 Opcode = NVPTX::STV_i8_v4_areg;
1876 Opcode = NVPTX::STV_i16_v4_areg;
1879 Opcode = NVPTX::STV_i32_v4_areg;
1882 Opcode = NVPTX::STV_f32_v4_areg;
1888 StOps.push_back(N2);
1891 StOps.push_back(Chain);
1893 ST = CurDAG->getMachineNode(Opcode, DL, MVT::Other, StOps);
1895 MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
1896 MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
1897 cast<MachineSDNode>(ST)->setMemRefs(MemRefs0, MemRefs0 + 1);
1902 SDNode *NVPTXDAGToDAGISel::SelectLoadParam(SDNode *Node) {
1903 SDValue Chain = Node->getOperand(0);
1904 SDValue Offset = Node->getOperand(2);
1905 SDValue Flag = Node->getOperand(3);
1907 MemSDNode *Mem = cast<MemSDNode>(Node);
1910 switch (Node->getOpcode()) {
1913 case NVPTXISD::LoadParam:
1916 case NVPTXISD::LoadParamV2:
1919 case NVPTXISD::LoadParamV4:
1924 EVT EltVT = Node->getValueType(0);
1925 EVT MemVT = Mem->getMemoryVT();
1933 switch (MemVT.getSimpleVT().SimpleTy) {
1937 Opc = NVPTX::LoadParamMemI8;
1940 Opc = NVPTX::LoadParamMemI8;
1943 Opc = NVPTX::LoadParamMemI16;
1946 Opc = NVPTX::LoadParamMemI32;
1949 Opc = NVPTX::LoadParamMemI64;
1952 Opc = NVPTX::LoadParamMemF32;
1955 Opc = NVPTX::LoadParamMemF64;
1960 switch (MemVT.getSimpleVT().SimpleTy) {
1964 Opc = NVPTX::LoadParamMemV2I8;
1967 Opc = NVPTX::LoadParamMemV2I8;
1970 Opc = NVPTX::LoadParamMemV2I16;
1973 Opc = NVPTX::LoadParamMemV2I32;
1976 Opc = NVPTX::LoadParamMemV2I64;
1979 Opc = NVPTX::LoadParamMemV2F32;
1982 Opc = NVPTX::LoadParamMemV2F64;
1987 switch (MemVT.getSimpleVT().SimpleTy) {
1991 Opc = NVPTX::LoadParamMemV4I8;
1994 Opc = NVPTX::LoadParamMemV4I8;
1997 Opc = NVPTX::LoadParamMemV4I16;
2000 Opc = NVPTX::LoadParamMemV4I32;
2003 Opc = NVPTX::LoadParamMemV4F32;
2011 VTs = CurDAG->getVTList(EltVT, MVT::Other, MVT::Glue);
2012 } else if (VecSize == 2) {
2013 VTs = CurDAG->getVTList(EltVT, EltVT, MVT::Other, MVT::Glue);
2015 EVT EVTs[] = { EltVT, EltVT, EltVT, EltVT, MVT::Other, MVT::Glue };
2016 VTs = CurDAG->getVTList(&EVTs[0], 5);
2019 unsigned OffsetVal = cast<ConstantSDNode>(Offset)->getZExtValue();
2021 SmallVector<SDValue, 2> Ops;
2022 Ops.push_back(CurDAG->getTargetConstant(OffsetVal, MVT::i32));
2023 Ops.push_back(Chain);
2024 Ops.push_back(Flag);
2027 CurDAG->getMachineNode(Opc, DL, VTs, Ops);
2031 SDNode *NVPTXDAGToDAGISel::SelectStoreRetval(SDNode *N) {
2033 SDValue Chain = N->getOperand(0);
2034 SDValue Offset = N->getOperand(1);
2035 unsigned OffsetVal = cast<ConstantSDNode>(Offset)->getZExtValue();
2036 MemSDNode *Mem = cast<MemSDNode>(N);
2038 // How many elements do we have?
2039 unsigned NumElts = 1;
2040 switch (N->getOpcode()) {
2043 case NVPTXISD::StoreRetval:
2046 case NVPTXISD::StoreRetvalV2:
2049 case NVPTXISD::StoreRetvalV4:
2054 // Build vector of operands
2055 SmallVector<SDValue, 6> Ops;
2056 for (unsigned i = 0; i < NumElts; ++i)
2057 Ops.push_back(N->getOperand(i + 2));
2058 Ops.push_back(CurDAG->getTargetConstant(OffsetVal, MVT::i32));
2059 Ops.push_back(Chain);
2061 // Determine target opcode
2062 // If we have an i1, use an 8-bit store. The lowering code in
2063 // NVPTXISelLowering will have already emitted an upcast.
2064 unsigned Opcode = 0;
2069 switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
2073 Opcode = NVPTX::StoreRetvalI8;
2076 Opcode = NVPTX::StoreRetvalI8;
2079 Opcode = NVPTX::StoreRetvalI16;
2082 Opcode = NVPTX::StoreRetvalI32;
2085 Opcode = NVPTX::StoreRetvalI64;
2088 Opcode = NVPTX::StoreRetvalF32;
2091 Opcode = NVPTX::StoreRetvalF64;
2096 switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
2100 Opcode = NVPTX::StoreRetvalV2I8;
2103 Opcode = NVPTX::StoreRetvalV2I8;
2106 Opcode = NVPTX::StoreRetvalV2I16;
2109 Opcode = NVPTX::StoreRetvalV2I32;
2112 Opcode = NVPTX::StoreRetvalV2I64;
2115 Opcode = NVPTX::StoreRetvalV2F32;
2118 Opcode = NVPTX::StoreRetvalV2F64;
2123 switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
2127 Opcode = NVPTX::StoreRetvalV4I8;
2130 Opcode = NVPTX::StoreRetvalV4I8;
2133 Opcode = NVPTX::StoreRetvalV4I16;
2136 Opcode = NVPTX::StoreRetvalV4I32;
2139 Opcode = NVPTX::StoreRetvalV4F32;
2146 CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops);
2147 MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
2148 MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
2149 cast<MachineSDNode>(Ret)->setMemRefs(MemRefs0, MemRefs0 + 1);
2154 SDNode *NVPTXDAGToDAGISel::SelectStoreParam(SDNode *N) {
2156 SDValue Chain = N->getOperand(0);
2157 SDValue Param = N->getOperand(1);
2158 unsigned ParamVal = cast<ConstantSDNode>(Param)->getZExtValue();
2159 SDValue Offset = N->getOperand(2);
2160 unsigned OffsetVal = cast<ConstantSDNode>(Offset)->getZExtValue();
2161 MemSDNode *Mem = cast<MemSDNode>(N);
2162 SDValue Flag = N->getOperand(N->getNumOperands() - 1);
2164 // How many elements do we have?
2165 unsigned NumElts = 1;
2166 switch (N->getOpcode()) {
2169 case NVPTXISD::StoreParamU32:
2170 case NVPTXISD::StoreParamS32:
2171 case NVPTXISD::StoreParam:
2174 case NVPTXISD::StoreParamV2:
2177 case NVPTXISD::StoreParamV4:
2182 // Build vector of operands
2183 SmallVector<SDValue, 8> Ops;
2184 for (unsigned i = 0; i < NumElts; ++i)
2185 Ops.push_back(N->getOperand(i + 3));
2186 Ops.push_back(CurDAG->getTargetConstant(ParamVal, MVT::i32));
2187 Ops.push_back(CurDAG->getTargetConstant(OffsetVal, MVT::i32));
2188 Ops.push_back(Chain);
2189 Ops.push_back(Flag);
2191 // Determine target opcode
2192 // If we have an i1, use an 8-bit store. The lowering code in
2193 // NVPTXISelLowering will have already emitted an upcast.
2194 unsigned Opcode = 0;
2195 switch (N->getOpcode()) {
2201 switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
2205 Opcode = NVPTX::StoreParamI8;
2208 Opcode = NVPTX::StoreParamI8;
2211 Opcode = NVPTX::StoreParamI16;
2214 Opcode = NVPTX::StoreParamI32;
2217 Opcode = NVPTX::StoreParamI64;
2220 Opcode = NVPTX::StoreParamF32;
2223 Opcode = NVPTX::StoreParamF64;
2228 switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
2232 Opcode = NVPTX::StoreParamV2I8;
2235 Opcode = NVPTX::StoreParamV2I8;
2238 Opcode = NVPTX::StoreParamV2I16;
2241 Opcode = NVPTX::StoreParamV2I32;
2244 Opcode = NVPTX::StoreParamV2I64;
2247 Opcode = NVPTX::StoreParamV2F32;
2250 Opcode = NVPTX::StoreParamV2F64;
2255 switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
2259 Opcode = NVPTX::StoreParamV4I8;
2262 Opcode = NVPTX::StoreParamV4I8;
2265 Opcode = NVPTX::StoreParamV4I16;
2268 Opcode = NVPTX::StoreParamV4I32;
2271 Opcode = NVPTX::StoreParamV4F32;
2277 // Special case: if we have a sign-extend/zero-extend node, insert the
2278 // conversion instruction first, and use that as the value operand to
2279 // the selected StoreParam node.
2280 case NVPTXISD::StoreParamU32: {
2281 Opcode = NVPTX::StoreParamI32;
2282 SDValue CvtNone = CurDAG->getTargetConstant(NVPTX::PTXCvtMode::NONE,
2284 SDNode *Cvt = CurDAG->getMachineNode(NVPTX::CVT_u32_u16, DL,
2285 MVT::i32, Ops[0], CvtNone);
2286 Ops[0] = SDValue(Cvt, 0);
2289 case NVPTXISD::StoreParamS32: {
2290 Opcode = NVPTX::StoreParamI32;
2291 SDValue CvtNone = CurDAG->getTargetConstant(NVPTX::PTXCvtMode::NONE,
2293 SDNode *Cvt = CurDAG->getMachineNode(NVPTX::CVT_s32_s16, DL,
2294 MVT::i32, Ops[0], CvtNone);
2295 Ops[0] = SDValue(Cvt, 0);
2300 SDVTList RetVTs = CurDAG->getVTList(MVT::Other, MVT::Glue);
2302 CurDAG->getMachineNode(Opcode, DL, RetVTs, Ops);
2303 MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
2304 MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
2305 cast<MachineSDNode>(Ret)->setMemRefs(MemRefs0, MemRefs0 + 1);
2310 // SelectDirectAddr - Match a direct address for DAG.
2311 // A direct address could be a globaladdress or externalsymbol.
2312 bool NVPTXDAGToDAGISel::SelectDirectAddr(SDValue N, SDValue &Address) {
2313 // Return true if TGA or ES.
2314 if (N.getOpcode() == ISD::TargetGlobalAddress ||
2315 N.getOpcode() == ISD::TargetExternalSymbol) {
2319 if (N.getOpcode() == NVPTXISD::Wrapper) {
2320 Address = N.getOperand(0);
2323 if (N.getOpcode() == ISD::INTRINSIC_WO_CHAIN) {
2324 unsigned IID = cast<ConstantSDNode>(N.getOperand(0))->getZExtValue();
2325 if (IID == Intrinsic::nvvm_ptr_gen_to_param)
2326 if (N.getOperand(1).getOpcode() == NVPTXISD::MoveParam)
2327 return (SelectDirectAddr(N.getOperand(1).getOperand(0), Address));
2333 bool NVPTXDAGToDAGISel::SelectADDRsi_imp(
2334 SDNode *OpNode, SDValue Addr, SDValue &Base, SDValue &Offset, MVT mvt) {
2335 if (Addr.getOpcode() == ISD::ADD) {
2336 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) {
2337 SDValue base = Addr.getOperand(0);
2338 if (SelectDirectAddr(base, Base)) {
2339 Offset = CurDAG->getTargetConstant(CN->getZExtValue(), mvt);
2348 bool NVPTXDAGToDAGISel::SelectADDRsi(SDNode *OpNode, SDValue Addr,
2349 SDValue &Base, SDValue &Offset) {
2350 return SelectADDRsi_imp(OpNode, Addr, Base, Offset, MVT::i32);
2354 bool NVPTXDAGToDAGISel::SelectADDRsi64(SDNode *OpNode, SDValue Addr,
2355 SDValue &Base, SDValue &Offset) {
2356 return SelectADDRsi_imp(OpNode, Addr, Base, Offset, MVT::i64);
2360 bool NVPTXDAGToDAGISel::SelectADDRri_imp(
2361 SDNode *OpNode, SDValue Addr, SDValue &Base, SDValue &Offset, MVT mvt) {
2362 if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
2363 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), mvt);
2364 Offset = CurDAG->getTargetConstant(0, mvt);
2367 if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
2368 Addr.getOpcode() == ISD::TargetGlobalAddress)
2369 return false; // direct calls.
2371 if (Addr.getOpcode() == ISD::ADD) {
2372 if (SelectDirectAddr(Addr.getOperand(0), Addr)) {
2375 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) {
2376 if (FrameIndexSDNode *FIN =
2377 dyn_cast<FrameIndexSDNode>(Addr.getOperand(0)))
2378 // Constant offset from frame ref.
2379 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), mvt);
2381 Base = Addr.getOperand(0);
2382 Offset = CurDAG->getTargetConstant(CN->getZExtValue(), mvt);
2390 bool NVPTXDAGToDAGISel::SelectADDRri(SDNode *OpNode, SDValue Addr,
2391 SDValue &Base, SDValue &Offset) {
2392 return SelectADDRri_imp(OpNode, Addr, Base, Offset, MVT::i32);
2396 bool NVPTXDAGToDAGISel::SelectADDRri64(SDNode *OpNode, SDValue Addr,
2397 SDValue &Base, SDValue &Offset) {
2398 return SelectADDRri_imp(OpNode, Addr, Base, Offset, MVT::i64);
2401 bool NVPTXDAGToDAGISel::ChkMemSDNodeAddressSpace(SDNode *N,
2402 unsigned int spN) const {
2403 const Value *Src = NULL;
2404 // Even though MemIntrinsicSDNode is a subclas of MemSDNode,
2405 // the classof() for MemSDNode does not include MemIntrinsicSDNode
2406 // (See SelectionDAGNodes.h). So we need to check for both.
2407 if (MemSDNode *mN = dyn_cast<MemSDNode>(N)) {
2408 Src = mN->getSrcValue();
2409 } else if (MemSDNode *mN = dyn_cast<MemIntrinsicSDNode>(N)) {
2410 Src = mN->getSrcValue();
2414 if (const PointerType *PT = dyn_cast<PointerType>(Src->getType()))
2415 return (PT->getAddressSpace() == spN);
2419 /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
2420 /// inline asm expressions.
2421 bool NVPTXDAGToDAGISel::SelectInlineAsmMemoryOperand(
2422 const SDValue &Op, char ConstraintCode, std::vector<SDValue> &OutOps) {
2424 switch (ConstraintCode) {
2428 if (SelectDirectAddr(Op, Op0)) {
2429 OutOps.push_back(Op0);
2430 OutOps.push_back(CurDAG->getTargetConstant(0, MVT::i32));
2433 if (SelectADDRri(Op.getNode(), Op, Op0, Op1)) {
2434 OutOps.push_back(Op0);
2435 OutOps.push_back(Op1);
2443 // Return true if N is a undef or a constant.
2444 // If N was undef, return a (i8imm 0) in Retval
2445 // If N was imm, convert it to i8imm and return in Retval
2446 // Note: The convert to i8imm is required, otherwise the
2447 // pattern matcher inserts a bunch of IMOVi8rr to convert
2448 // the imm to i8imm, and this causes instruction selection
2450 bool NVPTXDAGToDAGISel::UndefOrImm(SDValue Op, SDValue N, SDValue &Retval) {
2451 if (!(N.getOpcode() == ISD::UNDEF) && !(N.getOpcode() == ISD::Constant))
2454 if (N.getOpcode() == ISD::UNDEF)
2455 Retval = CurDAG->getTargetConstant(0, MVT::i8);
2457 ConstantSDNode *cn = cast<ConstantSDNode>(N.getNode());
2458 unsigned retval = cn->getZExtValue();
2459 Retval = CurDAG->getTargetConstant(retval, MVT::i8);