1 //===-- NVPTXISelDAGToDAG.cpp - A dag to dag inst selector for NVPTX ------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file defines an instruction selector for the NVPTX target.
12 //===----------------------------------------------------------------------===//
14 #include "NVPTXISelDAGToDAG.h"
15 #include "llvm/IR/GlobalValue.h"
16 #include "llvm/IR/Instructions.h"
17 #include "llvm/Support/CommandLine.h"
18 #include "llvm/Support/Debug.h"
19 #include "llvm/Support/ErrorHandling.h"
20 #include "llvm/Support/raw_ostream.h"
21 #include "llvm/Target/TargetIntrinsicInfo.h"
24 #define DEBUG_TYPE "nvptx-isel"
29 FMAContractLevel("nvptx-fma-level", cl::ZeroOrMore, cl::Hidden,
30 cl::desc("NVPTX Specific: FMA contraction (0: don't do it"
31 " 1: do it 2: do it aggressively"),
34 static cl::opt<int> UsePrecDivF32(
35 "nvptx-prec-divf32", cl::ZeroOrMore, cl::Hidden,
36 cl::desc("NVPTX Specifies: 0 use div.approx, 1 use div.full, 2 use"
37 " IEEE Compliant F32 div.rnd if avaiable."),
41 UsePrecSqrtF32("nvptx-prec-sqrtf32", cl::Hidden,
42 cl::desc("NVPTX Specific: 0 use sqrt.approx, 1 use sqrt.rn."),
46 FtzEnabled("nvptx-f32ftz", cl::ZeroOrMore, cl::Hidden,
47 cl::desc("NVPTX Specific: Flush f32 subnormals to sign-preserving zero."),
51 /// createNVPTXISelDag - This pass converts a legalized DAG into a
52 /// NVPTX-specific DAG, ready for instruction scheduling.
53 FunctionPass *llvm::createNVPTXISelDag(NVPTXTargetMachine &TM,
54 llvm::CodeGenOpt::Level OptLevel) {
55 return new NVPTXDAGToDAGISel(TM, OptLevel);
58 NVPTXDAGToDAGISel::NVPTXDAGToDAGISel(NVPTXTargetMachine &tm,
59 CodeGenOpt::Level OptLevel)
60 : SelectionDAGISel(tm, OptLevel),
61 Subtarget(tm.getSubtarget<NVPTXSubtarget>()) {
63 doFMAF32 = (OptLevel > 0) && Subtarget.hasFMAF32() && (FMAContractLevel >= 1);
64 doFMAF64 = (OptLevel > 0) && Subtarget.hasFMAF64() && (FMAContractLevel >= 1);
66 (OptLevel > 0) && Subtarget.hasFMAF32() && (FMAContractLevel == 2);
68 (OptLevel > 0) && Subtarget.hasFMAF64() && (FMAContractLevel == 2);
70 allowFMA = (FMAContractLevel >= 1);
72 doMulWide = (OptLevel > 0);
75 int NVPTXDAGToDAGISel::getDivF32Level() const {
76 if (UsePrecDivF32.getNumOccurrences() > 0) {
77 // If nvptx-prec-div32=N is used on the command-line, always honor it
80 // Otherwise, use div.approx if fast math is enabled
81 if (TM.Options.UnsafeFPMath)
88 bool NVPTXDAGToDAGISel::usePrecSqrtF32() const {
89 if (UsePrecSqrtF32.getNumOccurrences() > 0) {
90 // If nvptx-prec-sqrtf32 is used on the command-line, always honor it
91 return UsePrecSqrtF32;
93 // Otherwise, use sqrt.approx if fast math is enabled
94 if (TM.Options.UnsafeFPMath)
101 bool NVPTXDAGToDAGISel::useF32FTZ() const {
102 if (FtzEnabled.getNumOccurrences() > 0) {
103 // If nvptx-f32ftz is used on the command-line, always honor it
106 const Function *F = MF->getFunction();
107 // Otherwise, check for an nvptx-f32ftz attribute on the function
108 if (F->hasFnAttribute("nvptx-f32ftz"))
109 return (F->getAttributes().getAttribute(AttributeSet::FunctionIndex,
111 .getValueAsString() == "true");
117 /// Select - Select instructions not customized! Used for
118 /// expanded, promoted and normal instructions.
119 SDNode *NVPTXDAGToDAGISel::Select(SDNode *N) {
121 if (N->isMachineOpcode()) {
123 return NULL; // Already selected.
126 SDNode *ResNode = NULL;
127 switch (N->getOpcode()) {
129 ResNode = SelectLoad(N);
132 ResNode = SelectStore(N);
134 case NVPTXISD::LoadV2:
135 case NVPTXISD::LoadV4:
136 ResNode = SelectLoadVector(N);
138 case NVPTXISD::LDGV2:
139 case NVPTXISD::LDGV4:
140 case NVPTXISD::LDUV2:
141 case NVPTXISD::LDUV4:
142 ResNode = SelectLDGLDUVector(N);
144 case NVPTXISD::StoreV2:
145 case NVPTXISD::StoreV4:
146 ResNode = SelectStoreVector(N);
148 case NVPTXISD::LoadParam:
149 case NVPTXISD::LoadParamV2:
150 case NVPTXISD::LoadParamV4:
151 ResNode = SelectLoadParam(N);
153 case NVPTXISD::StoreRetval:
154 case NVPTXISD::StoreRetvalV2:
155 case NVPTXISD::StoreRetvalV4:
156 ResNode = SelectStoreRetval(N);
158 case NVPTXISD::StoreParam:
159 case NVPTXISD::StoreParamV2:
160 case NVPTXISD::StoreParamV4:
161 case NVPTXISD::StoreParamS32:
162 case NVPTXISD::StoreParamU32:
163 ResNode = SelectStoreParam(N);
165 case ISD::ADDRSPACECAST:
166 ResNode = SelectAddrSpaceCast(N);
173 return SelectCode(N);
176 static unsigned int getCodeAddrSpace(MemSDNode *N,
177 const NVPTXSubtarget &Subtarget) {
178 const Value *Src = N->getSrcValue();
181 return NVPTX::PTXLdStInstCode::GENERIC;
183 if (const PointerType *PT = dyn_cast<PointerType>(Src->getType())) {
184 switch (PT->getAddressSpace()) {
185 case llvm::ADDRESS_SPACE_LOCAL: return NVPTX::PTXLdStInstCode::LOCAL;
186 case llvm::ADDRESS_SPACE_GLOBAL: return NVPTX::PTXLdStInstCode::GLOBAL;
187 case llvm::ADDRESS_SPACE_SHARED: return NVPTX::PTXLdStInstCode::SHARED;
188 case llvm::ADDRESS_SPACE_GENERIC: return NVPTX::PTXLdStInstCode::GENERIC;
189 case llvm::ADDRESS_SPACE_PARAM: return NVPTX::PTXLdStInstCode::PARAM;
190 case llvm::ADDRESS_SPACE_CONST: return NVPTX::PTXLdStInstCode::CONSTANT;
194 return NVPTX::PTXLdStInstCode::GENERIC;
197 SDNode *NVPTXDAGToDAGISel::SelectAddrSpaceCast(SDNode *N) {
198 SDValue Src = N->getOperand(0);
199 AddrSpaceCastSDNode *CastN = cast<AddrSpaceCastSDNode>(N);
200 unsigned SrcAddrSpace = CastN->getSrcAddressSpace();
201 unsigned DstAddrSpace = CastN->getDestAddressSpace();
203 assert(SrcAddrSpace != DstAddrSpace &&
204 "addrspacecast must be between different address spaces");
206 if (DstAddrSpace == ADDRESS_SPACE_GENERIC) {
207 // Specific to generic
209 switch (SrcAddrSpace) {
210 default: report_fatal_error("Bad address space in addrspacecast");
211 case ADDRESS_SPACE_GLOBAL:
212 Opc = Subtarget.is64Bit() ? NVPTX::cvta_global_yes_64
213 : NVPTX::cvta_global_yes;
215 case ADDRESS_SPACE_SHARED:
216 Opc = Subtarget.is64Bit() ? NVPTX::cvta_shared_yes_64
217 : NVPTX::cvta_shared_yes;
219 case ADDRESS_SPACE_CONST:
220 Opc = Subtarget.is64Bit() ? NVPTX::cvta_const_yes_64
221 : NVPTX::cvta_const_yes;
223 case ADDRESS_SPACE_LOCAL:
224 Opc = Subtarget.is64Bit() ? NVPTX::cvta_local_yes_64
225 : NVPTX::cvta_local_yes;
228 return CurDAG->getMachineNode(Opc, SDLoc(N), N->getValueType(0), Src);
230 // Generic to specific
231 if (SrcAddrSpace != 0)
232 report_fatal_error("Cannot cast between two non-generic address spaces");
234 switch (DstAddrSpace) {
235 default: report_fatal_error("Bad address space in addrspacecast");
236 case ADDRESS_SPACE_GLOBAL:
237 Opc = Subtarget.is64Bit() ? NVPTX::cvta_to_global_yes_64
238 : NVPTX::cvta_to_global_yes;
240 case ADDRESS_SPACE_SHARED:
241 Opc = Subtarget.is64Bit() ? NVPTX::cvta_to_shared_yes_64
242 : NVPTX::cvta_to_shared_yes;
244 case ADDRESS_SPACE_CONST:
245 Opc = Subtarget.is64Bit() ? NVPTX::cvta_to_const_yes_64
246 : NVPTX::cvta_to_const_yes;
248 case ADDRESS_SPACE_LOCAL:
249 Opc = Subtarget.is64Bit() ? NVPTX::cvta_to_local_yes_64
250 : NVPTX::cvta_to_local_yes;
253 return CurDAG->getMachineNode(Opc, SDLoc(N), N->getValueType(0), Src);
257 SDNode *NVPTXDAGToDAGISel::SelectLoad(SDNode *N) {
259 LoadSDNode *LD = cast<LoadSDNode>(N);
260 EVT LoadedVT = LD->getMemoryVT();
261 SDNode *NVPTXLD = NULL;
263 // do not support pre/post inc/dec
267 if (!LoadedVT.isSimple())
270 // Address Space Setting
271 unsigned int codeAddrSpace = getCodeAddrSpace(LD, Subtarget);
274 // - .volatile is only availalble for .global and .shared
275 bool isVolatile = LD->isVolatile();
276 if (codeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
277 codeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
278 codeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
282 MVT SimpleVT = LoadedVT.getSimpleVT();
283 unsigned vecType = NVPTX::PTXLdStInstCode::Scalar;
284 if (SimpleVT.isVector()) {
285 unsigned num = SimpleVT.getVectorNumElements();
287 vecType = NVPTX::PTXLdStInstCode::V2;
289 vecType = NVPTX::PTXLdStInstCode::V4;
294 // Type Setting: fromType + fromTypeWidth
296 // Sign : ISD::SEXTLOAD
297 // Unsign : ISD::ZEXTLOAD, ISD::NON_EXTLOAD or ISD::EXTLOAD and the
299 // Float : ISD::NON_EXTLOAD or ISD::EXTLOAD and the type is float
300 MVT ScalarVT = SimpleVT.getScalarType();
301 // Read at least 8 bits (predicates are stored as 8-bit values)
302 unsigned fromTypeWidth = std::max(8U, ScalarVT.getSizeInBits());
303 unsigned int fromType;
304 if ((LD->getExtensionType() == ISD::SEXTLOAD))
305 fromType = NVPTX::PTXLdStInstCode::Signed;
306 else if (ScalarVT.isFloatingPoint())
307 fromType = NVPTX::PTXLdStInstCode::Float;
309 fromType = NVPTX::PTXLdStInstCode::Unsigned;
311 // Create the machine instruction DAG
312 SDValue Chain = N->getOperand(0);
313 SDValue N1 = N->getOperand(1);
315 SDValue Offset, Base;
317 MVT::SimpleValueType TargetVT = LD->getSimpleValueType(0).SimpleTy;
319 if (SelectDirectAddr(N1, Addr)) {
322 Opcode = NVPTX::LD_i8_avar;
325 Opcode = NVPTX::LD_i16_avar;
328 Opcode = NVPTX::LD_i32_avar;
331 Opcode = NVPTX::LD_i64_avar;
334 Opcode = NVPTX::LD_f32_avar;
337 Opcode = NVPTX::LD_f64_avar;
342 SDValue Ops[] = { getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
343 getI32Imm(vecType), getI32Imm(fromType),
344 getI32Imm(fromTypeWidth), Addr, Chain };
345 NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops);
346 } else if (Subtarget.is64Bit()
347 ? SelectADDRsi64(N1.getNode(), N1, Base, Offset)
348 : SelectADDRsi(N1.getNode(), N1, Base, Offset)) {
351 Opcode = NVPTX::LD_i8_asi;
354 Opcode = NVPTX::LD_i16_asi;
357 Opcode = NVPTX::LD_i32_asi;
360 Opcode = NVPTX::LD_i64_asi;
363 Opcode = NVPTX::LD_f32_asi;
366 Opcode = NVPTX::LD_f64_asi;
371 SDValue Ops[] = { getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
372 getI32Imm(vecType), getI32Imm(fromType),
373 getI32Imm(fromTypeWidth), Base, Offset, Chain };
374 NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops);
375 } else if (Subtarget.is64Bit()
376 ? SelectADDRri64(N1.getNode(), N1, Base, Offset)
377 : SelectADDRri(N1.getNode(), N1, Base, Offset)) {
378 if (Subtarget.is64Bit()) {
381 Opcode = NVPTX::LD_i8_ari_64;
384 Opcode = NVPTX::LD_i16_ari_64;
387 Opcode = NVPTX::LD_i32_ari_64;
390 Opcode = NVPTX::LD_i64_ari_64;
393 Opcode = NVPTX::LD_f32_ari_64;
396 Opcode = NVPTX::LD_f64_ari_64;
404 Opcode = NVPTX::LD_i8_ari;
407 Opcode = NVPTX::LD_i16_ari;
410 Opcode = NVPTX::LD_i32_ari;
413 Opcode = NVPTX::LD_i64_ari;
416 Opcode = NVPTX::LD_f32_ari;
419 Opcode = NVPTX::LD_f64_ari;
425 SDValue Ops[] = { getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
426 getI32Imm(vecType), getI32Imm(fromType),
427 getI32Imm(fromTypeWidth), Base, Offset, Chain };
428 NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops);
430 if (Subtarget.is64Bit()) {
433 Opcode = NVPTX::LD_i8_areg_64;
436 Opcode = NVPTX::LD_i16_areg_64;
439 Opcode = NVPTX::LD_i32_areg_64;
442 Opcode = NVPTX::LD_i64_areg_64;
445 Opcode = NVPTX::LD_f32_areg_64;
448 Opcode = NVPTX::LD_f64_areg_64;
456 Opcode = NVPTX::LD_i8_areg;
459 Opcode = NVPTX::LD_i16_areg;
462 Opcode = NVPTX::LD_i32_areg;
465 Opcode = NVPTX::LD_i64_areg;
468 Opcode = NVPTX::LD_f32_areg;
471 Opcode = NVPTX::LD_f64_areg;
477 SDValue Ops[] = { getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
478 getI32Imm(vecType), getI32Imm(fromType),
479 getI32Imm(fromTypeWidth), N1, Chain };
480 NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops);
483 if (NVPTXLD != NULL) {
484 MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
485 MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
486 cast<MachineSDNode>(NVPTXLD)->setMemRefs(MemRefs0, MemRefs0 + 1);
492 SDNode *NVPTXDAGToDAGISel::SelectLoadVector(SDNode *N) {
494 SDValue Chain = N->getOperand(0);
495 SDValue Op1 = N->getOperand(1);
496 SDValue Addr, Offset, Base;
500 MemSDNode *MemSD = cast<MemSDNode>(N);
501 EVT LoadedVT = MemSD->getMemoryVT();
503 if (!LoadedVT.isSimple())
506 // Address Space Setting
507 unsigned int CodeAddrSpace = getCodeAddrSpace(MemSD, Subtarget);
510 // - .volatile is only availalble for .global and .shared
511 bool IsVolatile = MemSD->isVolatile();
512 if (CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
513 CodeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
514 CodeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
518 MVT SimpleVT = LoadedVT.getSimpleVT();
520 // Type Setting: fromType + fromTypeWidth
522 // Sign : ISD::SEXTLOAD
523 // Unsign : ISD::ZEXTLOAD, ISD::NON_EXTLOAD or ISD::EXTLOAD and the
525 // Float : ISD::NON_EXTLOAD or ISD::EXTLOAD and the type is float
526 MVT ScalarVT = SimpleVT.getScalarType();
527 // Read at least 8 bits (predicates are stored as 8-bit values)
528 unsigned FromTypeWidth = std::max(8U, ScalarVT.getSizeInBits());
529 unsigned int FromType;
530 // The last operand holds the original LoadSDNode::getExtensionType() value
531 unsigned ExtensionType = cast<ConstantSDNode>(
532 N->getOperand(N->getNumOperands() - 1))->getZExtValue();
533 if (ExtensionType == ISD::SEXTLOAD)
534 FromType = NVPTX::PTXLdStInstCode::Signed;
535 else if (ScalarVT.isFloatingPoint())
536 FromType = NVPTX::PTXLdStInstCode::Float;
538 FromType = NVPTX::PTXLdStInstCode::Unsigned;
542 switch (N->getOpcode()) {
543 case NVPTXISD::LoadV2:
544 VecType = NVPTX::PTXLdStInstCode::V2;
546 case NVPTXISD::LoadV4:
547 VecType = NVPTX::PTXLdStInstCode::V4;
553 EVT EltVT = N->getValueType(0);
555 if (SelectDirectAddr(Op1, Addr)) {
556 switch (N->getOpcode()) {
559 case NVPTXISD::LoadV2:
560 switch (EltVT.getSimpleVT().SimpleTy) {
564 Opcode = NVPTX::LDV_i8_v2_avar;
567 Opcode = NVPTX::LDV_i16_v2_avar;
570 Opcode = NVPTX::LDV_i32_v2_avar;
573 Opcode = NVPTX::LDV_i64_v2_avar;
576 Opcode = NVPTX::LDV_f32_v2_avar;
579 Opcode = NVPTX::LDV_f64_v2_avar;
583 case NVPTXISD::LoadV4:
584 switch (EltVT.getSimpleVT().SimpleTy) {
588 Opcode = NVPTX::LDV_i8_v4_avar;
591 Opcode = NVPTX::LDV_i16_v4_avar;
594 Opcode = NVPTX::LDV_i32_v4_avar;
597 Opcode = NVPTX::LDV_f32_v4_avar;
603 SDValue Ops[] = { getI32Imm(IsVolatile), getI32Imm(CodeAddrSpace),
604 getI32Imm(VecType), getI32Imm(FromType),
605 getI32Imm(FromTypeWidth), Addr, Chain };
606 LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
607 } else if (Subtarget.is64Bit()
608 ? SelectADDRsi64(Op1.getNode(), Op1, Base, Offset)
609 : SelectADDRsi(Op1.getNode(), Op1, Base, Offset)) {
610 switch (N->getOpcode()) {
613 case NVPTXISD::LoadV2:
614 switch (EltVT.getSimpleVT().SimpleTy) {
618 Opcode = NVPTX::LDV_i8_v2_asi;
621 Opcode = NVPTX::LDV_i16_v2_asi;
624 Opcode = NVPTX::LDV_i32_v2_asi;
627 Opcode = NVPTX::LDV_i64_v2_asi;
630 Opcode = NVPTX::LDV_f32_v2_asi;
633 Opcode = NVPTX::LDV_f64_v2_asi;
637 case NVPTXISD::LoadV4:
638 switch (EltVT.getSimpleVT().SimpleTy) {
642 Opcode = NVPTX::LDV_i8_v4_asi;
645 Opcode = NVPTX::LDV_i16_v4_asi;
648 Opcode = NVPTX::LDV_i32_v4_asi;
651 Opcode = NVPTX::LDV_f32_v4_asi;
657 SDValue Ops[] = { getI32Imm(IsVolatile), getI32Imm(CodeAddrSpace),
658 getI32Imm(VecType), getI32Imm(FromType),
659 getI32Imm(FromTypeWidth), Base, Offset, Chain };
660 LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
661 } else if (Subtarget.is64Bit()
662 ? SelectADDRri64(Op1.getNode(), Op1, Base, Offset)
663 : SelectADDRri(Op1.getNode(), Op1, Base, Offset)) {
664 if (Subtarget.is64Bit()) {
665 switch (N->getOpcode()) {
668 case NVPTXISD::LoadV2:
669 switch (EltVT.getSimpleVT().SimpleTy) {
673 Opcode = NVPTX::LDV_i8_v2_ari_64;
676 Opcode = NVPTX::LDV_i16_v2_ari_64;
679 Opcode = NVPTX::LDV_i32_v2_ari_64;
682 Opcode = NVPTX::LDV_i64_v2_ari_64;
685 Opcode = NVPTX::LDV_f32_v2_ari_64;
688 Opcode = NVPTX::LDV_f64_v2_ari_64;
692 case NVPTXISD::LoadV4:
693 switch (EltVT.getSimpleVT().SimpleTy) {
697 Opcode = NVPTX::LDV_i8_v4_ari_64;
700 Opcode = NVPTX::LDV_i16_v4_ari_64;
703 Opcode = NVPTX::LDV_i32_v4_ari_64;
706 Opcode = NVPTX::LDV_f32_v4_ari_64;
712 switch (N->getOpcode()) {
715 case NVPTXISD::LoadV2:
716 switch (EltVT.getSimpleVT().SimpleTy) {
720 Opcode = NVPTX::LDV_i8_v2_ari;
723 Opcode = NVPTX::LDV_i16_v2_ari;
726 Opcode = NVPTX::LDV_i32_v2_ari;
729 Opcode = NVPTX::LDV_i64_v2_ari;
732 Opcode = NVPTX::LDV_f32_v2_ari;
735 Opcode = NVPTX::LDV_f64_v2_ari;
739 case NVPTXISD::LoadV4:
740 switch (EltVT.getSimpleVT().SimpleTy) {
744 Opcode = NVPTX::LDV_i8_v4_ari;
747 Opcode = NVPTX::LDV_i16_v4_ari;
750 Opcode = NVPTX::LDV_i32_v4_ari;
753 Opcode = NVPTX::LDV_f32_v4_ari;
760 SDValue Ops[] = { getI32Imm(IsVolatile), getI32Imm(CodeAddrSpace),
761 getI32Imm(VecType), getI32Imm(FromType),
762 getI32Imm(FromTypeWidth), Base, Offset, Chain };
764 LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
766 if (Subtarget.is64Bit()) {
767 switch (N->getOpcode()) {
770 case NVPTXISD::LoadV2:
771 switch (EltVT.getSimpleVT().SimpleTy) {
775 Opcode = NVPTX::LDV_i8_v2_areg_64;
778 Opcode = NVPTX::LDV_i16_v2_areg_64;
781 Opcode = NVPTX::LDV_i32_v2_areg_64;
784 Opcode = NVPTX::LDV_i64_v2_areg_64;
787 Opcode = NVPTX::LDV_f32_v2_areg_64;
790 Opcode = NVPTX::LDV_f64_v2_areg_64;
794 case NVPTXISD::LoadV4:
795 switch (EltVT.getSimpleVT().SimpleTy) {
799 Opcode = NVPTX::LDV_i8_v4_areg_64;
802 Opcode = NVPTX::LDV_i16_v4_areg_64;
805 Opcode = NVPTX::LDV_i32_v4_areg_64;
808 Opcode = NVPTX::LDV_f32_v4_areg_64;
814 switch (N->getOpcode()) {
817 case NVPTXISD::LoadV2:
818 switch (EltVT.getSimpleVT().SimpleTy) {
822 Opcode = NVPTX::LDV_i8_v2_areg;
825 Opcode = NVPTX::LDV_i16_v2_areg;
828 Opcode = NVPTX::LDV_i32_v2_areg;
831 Opcode = NVPTX::LDV_i64_v2_areg;
834 Opcode = NVPTX::LDV_f32_v2_areg;
837 Opcode = NVPTX::LDV_f64_v2_areg;
841 case NVPTXISD::LoadV4:
842 switch (EltVT.getSimpleVT().SimpleTy) {
846 Opcode = NVPTX::LDV_i8_v4_areg;
849 Opcode = NVPTX::LDV_i16_v4_areg;
852 Opcode = NVPTX::LDV_i32_v4_areg;
855 Opcode = NVPTX::LDV_f32_v4_areg;
862 SDValue Ops[] = { getI32Imm(IsVolatile), getI32Imm(CodeAddrSpace),
863 getI32Imm(VecType), getI32Imm(FromType),
864 getI32Imm(FromTypeWidth), Op1, Chain };
865 LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
868 MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
869 MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
870 cast<MachineSDNode>(LD)->setMemRefs(MemRefs0, MemRefs0 + 1);
875 SDNode *NVPTXDAGToDAGISel::SelectLDGLDUVector(SDNode *N) {
877 SDValue Chain = N->getOperand(0);
878 SDValue Op1 = N->getOperand(1);
882 MemSDNode *Mem = cast<MemSDNode>(N);
883 SDValue Base, Offset, Addr;
885 EVT EltVT = Mem->getMemoryVT().getVectorElementType();
887 if (SelectDirectAddr(Op1, Addr)) {
888 switch (N->getOpcode()) {
891 case NVPTXISD::LDGV2:
892 switch (EltVT.getSimpleVT().SimpleTy) {
896 Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_avar;
899 Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_avar;
902 Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_avar;
905 Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_avar;
908 Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_avar;
911 Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_avar;
915 case NVPTXISD::LDUV2:
916 switch (EltVT.getSimpleVT().SimpleTy) {
920 Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_avar;
923 Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_avar;
926 Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_avar;
929 Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_avar;
932 Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_avar;
935 Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_avar;
939 case NVPTXISD::LDGV4:
940 switch (EltVT.getSimpleVT().SimpleTy) {
944 Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_avar;
947 Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_avar;
950 Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_avar;
953 Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_avar;
957 case NVPTXISD::LDUV4:
958 switch (EltVT.getSimpleVT().SimpleTy) {
962 Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_avar;
965 Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_avar;
968 Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_avar;
971 Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_avar;
977 SDValue Ops[] = { Addr, Chain };
978 LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(),
979 ArrayRef<SDValue>(Ops, 2));
980 } else if (Subtarget.is64Bit()
981 ? SelectADDRri64(Op1.getNode(), Op1, Base, Offset)
982 : SelectADDRri(Op1.getNode(), Op1, Base, Offset)) {
983 if (Subtarget.is64Bit()) {
984 switch (N->getOpcode()) {
987 case NVPTXISD::LDGV2:
988 switch (EltVT.getSimpleVT().SimpleTy) {
992 Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_ari64;
995 Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_ari64;
998 Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_ari64;
1001 Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_ari64;
1004 Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_ari64;
1007 Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_ari64;
1011 case NVPTXISD::LDUV2:
1012 switch (EltVT.getSimpleVT().SimpleTy) {
1016 Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_ari64;
1019 Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_ari64;
1022 Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_ari64;
1025 Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_ari64;
1028 Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_ari64;
1031 Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_ari64;
1035 case NVPTXISD::LDGV4:
1036 switch (EltVT.getSimpleVT().SimpleTy) {
1040 Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_ari64;
1043 Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_ari64;
1046 Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_ari64;
1049 Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_ari64;
1053 case NVPTXISD::LDUV4:
1054 switch (EltVT.getSimpleVT().SimpleTy) {
1058 Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_ari64;
1061 Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_ari64;
1064 Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_ari64;
1067 Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_ari64;
1073 switch (N->getOpcode()) {
1076 case NVPTXISD::LDGV2:
1077 switch (EltVT.getSimpleVT().SimpleTy) {
1081 Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_ari32;
1084 Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_ari32;
1087 Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_ari32;
1090 Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_ari32;
1093 Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_ari32;
1096 Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_ari32;
1100 case NVPTXISD::LDUV2:
1101 switch (EltVT.getSimpleVT().SimpleTy) {
1105 Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_ari32;
1108 Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_ari32;
1111 Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_ari32;
1114 Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_ari32;
1117 Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_ari32;
1120 Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_ari32;
1124 case NVPTXISD::LDGV4:
1125 switch (EltVT.getSimpleVT().SimpleTy) {
1129 Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_ari32;
1132 Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_ari32;
1135 Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_ari32;
1138 Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_ari32;
1142 case NVPTXISD::LDUV4:
1143 switch (EltVT.getSimpleVT().SimpleTy) {
1147 Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_ari32;
1150 Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_ari32;
1153 Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_ari32;
1156 Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_ari32;
1163 SDValue Ops[] = { Base, Offset, Chain };
1165 LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(),
1166 ArrayRef<SDValue>(Ops, 3));
1168 if (Subtarget.is64Bit()) {
1169 switch (N->getOpcode()) {
1172 case NVPTXISD::LDGV2:
1173 switch (EltVT.getSimpleVT().SimpleTy) {
1177 Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_areg64;
1180 Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_areg64;
1183 Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_areg64;
1186 Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_areg64;
1189 Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_areg64;
1192 Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_areg64;
1196 case NVPTXISD::LDUV2:
1197 switch (EltVT.getSimpleVT().SimpleTy) {
1201 Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_areg64;
1204 Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_areg64;
1207 Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_areg64;
1210 Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_areg64;
1213 Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_areg64;
1216 Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_areg64;
1220 case NVPTXISD::LDGV4:
1221 switch (EltVT.getSimpleVT().SimpleTy) {
1225 Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_areg64;
1228 Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_areg64;
1231 Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_areg64;
1234 Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_areg64;
1238 case NVPTXISD::LDUV4:
1239 switch (EltVT.getSimpleVT().SimpleTy) {
1243 Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_areg64;
1246 Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_areg64;
1249 Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_areg64;
1252 Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_areg64;
1258 switch (N->getOpcode()) {
1261 case NVPTXISD::LDGV2:
1262 switch (EltVT.getSimpleVT().SimpleTy) {
1266 Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_areg32;
1269 Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_areg32;
1272 Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_areg32;
1275 Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_areg32;
1278 Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_areg32;
1281 Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_areg32;
1285 case NVPTXISD::LDUV2:
1286 switch (EltVT.getSimpleVT().SimpleTy) {
1290 Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_areg32;
1293 Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_areg32;
1296 Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_areg32;
1299 Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_areg32;
1302 Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_areg32;
1305 Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_areg32;
1309 case NVPTXISD::LDGV4:
1310 switch (EltVT.getSimpleVT().SimpleTy) {
1314 Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_areg32;
1317 Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_areg32;
1320 Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_areg32;
1323 Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_areg32;
1327 case NVPTXISD::LDUV4:
1328 switch (EltVT.getSimpleVT().SimpleTy) {
1332 Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_areg32;
1335 Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_areg32;
1338 Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_areg32;
1341 Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_areg32;
1348 SDValue Ops[] = { Op1, Chain };
1349 LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(),
1350 ArrayRef<SDValue>(Ops, 2));
1353 MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
1354 MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
1355 cast<MachineSDNode>(LD)->setMemRefs(MemRefs0, MemRefs0 + 1);
1360 SDNode *NVPTXDAGToDAGISel::SelectStore(SDNode *N) {
1362 StoreSDNode *ST = cast<StoreSDNode>(N);
1363 EVT StoreVT = ST->getMemoryVT();
1364 SDNode *NVPTXST = NULL;
1366 // do not support pre/post inc/dec
1367 if (ST->isIndexed())
1370 if (!StoreVT.isSimple())
1373 // Address Space Setting
1374 unsigned int codeAddrSpace = getCodeAddrSpace(ST, Subtarget);
1377 // - .volatile is only availalble for .global and .shared
1378 bool isVolatile = ST->isVolatile();
1379 if (codeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
1380 codeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
1381 codeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
1385 MVT SimpleVT = StoreVT.getSimpleVT();
1386 unsigned vecType = NVPTX::PTXLdStInstCode::Scalar;
1387 if (SimpleVT.isVector()) {
1388 unsigned num = SimpleVT.getVectorNumElements();
1390 vecType = NVPTX::PTXLdStInstCode::V2;
1392 vecType = NVPTX::PTXLdStInstCode::V4;
1397 // Type Setting: toType + toTypeWidth
1398 // - for integer type, always use 'u'
1400 MVT ScalarVT = SimpleVT.getScalarType();
1401 unsigned toTypeWidth = ScalarVT.getSizeInBits();
1402 unsigned int toType;
1403 if (ScalarVT.isFloatingPoint())
1404 toType = NVPTX::PTXLdStInstCode::Float;
1406 toType = NVPTX::PTXLdStInstCode::Unsigned;
1408 // Create the machine instruction DAG
1409 SDValue Chain = N->getOperand(0);
1410 SDValue N1 = N->getOperand(1);
1411 SDValue N2 = N->getOperand(2);
1413 SDValue Offset, Base;
1415 MVT::SimpleValueType SourceVT = N1.getNode()->getSimpleValueType(0).SimpleTy;
1417 if (SelectDirectAddr(N2, Addr)) {
1420 Opcode = NVPTX::ST_i8_avar;
1423 Opcode = NVPTX::ST_i16_avar;
1426 Opcode = NVPTX::ST_i32_avar;
1429 Opcode = NVPTX::ST_i64_avar;
1432 Opcode = NVPTX::ST_f32_avar;
1435 Opcode = NVPTX::ST_f64_avar;
1440 SDValue Ops[] = { N1, getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
1441 getI32Imm(vecType), getI32Imm(toType),
1442 getI32Imm(toTypeWidth), Addr, Chain };
1443 NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops);
1444 } else if (Subtarget.is64Bit()
1445 ? SelectADDRsi64(N2.getNode(), N2, Base, Offset)
1446 : SelectADDRsi(N2.getNode(), N2, Base, Offset)) {
1449 Opcode = NVPTX::ST_i8_asi;
1452 Opcode = NVPTX::ST_i16_asi;
1455 Opcode = NVPTX::ST_i32_asi;
1458 Opcode = NVPTX::ST_i64_asi;
1461 Opcode = NVPTX::ST_f32_asi;
1464 Opcode = NVPTX::ST_f64_asi;
1469 SDValue Ops[] = { N1, getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
1470 getI32Imm(vecType), getI32Imm(toType),
1471 getI32Imm(toTypeWidth), Base, Offset, Chain };
1472 NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops);
1473 } else if (Subtarget.is64Bit()
1474 ? SelectADDRri64(N2.getNode(), N2, Base, Offset)
1475 : SelectADDRri(N2.getNode(), N2, Base, Offset)) {
1476 if (Subtarget.is64Bit()) {
1479 Opcode = NVPTX::ST_i8_ari_64;
1482 Opcode = NVPTX::ST_i16_ari_64;
1485 Opcode = NVPTX::ST_i32_ari_64;
1488 Opcode = NVPTX::ST_i64_ari_64;
1491 Opcode = NVPTX::ST_f32_ari_64;
1494 Opcode = NVPTX::ST_f64_ari_64;
1502 Opcode = NVPTX::ST_i8_ari;
1505 Opcode = NVPTX::ST_i16_ari;
1508 Opcode = NVPTX::ST_i32_ari;
1511 Opcode = NVPTX::ST_i64_ari;
1514 Opcode = NVPTX::ST_f32_ari;
1517 Opcode = NVPTX::ST_f64_ari;
1523 SDValue Ops[] = { N1, getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
1524 getI32Imm(vecType), getI32Imm(toType),
1525 getI32Imm(toTypeWidth), Base, Offset, Chain };
1526 NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops);
1528 if (Subtarget.is64Bit()) {
1531 Opcode = NVPTX::ST_i8_areg_64;
1534 Opcode = NVPTX::ST_i16_areg_64;
1537 Opcode = NVPTX::ST_i32_areg_64;
1540 Opcode = NVPTX::ST_i64_areg_64;
1543 Opcode = NVPTX::ST_f32_areg_64;
1546 Opcode = NVPTX::ST_f64_areg_64;
1554 Opcode = NVPTX::ST_i8_areg;
1557 Opcode = NVPTX::ST_i16_areg;
1560 Opcode = NVPTX::ST_i32_areg;
1563 Opcode = NVPTX::ST_i64_areg;
1566 Opcode = NVPTX::ST_f32_areg;
1569 Opcode = NVPTX::ST_f64_areg;
1575 SDValue Ops[] = { N1, getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
1576 getI32Imm(vecType), getI32Imm(toType),
1577 getI32Imm(toTypeWidth), N2, Chain };
1578 NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops);
1581 if (NVPTXST != NULL) {
1582 MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
1583 MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
1584 cast<MachineSDNode>(NVPTXST)->setMemRefs(MemRefs0, MemRefs0 + 1);
1590 SDNode *NVPTXDAGToDAGISel::SelectStoreVector(SDNode *N) {
1591 SDValue Chain = N->getOperand(0);
1592 SDValue Op1 = N->getOperand(1);
1593 SDValue Addr, Offset, Base;
1597 EVT EltVT = Op1.getValueType();
1598 MemSDNode *MemSD = cast<MemSDNode>(N);
1599 EVT StoreVT = MemSD->getMemoryVT();
1601 // Address Space Setting
1602 unsigned CodeAddrSpace = getCodeAddrSpace(MemSD, Subtarget);
1604 if (CodeAddrSpace == NVPTX::PTXLdStInstCode::CONSTANT) {
1605 report_fatal_error("Cannot store to pointer that points to constant "
1610 // - .volatile is only availalble for .global and .shared
1611 bool IsVolatile = MemSD->isVolatile();
1612 if (CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
1613 CodeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
1614 CodeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
1617 // Type Setting: toType + toTypeWidth
1618 // - for integer type, always use 'u'
1619 assert(StoreVT.isSimple() && "Store value is not simple");
1620 MVT ScalarVT = StoreVT.getSimpleVT().getScalarType();
1621 unsigned ToTypeWidth = ScalarVT.getSizeInBits();
1623 if (ScalarVT.isFloatingPoint())
1624 ToType = NVPTX::PTXLdStInstCode::Float;
1626 ToType = NVPTX::PTXLdStInstCode::Unsigned;
1628 SmallVector<SDValue, 12> StOps;
1632 switch (N->getOpcode()) {
1633 case NVPTXISD::StoreV2:
1634 VecType = NVPTX::PTXLdStInstCode::V2;
1635 StOps.push_back(N->getOperand(1));
1636 StOps.push_back(N->getOperand(2));
1637 N2 = N->getOperand(3);
1639 case NVPTXISD::StoreV4:
1640 VecType = NVPTX::PTXLdStInstCode::V4;
1641 StOps.push_back(N->getOperand(1));
1642 StOps.push_back(N->getOperand(2));
1643 StOps.push_back(N->getOperand(3));
1644 StOps.push_back(N->getOperand(4));
1645 N2 = N->getOperand(5);
1651 StOps.push_back(getI32Imm(IsVolatile));
1652 StOps.push_back(getI32Imm(CodeAddrSpace));
1653 StOps.push_back(getI32Imm(VecType));
1654 StOps.push_back(getI32Imm(ToType));
1655 StOps.push_back(getI32Imm(ToTypeWidth));
1657 if (SelectDirectAddr(N2, Addr)) {
1658 switch (N->getOpcode()) {
1661 case NVPTXISD::StoreV2:
1662 switch (EltVT.getSimpleVT().SimpleTy) {
1666 Opcode = NVPTX::STV_i8_v2_avar;
1669 Opcode = NVPTX::STV_i16_v2_avar;
1672 Opcode = NVPTX::STV_i32_v2_avar;
1675 Opcode = NVPTX::STV_i64_v2_avar;
1678 Opcode = NVPTX::STV_f32_v2_avar;
1681 Opcode = NVPTX::STV_f64_v2_avar;
1685 case NVPTXISD::StoreV4:
1686 switch (EltVT.getSimpleVT().SimpleTy) {
1690 Opcode = NVPTX::STV_i8_v4_avar;
1693 Opcode = NVPTX::STV_i16_v4_avar;
1696 Opcode = NVPTX::STV_i32_v4_avar;
1699 Opcode = NVPTX::STV_f32_v4_avar;
1704 StOps.push_back(Addr);
1705 } else if (Subtarget.is64Bit()
1706 ? SelectADDRsi64(N2.getNode(), N2, Base, Offset)
1707 : SelectADDRsi(N2.getNode(), N2, Base, Offset)) {
1708 switch (N->getOpcode()) {
1711 case NVPTXISD::StoreV2:
1712 switch (EltVT.getSimpleVT().SimpleTy) {
1716 Opcode = NVPTX::STV_i8_v2_asi;
1719 Opcode = NVPTX::STV_i16_v2_asi;
1722 Opcode = NVPTX::STV_i32_v2_asi;
1725 Opcode = NVPTX::STV_i64_v2_asi;
1728 Opcode = NVPTX::STV_f32_v2_asi;
1731 Opcode = NVPTX::STV_f64_v2_asi;
1735 case NVPTXISD::StoreV4:
1736 switch (EltVT.getSimpleVT().SimpleTy) {
1740 Opcode = NVPTX::STV_i8_v4_asi;
1743 Opcode = NVPTX::STV_i16_v4_asi;
1746 Opcode = NVPTX::STV_i32_v4_asi;
1749 Opcode = NVPTX::STV_f32_v4_asi;
1754 StOps.push_back(Base);
1755 StOps.push_back(Offset);
1756 } else if (Subtarget.is64Bit()
1757 ? SelectADDRri64(N2.getNode(), N2, Base, Offset)
1758 : SelectADDRri(N2.getNode(), N2, Base, Offset)) {
1759 if (Subtarget.is64Bit()) {
1760 switch (N->getOpcode()) {
1763 case NVPTXISD::StoreV2:
1764 switch (EltVT.getSimpleVT().SimpleTy) {
1768 Opcode = NVPTX::STV_i8_v2_ari_64;
1771 Opcode = NVPTX::STV_i16_v2_ari_64;
1774 Opcode = NVPTX::STV_i32_v2_ari_64;
1777 Opcode = NVPTX::STV_i64_v2_ari_64;
1780 Opcode = NVPTX::STV_f32_v2_ari_64;
1783 Opcode = NVPTX::STV_f64_v2_ari_64;
1787 case NVPTXISD::StoreV4:
1788 switch (EltVT.getSimpleVT().SimpleTy) {
1792 Opcode = NVPTX::STV_i8_v4_ari_64;
1795 Opcode = NVPTX::STV_i16_v4_ari_64;
1798 Opcode = NVPTX::STV_i32_v4_ari_64;
1801 Opcode = NVPTX::STV_f32_v4_ari_64;
1807 switch (N->getOpcode()) {
1810 case NVPTXISD::StoreV2:
1811 switch (EltVT.getSimpleVT().SimpleTy) {
1815 Opcode = NVPTX::STV_i8_v2_ari;
1818 Opcode = NVPTX::STV_i16_v2_ari;
1821 Opcode = NVPTX::STV_i32_v2_ari;
1824 Opcode = NVPTX::STV_i64_v2_ari;
1827 Opcode = NVPTX::STV_f32_v2_ari;
1830 Opcode = NVPTX::STV_f64_v2_ari;
1834 case NVPTXISD::StoreV4:
1835 switch (EltVT.getSimpleVT().SimpleTy) {
1839 Opcode = NVPTX::STV_i8_v4_ari;
1842 Opcode = NVPTX::STV_i16_v4_ari;
1845 Opcode = NVPTX::STV_i32_v4_ari;
1848 Opcode = NVPTX::STV_f32_v4_ari;
1854 StOps.push_back(Base);
1855 StOps.push_back(Offset);
1857 if (Subtarget.is64Bit()) {
1858 switch (N->getOpcode()) {
1861 case NVPTXISD::StoreV2:
1862 switch (EltVT.getSimpleVT().SimpleTy) {
1866 Opcode = NVPTX::STV_i8_v2_areg_64;
1869 Opcode = NVPTX::STV_i16_v2_areg_64;
1872 Opcode = NVPTX::STV_i32_v2_areg_64;
1875 Opcode = NVPTX::STV_i64_v2_areg_64;
1878 Opcode = NVPTX::STV_f32_v2_areg_64;
1881 Opcode = NVPTX::STV_f64_v2_areg_64;
1885 case NVPTXISD::StoreV4:
1886 switch (EltVT.getSimpleVT().SimpleTy) {
1890 Opcode = NVPTX::STV_i8_v4_areg_64;
1893 Opcode = NVPTX::STV_i16_v4_areg_64;
1896 Opcode = NVPTX::STV_i32_v4_areg_64;
1899 Opcode = NVPTX::STV_f32_v4_areg_64;
1905 switch (N->getOpcode()) {
1908 case NVPTXISD::StoreV2:
1909 switch (EltVT.getSimpleVT().SimpleTy) {
1913 Opcode = NVPTX::STV_i8_v2_areg;
1916 Opcode = NVPTX::STV_i16_v2_areg;
1919 Opcode = NVPTX::STV_i32_v2_areg;
1922 Opcode = NVPTX::STV_i64_v2_areg;
1925 Opcode = NVPTX::STV_f32_v2_areg;
1928 Opcode = NVPTX::STV_f64_v2_areg;
1932 case NVPTXISD::StoreV4:
1933 switch (EltVT.getSimpleVT().SimpleTy) {
1937 Opcode = NVPTX::STV_i8_v4_areg;
1940 Opcode = NVPTX::STV_i16_v4_areg;
1943 Opcode = NVPTX::STV_i32_v4_areg;
1946 Opcode = NVPTX::STV_f32_v4_areg;
1952 StOps.push_back(N2);
1955 StOps.push_back(Chain);
1957 ST = CurDAG->getMachineNode(Opcode, DL, MVT::Other, StOps);
1959 MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
1960 MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
1961 cast<MachineSDNode>(ST)->setMemRefs(MemRefs0, MemRefs0 + 1);
1966 SDNode *NVPTXDAGToDAGISel::SelectLoadParam(SDNode *Node) {
1967 SDValue Chain = Node->getOperand(0);
1968 SDValue Offset = Node->getOperand(2);
1969 SDValue Flag = Node->getOperand(3);
1971 MemSDNode *Mem = cast<MemSDNode>(Node);
1974 switch (Node->getOpcode()) {
1977 case NVPTXISD::LoadParam:
1980 case NVPTXISD::LoadParamV2:
1983 case NVPTXISD::LoadParamV4:
1988 EVT EltVT = Node->getValueType(0);
1989 EVT MemVT = Mem->getMemoryVT();
1997 switch (MemVT.getSimpleVT().SimpleTy) {
2001 Opc = NVPTX::LoadParamMemI8;
2004 Opc = NVPTX::LoadParamMemI8;
2007 Opc = NVPTX::LoadParamMemI16;
2010 Opc = NVPTX::LoadParamMemI32;
2013 Opc = NVPTX::LoadParamMemI64;
2016 Opc = NVPTX::LoadParamMemF32;
2019 Opc = NVPTX::LoadParamMemF64;
2024 switch (MemVT.getSimpleVT().SimpleTy) {
2028 Opc = NVPTX::LoadParamMemV2I8;
2031 Opc = NVPTX::LoadParamMemV2I8;
2034 Opc = NVPTX::LoadParamMemV2I16;
2037 Opc = NVPTX::LoadParamMemV2I32;
2040 Opc = NVPTX::LoadParamMemV2I64;
2043 Opc = NVPTX::LoadParamMemV2F32;
2046 Opc = NVPTX::LoadParamMemV2F64;
2051 switch (MemVT.getSimpleVT().SimpleTy) {
2055 Opc = NVPTX::LoadParamMemV4I8;
2058 Opc = NVPTX::LoadParamMemV4I8;
2061 Opc = NVPTX::LoadParamMemV4I16;
2064 Opc = NVPTX::LoadParamMemV4I32;
2067 Opc = NVPTX::LoadParamMemV4F32;
2075 VTs = CurDAG->getVTList(EltVT, MVT::Other, MVT::Glue);
2076 } else if (VecSize == 2) {
2077 VTs = CurDAG->getVTList(EltVT, EltVT, MVT::Other, MVT::Glue);
2079 EVT EVTs[] = { EltVT, EltVT, EltVT, EltVT, MVT::Other, MVT::Glue };
2080 VTs = CurDAG->getVTList(&EVTs[0], array_lengthof(EVTs));
2083 unsigned OffsetVal = cast<ConstantSDNode>(Offset)->getZExtValue();
2085 SmallVector<SDValue, 2> Ops;
2086 Ops.push_back(CurDAG->getTargetConstant(OffsetVal, MVT::i32));
2087 Ops.push_back(Chain);
2088 Ops.push_back(Flag);
2091 CurDAG->getMachineNode(Opc, DL, VTs, Ops);
2095 SDNode *NVPTXDAGToDAGISel::SelectStoreRetval(SDNode *N) {
2097 SDValue Chain = N->getOperand(0);
2098 SDValue Offset = N->getOperand(1);
2099 unsigned OffsetVal = cast<ConstantSDNode>(Offset)->getZExtValue();
2100 MemSDNode *Mem = cast<MemSDNode>(N);
2102 // How many elements do we have?
2103 unsigned NumElts = 1;
2104 switch (N->getOpcode()) {
2107 case NVPTXISD::StoreRetval:
2110 case NVPTXISD::StoreRetvalV2:
2113 case NVPTXISD::StoreRetvalV4:
2118 // Build vector of operands
2119 SmallVector<SDValue, 6> Ops;
2120 for (unsigned i = 0; i < NumElts; ++i)
2121 Ops.push_back(N->getOperand(i + 2));
2122 Ops.push_back(CurDAG->getTargetConstant(OffsetVal, MVT::i32));
2123 Ops.push_back(Chain);
2125 // Determine target opcode
2126 // If we have an i1, use an 8-bit store. The lowering code in
2127 // NVPTXISelLowering will have already emitted an upcast.
2128 unsigned Opcode = 0;
2133 switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
2137 Opcode = NVPTX::StoreRetvalI8;
2140 Opcode = NVPTX::StoreRetvalI8;
2143 Opcode = NVPTX::StoreRetvalI16;
2146 Opcode = NVPTX::StoreRetvalI32;
2149 Opcode = NVPTX::StoreRetvalI64;
2152 Opcode = NVPTX::StoreRetvalF32;
2155 Opcode = NVPTX::StoreRetvalF64;
2160 switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
2164 Opcode = NVPTX::StoreRetvalV2I8;
2167 Opcode = NVPTX::StoreRetvalV2I8;
2170 Opcode = NVPTX::StoreRetvalV2I16;
2173 Opcode = NVPTX::StoreRetvalV2I32;
2176 Opcode = NVPTX::StoreRetvalV2I64;
2179 Opcode = NVPTX::StoreRetvalV2F32;
2182 Opcode = NVPTX::StoreRetvalV2F64;
2187 switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
2191 Opcode = NVPTX::StoreRetvalV4I8;
2194 Opcode = NVPTX::StoreRetvalV4I8;
2197 Opcode = NVPTX::StoreRetvalV4I16;
2200 Opcode = NVPTX::StoreRetvalV4I32;
2203 Opcode = NVPTX::StoreRetvalV4F32;
2210 CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops);
2211 MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
2212 MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
2213 cast<MachineSDNode>(Ret)->setMemRefs(MemRefs0, MemRefs0 + 1);
2218 SDNode *NVPTXDAGToDAGISel::SelectStoreParam(SDNode *N) {
2220 SDValue Chain = N->getOperand(0);
2221 SDValue Param = N->getOperand(1);
2222 unsigned ParamVal = cast<ConstantSDNode>(Param)->getZExtValue();
2223 SDValue Offset = N->getOperand(2);
2224 unsigned OffsetVal = cast<ConstantSDNode>(Offset)->getZExtValue();
2225 MemSDNode *Mem = cast<MemSDNode>(N);
2226 SDValue Flag = N->getOperand(N->getNumOperands() - 1);
2228 // How many elements do we have?
2229 unsigned NumElts = 1;
2230 switch (N->getOpcode()) {
2233 case NVPTXISD::StoreParamU32:
2234 case NVPTXISD::StoreParamS32:
2235 case NVPTXISD::StoreParam:
2238 case NVPTXISD::StoreParamV2:
2241 case NVPTXISD::StoreParamV4:
2246 // Build vector of operands
2247 SmallVector<SDValue, 8> Ops;
2248 for (unsigned i = 0; i < NumElts; ++i)
2249 Ops.push_back(N->getOperand(i + 3));
2250 Ops.push_back(CurDAG->getTargetConstant(ParamVal, MVT::i32));
2251 Ops.push_back(CurDAG->getTargetConstant(OffsetVal, MVT::i32));
2252 Ops.push_back(Chain);
2253 Ops.push_back(Flag);
2255 // Determine target opcode
2256 // If we have an i1, use an 8-bit store. The lowering code in
2257 // NVPTXISelLowering will have already emitted an upcast.
2258 unsigned Opcode = 0;
2259 switch (N->getOpcode()) {
2265 switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
2269 Opcode = NVPTX::StoreParamI8;
2272 Opcode = NVPTX::StoreParamI8;
2275 Opcode = NVPTX::StoreParamI16;
2278 Opcode = NVPTX::StoreParamI32;
2281 Opcode = NVPTX::StoreParamI64;
2284 Opcode = NVPTX::StoreParamF32;
2287 Opcode = NVPTX::StoreParamF64;
2292 switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
2296 Opcode = NVPTX::StoreParamV2I8;
2299 Opcode = NVPTX::StoreParamV2I8;
2302 Opcode = NVPTX::StoreParamV2I16;
2305 Opcode = NVPTX::StoreParamV2I32;
2308 Opcode = NVPTX::StoreParamV2I64;
2311 Opcode = NVPTX::StoreParamV2F32;
2314 Opcode = NVPTX::StoreParamV2F64;
2319 switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
2323 Opcode = NVPTX::StoreParamV4I8;
2326 Opcode = NVPTX::StoreParamV4I8;
2329 Opcode = NVPTX::StoreParamV4I16;
2332 Opcode = NVPTX::StoreParamV4I32;
2335 Opcode = NVPTX::StoreParamV4F32;
2341 // Special case: if we have a sign-extend/zero-extend node, insert the
2342 // conversion instruction first, and use that as the value operand to
2343 // the selected StoreParam node.
2344 case NVPTXISD::StoreParamU32: {
2345 Opcode = NVPTX::StoreParamI32;
2346 SDValue CvtNone = CurDAG->getTargetConstant(NVPTX::PTXCvtMode::NONE,
2348 SDNode *Cvt = CurDAG->getMachineNode(NVPTX::CVT_u32_u16, DL,
2349 MVT::i32, Ops[0], CvtNone);
2350 Ops[0] = SDValue(Cvt, 0);
2353 case NVPTXISD::StoreParamS32: {
2354 Opcode = NVPTX::StoreParamI32;
2355 SDValue CvtNone = CurDAG->getTargetConstant(NVPTX::PTXCvtMode::NONE,
2357 SDNode *Cvt = CurDAG->getMachineNode(NVPTX::CVT_s32_s16, DL,
2358 MVT::i32, Ops[0], CvtNone);
2359 Ops[0] = SDValue(Cvt, 0);
2364 SDVTList RetVTs = CurDAG->getVTList(MVT::Other, MVT::Glue);
2366 CurDAG->getMachineNode(Opcode, DL, RetVTs, Ops);
2367 MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
2368 MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
2369 cast<MachineSDNode>(Ret)->setMemRefs(MemRefs0, MemRefs0 + 1);
2374 // SelectDirectAddr - Match a direct address for DAG.
2375 // A direct address could be a globaladdress or externalsymbol.
2376 bool NVPTXDAGToDAGISel::SelectDirectAddr(SDValue N, SDValue &Address) {
2377 // Return true if TGA or ES.
2378 if (N.getOpcode() == ISD::TargetGlobalAddress ||
2379 N.getOpcode() == ISD::TargetExternalSymbol) {
2383 if (N.getOpcode() == NVPTXISD::Wrapper) {
2384 Address = N.getOperand(0);
2387 if (N.getOpcode() == ISD::INTRINSIC_WO_CHAIN) {
2388 unsigned IID = cast<ConstantSDNode>(N.getOperand(0))->getZExtValue();
2389 if (IID == Intrinsic::nvvm_ptr_gen_to_param)
2390 if (N.getOperand(1).getOpcode() == NVPTXISD::MoveParam)
2391 return (SelectDirectAddr(N.getOperand(1).getOperand(0), Address));
2397 bool NVPTXDAGToDAGISel::SelectADDRsi_imp(
2398 SDNode *OpNode, SDValue Addr, SDValue &Base, SDValue &Offset, MVT mvt) {
2399 if (Addr.getOpcode() == ISD::ADD) {
2400 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) {
2401 SDValue base = Addr.getOperand(0);
2402 if (SelectDirectAddr(base, Base)) {
2403 Offset = CurDAG->getTargetConstant(CN->getZExtValue(), mvt);
2412 bool NVPTXDAGToDAGISel::SelectADDRsi(SDNode *OpNode, SDValue Addr,
2413 SDValue &Base, SDValue &Offset) {
2414 return SelectADDRsi_imp(OpNode, Addr, Base, Offset, MVT::i32);
2418 bool NVPTXDAGToDAGISel::SelectADDRsi64(SDNode *OpNode, SDValue Addr,
2419 SDValue &Base, SDValue &Offset) {
2420 return SelectADDRsi_imp(OpNode, Addr, Base, Offset, MVT::i64);
2424 bool NVPTXDAGToDAGISel::SelectADDRri_imp(
2425 SDNode *OpNode, SDValue Addr, SDValue &Base, SDValue &Offset, MVT mvt) {
2426 if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
2427 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), mvt);
2428 Offset = CurDAG->getTargetConstant(0, mvt);
2431 if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
2432 Addr.getOpcode() == ISD::TargetGlobalAddress)
2433 return false; // direct calls.
2435 if (Addr.getOpcode() == ISD::ADD) {
2436 if (SelectDirectAddr(Addr.getOperand(0), Addr)) {
2439 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) {
2440 if (FrameIndexSDNode *FIN =
2441 dyn_cast<FrameIndexSDNode>(Addr.getOperand(0)))
2442 // Constant offset from frame ref.
2443 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), mvt);
2445 Base = Addr.getOperand(0);
2446 Offset = CurDAG->getTargetConstant(CN->getZExtValue(), mvt);
2454 bool NVPTXDAGToDAGISel::SelectADDRri(SDNode *OpNode, SDValue Addr,
2455 SDValue &Base, SDValue &Offset) {
2456 return SelectADDRri_imp(OpNode, Addr, Base, Offset, MVT::i32);
2460 bool NVPTXDAGToDAGISel::SelectADDRri64(SDNode *OpNode, SDValue Addr,
2461 SDValue &Base, SDValue &Offset) {
2462 return SelectADDRri_imp(OpNode, Addr, Base, Offset, MVT::i64);
2465 bool NVPTXDAGToDAGISel::ChkMemSDNodeAddressSpace(SDNode *N,
2466 unsigned int spN) const {
2467 const Value *Src = NULL;
2468 // Even though MemIntrinsicSDNode is a subclas of MemSDNode,
2469 // the classof() for MemSDNode does not include MemIntrinsicSDNode
2470 // (See SelectionDAGNodes.h). So we need to check for both.
2471 if (MemSDNode *mN = dyn_cast<MemSDNode>(N)) {
2472 Src = mN->getSrcValue();
2473 } else if (MemSDNode *mN = dyn_cast<MemIntrinsicSDNode>(N)) {
2474 Src = mN->getSrcValue();
2478 if (const PointerType *PT = dyn_cast<PointerType>(Src->getType()))
2479 return (PT->getAddressSpace() == spN);
2483 /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
2484 /// inline asm expressions.
2485 bool NVPTXDAGToDAGISel::SelectInlineAsmMemoryOperand(
2486 const SDValue &Op, char ConstraintCode, std::vector<SDValue> &OutOps) {
2488 switch (ConstraintCode) {
2492 if (SelectDirectAddr(Op, Op0)) {
2493 OutOps.push_back(Op0);
2494 OutOps.push_back(CurDAG->getTargetConstant(0, MVT::i32));
2497 if (SelectADDRri(Op.getNode(), Op, Op0, Op1)) {
2498 OutOps.push_back(Op0);
2499 OutOps.push_back(Op1);