From ef0ccc93203e99077632cec7a0a15b8e1b704aee Mon Sep 17 00:00:00 2001 From: Justin Holewinski Date: Fri, 28 Jun 2013 17:58:04 +0000 Subject: [PATCH] [NVPTX] Clean up comparison/select/convert patterns and factor out PTX instructions from their patterns Test case is no breakage git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@185175 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/NVPTX/NVPTX.h | 47 + lib/Target/NVPTX/NVPTXAsmPrinter.cpp | 133 ++- lib/Target/NVPTX/NVPTXAsmPrinter.h | 4 + lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp | 23 +- lib/Target/NVPTX/NVPTXISelLowering.cpp | 6 - lib/Target/NVPTX/NVPTXISelLowering.h | 3 - lib/Target/NVPTX/NVPTXInstrInfo.td | 1332 ++++++++++++------------ lib/Target/NVPTX/NVPTXIntrinsics.td | 530 +++++----- test/CodeGen/NVPTX/pr13291-i1-store.ll | 8 +- 9 files changed, 1133 insertions(+), 953 deletions(-) diff --git a/lib/Target/NVPTX/NVPTX.h b/lib/Target/NVPTX/NVPTX.h index 85cdb8b642d..7be3f9aa2f7 100644 --- a/lib/Target/NVPTX/NVPTX.h +++ b/lib/Target/NVPTX/NVPTX.h @@ -131,6 +131,53 @@ enum VecType { V4 = 4 }; } + +/// PTXCvtMode - Conversion code enumeration +namespace PTXCvtMode { +enum CvtMode { + NONE = 0, + RNI, + RZI, + RMI, + RPI, + RN, + RZ, + RM, + RP, + + BASE_MASK = 0x0F, + FTZ_FLAG = 0x10, + SAT_FLAG = 0x20 +}; +} + +/// PTXCmpMode - Comparison mode enumeration +namespace PTXCmpMode { +enum CmpMode { + EQ = 0, + NE, + LT, + LE, + GT, + GE, + LO, + LS, + HI, + HS, + EQU, + NEU, + LTU, + LEU, + GTU, + GEU, + NUM, + // NAN is a MACRO + NotANumber, + + BASE_MASK = 0xFF, + FTZ_FLAG = 0x100 +}; +} } } // end namespace llvm; diff --git a/lib/Target/NVPTX/NVPTXAsmPrinter.cpp b/lib/Target/NVPTX/NVPTXAsmPrinter.cpp index 9188262ca92..d7eecede8a4 100644 --- a/lib/Target/NVPTX/NVPTXAsmPrinter.cpp +++ b/lib/Target/NVPTX/NVPTXAsmPrinter.cpp @@ -693,6 +693,130 @@ void NVPTXAsmPrinter::printLdStCode(const MachineInstr *MI, int opNum, llvm_unreachable("Empty Modifier"); } +void NVPTXAsmPrinter::printCvtMode(const MachineInstr *MI, int OpNum, + raw_ostream &O, const char *Modifier) { + const MachineOperand &MO = MI->getOperand(OpNum); + int64_t Imm = MO.getImm(); + + if (strcmp(Modifier, "ftz") == 0) { + // FTZ flag + if (Imm & NVPTX::PTXCvtMode::FTZ_FLAG) + O << ".ftz"; + } else if (strcmp(Modifier, "sat") == 0) { + // SAT flag + if (Imm & NVPTX::PTXCvtMode::SAT_FLAG) + O << ".sat"; + } else if (strcmp(Modifier, "base") == 0) { + // Default operand + switch (Imm & NVPTX::PTXCvtMode::BASE_MASK) { + default: + return; + case NVPTX::PTXCvtMode::NONE: + break; + case NVPTX::PTXCvtMode::RNI: + O << ".rni"; + break; + case NVPTX::PTXCvtMode::RZI: + O << ".rzi"; + break; + case NVPTX::PTXCvtMode::RMI: + O << ".rmi"; + break; + case NVPTX::PTXCvtMode::RPI: + O << ".rpi"; + break; + case NVPTX::PTXCvtMode::RN: + O << ".rn"; + break; + case NVPTX::PTXCvtMode::RZ: + O << ".rz"; + break; + case NVPTX::PTXCvtMode::RM: + O << ".rm"; + break; + case NVPTX::PTXCvtMode::RP: + O << ".rp"; + break; + } + } else { + llvm_unreachable("Invalid conversion modifier"); + } +} + +void NVPTXAsmPrinter::printCmpMode(const MachineInstr *MI, int OpNum, + raw_ostream &O, const char *Modifier) { + const MachineOperand &MO = MI->getOperand(OpNum); + int64_t Imm = MO.getImm(); + + if (strcmp(Modifier, "ftz") == 0) { + // FTZ flag + if (Imm & NVPTX::PTXCmpMode::FTZ_FLAG) + O << ".ftz"; + } else if (strcmp(Modifier, "base") == 0) { + switch (Imm & NVPTX::PTXCmpMode::BASE_MASK) { + default: + return; + case NVPTX::PTXCmpMode::EQ: + O << ".eq"; + break; + case NVPTX::PTXCmpMode::NE: + O << ".ne"; + break; + case NVPTX::PTXCmpMode::LT: + O << ".lt"; + break; + case NVPTX::PTXCmpMode::LE: + O << ".le"; + break; + case NVPTX::PTXCmpMode::GT: + O << ".gt"; + break; + case NVPTX::PTXCmpMode::GE: + O << ".ge"; + break; + case NVPTX::PTXCmpMode::LO: + O << ".lo"; + break; + case NVPTX::PTXCmpMode::LS: + O << ".ls"; + break; + case NVPTX::PTXCmpMode::HI: + O << ".hi"; + break; + case NVPTX::PTXCmpMode::HS: + O << ".hs"; + break; + case NVPTX::PTXCmpMode::EQU: + O << ".equ"; + break; + case NVPTX::PTXCmpMode::NEU: + O << ".neu"; + break; + case NVPTX::PTXCmpMode::LTU: + O << ".ltu"; + break; + case NVPTX::PTXCmpMode::LEU: + O << ".leu"; + break; + case NVPTX::PTXCmpMode::GTU: + O << ".gtu"; + break; + case NVPTX::PTXCmpMode::GEU: + O << ".geu"; + break; + case NVPTX::PTXCmpMode::NUM: + O << ".num"; + break; + case NVPTX::PTXCmpMode::NotANumber: + O << ".nan"; + break; + } + } else { + llvm_unreachable("Empty Modifier"); + } +} + + void NVPTXAsmPrinter::emitDeclaration(const Function *F, raw_ostream &O) { emitLinkageDirective(F, O); @@ -2033,10 +2157,6 @@ bool NVPTXAsmPrinter::ignoreLoc(const MachineInstr &MI) { case NVPTX::StoreParamI32: case NVPTX::StoreParamI64: case NVPTX::StoreParamI8: - case NVPTX::StoreParamS32I8: - case NVPTX::StoreParamU32I8: - case NVPTX::StoreParamS32I16: - case NVPTX::StoreParamU32I16: case NVPTX::StoreRetvalF32: case NVPTX::StoreRetvalF64: case NVPTX::StoreRetvalI16: @@ -2056,11 +2176,6 @@ bool NVPTXAsmPrinter::ignoreLoc(const MachineInstr &MI) { case NVPTX::LoadParamMemI32: case NVPTX::LoadParamMemI64: case NVPTX::LoadParamMemI8: - case NVPTX::LoadParamRegF32: - case NVPTX::LoadParamRegF64: - case NVPTX::LoadParamRegI16: - case NVPTX::LoadParamRegI32: - case NVPTX::LoadParamRegI64: case NVPTX::PrototypeInst: case NVPTX::DBG_VALUE: return true; diff --git a/lib/Target/NVPTX/NVPTXAsmPrinter.h b/lib/Target/NVPTX/NVPTXAsmPrinter.h index 55f29436672..c7b7fb0ae0d 100644 --- a/lib/Target/NVPTX/NVPTXAsmPrinter.h +++ b/lib/Target/NVPTX/NVPTXAsmPrinter.h @@ -198,6 +198,10 @@ private: const char *Modifier = 0); void printLdStCode(const MachineInstr *MI, int opNum, raw_ostream &O, const char *Modifier = 0); + void printCvtMode(const MachineInstr *MI, int OpNum, raw_ostream &O, + const char *Modifier = 0); + void printCmpMode(const MachineInstr *MI, int OpNum, raw_ostream &O, + const char *Modifier = 0); void printVecModifiedImmediate(const MachineOperand &MO, const char *Modifier, raw_ostream &O); void printMemOperand(const MachineInstr *MI, int opNum, raw_ostream &O, diff --git a/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp b/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp index 7a0a59f1ce4..4457ec349cb 100644 --- a/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp +++ b/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp @@ -1965,13 +1965,28 @@ SDNode *NVPTXDAGToDAGISel::SelectStoreParam(SDNode *N) { break; } break; - case NVPTXISD::StoreParamU32: - Opcode = NVPTX::StoreParamU32I16; + // Special case: if we have a sign-extend/zero-extend node, insert the + // conversion instruction first, and use that as the value operand to + // the selected StoreParam node. + case NVPTXISD::StoreParamU32: { + Opcode = NVPTX::StoreParamI32; + SDValue CvtNone = CurDAG->getTargetConstant(NVPTX::PTXCvtMode::NONE, + MVT::i32); + SDNode *Cvt = CurDAG->getMachineNode(NVPTX::CVT_u32_u16, DL, + MVT::i32, Ops[0], CvtNone); + Ops[0] = SDValue(Cvt, 0); break; - case NVPTXISD::StoreParamS32: - Opcode = NVPTX::StoreParamS32I16; + } + case NVPTXISD::StoreParamS32: { + Opcode = NVPTX::StoreParamI32; + SDValue CvtNone = CurDAG->getTargetConstant(NVPTX::PTXCvtMode::NONE, + MVT::i32); + SDNode *Cvt = CurDAG->getMachineNode(NVPTX::CVT_s32_s16, DL, + MVT::i32, Ops[0], CvtNone); + Ops[0] = SDValue(Cvt, 0); break; } + } SDNode *Ret = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops); diff --git a/lib/Target/NVPTX/NVPTXISelLowering.cpp b/lib/Target/NVPTX/NVPTXISelLowering.cpp index 0396a6421a6..338fe7c155f 100644 --- a/lib/Target/NVPTX/NVPTXISelLowering.cpp +++ b/lib/Target/NVPTX/NVPTXISelLowering.cpp @@ -259,8 +259,6 @@ const char *NVPTXTargetLowering::getTargetNodeName(unsigned Opcode) const { return "NVPTXISD::StoreParamS32"; case NVPTXISD::StoreParamU32: return "NVPTXISD::StoreParamU32"; - case NVPTXISD::MoveToParam: - return "NVPTXISD::MoveToParam"; case NVPTXISD::CallArgBegin: return "NVPTXISD::CallArgBegin"; case NVPTXISD::CallArg: @@ -279,10 +277,6 @@ const char *NVPTXTargetLowering::getTargetNodeName(unsigned Opcode) const { return "NVPTXISD::Prototype"; case NVPTXISD::MoveParam: return "NVPTXISD::MoveParam"; - case NVPTXISD::MoveRetval: - return "NVPTXISD::MoveRetval"; - case NVPTXISD::MoveToRetval: - return "NVPTXISD::MoveToRetval"; case NVPTXISD::StoreRetval: return "NVPTXISD::StoreRetval"; case NVPTXISD::StoreRetvalV2: diff --git a/lib/Target/NVPTX/NVPTXISelLowering.h b/lib/Target/NVPTX/NVPTXISelLowering.h index 43c63ae7396..5e26b1c012f 100644 --- a/lib/Target/NVPTX/NVPTXISelLowering.h +++ b/lib/Target/NVPTX/NVPTXISelLowering.h @@ -35,7 +35,6 @@ enum NodeType { DeclareRetParam, DeclareRet, DeclareScalarRet, - MoveToParam, PrintCall, PrintCallUni, CallArgBegin, @@ -47,8 +46,6 @@ enum NodeType { CallSymbol, Prototype, MoveParam, - MoveRetval, - MoveToRetval, PseudoUseParam, RETURN, CallSeqBegin, diff --git a/lib/Target/NVPTX/NVPTXInstrInfo.td b/lib/Target/NVPTX/NVPTXInstrInfo.td index 965af511e1f..32193641f21 100644 --- a/lib/Target/NVPTX/NVPTXInstrInfo.td +++ b/lib/Target/NVPTX/NVPTXInstrInfo.td @@ -32,6 +32,86 @@ def isVecOther : VecInstTypeEnum<15>; def brtarget : Operand; +// CVT conversion modes +// These must match the enum in NVPTX.h +def CvtNONE : PatLeaf<(i32 0x0)>; +def CvtRNI : PatLeaf<(i32 0x1)>; +def CvtRZI : PatLeaf<(i32 0x2)>; +def CvtRMI : PatLeaf<(i32 0x3)>; +def CvtRPI : PatLeaf<(i32 0x4)>; +def CvtRN : PatLeaf<(i32 0x5)>; +def CvtRZ : PatLeaf<(i32 0x6)>; +def CvtRM : PatLeaf<(i32 0x7)>; +def CvtRP : PatLeaf<(i32 0x8)>; + +def CvtNONE_FTZ : PatLeaf<(i32 0x10)>; +def CvtRNI_FTZ : PatLeaf<(i32 0x11)>; +def CvtRZI_FTZ : PatLeaf<(i32 0x12)>; +def CvtRMI_FTZ : PatLeaf<(i32 0x13)>; +def CvtRPI_FTZ : PatLeaf<(i32 0x14)>; +def CvtRN_FTZ : PatLeaf<(i32 0x15)>; +def CvtRZ_FTZ : PatLeaf<(i32 0x16)>; +def CvtRM_FTZ : PatLeaf<(i32 0x17)>; +def CvtRP_FTZ : PatLeaf<(i32 0x18)>; + +def CvtSAT : PatLeaf<(i32 0x20)>; +def CvtSAT_FTZ : PatLeaf<(i32 0x30)>; + +def CvtMode : Operand { + let PrintMethod = "printCvtMode"; +} + +// Compare modes +// These must match the enum in NVPTX.h +def CmpEQ : PatLeaf<(i32 0)>; +def CmpNE : PatLeaf<(i32 1)>; +def CmpLT : PatLeaf<(i32 2)>; +def CmpLE : PatLeaf<(i32 3)>; +def CmpGT : PatLeaf<(i32 4)>; +def CmpGE : PatLeaf<(i32 5)>; +def CmpLO : PatLeaf<(i32 6)>; +def CmpLS : PatLeaf<(i32 7)>; +def CmpHI : PatLeaf<(i32 8)>; +def CmpHS : PatLeaf<(i32 9)>; +def CmpEQU : PatLeaf<(i32 10)>; +def CmpNEU : PatLeaf<(i32 11)>; +def CmpLTU : PatLeaf<(i32 12)>; +def CmpLEU : PatLeaf<(i32 13)>; +def CmpGTU : PatLeaf<(i32 14)>; +def CmpGEU : PatLeaf<(i32 15)>; +def CmpNUM : PatLeaf<(i32 16)>; +def CmpNAN : PatLeaf<(i32 17)>; + +def CmpEQ_FTZ : PatLeaf<(i32 0x100)>; +def CmpNE_FTZ : PatLeaf<(i32 0x101)>; +def CmpLT_FTZ : PatLeaf<(i32 0x102)>; +def CmpLE_FTZ : PatLeaf<(i32 0x103)>; +def CmpGT_FTZ : PatLeaf<(i32 0x104)>; +def CmpGE_FTZ : PatLeaf<(i32 0x105)>; +def CmpLO_FTZ : PatLeaf<(i32 0x106)>; +def CmpLS_FTZ : PatLeaf<(i32 0x107)>; +def CmpHI_FTZ : PatLeaf<(i32 0x108)>; +def CmpHS_FTZ : PatLeaf<(i32 0x109)>; +def CmpEQU_FTZ : PatLeaf<(i32 0x10A)>; +def CmpNEU_FTZ : PatLeaf<(i32 0x10B)>; +def CmpLTU_FTZ : PatLeaf<(i32 0x10C)>; +def CmpLEU_FTZ : PatLeaf<(i32 0x10D)>; +def CmpGTU_FTZ : PatLeaf<(i32 0x10E)>; +def CmpGEU_FTZ : PatLeaf<(i32 0x10F)>; +def CmpNUM_FTZ : PatLeaf<(i32 0x110)>; +def CmpNAN_FTZ : PatLeaf<(i32 0x111)>; + +def CmpMode : Operand { + let PrintMethod = "printCmpMode"; +} + +def F32ConstZero : Operand, PatLeaf<(f32 fpimm)>, SDNodeXFormgetTargetConstantFP(0.0, MVT::f32); + }]>; +def F32ConstOne : Operand, PatLeaf<(f32 fpimm)>, SDNodeXFormgetTargetConstantFP(1.0, MVT::f32); + }]>; + //===----------------------------------------------------------------------===// // NVPTX Instruction Predicate Definitions //===----------------------------------------------------------------------===// @@ -213,6 +293,72 @@ multiclass F2 { // NVPTX Instructions. //===----------------------------------------------------------------------===// +//----------------------------------- +// General Type Conversion +//----------------------------------- + +// Generate a cvt to the given type from all possible types. +// Each instance takes a CvtMode immediate that defines the conversion mode to +// use. It can be CvtNONE to omit a conversion mode. +multiclass CVT_FROM_ALL { + def _s16 : NVPTXInst<(outs RC:$dst), + (ins Int16Regs:$src, CvtMode:$mode), + !strconcat("cvt${mode:base}${mode:ftz}${mode:sat}.", + FromName, ".s16\t$dst, $src;"), + []>; + def _u16 : NVPTXInst<(outs RC:$dst), + (ins Int16Regs:$src, CvtMode:$mode), + !strconcat("cvt${mode:base}${mode:ftz}${mode:sat}.", + FromName, ".u16\t$dst, $src;"), + []>; + def _f16 : NVPTXInst<(outs RC:$dst), + (ins Int16Regs:$src, CvtMode:$mode), + !strconcat("cvt${mode:base}${mode:ftz}${mode:sat}.", + FromName, ".f16\t$dst, $src;"), + []>; + def _s32 : NVPTXInst<(outs RC:$dst), + (ins Int32Regs:$src, CvtMode:$mode), + !strconcat("cvt${mode:base}${mode:ftz}${mode:sat}.", + FromName, ".s32\t$dst, $src;"), + []>; + def _u32 : NVPTXInst<(outs RC:$dst), + (ins Int32Regs:$src, CvtMode:$mode), + !strconcat("cvt${mode:base}${mode:ftz}${mode:sat}.", + FromName, ".u32\t$dst, $src;"), + []>; + def _s64 : NVPTXInst<(outs RC:$dst), + (ins Int64Regs:$src, CvtMode:$mode), + !strconcat("cvt${mode:base}${mode:ftz}${mode:sat}.", + FromName, ".s64\t$dst, $src;"), + []>; + def _u64 : NVPTXInst<(outs RC:$dst), + (ins Int64Regs:$src, CvtMode:$mode), + !strconcat("cvt${mode:base}${mode:ftz}${mode:sat}.", + FromName, ".u64\t$dst, $src;"), + []>; + def _f32 : NVPTXInst<(outs RC:$dst), + (ins Float32Regs:$src, CvtMode:$mode), + !strconcat("cvt${mode:base}${mode:ftz}${mode:sat}.", + FromName, ".f32\t$dst, $src;"), + []>; + def _f64 : NVPTXInst<(outs RC:$dst), + (ins Float64Regs:$src, CvtMode:$mode), + !strconcat("cvt${mode:base}${mode:ftz}${mode:sat}.", + FromName, ".f64\t$dst, $src;"), + []>; +} + +// Generate a cvt to all possible types. +defm CVT_s16 : CVT_FROM_ALL<"s16", Int16Regs>; +defm CVT_u16 : CVT_FROM_ALL<"u16", Int16Regs>; +defm CVT_f16 : CVT_FROM_ALL<"f16", Int16Regs>; +defm CVT_s32 : CVT_FROM_ALL<"s32", Int32Regs>; +defm CVT_u32 : CVT_FROM_ALL<"u32", Int32Regs>; +defm CVT_s64 : CVT_FROM_ALL<"s64", Int64Regs>; +defm CVT_u64 : CVT_FROM_ALL<"u64", Int64Regs>; +defm CVT_f32 : CVT_FROM_ALL<"f32", Float32Regs>; +defm CVT_f64 : CVT_FROM_ALL<"f64", Float64Regs>; + //----------------------------------- // Integer Arithmetic //----------------------------------- @@ -740,6 +886,41 @@ def COSF: NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$src), "cos.approx.f32 \t$dst, $src;", [(set Float32Regs:$dst, (fcos Float32Regs:$src))]>; +// Lower (frem x, y) into (sub x, (mul (floor (div x, y)) y)) +// e.g. "poor man's fmod()" + +// frem - f32 FTZ +def : Pat<(frem Float32Regs:$x, Float32Regs:$y), + (FSUBf32rr_ftz Float32Regs:$x, (FMULf32rr_ftz (CVT_f32_f32 + (FDIV32rr_prec_ftz Float32Regs:$x, Float32Regs:$y), CvtRMI_FTZ), + Float32Regs:$y))>, + Requires<[doF32FTZ]>; +def : Pat<(frem Float32Regs:$x, fpimm:$y), + (FSUBf32rr_ftz Float32Regs:$x, (FMULf32ri_ftz (CVT_f32_f32 + (FDIV32ri_prec_ftz Float32Regs:$x, fpimm:$y), CvtRMI_FTZ), + fpimm:$y))>, + Requires<[doF32FTZ]>; + +// frem - f32 +def : Pat<(frem Float32Regs:$x, Float32Regs:$y), + (FSUBf32rr Float32Regs:$x, (FMULf32rr (CVT_f32_f32 + (FDIV32rr_prec Float32Regs:$x, Float32Regs:$y), CvtRMI), + Float32Regs:$y))>; +def : Pat<(frem Float32Regs:$x, fpimm:$y), + (FSUBf32rr Float32Regs:$x, (FMULf32ri (CVT_f32_f32 + (FDIV32ri_prec Float32Regs:$x, fpimm:$y), CvtRMI), + fpimm:$y))>; + +// frem - f64 +def : Pat<(frem Float64Regs:$x, Float64Regs:$y), + (FSUBf64rr Float64Regs:$x, (FMULf64rr (CVT_f64_f64 + (FDIV64rr Float64Regs:$x, Float64Regs:$y), CvtRMI), + Float64Regs:$y))>; +def : Pat<(frem Float64Regs:$x, fpimm:$y), + (FSUBf64rr Float64Regs:$x, (FMULf64ri (CVT_f64_f64 + (FDIV64ri Float64Regs:$x, fpimm:$y), CvtRMI), + fpimm:$y))>; + //----------------------------------- // Logical Arithmetic //----------------------------------- @@ -830,7 +1011,7 @@ defm SHL : LSHIFT_FORMAT<"shl.b", shl>; // For shifts, the second src operand must be 32-bit value // Need to add cvt for the 8-bits. -multiclass RSHIFT_FORMAT { +multiclass RSHIFT_FORMAT { def i64rr : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, Int32Regs:$b), !strconcat(OpcStr, "64 \t$dst, $a, $b;"), @@ -864,8 +1045,8 @@ multiclass RSHIFT_FORMAT { (i32 imm:$b)))]>; } -defm SRA : RSHIFT_FORMAT<"shr.s", sra, "cvt.s16.s8">; -defm SRL : RSHIFT_FORMAT<"shr.u", srl, "cvt.u16.u8">; +defm SRA : RSHIFT_FORMAT<"shr.s", sra>; +defm SRL : RSHIFT_FORMAT<"shr.u", srl>; // 32bit def ROT32imm_sw : NVPTXInst<(outs Int32Regs:$dst), @@ -962,6 +1143,120 @@ def ROTR64reg_sw : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src, [(set Int64Regs:$dst, (rotr Int64Regs:$src, Int32Regs:$amt))]>; +//----------------------------------- +// General Comparison +//----------------------------------- + +// General setp instructions +multiclass SETP { + def rr : NVPTXInst<(outs Int1Regs:$dst), + (ins RC:$a, RC:$b, CmpMode:$cmp), + !strconcat("setp${cmp:base}${cmp:ftz}.", TypeStr, "\t$dst, $a, $b;"), + []>; + def ri : NVPTXInst<(outs Int1Regs:$dst), + (ins RC:$a, ImmCls:$b, CmpMode:$cmp), + !strconcat("setp${cmp:base}${cmp:ftz}.", TypeStr, "\t$dst, $a, $b;"), + []>; + def ir : NVPTXInst<(outs Int1Regs:$dst), + (ins ImmCls:$a, RC:$b, CmpMode:$cmp), + !strconcat("setp${cmp:base}${cmp:ftz}.", TypeStr, "\t$dst, $a, $b;"), + []>; +} + +defm SETP_b16 : SETP<"b16", Int16Regs, i16imm>; +defm SETP_s16 : SETP<"s16", Int16Regs, i16imm>; +defm SETP_u16 : SETP<"u16", Int16Regs, i16imm>; +defm SETP_b32 : SETP<"b32", Int32Regs, i32imm>; +defm SETP_s32 : SETP<"s32", Int32Regs, i32imm>; +defm SETP_u32 : SETP<"u32", Int32Regs, i32imm>; +defm SETP_b64 : SETP<"b64", Int64Regs, i64imm>; +defm SETP_s64 : SETP<"s64", Int64Regs, i64imm>; +defm SETP_u64 : SETP<"u64", Int64Regs, i64imm>; +defm SETP_f32 : SETP<"f32", Float32Regs, f32imm>; +defm SETP_f64 : SETP<"f64", Float64Regs, f64imm>; + +// General set instructions +multiclass SET { + def rr : NVPTXInst<(outs Int32Regs:$dst), + (ins RC:$a, RC:$b, CmpMode:$cmp), + !strconcat("set$cmp.", TypeStr, "\t$dst, $a, $b;"), []>; + def ri : NVPTXInst<(outs Int32Regs:$dst), + (ins RC:$a, ImmCls:$b, CmpMode:$cmp), + !strconcat("set$cmp.", TypeStr, "\t$dst, $a, $b;"), []>; + def ir : NVPTXInst<(outs Int32Regs:$dst), + (ins ImmCls:$a, RC:$b, CmpMode:$cmp), + !strconcat("set$cmp.", TypeStr, "\t$dst, $a, $b;"), []>; +} + +defm SET_b16 : SET<"b16", Int16Regs, i16imm>; +defm SET_s16 : SET<"s16", Int16Regs, i16imm>; +defm SET_u16 : SET<"u16", Int16Regs, i16imm>; +defm SET_b32 : SET<"b32", Int32Regs, i32imm>; +defm SET_s32 : SET<"s32", Int32Regs, i32imm>; +defm SET_u32 : SET<"u32", Int32Regs, i32imm>; +defm SET_b64 : SET<"b64", Int64Regs, i64imm>; +defm SET_s64 : SET<"s64", Int64Regs, i64imm>; +defm SET_u64 : SET<"u64", Int64Regs, i64imm>; +defm SET_f32 : SET<"f32", Float32Regs, f32imm>; +defm SET_f64 : SET<"f64", Float64Regs, f64imm>; + +//----------------------------------- +// General Selection +//----------------------------------- + +// General selp instructions +multiclass SELP { + def rr : NVPTXInst<(outs RC:$dst), + (ins RC:$a, RC:$b, Int1Regs:$p), + !strconcat("selp.", TypeStr, "\t$dst, $a, $b, $p;"), []>; + def ri : NVPTXInst<(outs RC:$dst), + (ins RC:$a, ImmCls:$b, Int1Regs:$p), + !strconcat("selp.", TypeStr, "\t$dst, $a, $b, $p;"), []>; + def ir : NVPTXInst<(outs RC:$dst), + (ins ImmCls:$a, RC:$b, Int1Regs:$p), + !strconcat("selp.", TypeStr, "\t$dst, $a, $b, $p;"), []>; + def ii : NVPTXInst<(outs RC:$dst), + (ins ImmCls:$a, ImmCls:$b, Int1Regs:$p), + !strconcat("selp.", TypeStr, "\t$dst, $a, $b, $p;"), []>; +} + +multiclass SELP_PATTERN { + def rr : NVPTXInst<(outs RC:$dst), + (ins RC:$a, RC:$b, Int1Regs:$p), + !strconcat("selp.", TypeStr, "\t$dst, $a, $b, $p;"), + [(set RC:$dst, (select Int1Regs:$p, RC:$a, RC:$b))]>; + def ri : NVPTXInst<(outs RC:$dst), + (ins RC:$a, ImmCls:$b, Int1Regs:$p), + !strconcat("selp.", TypeStr, "\t$dst, $a, $b, $p;"), + [(set RC:$dst, (select Int1Regs:$p, RC:$a, ImmNode:$b))]>; + def ir : NVPTXInst<(outs RC:$dst), + (ins ImmCls:$a, RC:$b, Int1Regs:$p), + !strconcat("selp.", TypeStr, "\t$dst, $a, $b, $p;"), + [(set RC:$dst, (select Int1Regs:$p, ImmNode:$a, RC:$b))]>; + def ii : NVPTXInst<(outs RC:$dst), + (ins ImmCls:$a, ImmCls:$b, Int1Regs:$p), + !strconcat("selp.", TypeStr, "\t$dst, $a, $b, $p;"), + [(set RC:$dst, (select Int1Regs:$p, ImmNode:$a, ImmNode:$b))]>; +} + +defm SELP_b16 : SELP_PATTERN<"b16", Int16Regs, i16imm, imm>; +defm SELP_s16 : SELP<"s16", Int16Regs, i16imm>; +defm SELP_u16 : SELP<"u16", Int16Regs, i16imm>; +defm SELP_b32 : SELP_PATTERN<"b32", Int32Regs, i32imm, imm>; +defm SELP_s32 : SELP<"s32", Int32Regs, i32imm>; +defm SELP_u32 : SELP<"u32", Int32Regs, i32imm>; +defm SELP_b64 : SELP_PATTERN<"b64", Int64Regs, i64imm, imm>; +defm SELP_s64 : SELP<"s64", Int64Regs, i64imm>; +defm SELP_u64 : SELP<"u64", Int64Regs, i64imm>; +defm SELP_f32 : SELP_PATTERN<"f32", Float32Regs, f32imm, fpimm>; +defm SELP_f64 : SELP_PATTERN<"f64", Float64Regs, f64imm, fpimm>; + +// Special select for predicate operands +def : Pat<(i1 (select Int1Regs:$p, Int1Regs:$a, Int1Regs:$b)), + (ORb1rr (ANDb1rr Int1Regs:$p, Int1Regs:$a), + (ANDb1rr (NOT1 Int1Regs:$p), Int1Regs:$b))>; + //----------------------------------- // Data Movement (Load / Store, Move) //----------------------------------- @@ -1053,367 +1348,194 @@ def LEA_ADDRi64 : NVPTXInst<(outs Int64Regs:$dst), (ins MEMri64:$addr), // Comparison and Selection //----------------------------------- -// Generate string block like -// { -// .reg .pred p; -// setp.gt.s16 p, %a, %b; -// selp.s16 %dst, -1, 0, p; -// } -// when OpcStr=setp.gt.s sz1=16 sz2=16 d=%dst a=%a b=%b -class Set_Str { - string t1 = "{{\n\t.reg .pred p;\n\t"; - string t2 = !strconcat(t1 , OpcStr); - string t3 = !strconcat(t2 , sz1); - string t4 = !strconcat(t3 , " \tp, "); - string t5 = !strconcat(t4 , a); - string t6 = !strconcat(t5 , ", "); - string t7 = !strconcat(t6 , b); - string t8 = !strconcat(t7 , ";\n\tselp.s"); - string t9 = !strconcat(t8 , sz2); - string t10 = !strconcat(t9, " \t"); - string t11 = !strconcat(t10, d); - string s = !strconcat(t11, ", -1, 0, p;\n\t}}"); +multiclass ISET_FORMAT { + // i16 -> pred + def : Pat<(i1 (OpNode Int16Regs:$a, Int16Regs:$b)), + (setp_16rr Int16Regs:$a, Int16Regs:$b, Mode)>; + def : Pat<(i1 (OpNode Int16Regs:$a, imm:$b)), + (setp_16ri Int16Regs:$a, imm:$b, Mode)>; + def : Pat<(i1 (OpNode imm:$a, Int16Regs:$b)), + (setp_16ir imm:$a, Int16Regs:$b, Mode)>; + // i32 -> pred + def : Pat<(i1 (OpNode Int32Regs:$a, Int32Regs:$b)), + (setp_32rr Int32Regs:$a, Int32Regs:$b, Mode)>; + def : Pat<(i1 (OpNode Int32Regs:$a, imm:$b)), + (setp_32ri Int32Regs:$a, imm:$b, Mode)>; + def : Pat<(i1 (OpNode imm:$a, Int32Regs:$b)), + (setp_32ir imm:$a, Int32Regs:$b, Mode)>; + // i64 -> pred + def : Pat<(i1 (OpNode Int64Regs:$a, Int64Regs:$b)), + (setp_64rr Int64Regs:$a, Int64Regs:$b, Mode)>; + def : Pat<(i1 (OpNode Int64Regs:$a, imm:$b)), + (setp_64ri Int64Regs:$a, imm:$b, Mode)>; + def : Pat<(i1 (OpNode imm:$a, Int64Regs:$b)), + (setp_64ir imm:$a, Int64Regs:$b, Mode)>; + + // i16 -> i32 + def : Pat<(i32 (OpNode Int16Regs:$a, Int16Regs:$b)), + (set_16rr Int16Regs:$a, Int16Regs:$b, Mode)>; + def : Pat<(i32 (OpNode Int16Regs:$a, imm:$b)), + (set_16ri Int16Regs:$a, imm:$b, Mode)>; + def : Pat<(i32 (OpNode imm:$a, Int16Regs:$b)), + (set_16ir imm:$a, Int16Regs:$b, Mode)>; + // i32 -> i32 + def : Pat<(i32 (OpNode Int32Regs:$a, Int32Regs:$b)), + (set_32rr Int32Regs:$a, Int32Regs:$b, Mode)>; + def : Pat<(i32 (OpNode Int32Regs:$a, imm:$b)), + (set_32ri Int32Regs:$a, imm:$b, Mode)>; + def : Pat<(i32 (OpNode imm:$a, Int32Regs:$b)), + (set_32ir imm:$a, Int32Regs:$b, Mode)>; + // i64 -> i32 + def : Pat<(i32 (OpNode Int64Regs:$a, Int64Regs:$b)), + (set_64rr Int64Regs:$a, Int64Regs:$b, Mode)>; + def : Pat<(i32 (OpNode Int64Regs:$a, imm:$b)), + (set_64ri Int64Regs:$a, imm:$b, Mode)>; + def : Pat<(i32 (OpNode imm:$a, Int64Regs:$b)), + (set_64ir imm:$a, Int64Regs:$b, Mode)>; } -multiclass ISET_FORMAT { - def i16rr_toi16: NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, - Int16Regs:$b), - Set_Str.s, - []>; - def i32rr_toi32: NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, - Int32Regs:$b), - Set_Str.s, - []>; - def i64rr_toi64: NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, - Int64Regs:$b), - Set_Str.s, - []>; - - def i16rr_p: NVPTXInst<(outs Int1Regs:$dst), (ins Int16Regs:$a, Int16Regs:$b), - !strconcat(OpcStr, "16 \t$dst, $a, $b;"), - [(set Int1Regs:$dst, (OpNode Int16Regs:$a, Int16Regs:$b))]>; - def i16ri_p: NVPTXInst<(outs Int1Regs:$dst), (ins Int16Regs:$a, i16imm:$b), - !strconcat(OpcStr, "16 \t$dst, $a, $b;"), - [(set Int1Regs:$dst, (OpNode Int16Regs:$a, imm:$b))]>; - def i16ir_p: NVPTXInst<(outs Int1Regs:$dst), (ins i16imm:$a, Int16Regs:$b), - !strconcat(OpcStr, "16 \t$dst, $a, $b;"), - [(set Int1Regs:$dst, (OpNode imm:$a, Int16Regs:$b))]>; - def i32rr_p: NVPTXInst<(outs Int1Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b), - !strconcat(OpcStr, "32 \t$dst, $a, $b;"), - [(set Int1Regs:$dst, (OpNode Int32Regs:$a, Int32Regs:$b))]>; - def i32ri_p: NVPTXInst<(outs Int1Regs:$dst), (ins Int32Regs:$a, i32imm:$b), - !strconcat(OpcStr, "32 \t$dst, $a, $b;"), - [(set Int1Regs:$dst, (OpNode Int32Regs:$a, imm:$b))]>; - def i32ir_p: NVPTXInst<(outs Int1Regs:$dst), (ins i32imm:$a, Int32Regs:$b), - !strconcat(OpcStr, "32 \t$dst, $a, $b;"), - [(set Int1Regs:$dst, (OpNode imm:$a, Int32Regs:$b))]>; - def i64rr_p: NVPTXInst<(outs Int1Regs:$dst), (ins Int64Regs:$a, Int64Regs:$b), - !strconcat(OpcStr, "64 \t$dst, $a, $b;"), - [(set Int1Regs:$dst, (OpNode Int64Regs:$a, Int64Regs:$b))]>; - def i64ri_p: NVPTXInst<(outs Int1Regs:$dst), (ins Int64Regs:$a, i64imm:$b), - !strconcat(OpcStr, "64 \t$dst, $a, $b;"), - [(set Int1Regs:$dst, (OpNode Int64Regs:$a, imm:$b))]>; - def i64ir_p: NVPTXInst<(outs Int1Regs:$dst), (ins i64imm:$a, Int64Regs:$b), - !strconcat(OpcStr, "64 \t$dst, $a, $b;"), - [(set Int1Regs:$dst, (OpNode imm:$a, Int64Regs:$b))]>; - - def i16rr_u32: NVPTXInst<(outs Int32Regs:$dst), (ins Int16Regs:$a, - Int16Regs:$b), - !strconcat(OpcStr_u32, "16 \t$dst, $a, $b;"), - [(set Int32Regs:$dst, (OpNode Int16Regs:$a, Int16Regs:$b))]>; - def i16ri_u32: NVPTXInst<(outs Int32Regs:$dst), (ins Int16Regs:$a, i16imm:$b), - !strconcat(OpcStr_u32, "16 \t$dst, $a, $b;"), - [(set Int32Regs:$dst, (OpNode Int16Regs:$a, imm:$b))]>; - def i16ir_u32: NVPTXInst<(outs Int32Regs:$dst), (ins i16imm:$a, Int16Regs:$b), - !strconcat(OpcStr_u32, "16 \t$dst, $a, $b;"), - [(set Int32Regs:$dst, (OpNode imm:$a, Int16Regs:$b))]>; - def i32rr_u32: NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, - Int32Regs:$b), - !strconcat(OpcStr_u32, "32 \t$dst, $a, $b;"), - [(set Int32Regs:$dst, (OpNode Int32Regs:$a, Int32Regs:$b))]>; - def i32ri_u32: NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b), - !strconcat(OpcStr_u32, "32 \t$dst, $a, $b;"), - [(set Int32Regs:$dst, (OpNode Int32Regs:$a, imm:$b))]>; - def i32ir_u32: NVPTXInst<(outs Int32Regs:$dst), (ins i32imm:$a, Int32Regs:$b), - !strconcat(OpcStr_u32, "32 \t$dst, $a, $b;"), - [(set Int32Regs:$dst, (OpNode imm:$a, Int32Regs:$b))]>; - def i64rr_u32: NVPTXInst<(outs Int32Regs:$dst), (ins Int64Regs:$a, - Int64Regs:$b), - !strconcat(OpcStr_u32, "64 \t$dst, $a, $b;"), - [(set Int32Regs:$dst, (OpNode Int64Regs:$a, Int64Regs:$b))]>; - def i64ri_u32: NVPTXInst<(outs Int32Regs:$dst), (ins Int64Regs:$a, i64imm:$b), - !strconcat(OpcStr_u32, "64 \t$dst, $a, $b;"), - [(set Int32Regs:$dst, (OpNode Int64Regs:$a, imm:$b))]>; - def i64ir_u32: NVPTXInst<(outs Int32Regs:$dst), (ins i64imm:$a, Int64Regs:$b), - !strconcat(OpcStr_u32, "64 \t$dst, $a, $b;"), - [(set Int32Regs:$dst, (OpNode imm:$a, Int64Regs:$b))]>; +multiclass ISET_FORMAT_SIGNED + : ISET_FORMAT { + // TableGen doesn't like empty multiclasses + def : PatLeaf<(i32 0)>; } -multiclass FSET_FORMAT { - def f32rr_toi32_ftz: NVPTXInst<(outs Int32Regs:$dst), (ins Float32Regs:$a, - Float32Regs:$b), - Set_Str.s, - []>, Requires<[doF32FTZ]>; - def f32rr_toi32: NVPTXInst<(outs Int32Regs:$dst), (ins Float32Regs:$a, - Float32Regs:$b), - Set_Str.s, - []>; - def f64rr_toi64: NVPTXInst<(outs Int64Regs:$dst), (ins Float64Regs:$a, - Float64Regs:$b), - Set_Str.s, - []>; - def f64rr_toi32: NVPTXInst<(outs Int32Regs:$dst), (ins Float64Regs:$a, - Float64Regs:$b), - Set_Str.s, - []>; - - def f32rr_p_ftz: NVPTXInst<(outs Int1Regs:$dst), (ins Float32Regs:$a - , Float32Regs:$b), - !strconcat(OpcStr, "ftz.f32 \t$dst, $a, $b;"), - [(set Int1Regs:$dst, (OpNode Float32Regs:$a, Float32Regs:$b))]> - , Requires<[doF32FTZ]>; - def f32rr_p: NVPTXInst<(outs Int1Regs:$dst), - (ins Float32Regs:$a, Float32Regs:$b), - !strconcat(OpcStr, "f32 \t$dst, $a, $b;"), - [(set Int1Regs:$dst, (OpNode Float32Regs:$a, Float32Regs:$b))]>; - def f32ri_p_ftz: NVPTXInst<(outs Int1Regs:$dst), - (ins Float32Regs:$a, f32imm:$b), - !strconcat(OpcStr, "ftz.f32 \t$dst, $a, $b;"), - [(set Int1Regs:$dst, (OpNode Float32Regs:$a, fpimm:$b))]>, - Requires<[doF32FTZ]>; - def f32ri_p: NVPTXInst<(outs Int1Regs:$dst), (ins Float32Regs:$a, f32imm:$b), - !strconcat(OpcStr, "f32 \t$dst, $a, $b;"), - [(set Int1Regs:$dst, (OpNode Float32Regs:$a, fpimm:$b))]>; - def f32ir_p_ftz: NVPTXInst<(outs Int1Regs:$dst), - (ins f32imm:$a, Float32Regs:$b), - !strconcat(OpcStr, "ftz.f32 \t$dst, $a, $b;"), - [(set Int1Regs:$dst, (OpNode fpimm:$a, Float32Regs:$b))]>, - Requires<[doF32FTZ]>; - def f32ir_p: NVPTXInst<(outs Int1Regs:$dst), (ins f32imm:$a, Float32Regs:$b), - !strconcat(OpcStr, "f32 \t$dst, $a, $b;"), - [(set Int1Regs:$dst, (OpNode fpimm:$a, Float32Regs:$b))]>; - def f64rr_p: NVPTXInst<(outs Int1Regs:$dst), - (ins Float64Regs:$a, Float64Regs:$b), - !strconcat(OpcStr, "f64 \t$dst, $a, $b;"), - [(set Int1Regs:$dst, (OpNode Float64Regs:$a, Float64Regs:$b))]>; - def f64ri_p: NVPTXInst<(outs Int1Regs:$dst), (ins Float64Regs:$a, f64imm:$b), - !strconcat(OpcStr, "f64 \t$dst, $a, $b;"), - [(set Int1Regs:$dst, (OpNode Float64Regs:$a, fpimm:$b))]>; - def f64ir_p: NVPTXInst<(outs Int1Regs:$dst), (ins f64imm:$a, Float64Regs:$b), - !strconcat(OpcStr, "f64 \t$dst, $a, $b;"), - [(set Int1Regs:$dst, (OpNode fpimm:$a, Float64Regs:$b))]>; - - def f32rr_u32_ftz: NVPTXInst<(outs Int32Regs:$dst), - (ins Float32Regs:$a, Float32Regs:$b), - !strconcat(OpcStr_u32, "ftz.f32 \t$dst, $a, $b;"), - [(set Int32Regs:$dst, (OpNode Float32Regs:$a, Float32Regs:$b))]>; - def f32rr_u32: NVPTXInst<(outs Int32Regs:$dst), - (ins Float32Regs:$a, Float32Regs:$b), - !strconcat(OpcStr_u32, "f32 \t$dst, $a, $b;"), - [(set Int32Regs:$dst, (OpNode Float32Regs:$a, Float32Regs:$b))]>; - def f32ri_u32_ftz: NVPTXInst<(outs Int32Regs:$dst), - (ins Float32Regs:$a, f32imm:$b), - !strconcat(OpcStr_u32, "ftz.f32 \t$dst, $a, $b;"), - [(set Int32Regs:$dst, (OpNode Float32Regs:$a, fpimm:$b))]>; - def f32ri_u32: NVPTXInst<(outs Int32Regs:$dst), - (ins Float32Regs:$a, f32imm:$b), - !strconcat(OpcStr_u32, "f32 \t$dst, $a, $b;"), - [(set Int32Regs:$dst, (OpNode Float32Regs:$a, fpimm:$b))]>; - def f32ir_u32_ftz: NVPTXInst<(outs Int32Regs:$dst), - (ins f32imm:$a, Float32Regs:$b), - !strconcat(OpcStr_u32, "ftz.f32 \t$dst, $a, $b;"), - [(set Int32Regs:$dst, (OpNode fpimm:$a, Float32Regs:$b))]>; - def f32ir_u32: NVPTXInst<(outs Int32Regs:$dst), - (ins f32imm:$a, Float32Regs:$b), - !strconcat(OpcStr_u32, "f32 \t$dst, $a, $b;"), - [(set Int32Regs:$dst, (OpNode fpimm:$a, Float32Regs:$b))]>; - def f64rr_u32: NVPTXInst<(outs Int32Regs:$dst), - (ins Float64Regs:$a, Float64Regs:$b), - !strconcat(OpcStr_u32, "f64 \t$dst, $a, $b;"), - [(set Int32Regs:$dst, (OpNode Float64Regs:$a, Float64Regs:$b))]>; - def f64ri_u32: NVPTXInst<(outs Int32Regs:$dst), - (ins Float64Regs:$a, f64imm:$b), - !strconcat(OpcStr_u32, "f64 \t$dst, $a, $b;"), - [(set Int32Regs:$dst, (OpNode Float64Regs:$a, fpimm:$b))]>; - def f64ir_u32: NVPTXInst<(outs Int32Regs:$dst), - (ins f64imm:$a, Float64Regs:$b), - !strconcat(OpcStr_u32, "f64 \t$dst, $a, $b;"), - [(set Int32Regs:$dst, (OpNode fpimm:$a, Float64Regs:$b))]>; +multiclass ISET_FORMAT_UNSIGNED + : ISET_FORMAT { + // TableGen doesn't like empty multiclasses + def : PatLeaf<(i32 0)>; } -defm ISetSGT -: ISET_FORMAT<"setp.gt.s", "set.gt.u32.s", setgt, "s16", "cvt.s16.s8">; -defm ISetUGT -: ISET_FORMAT<"setp.gt.u", "set.gt.u32.u", setugt, "u16", "cvt.u16.u8">; -defm ISetSLT -: ISET_FORMAT<"setp.lt.s", "set.lt.u32.s", setlt, "s16", "cvt.s16.s8">; -defm ISetULT -: ISET_FORMAT<"setp.lt.u", "set.lt.u32.u", setult, "u16", "cvt.u16.u8">; -defm ISetSGE -: ISET_FORMAT<"setp.ge.s", "set.ge.u32.s", setge, "s16", "cvt.s16.s8">; -defm ISetUGE -: ISET_FORMAT<"setp.ge.u", "set.ge.u32.u", setuge, "u16", "cvt.u16.u8">; -defm ISetSLE -: ISET_FORMAT<"setp.le.s", "set.le.u32.s", setle, "s16", "cvt.s16.s8">; -defm ISetULE -: ISET_FORMAT<"setp.le.u", "set.le.u32.u", setule, "u16", "cvt.u16.u8">; -defm ISetSEQ -: ISET_FORMAT<"setp.eq.s", "set.eq.u32.s", seteq, "s16", "cvt.s16.s8">; -defm ISetUEQ -: ISET_FORMAT<"setp.eq.u", "set.eq.u32.u", setueq, "u16", "cvt.u16.u8">; -defm ISetSNE -: ISET_FORMAT<"setp.ne.s", "set.ne.u32.s", setne, "s16", "cvt.s16.s8">; -defm ISetUNE -: ISET_FORMAT<"setp.ne.u", "set.ne.u32.u", setune, "u16", "cvt.u16.u8">; - -def ISetSNEi1rr_p : NVPTXInst<(outs Int1Regs:$dst), - (ins Int1Regs:$a, Int1Regs:$b), - "xor.pred \t$dst, $a, $b;", - [(set Int1Regs:$dst, (setne Int1Regs:$a, Int1Regs:$b))]>; -def ISetUNEi1rr_p : NVPTXInst<(outs Int1Regs:$dst), - (ins Int1Regs:$a, Int1Regs:$b), - "xor.pred \t$dst, $a, $b;", - [(set Int1Regs:$dst, (setune Int1Regs:$a, Int1Regs:$b))]>; -def ISetSEQi1rr_p : NVPTXInst<(outs Int1Regs:$dst), - (ins Int1Regs:$a, Int1Regs:$b), - !strconcat("{{\n\t", - !strconcat(".reg .pred temp;\n\t", - !strconcat("xor.pred \ttemp, $a, $b;\n\t", - !strconcat("not.pred \t$dst, temp;\n\t}}","")))), - [(set Int1Regs:$dst, (seteq Int1Regs:$a, Int1Regs:$b))]>; -def ISetUEQi1rr_p : NVPTXInst<(outs Int1Regs:$dst), - (ins Int1Regs:$a, Int1Regs:$b), - !strconcat("{{\n\t", - !strconcat(".reg .pred temp;\n\t", - !strconcat("xor.pred \ttemp, $a, $b;\n\t", - !strconcat("not.pred \t$dst, temp;\n\t}}","")))), - [(set Int1Regs:$dst, (setueq Int1Regs:$a, Int1Regs:$b))]>; - -// Compare 2 i1's and produce a u32 -def ISETSNEi1rr_u32 : NVPTXInst<(outs Int32Regs:$dst), - (ins Int1Regs:$a, Int1Regs:$b), - !strconcat("{{\n\t", - !strconcat(".reg .pred temp;\n\t", - !strconcat("xor.pred \ttemp, $a, $b;\n\t", - !strconcat("selp.u32 \t$dst, -1, 0, temp;", "\n\t}}")))), - [(set Int32Regs:$dst, (setne Int1Regs:$a, Int1Regs:$b))]>; -def ISETSEQi1rr_u32 : NVPTXInst<(outs Int32Regs:$dst), - (ins Int1Regs:$a, Int1Regs:$b), - !strconcat("{{\n\t", - !strconcat(".reg .pred temp;\n\t", - !strconcat("xor.pred \ttemp, $a, $b;\n\t", - !strconcat("selp.u32 \t$dst, 0, -1, temp;", "\n\t}}")))), - [(set Int32Regs:$dst, (seteq Int1Regs:$a, Int1Regs:$b))]>; - -defm FSetGT : FSET_FORMAT<"setp.gt.", "set.gt.u32.", setogt>; -defm FSetLT : FSET_FORMAT<"setp.lt.", "set.lt.u32.", setolt>; -defm FSetGE : FSET_FORMAT<"setp.ge.", "set.ge.u32.", setoge>; -defm FSetLE : FSET_FORMAT<"setp.le.", "set.le.u32.", setole>; -defm FSetEQ : FSET_FORMAT<"setp.eq.", "set.eq.u32.", setoeq>; -defm FSetNE : FSET_FORMAT<"setp.ne.", "set.ne.u32.", setone>; - -defm FSetUGT : FSET_FORMAT<"setp.gtu.", "set.gtu.u32.", setugt>; -defm FSetULT : FSET_FORMAT<"setp.ltu.", "set.ltu.u32.",setult>; -defm FSetUGE : FSET_FORMAT<"setp.geu.", "set.geu.u32.",setuge>; -defm FSetULE : FSET_FORMAT<"setp.leu.", "set.leu.u32.",setule>; -defm FSetUEQ : FSET_FORMAT<"setp.equ.", "set.equ.u32.",setueq>; -defm FSetUNE : FSET_FORMAT<"setp.neu.", "set.neu.u32.",setune>; - -defm FSetNUM : FSET_FORMAT<"setp.num.", "set.num.u32.",seto>; -defm FSetNAN : FSET_FORMAT<"setp.nan.", "set.nan.u32.",setuo>; - -def SELECTi1rr : Pat<(i1 (select Int1Regs:$p, Int1Regs:$a, Int1Regs:$b)), - (ORb1rr (ANDb1rr Int1Regs:$p, Int1Regs:$a), - (ANDb1rr (NOT1 Int1Regs:$p), Int1Regs:$b))>; - -def SELECTi16rr : NVPTXInst<(outs Int16Regs:$dst), - (ins Int16Regs:$a, Int16Regs:$b, Int1Regs:$p), - "selp.b16 \t$dst, $a, $b, $p;", - [(set Int16Regs:$dst, (select Int1Regs:$p, Int16Regs:$a, Int16Regs:$b))]>; -def SELECTi16ri : NVPTXInst<(outs Int16Regs:$dst), - (ins Int16Regs:$a, i16imm:$b, Int1Regs:$p), - "selp.b16 \t$dst, $a, $b, $p;", - [(set Int16Regs:$dst, (select Int1Regs:$p, Int16Regs:$a, imm:$b))]>; -def SELECTi16ir : NVPTXInst<(outs Int16Regs:$dst), - (ins i16imm:$a, Int16Regs:$b, Int1Regs:$p), - "selp.b16 \t$dst, $a, $b, $p;", - [(set Int16Regs:$dst, (select Int1Regs:$p, imm:$a, Int16Regs:$b))]>; -def SELECTi16ii : NVPTXInst<(outs Int16Regs:$dst), - (ins i16imm:$a, i16imm:$b, Int1Regs:$p), - "selp.b16 \t$dst, $a, $b, $p;", - [(set Int16Regs:$dst, (select Int1Regs:$p, imm:$a, imm:$b))]>; - -def SELECTi32rr : NVPTXInst<(outs Int32Regs:$dst), - (ins Int32Regs:$a, Int32Regs:$b, Int1Regs:$p), - "selp.b32 \t$dst, $a, $b, $p;", - [(set Int32Regs:$dst, (select Int1Regs:$p, Int32Regs:$a, Int32Regs:$b))]>; -def SELECTi32ri : NVPTXInst<(outs Int32Regs:$dst), - (ins Int32Regs:$a, i32imm:$b, Int1Regs:$p), - "selp.b32 \t$dst, $a, $b, $p;", - [(set Int32Regs:$dst, (select Int1Regs:$p, Int32Regs:$a, imm:$b))]>; -def SELECTi32ir : NVPTXInst<(outs Int32Regs:$dst), - (ins i32imm:$a, Int32Regs:$b, Int1Regs:$p), - "selp.b32 \t$dst, $a, $b, $p;", - [(set Int32Regs:$dst, (select Int1Regs:$p, imm:$a, Int32Regs:$b))]>; -def SELECTi32ii : NVPTXInst<(outs Int32Regs:$dst), - (ins i32imm:$a, i32imm:$b, Int1Regs:$p), - "selp.b32 \t$dst, $a, $b, $p;", - [(set Int32Regs:$dst, (select Int1Regs:$p, imm:$a, imm:$b))]>; - -def SELECTi64rr : NVPTXInst<(outs Int64Regs:$dst), - (ins Int64Regs:$a, Int64Regs:$b, Int1Regs:$p), - "selp.b64 \t$dst, $a, $b, $p;", - [(set Int64Regs:$dst, (select Int1Regs:$p, Int64Regs:$a, Int64Regs:$b))]>; -def SELECTi64ri : NVPTXInst<(outs Int64Regs:$dst), - (ins Int64Regs:$a, i64imm:$b, Int1Regs:$p), - "selp.b64 \t$dst, $a, $b, $p;", - [(set Int64Regs:$dst, (select Int1Regs:$p, Int64Regs:$a, imm:$b))]>; -def SELECTi64ir : NVPTXInst<(outs Int64Regs:$dst), - (ins i64imm:$a, Int64Regs:$b, Int1Regs:$p), - "selp.b64 \t$dst, $a, $b, $p;", - [(set Int64Regs:$dst, (select Int1Regs:$p, imm:$a, Int64Regs:$b))]>; -def SELECTi64ii : NVPTXInst<(outs Int64Regs:$dst), - (ins i64imm:$a, i64imm:$b, Int1Regs:$p), - "selp.b64 \t$dst, $a, $b, $p;", - [(set Int64Regs:$dst, (select Int1Regs:$p, imm:$a, imm:$b))]>; - -def SELECTf32rr : NVPTXInst<(outs Float32Regs:$dst), - (ins Float32Regs:$a, Float32Regs:$b, Int1Regs:$p), - "selp.f32 \t$dst, $a, $b, $p;", - [(set Float32Regs:$dst, - (select Int1Regs:$p, Float32Regs:$a, Float32Regs:$b))]>; -def SELECTf32ri : NVPTXInst<(outs Float32Regs:$dst), - (ins Float32Regs:$a, f32imm:$b, Int1Regs:$p), - "selp.f32 \t$dst, $a, $b, $p;", - [(set Float32Regs:$dst, (select Int1Regs:$p, Float32Regs:$a, fpimm:$b))]>; -def SELECTf32ir : NVPTXInst<(outs Float32Regs:$dst), - (ins f32imm:$a, Float32Regs:$b, Int1Regs:$p), - "selp.f32 \t$dst, $a, $b, $p;", - [(set Float32Regs:$dst, (select Int1Regs:$p, fpimm:$a, Float32Regs:$b))]>; -def SELECTf32ii : NVPTXInst<(outs Float32Regs:$dst), - (ins f32imm:$a, f32imm:$b, Int1Regs:$p), - "selp.f32 \t$dst, $a, $b, $p;", - [(set Float32Regs:$dst, (select Int1Regs:$p, fpimm:$a, fpimm:$b))]>; - -def SELECTf64rr : NVPTXInst<(outs Float64Regs:$dst), - (ins Float64Regs:$a, Float64Regs:$b, Int1Regs:$p), - "selp.f64 \t$dst, $a, $b, $p;", - [(set Float64Regs:$dst, - (select Int1Regs:$p, Float64Regs:$a, Float64Regs:$b))]>; -def SELECTf64ri : NVPTXInst<(outs Float64Regs:$dst), - (ins Float64Regs:$a, f64imm:$b, Int1Regs:$p), - "selp.f64 \t$dst, $a, $b, $p;", - [(set Float64Regs:$dst, (select Int1Regs:$p, Float64Regs:$a, fpimm:$b))]>; -def SELECTf64ir : NVPTXInst<(outs Float64Regs:$dst), - (ins f64imm:$a, Float64Regs:$b, Int1Regs:$p), - "selp.f64 \t$dst, $a, $b, $p;", - [(set Float64Regs:$dst, (select Int1Regs:$p, fpimm:$a, Float64Regs:$b))]>; -def SELECTf64ii : NVPTXInst<(outs Float64Regs:$dst), - (ins f64imm:$a, f64imm:$b, Int1Regs:$p), - "selp.f64 \t $dst, $a, $b, $p;", - [(set Float64Regs:$dst, (select Int1Regs:$p, fpimm:$a, fpimm:$b))]>; +defm : ISET_FORMAT_SIGNED; +defm : ISET_FORMAT_UNSIGNED; +defm : ISET_FORMAT_SIGNED; +defm : ISET_FORMAT_UNSIGNED; +defm : ISET_FORMAT_SIGNED; +defm : ISET_FORMAT_UNSIGNED; +defm : ISET_FORMAT_SIGNED; +defm : ISET_FORMAT_UNSIGNED; +defm : ISET_FORMAT_SIGNED; +defm : ISET_FORMAT_UNSIGNED; +defm : ISET_FORMAT_SIGNED; +defm : ISET_FORMAT_UNSIGNED; + +// i1 compares +def : Pat<(setne Int1Regs:$a, Int1Regs:$b), + (XORb1rr Int1Regs:$a, Int1Regs:$b)>; +def : Pat<(setune Int1Regs:$a, Int1Regs:$b), + (XORb1rr Int1Regs:$a, Int1Regs:$b)>; + +def : Pat<(seteq Int1Regs:$a, Int1Regs:$b), + (NOT1 (XORb1rr Int1Regs:$a, Int1Regs:$b))>; +def : Pat<(setueq Int1Regs:$a, Int1Regs:$b), + (NOT1 (XORb1rr Int1Regs:$a, Int1Regs:$b))>; + +// i1 compare -> i32 +def : Pat<(i32 (setne Int1Regs:$a, Int1Regs:$b)), + (SELP_u32ii -1, 0, (XORb1rr Int1Regs:$a, Int1Regs:$b))>; +def : Pat<(i32 (setne Int1Regs:$a, Int1Regs:$b)), + (SELP_u32ii 0, -1, (XORb1rr Int1Regs:$a, Int1Regs:$b))>; + + + +multiclass FSET_FORMAT { + // f32 -> pred + def : Pat<(i1 (OpNode Float32Regs:$a, Float32Regs:$b)), + (SETP_f32rr Float32Regs:$a, Float32Regs:$b, ModeFTZ)>, + Requires<[doF32FTZ]>; + def : Pat<(i1 (OpNode Float32Regs:$a, Float32Regs:$b)), + (SETP_f32rr Float32Regs:$a, Float32Regs:$b, Mode)>; + def : Pat<(i1 (OpNode Float32Regs:$a, fpimm:$b)), + (SETP_f32ri Float32Regs:$a, fpimm:$b, ModeFTZ)>, + Requires<[doF32FTZ]>; + def : Pat<(i1 (OpNode Float32Regs:$a, fpimm:$b)), + (SETP_f32ri Float32Regs:$a, fpimm:$b, Mode)>; + def : Pat<(i1 (OpNode fpimm:$a, Float32Regs:$b)), + (SETP_f32ir fpimm:$a, Float32Regs:$b, ModeFTZ)>, + Requires<[doF32FTZ]>; + def : Pat<(i1 (OpNode fpimm:$a, Float32Regs:$b)), + (SETP_f32ir fpimm:$a, Float32Regs:$b, Mode)>; + + // f64 -> pred + def : Pat<(i1 (OpNode Float64Regs:$a, Float64Regs:$b)), + (SETP_f64rr Float64Regs:$a, Float64Regs:$b, Mode)>; + def : Pat<(i1 (OpNode Float64Regs:$a, fpimm:$b)), + (SETP_f64ri Float64Regs:$a, fpimm:$b, Mode)>; + def : Pat<(i1 (OpNode fpimm:$a, Float64Regs:$b)), + (SETP_f64ir fpimm:$a, Float64Regs:$b, Mode)>; + + // f32 -> i32 + def : Pat<(i32 (OpNode Float32Regs:$a, Float32Regs:$b)), + (SET_f32rr Float32Regs:$a, Float32Regs:$b, ModeFTZ)>, + Requires<[doF32FTZ]>; + def : Pat<(i32 (OpNode Float32Regs:$a, Float32Regs:$b)), + (SET_f32rr Float32Regs:$a, Float32Regs:$b, Mode)>; + def : Pat<(i32 (OpNode Float32Regs:$a, fpimm:$b)), + (SET_f32ri Float32Regs:$a, fpimm:$b, ModeFTZ)>, + Requires<[doF32FTZ]>; + def : Pat<(i32 (OpNode Float32Regs:$a, fpimm:$b)), + (SET_f32ri Float32Regs:$a, fpimm:$b, Mode)>; + def : Pat<(i32 (OpNode fpimm:$a, Float32Regs:$b)), + (SET_f32ir fpimm:$a, Float32Regs:$b, ModeFTZ)>, + Requires<[doF32FTZ]>; + def : Pat<(i32 (OpNode fpimm:$a, Float32Regs:$b)), + (SET_f32ir fpimm:$a, Float32Regs:$b, Mode)>; + + // f64 -> i32 + def : Pat<(i32 (OpNode Float64Regs:$a, Float64Regs:$b)), + (SET_f64rr Float64Regs:$a, Float64Regs:$b, Mode)>; + def : Pat<(i32 (OpNode Float64Regs:$a, fpimm:$b)), + (SET_f64ri Float64Regs:$a, fpimm:$b, Mode)>; + def : Pat<(i32 (OpNode fpimm:$a, Float64Regs:$b)), + (SET_f64ir fpimm:$a, Float64Regs:$b, Mode)>; +} + +defm FSetGT : FSET_FORMAT; +defm FSetLT : FSET_FORMAT; +defm FSetGE : FSET_FORMAT; +defm FSetLE : FSET_FORMAT; +defm FSetEQ : FSET_FORMAT; +defm FSetNE : FSET_FORMAT; + +defm FSetUGT : FSET_FORMAT; +defm FSetULT : FSET_FORMAT; +defm FSetUGE : FSET_FORMAT; +defm FSetULE : FSET_FORMAT; +defm FSetUEQ : FSET_FORMAT; +defm FSetUNE : FSET_FORMAT; + +defm FSetNUM : FSET_FORMAT; +defm FSetNAN : FSET_FORMAT; //def ld_param : SDNode<"NVPTXISD::LOAD_PARAM", SDTLoad, // [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; @@ -1436,7 +1558,6 @@ def SDTCallArgMarkProfile : SDTypeProfile<0, 0, []>; def SDTCallVoidProfile : SDTypeProfile<0, 1, []>; def SDTCallValProfile : SDTypeProfile<1, 0, []>; def SDTMoveParamProfile : SDTypeProfile<1, 1, []>; -def SDTMoveRetvalProfile : SDTypeProfile<0, 1, []>; def SDTStoreRetvalProfile : SDTypeProfile<0, 2, [SDTCisInt<0>]>; def SDTStoreRetvalV2Profile : SDTypeProfile<0, 3, [SDTCisInt<0>]>; def SDTStoreRetvalV4Profile : SDTypeProfile<0, 5, [SDTCisInt<0>]>; @@ -1472,8 +1593,6 @@ def StoreParamU32 : SDNode<"NVPTXISD::StoreParamU32", SDTStoreParam32Profile, [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; def StoreParamS32 : SDNode<"NVPTXISD::StoreParamS32", SDTStoreParam32Profile, [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; -def MoveToParam : SDNode<"NVPTXISD::MoveToParam", SDTStoreParamProfile, - [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; def CallArgBegin : SDNode<"NVPTXISD::CallArgBegin", SDTCallArgMarkProfile, [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; def CallArg : SDNode<"NVPTXISD::CallArg", SDTCallArgProfile, @@ -1490,16 +1609,12 @@ def CallVal : SDNode<"NVPTXISD::CallVal", SDTCallValProfile, [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; def MoveParam : SDNode<"NVPTXISD::MoveParam", SDTMoveParamProfile, []>; -def MoveRetval : SDNode<"NVPTXISD::MoveRetval", SDTMoveRetvalProfile, - [SDNPHasChain, SDNPSideEffect]>; def StoreRetval : SDNode<"NVPTXISD::StoreRetval", SDTStoreRetvalProfile, [SDNPHasChain, SDNPSideEffect]>; def StoreRetvalV2 : SDNode<"NVPTXISD::StoreRetvalV2", SDTStoreRetvalV2Profile, [SDNPHasChain, SDNPSideEffect]>; def StoreRetvalV4 : SDNode<"NVPTXISD::StoreRetvalV4", SDTStoreRetvalV4Profile, [SDNPHasChain, SDNPSideEffect]>; -def MoveToRetval : SDNode<"NVPTXISD::MoveToRetval", SDTStoreRetvalProfile, - [SDNPHasChain, SDNPSideEffect]>; def PseudoUseParam : SDNode<"NVPTXISD::PseudoUseParam", SDTPseudoUseParamProfile, [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; @@ -1550,12 +1665,6 @@ class StoreParamV4Inst : "\t[param$a+$b], {{$val, $val2, $val3, $val4}};"), []>; -class MoveToParamInst : - NVPTXInst<(outs), (ins regclass:$val, i32imm:$a, i32imm:$b), - !strconcat(!strconcat("mov", opstr), - "\tparam$a, $val;"), - [(MoveToParam (i32 imm:$a), (i32 imm:$b), regclass:$val)]>; - class StoreRetvalInst : NVPTXInst<(outs), (ins regclass:$val, i32imm:$a), !strconcat(!strconcat("st.param", opstr), @@ -1576,18 +1685,6 @@ class StoreRetvalV4Inst : "\t[func_retval0+$a], {{$val, $val2, $val3, $val4}};"), []>; -class MoveToRetvalInst : - NVPTXInst<(outs), (ins i32imm:$num, regclass:$val), - !strconcat(!strconcat("mov", opstr), - "\tfunc_retval$num, $val;"), - [(MoveToRetval (i32 imm:$num), regclass:$val)]>; - -class MoveRetvalInst : - NVPTXInst<(outs), (ins regclass:$val), - !strconcat(!strconcat("mov", opstr), - "\tfunc_retval0, $val;"), - [(MoveRetval regclass:$val)]>; - def PrintCallRetInst1 : NVPTXInst<(outs), (ins), "call (retval0), ", [(PrintCall (i32 1))]>; @@ -1663,16 +1760,6 @@ def LoadParamMemV2F32 : LoadParamV2MemInst; def LoadParamMemV2F64 : LoadParamV2MemInst; def LoadParamMemV4F32 : LoadParamV4MemInst; -def LoadParamRegI64 : LoadParamRegInst; -def LoadParamRegI32 : LoadParamRegInst; -def LoadParamRegI16 : NVPTXInst<(outs Int16Regs:$dst), (ins i32imm:$b), - "cvt.u16.u32\t$dst, retval$b;", - [(set Int16Regs:$dst, - (LoadParam (i32 0), (i32 imm:$b)))]>; - -def LoadParamRegF32 : LoadParamRegInst; -def LoadParamRegF64 : LoadParamRegInst; - def StoreParamI64 : StoreParamInst; def StoreParamI32 : StoreParamInst; @@ -1703,28 +1790,6 @@ def StoreParamV4I8 : NVPTXInst<(outs), (ins Int16Regs:$val, Int16Regs:$val2, "st.param.v4.b8\t[param$a+$b], {{$val, $val2, $val3, $val4}};", []>; -def StoreParamS32I16 : NVPTXInst<(outs), - (ins Int16Regs:$val, i32imm:$a, i32imm:$b), - !strconcat("cvt.s32.s16\ttemp_param_reg, $val;\n\t", - "st.param.b32\t[param$a+$b], temp_param_reg;"), - []>; -def StoreParamU32I16 : NVPTXInst<(outs), - (ins Int16Regs:$val, i32imm:$a, i32imm:$b), - !strconcat("cvt.u32.u16\ttemp_param_reg, $val;\n\t", - "st.param.b32\t[param$a+$b], temp_param_reg;"), - []>; - -def StoreParamU32I8 : NVPTXInst<(outs), - (ins Int16Regs:$val, i32imm:$a, i32imm:$b), - !strconcat("cvt.u32.u8\ttemp_param_reg, $val;\n\t", - "st.param.b32\t[param$a+$b], temp_param_reg;"), - []>; -def StoreParamS32I8 : NVPTXInst<(outs), - (ins Int16Regs:$val, i32imm:$a, i32imm:$b), - !strconcat("cvt.s32.s8\ttemp_param_reg, $val;\n\t", - "st.param.b32\t[param$a+$b], temp_param_reg;"), - []>; - def StoreParamF32 : StoreParamInst; def StoreParamF64 : StoreParamInst; def StoreParamV2F32 : StoreParamV2Inst; @@ -1738,15 +1803,6 @@ def StoreParamV4F32 : NVPTXInst<(outs), "st.param.v4.f32\t[param$a+$b], {{$val, $val2, $val3, $val4}};", []>; -def MoveToParamI64 : MoveToParamInst; -def MoveToParamI32 : MoveToParamInst; -def MoveToParamF64 : MoveToParamInst; -def MoveToParamF32 : MoveToParamInst; -def MoveToParamI16 : NVPTXInst<(outs), - (ins Int16Regs:$val, i32imm:$a, i32imm:$b), - !strconcat("cvt.u32.u16\ttemp_param_reg, $val;\n\t", - "mov.b32\tparam$a, temp_param_reg;"), - [(MoveToParam (i32 imm:$a), (i32 imm:$b), Int16Regs:$val)]>; def StoreRetvalI64 : StoreRetvalInst; def StoreRetvalI32 : StoreRetvalInst; @@ -1766,21 +1822,6 @@ def StoreRetvalV2F64 : StoreRetvalV2Inst; def StoreRetvalV2F32 : StoreRetvalV2Inst; def StoreRetvalV4F32 : StoreRetvalV4Inst; -def MoveRetvalI64 : MoveRetvalInst; -def MoveRetvalI32 : MoveRetvalInst; -def MoveRetvalI16 : MoveRetvalInst; -def MoveRetvalI8 : MoveRetvalInst; -def MoveRetvalF64 : MoveRetvalInst; -def MoveRetvalF32 : MoveRetvalInst; - -def MoveToRetvalI64 : MoveToRetvalInst; -def MoveToRetvalI32 : MoveToRetvalInst; -def MoveToRetvalF64 : MoveToRetvalInst; -def MoveToRetvalF32 : MoveToRetvalInst; -def MoveToRetvalI16 : NVPTXInst<(outs), (ins i32imm:$num, Int16Regs:$val), - "cvt.u32.u16\tfunc_retval$num, $val;", - [(MoveToRetval (i32 imm:$num), Int16Regs:$val)]>; - def CallArgBeginInst : NVPTXInst<(outs), (ins), "(", [(CallArgBegin)]>; def CallArgEndInst1 : NVPTXInst<(outs), (ins), ");", [(CallArgEnd (i32 1))]>; def CallArgEndInst0 : NVPTXInst<(outs), (ins), ")", [(CallArgEnd (i32 0))]>; @@ -2135,221 +2176,186 @@ defm STV_f64 : ST_VEC; //---- Conversion ---- -multiclass CVT_INT_TO_FP { -// FIXME: need to add f16 support -// def CVTf16i16 : -// NVPTXInst<(outs Float16Regs:$d), (ins Int16Regs:$a), -// !strconcat(!strconcat("cvt.rn.f16.", OpStr), "16 \t$d, $a;"), -// [(set Float16Regs:$d, (OpNode Int16Regs:$a))]>; -// def CVTf16i32 : -// NVPTXInst<(outs Float16Regs:$d), (ins Int32Regs:$a), -// !strconcat(!strconcat("cvt.rn.f16.", OpStr), "32 \t$d, $a;"), -// [(set Float16Regs:$d, (OpNode Int32Regs:$a))]>; -// def CVTf16i64: -// NVPTXInst<(outs Float16Regs:$d), (ins Int64Regs:$a), -// !strconcat(!strconcat("cvt.rn.f32.", OpStr), "64 \t$d, $a;"), -// [(set Float32Regs:$d, (OpNode Int64Regs:$a))]>; - - def CVTf32i1 : - NVPTXInst<(outs Float32Regs:$d), (ins Int1Regs:$a), - "selp.f32 \t$d, 1.0, 0.0, $a;", - [(set Float32Regs:$d, (OpNode Int1Regs:$a))]>; - def CVTf32i16 : - NVPTXInst<(outs Float32Regs:$d), (ins Int16Regs:$a), - !strconcat(!strconcat("cvt.rn.f32.", OpStr), "16 \t$d, $a;"), - [(set Float32Regs:$d, (OpNode Int16Regs:$a))]>; - def CVTf32i32 : - NVPTXInst<(outs Float32Regs:$d), (ins Int32Regs:$a), - !strconcat(!strconcat("cvt.rn.f32.", OpStr), "32 \t$d, $a;"), - [(set Float32Regs:$d, (OpNode Int32Regs:$a))]>; - def CVTf32i64: - NVPTXInst<(outs Float32Regs:$d), (ins Int64Regs:$a), - !strconcat(!strconcat("cvt.rn.f32.", OpStr), "64 \t$d, $a;"), - [(set Float32Regs:$d, (OpNode Int64Regs:$a))]>; - - def CVTf64i1 : - NVPTXInst<(outs Float64Regs:$d), (ins Int1Regs:$a), - "selp.f64 \t$d, 1.0, 0.0, $a;", - [(set Float64Regs:$d, (OpNode Int1Regs:$a))]>; - def CVTf64i16 : - NVPTXInst<(outs Float64Regs:$d), (ins Int16Regs:$a), - !strconcat(!strconcat("cvt.rn.f64.", OpStr), "16 \t$d, $a;"), - [(set Float64Regs:$d, (OpNode Int16Regs:$a))]>; - def CVTf64i32 : - NVPTXInst<(outs Float64Regs:$d), (ins Int32Regs:$a), - !strconcat(!strconcat("cvt.rn.f64.", OpStr), "32 \t$d, $a;"), - [(set Float64Regs:$d, (OpNode Int32Regs:$a))]>; - def CVTf64i64: - NVPTXInst<(outs Float64Regs:$d), (ins Int64Regs:$a), - !strconcat(!strconcat("cvt.rn.f64.", OpStr), "64 \t$d, $a;"), - [(set Float64Regs:$d, (OpNode Int64Regs:$a))]>; -} - -defm Sint_to_fp : CVT_INT_TO_FP <"s", sint_to_fp>; -defm Uint_to_fp : CVT_INT_TO_FP <"u", uint_to_fp>; - -multiclass CVT_FP_TO_INT { -// FIXME: need to add f16 support -// def CVTi16f16: -// NVPTXInst<(outs Int16Regs:$d), (ins Float16Regs:$a), -// !strconcat(!strconcat("cvt.rzi.", OpStr), "16.f16 \t$d, $a;"), -// [(set Int16Regs:$d, (OpNode Float16Regs:$a))]>; - def CVTi16f32_ftz: - NVPTXInst<(outs Int16Regs:$d), (ins Float32Regs:$a), - !strconcat(!strconcat("cvt.rzi.ftz.", OpStr), "16.f32 \t$d, $a;"), - [(set Int16Regs:$d, (OpNode Float32Regs:$a))]>, Requires<[doF32FTZ]>; - def CVTi16f32: - NVPTXInst<(outs Int16Regs:$d), (ins Float32Regs:$a), - !strconcat(!strconcat("cvt.rzi.", OpStr), "16.f32 \t$d, $a;"), - [(set Int16Regs:$d, (OpNode Float32Regs:$a))]>; - def CVTi16f64: - NVPTXInst<(outs Int16Regs:$d), (ins Float64Regs:$a), - !strconcat(!strconcat("cvt.rzi.", OpStr), "16.f64 \t$d, $a;"), - [(set Int16Regs:$d, (OpNode Float64Regs:$a))]>; - -// FIXME: need to add f16 support -// def CVTi32f16: def CVTi32f16: -// NVPTXInst<(outs Int32Regs:$d), (ins Float16Regs:$a), -// !strconcat(!strconcat("cvt.rzi.", OpStr), "32.f16 \t$d, $a;"), -// [(set Int32Regs:$d, (OpNode Float16Regs:$a))]>; - def CVTi32f32_ftz: - NVPTXInst<(outs Int32Regs:$d), (ins Float32Regs:$a), - !strconcat(!strconcat("cvt.rzi.ftz.", OpStr), "32.f32 \t$d, $a;"), - [(set Int32Regs:$d, (OpNode Float32Regs:$a))]>, Requires<[doF32FTZ]>; - def CVTi32f32: - NVPTXInst<(outs Int32Regs:$d), (ins Float32Regs:$a), - !strconcat(!strconcat("cvt.rzi.", OpStr), "32.f32 \t$d, $a;"), - [(set Int32Regs:$d, (OpNode Float32Regs:$a))]>; - def CVTi32f64: - NVPTXInst<(outs Int32Regs:$d), (ins Float64Regs:$a), - !strconcat(!strconcat("cvt.rzi.", OpStr), "32.f64 \t$d, $a;"), - [(set Int32Regs:$d, (OpNode Float64Regs:$a))]>; - -// FIXME: need to add f16 support -// def CVTi64f16: -// NVPTXInst<(outs Int64Regs:$d), (ins Float16Regs:$a), -// !strconcat(!strconcat("cvt.rzi.", OpStr), "64.f16 \t$d, $a;"), -// [(set Int64Regs:$d, (OpNode Float16Regs:$a))]>; - def CVTi64f32_ftz: - NVPTXInst<(outs Int64Regs:$d), (ins Float32Regs:$a), - !strconcat(!strconcat("cvt.rzi.ftz.", OpStr), "64.f32 \t$d, $a;"), - [(set Int64Regs:$d, (OpNode Float32Regs:$a))]>, Requires<[doF32FTZ]>; - def CVTi64f32: - NVPTXInst<(outs Int64Regs:$d), (ins Float32Regs:$a), - !strconcat(!strconcat("cvt.rzi.", OpStr), "64.f32 \t$d, $a;"), - [(set Int64Regs:$d, (OpNode Float32Regs:$a))]>; - def CVTi64f64: - NVPTXInst<(outs Int64Regs:$d), (ins Float64Regs:$a), - !strconcat(!strconcat("cvt.rzi.", OpStr), "64.f64 \t$d, $a;"), - [(set Int64Regs:$d, (OpNode Float64Regs:$a))]>; -} - -defm Fp_to_sint : CVT_FP_TO_INT <"s", fp_to_sint>; -defm Fp_to_uint : CVT_FP_TO_INT <"u", fp_to_uint>; - -multiclass INT_EXTEND_UNSIGNED_1 { - def ext1to16: - NVPTXInst<(outs Int16Regs:$d), (ins Int1Regs:$a), - "selp.u16 \t$d, 1, 0, $a;", - [(set Int16Regs:$d, (OpNode Int1Regs:$a))]>; - def ext1to32: - NVPTXInst<(outs Int32Regs:$d), (ins Int1Regs:$a), - "selp.u32 \t$d, 1, 0, $a;", - [(set Int32Regs:$d, (OpNode Int1Regs:$a))]>; - def ext1to64: - NVPTXInst<(outs Int64Regs:$d), (ins Int1Regs:$a), - "selp.u64 \t$d, 1, 0, $a;", - [(set Int64Regs:$d, (OpNode Int1Regs:$a))]>; -} - -multiclass INT_EXTEND_SIGNED_1 { - def ext1to16: - NVPTXInst<(outs Int16Regs:$d), (ins Int1Regs:$a), - "selp.s16 \t$d, -1, 0, $a;", - [(set Int16Regs:$d, (OpNode Int1Regs:$a))]>; - def ext1to32: - NVPTXInst<(outs Int32Regs:$d), (ins Int1Regs:$a), - "selp.s32 \t$d, -1, 0, $a;", - [(set Int32Regs:$d, (OpNode Int1Regs:$a))]>; - def ext1to64: - NVPTXInst<(outs Int64Regs:$d), (ins Int1Regs:$a), - "selp.s64 \t$d, -1, 0, $a;", - [(set Int64Regs:$d, (OpNode Int1Regs:$a))]>; -} - -multiclass INT_EXTEND { - def ext16to32: - NVPTXInst<(outs Int32Regs:$d), (ins Int16Regs:$a), - !strconcat("cvt.", !strconcat(OpStr, !strconcat("32.", - !strconcat(OpStr, "16 \t$d, $a;")))), - [(set Int32Regs:$d, (OpNode Int16Regs:$a))]>; - def ext16to64: - NVPTXInst<(outs Int64Regs:$d), (ins Int16Regs:$a), - !strconcat("cvt.", !strconcat(OpStr, !strconcat("64.", - !strconcat(OpStr, "16 \t$d, $a;")))), - [(set Int64Regs:$d, (OpNode Int16Regs:$a))]>; - def ext32to64: - NVPTXInst<(outs Int64Regs:$d), (ins Int32Regs:$a), - !strconcat("cvt.", !strconcat(OpStr, !strconcat("64.", - !strconcat(OpStr, "32 \t$d, $a;")))), - [(set Int64Regs:$d, (OpNode Int32Regs:$a))]>; -} - -defm Sint_extend_1 : INT_EXTEND_SIGNED_1; -defm Zint_extend_1 : INT_EXTEND_UNSIGNED_1; -defm Aint_extend_1 : INT_EXTEND_UNSIGNED_1; - -defm Sint_extend : INT_EXTEND <"s", sext>; -defm Zint_extend : INT_EXTEND <"u", zext>; -defm Aint_extend : INT_EXTEND <"u", anyext>; - -class TRUNC_to1_asm { - string s = !strconcat("{{\n\t", - !strconcat(".reg ", - !strconcat(sz, - !strconcat(" temp;\n\t", - !strconcat("and", - !strconcat(sz, - !strconcat("\t temp, $a, 1;\n\t", - !strconcat("setp", - !strconcat(sz, ".eq \t $d, temp, 1;\n\t}}"))))))))); -} - -def TRUNC_64to32 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), - "cvt.u32.u64 \t$d, $a;", - [(set Int32Regs:$d, (trunc Int64Regs:$a))]>; -def TRUNC_64to16 : NVPTXInst<(outs Int16Regs:$d), (ins Int64Regs:$a), - "cvt.u16.u64 \t$d, $a;", - [(set Int16Regs:$d, (trunc Int64Regs:$a))]>; -def TRUNC_32to16 : NVPTXInst<(outs Int16Regs:$d), (ins Int32Regs:$a), - "cvt.u16.u32 \t$d, $a;", - [(set Int16Regs:$d, (trunc Int32Regs:$a))]>; -def TRUNC_64to1 : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a), - TRUNC_to1_asm<".b64">.s, - [(set Int1Regs:$d, (trunc Int64Regs:$a))]>; -def TRUNC_32to1 : NVPTXInst<(outs Int1Regs:$d), (ins Int32Regs:$a), - TRUNC_to1_asm<".b32">.s, - [(set Int1Regs:$d, (trunc Int32Regs:$a))]>; -def TRUNC_16to1 : NVPTXInst<(outs Int1Regs:$d), (ins Int16Regs:$a), - TRUNC_to1_asm<".b16">.s, - [(set Int1Regs:$d, (trunc Int16Regs:$a))]>; - -// Select instructions +// NOTE: pred->fp are currently sub-optimal due to an issue in TableGen where +// we cannot specify floating-point literals in isel patterns. Therefore, we +// use an integer selp to select either 1 or 0 and then cvt to floating-point. + +// sint -> f32 +def : Pat<(f32 (sint_to_fp Int1Regs:$a)), + (CVT_f32_s32 (SELP_u32ii 1, 0, Int1Regs:$a), CvtRN)>; +def : Pat<(f32 (sint_to_fp Int16Regs:$a)), + (CVT_f32_s16 Int16Regs:$a, CvtRN)>; +def : Pat<(f32 (sint_to_fp Int32Regs:$a)), + (CVT_f32_s32 Int32Regs:$a, CvtRN)>; +def : Pat<(f32 (sint_to_fp Int64Regs:$a)), + (CVT_f32_s64 Int64Regs:$a, CvtRN)>; + +// uint -> f32 +def : Pat<(f32 (uint_to_fp Int1Regs:$a)), + (CVT_f32_u32 (SELP_u32ii 1, 0, Int1Regs:$a), CvtRN)>; +def : Pat<(f32 (uint_to_fp Int16Regs:$a)), + (CVT_f32_u16 Int16Regs:$a, CvtRN)>; +def : Pat<(f32 (uint_to_fp Int32Regs:$a)), + (CVT_f32_u32 Int32Regs:$a, CvtRN)>; +def : Pat<(f32 (uint_to_fp Int64Regs:$a)), + (CVT_f32_u64 Int64Regs:$a, CvtRN)>; + +// sint -> f64 +def : Pat<(f64 (sint_to_fp Int1Regs:$a)), + (CVT_f64_s32 (SELP_u32ii 1, 0, Int1Regs:$a), CvtRN)>; +def : Pat<(f64 (sint_to_fp Int16Regs:$a)), + (CVT_f64_s16 Int16Regs:$a, CvtRN)>; +def : Pat<(f64 (sint_to_fp Int32Regs:$a)), + (CVT_f64_s32 Int32Regs:$a, CvtRN)>; +def : Pat<(f64 (sint_to_fp Int64Regs:$a)), + (CVT_f64_s64 Int64Regs:$a, CvtRN)>; + +// uint -> f64 +def : Pat<(f64 (uint_to_fp Int1Regs:$a)), + (CVT_f64_u32 (SELP_u32ii 1, 0, Int1Regs:$a), CvtRN)>; +def : Pat<(f64 (uint_to_fp Int16Regs:$a)), + (CVT_f64_u16 Int16Regs:$a, CvtRN)>; +def : Pat<(f64 (uint_to_fp Int32Regs:$a)), + (CVT_f64_u32 Int32Regs:$a, CvtRN)>; +def : Pat<(f64 (uint_to_fp Int64Regs:$a)), + (CVT_f64_u64 Int64Regs:$a, CvtRN)>; + + +// f32 -> sint +def : Pat<(i16 (fp_to_sint Float32Regs:$a)), + (CVT_s16_f32 Float32Regs:$a, CvtRZI_FTZ)>, Requires<[doF32FTZ]>; +def : Pat<(i16 (fp_to_sint Float32Regs:$a)), + (CVT_s16_f32 Float32Regs:$a, CvtRZI)>; +def : Pat<(i32 (fp_to_sint Float32Regs:$a)), + (CVT_s32_f32 Float32Regs:$a, CvtRZI_FTZ)>, Requires<[doF32FTZ]>; +def : Pat<(i32 (fp_to_sint Float32Regs:$a)), + (CVT_s32_f32 Float32Regs:$a, CvtRZI)>; +def : Pat<(i64 (fp_to_sint Float32Regs:$a)), + (CVT_s64_f32 Float32Regs:$a, CvtRZI_FTZ)>, Requires<[doF32FTZ]>; +def : Pat<(i64 (fp_to_sint Float32Regs:$a)), + (CVT_s64_f32 Float32Regs:$a, CvtRZI)>; + +// f32 -> uint +def : Pat<(i16 (fp_to_uint Float32Regs:$a)), + (CVT_u16_f32 Float32Regs:$a, CvtRZI_FTZ)>, Requires<[doF32FTZ]>; +def : Pat<(i16 (fp_to_uint Float32Regs:$a)), + (CVT_u16_f32 Float32Regs:$a, CvtRZI)>; +def : Pat<(i32 (fp_to_uint Float32Regs:$a)), + (CVT_u32_f32 Float32Regs:$a, CvtRZI_FTZ)>, Requires<[doF32FTZ]>; +def : Pat<(i32 (fp_to_uint Float32Regs:$a)), + (CVT_u32_f32 Float32Regs:$a, CvtRZI)>; +def : Pat<(i64 (fp_to_uint Float32Regs:$a)), + (CVT_u64_f32 Float32Regs:$a, CvtRZI_FTZ)>, Requires<[doF32FTZ]>; +def : Pat<(i64 (fp_to_uint Float32Regs:$a)), + (CVT_u64_f32 Float32Regs:$a, CvtRZI)>; + +// f64 -> sint +def : Pat<(i16 (fp_to_sint Float64Regs:$a)), + (CVT_s16_f64 Float64Regs:$a, CvtRZI)>; +def : Pat<(i32 (fp_to_sint Float64Regs:$a)), + (CVT_s32_f64 Float64Regs:$a, CvtRZI)>; +def : Pat<(i64 (fp_to_sint Float64Regs:$a)), + (CVT_s64_f64 Float64Regs:$a, CvtRZI)>; + +// f64 -> uint +def : Pat<(i16 (fp_to_uint Float64Regs:$a)), + (CVT_u16_f64 Float64Regs:$a, CvtRZI)>; +def : Pat<(i32 (fp_to_uint Float64Regs:$a)), + (CVT_u32_f64 Float64Regs:$a, CvtRZI)>; +def : Pat<(i64 (fp_to_uint Float64Regs:$a)), + (CVT_u64_f64 Float64Regs:$a, CvtRZI)>; + +// sext i1 +def : Pat<(i16 (sext Int1Regs:$a)), + (SELP_s16ii -1, 0, Int1Regs:$a)>; +def : Pat<(i32 (sext Int1Regs:$a)), + (SELP_s32ii -1, 0, Int1Regs:$a)>; +def : Pat<(i64 (sext Int1Regs:$a)), + (SELP_s64ii -1, 0, Int1Regs:$a)>; + +// zext i1 +def : Pat<(i16 (zext Int1Regs:$a)), + (SELP_u16ii 1, 0, Int1Regs:$a)>; +def : Pat<(i32 (zext Int1Regs:$a)), + (SELP_u32ii 1, 0, Int1Regs:$a)>; +def : Pat<(i64 (zext Int1Regs:$a)), + (SELP_u64ii 1, 0, Int1Regs:$a)>; + +// anyext i1 +def : Pat<(i16 (anyext Int1Regs:$a)), + (SELP_u16ii 1, 0, Int1Regs:$a)>; +def : Pat<(i32 (anyext Int1Regs:$a)), + (SELP_u32ii 1, 0, Int1Regs:$a)>; +def : Pat<(i64 (anyext Int1Regs:$a)), + (SELP_u64ii 1, 0, Int1Regs:$a)>; + +// sext i16 +def : Pat<(i32 (sext Int16Regs:$a)), + (CVT_s32_s16 Int16Regs:$a, CvtNONE)>; +def : Pat<(i64 (sext Int16Regs:$a)), + (CVT_s64_s16 Int16Regs:$a, CvtNONE)>; + +// zext i16 +def : Pat<(i32 (zext Int16Regs:$a)), + (CVT_u32_u16 Int16Regs:$a, CvtNONE)>; +def : Pat<(i64 (zext Int16Regs:$a)), + (CVT_u64_u16 Int16Regs:$a, CvtNONE)>; + +// anyext i16 +def : Pat<(i32 (anyext Int16Regs:$a)), + (CVT_u32_u16 Int16Regs:$a, CvtNONE)>; +def : Pat<(i64 (anyext Int16Regs:$a)), + (CVT_u64_u16 Int16Regs:$a, CvtNONE)>; + +// sext i32 +def : Pat<(i64 (sext Int32Regs:$a)), + (CVT_s64_s32 Int32Regs:$a, CvtNONE)>; + +// zext i32 +def : Pat<(i64 (zext Int32Regs:$a)), + (CVT_u64_u32 Int32Regs:$a, CvtNONE)>; + +// anyext i32 +def : Pat<(i64 (anyext Int32Regs:$a)), + (CVT_u64_u32 Int32Regs:$a, CvtNONE)>; + + +// truncate i64 +def : Pat<(i32 (trunc Int64Regs:$a)), + (CVT_u32_u64 Int64Regs:$a, CvtNONE)>; +def : Pat<(i16 (trunc Int64Regs:$a)), + (CVT_u16_u64 Int64Regs:$a, CvtNONE)>; +def : Pat<(i1 (trunc Int64Regs:$a)), + (SETP_b64ri (ANDb64ri Int64Regs:$a, 1), 1, CmpEQ)>; + +// truncate i32 +def : Pat<(i16 (trunc Int32Regs:$a)), + (CVT_u16_u32 Int32Regs:$a, CvtNONE)>; +def : Pat<(i1 (trunc Int32Regs:$a)), + (SETP_b32ri (ANDb32ri Int32Regs:$a, 1), 1, CmpEQ)>; + +// truncate i16 +def : Pat<(i1 (trunc Int16Regs:$a)), + (SETP_b16ri (ANDb16ri Int16Regs:$a, 1), 1, CmpEQ)>; + + +// Select instructions with 32-bit predicates def : Pat<(select Int32Regs:$pred, Int16Regs:$a, Int16Regs:$b), - (SELECTi16rr Int16Regs:$a, Int16Regs:$b, - (TRUNC_32to1 Int32Regs:$pred))>; + (SELP_b16rr Int16Regs:$a, Int16Regs:$b, + (SETP_b32ri (ANDb32ri Int32Regs:$pred, 1), 1, CmpEQ))>; def : Pat<(select Int32Regs:$pred, Int32Regs:$a, Int32Regs:$b), - (SELECTi32rr Int32Regs:$a, Int32Regs:$b, - (TRUNC_32to1 Int32Regs:$pred))>; + (SELP_b32rr Int32Regs:$a, Int32Regs:$b, + (SETP_b32ri (ANDb32ri Int32Regs:$pred, 1), 1, CmpEQ))>; def : Pat<(select Int32Regs:$pred, Int64Regs:$a, Int64Regs:$b), - (SELECTi64rr Int64Regs:$a, Int64Regs:$b, - (TRUNC_32to1 Int32Regs:$pred))>; + (SELP_b64rr Int64Regs:$a, Int64Regs:$b, + (SETP_b32ri (ANDb32ri Int32Regs:$pred, 1), 1, CmpEQ))>; def : Pat<(select Int32Regs:$pred, Float32Regs:$a, Float32Regs:$b), - (SELECTf32rr Float32Regs:$a, Float32Regs:$b, - (TRUNC_32to1 Int32Regs:$pred))>; + (SELP_f32rr Float32Regs:$a, Float32Regs:$b, + (SETP_b32ri (ANDb32ri Int32Regs:$pred, 1), 1, CmpEQ))>; def : Pat<(select Int32Regs:$pred, Float64Regs:$a, Float64Regs:$b), - (SELECTf64rr Float64Regs:$a, Float64Regs:$b, - (TRUNC_32to1 Int32Regs:$pred))>; + (SELP_f64rr Float64Regs:$a, Float64Regs:$b, + (SETP_b32ri (ANDb32ri Int32Regs:$pred, 1), 1, CmpEQ))>; + class F_BITCONVERT : @@ -2400,21 +2406,17 @@ def F64toV2F32 : NVPTXInst<(outs Float32Regs:$d1, Float32Regs:$d2), "mov.b64\t{{$d1, $d2}}, $s;", []>; -def FPRound_ftz : NVPTXInst<(outs Float32Regs:$d), (ins Float64Regs:$a), - "cvt.rn.ftz.f32.f64 \t$d, $a;", - [(set Float32Regs:$d, (fround Float64Regs:$a))]>, Requires<[doF32FTZ]>; - -def FPRound : NVPTXInst<(outs Float32Regs:$d), (ins Float64Regs:$a), - "cvt.rn.f32.f64 \t$d, $a;", - [(set Float32Regs:$d, (fround Float64Regs:$a))]>; - -def FPExtend_ftz : NVPTXInst<(outs Float64Regs:$d), (ins Float32Regs:$a), - "cvt.ftz.f64.f32 \t$d, $a;", - [(set Float64Regs:$d, (fextend Float32Regs:$a))]>, Requires<[doF32FTZ]>; +// fround f64 -> f32 +def : Pat<(f32 (fround Float64Regs:$a)), + (CVT_f32_f64 Float64Regs:$a, CvtRN_FTZ)>, Requires<[doF32FTZ]>; +def : Pat<(f32 (fround Float64Regs:$a)), + (CVT_f32_f64 Float64Regs:$a, CvtRN)>; -def FPExtend : NVPTXInst<(outs Float64Regs:$d), (ins Float32Regs:$a), - "cvt.f64.f32 \t$d, $a;", - [(set Float64Regs:$d, (fextend Float32Regs:$a))]>; +// fextend f32 -> f64 +def : Pat<(f64 (fextend Float32Regs:$a)), + (CVT_f64_f32 Float32Regs:$a, CvtNONE_FTZ)>, Requires<[doF32FTZ]>; +def : Pat<(f64 (fextend Float32Regs:$a)), + (CVT_f64_f32 Float32Regs:$a, CvtNONE)>; def retflag : SDNode<"NVPTXISD::RET_FLAG", SDTNone, [SDNPHasChain, SDNPOptInGlue]>; @@ -2442,8 +2444,8 @@ let isTerminator=1 in { [(br bb:$target)]>; } -def : Pat<(brcond Int32Regs:$a, bb:$target), (CBranch - (ISetUNEi32ri_p Int32Regs:$a, 0), bb:$target)>; +def : Pat<(brcond Int32Regs:$a, bb:$target), + (CBranch (SETP_u32ri Int32Regs:$a, 0, CmpNE), bb:$target)>; // SelectionDAGBuilder::visitSWitchCase() will invert the condition of a // conditional branch if diff --git a/lib/Target/NVPTX/NVPTXIntrinsics.td b/lib/Target/NVPTX/NVPTXIntrinsics.td index caa7775a4c4..93cdfef1fe4 100644 --- a/lib/Target/NVPTX/NVPTXIntrinsics.td +++ b/lib/Target/NVPTX/NVPTXIntrinsics.td @@ -82,49 +82,36 @@ def INT_MEMBAR_SYS : MEMBAR<"membar.sys;", int_nvvm_membar_sys>; //----------------------------------- // Map min(1.0, max(0.0, x)) to sat(x) -multiclass SAT { - - // fmin(1.0, fmax(0.0, x)) => sat(x) - def SAT11 : NVPTXInst<(outs regclass:$dst), - (ins fimm:$srcf0, fimm:$srcf1, regclass:$src), - OpStr, - [(set regclass:$dst, (IntMinOp f1:$srcf0 , - (IntMaxOp f0:$srcf1, regclass:$src)))]>; - - // fmin(1.0, fmax(x, 0.0)) => sat(x) - def SAT12 : NVPTXInst<(outs regclass:$dst), - (ins fimm:$srcf0, fimm:$srcf1, regclass:$src), - OpStr, - [(set regclass:$dst, (IntMinOp f1:$srcf0 , - (IntMaxOp regclass:$src, f0:$srcf1)))]>; - - // fmin(fmax(0.0, x), 1.0) => sat(x) - def SAT13 : NVPTXInst<(outs regclass:$dst), - (ins fimm:$srcf0, fimm:$srcf1, regclass:$src), - OpStr, - [(set regclass:$dst, (IntMinOp - (IntMaxOp f0:$srcf0, regclass:$src), f1:$srcf1))]>; - - // fmin(fmax(x, 0.0), 1.0) => sat(x) - def SAT14 : NVPTXInst<(outs regclass:$dst), - (ins fimm:$srcf0, fimm:$srcf1, regclass:$src), - OpStr, - [(set regclass:$dst, (IntMinOp - (IntMaxOp regclass:$src, f0:$srcf0), f1:$srcf1))]>; - -} -// Note that max(0.0, min(x, 1.0)) cannot be mapped to sat(x) because when x -// is NaN +// Note that max(0.0, min(x, 1.0)) cannot be mapped to sat(x) because when x is +// NaN // max(0.0, min(x, 1.0)) is 1.0 while sat(x) is 0. // Same story for fmax, fmin. -defm SAT_fmin_fmax_f : SAT; -defm SAT_fmin_fmax_d : SAT; +def : Pat<(int_nvvm_fmin_f immFloat1, + (int_nvvm_fmax_f immFloat0, Float32Regs:$a)), + (CVT_f32_f32 Float32Regs:$a, CvtSAT)>; +def : Pat<(int_nvvm_fmin_f immFloat1, + (int_nvvm_fmax_f Float32Regs:$a, immFloat0)), + (CVT_f32_f32 Float32Regs:$a, CvtSAT)>; +def : Pat<(int_nvvm_fmin_f + (int_nvvm_fmax_f immFloat0, Float32Regs:$a), immFloat1), + (CVT_f32_f32 Float32Regs:$a, CvtSAT)>; +def : Pat<(int_nvvm_fmin_f + (int_nvvm_fmax_f Float32Regs:$a, immFloat0), immFloat1), + (CVT_f32_f32 Float32Regs:$a, CvtSAT)>; + +def : Pat<(int_nvvm_fmin_d immDouble1, + (int_nvvm_fmax_d immDouble0, Float64Regs:$a)), + (CVT_f64_f64 Float64Regs:$a, CvtSAT)>; +def : Pat<(int_nvvm_fmin_d immDouble1, + (int_nvvm_fmax_d Float64Regs:$a, immDouble0)), + (CVT_f64_f64 Float64Regs:$a, CvtSAT)>; +def : Pat<(int_nvvm_fmin_d + (int_nvvm_fmax_d immDouble0, Float64Regs:$a), immDouble1), + (CVT_f64_f64 Float64Regs:$a, CvtSAT)>; +def : Pat<(int_nvvm_fmin_d + (int_nvvm_fmax_d Float64Regs:$a, immDouble0), immDouble1), + (CVT_f64_f64 Float64Regs:$a, CvtSAT)>; // We need a full string for OpcStr here because we need to deal with case like @@ -312,19 +299,19 @@ def INT_NVVM_SAD_UI : F_MATH_3<"sad.u32 \t$dst, $src0, $src1, $src2;", // Floor Ceil // -def INT_NVVM_FLOOR_FTZ_F : F_MATH_1<"cvt.rmi.ftz.f32.f32 \t$dst, $src0;", - Float32Regs, Float32Regs, int_nvvm_floor_ftz_f>; -def INT_NVVM_FLOOR_F : F_MATH_1<"cvt.rmi.f32.f32 \t$dst, $src0;", - Float32Regs, Float32Regs, int_nvvm_floor_f>; -def INT_NVVM_FLOOR_D : F_MATH_1<"cvt.rmi.f64.f64 \t$dst, $src0;", - Float64Regs, Float64Regs, int_nvvm_floor_d>; +def : Pat<(int_nvvm_floor_ftz_f Float32Regs:$a), + (CVT_f32_f32 Float32Regs:$a, CvtRMI_FTZ)>; +def : Pat<(int_nvvm_floor_f Float32Regs:$a), + (CVT_f32_f32 Float32Regs:$a, CvtRMI)>; +def : Pat<(int_nvvm_floor_d Float64Regs:$a), + (CVT_f64_f64 Float64Regs:$a, CvtRMI)>; -def INT_NVVM_CEIL_FTZ_F : F_MATH_1<"cvt.rpi.ftz.f32.f32 \t$dst, $src0;", - Float32Regs, Float32Regs, int_nvvm_ceil_ftz_f>; -def INT_NVVM_CEIL_F : F_MATH_1<"cvt.rpi.f32.f32 \t$dst, $src0;", - Float32Regs, Float32Regs, int_nvvm_ceil_f>; -def INT_NVVM_CEIL_D : F_MATH_1<"cvt.rpi.f64.f64 \t$dst, $src0;", - Float64Regs, Float64Regs, int_nvvm_ceil_d>; +def : Pat<(int_nvvm_ceil_ftz_f Float32Regs:$a), + (CVT_f32_f32 Float32Regs:$a, CvtRPI_FTZ)>; +def : Pat<(int_nvvm_ceil_f Float32Regs:$a), + (CVT_f32_f32 Float32Regs:$a, CvtRPI)>; +def : Pat<(int_nvvm_ceil_d Float64Regs:$a), + (CVT_f64_f64 Float64Regs:$a, CvtRPI)>; // // Abs @@ -347,37 +334,34 @@ def INT_NVVM_FABS_D : F_MATH_1<"abs.f64 \t$dst, $src0;", Float64Regs, // Round // -def INT_NVVM_ROUND_FTZ_F : F_MATH_1<"cvt.rni.ftz.f32.f32 \t$dst, $src0;", - Float32Regs, Float32Regs, int_nvvm_round_ftz_f>; -def INT_NVVM_ROUND_F : F_MATH_1<"cvt.rni.f32.f32 \t$dst, $src0;", Float32Regs, - Float32Regs, int_nvvm_round_f>; - -def INT_NVVM_ROUND_D : F_MATH_1<"cvt.rni.f64.f64 \t$dst, $src0;", Float64Regs, - Float64Regs, int_nvvm_round_d>; +def : Pat<(int_nvvm_round_ftz_f Float32Regs:$a), + (CVT_f32_f32 Float32Regs:$a, CvtRNI_FTZ)>; +def : Pat<(int_nvvm_round_f Float32Regs:$a), + (CVT_f32_f32 Float32Regs:$a, CvtRNI)>; +def : Pat<(int_nvvm_round_d Float64Regs:$a), + (CVT_f64_f64 Float64Regs:$a, CvtRNI)>; // // Trunc // -def INT_NVVM_TRUNC_FTZ_F : F_MATH_1<"cvt.rzi.ftz.f32.f32 \t$dst, $src0;", - Float32Regs, Float32Regs, int_nvvm_trunc_ftz_f>; -def INT_NVVM_TRUNC_F : F_MATH_1<"cvt.rzi.f32.f32 \t$dst, $src0;", Float32Regs, - Float32Regs, int_nvvm_trunc_f>; - -def INT_NVVM_TRUNC_D : F_MATH_1<"cvt.rzi.f64.f64 \t$dst, $src0;", Float64Regs, - Float64Regs, int_nvvm_trunc_d>; +def : Pat<(int_nvvm_trunc_ftz_f Float32Regs:$a), + (CVT_f32_f32 Float32Regs:$a, CvtRZI_FTZ)>; +def : Pat<(int_nvvm_trunc_f Float32Regs:$a), + (CVT_f32_f32 Float32Regs:$a, CvtRZI)>; +def : Pat<(int_nvvm_trunc_d Float64Regs:$a), + (CVT_f64_f64 Float64Regs:$a, CvtRZI)>; // // Saturate // -def INT_NVVM_SATURATE_FTZ_F : F_MATH_1<"cvt.sat.ftz.f32.f32 \t$dst, $src0;", - Float32Regs, Float32Regs, int_nvvm_saturate_ftz_f>; -def INT_NVVM_SATURATE_F : F_MATH_1<"cvt.sat.f32.f32 \t$dst, $src0;", - Float32Regs, Float32Regs, int_nvvm_saturate_f>; - -def INT_NVVM_SATURATE_D : F_MATH_1<"cvt.sat.f64.f64 \t$dst, $src0;", - Float64Regs, Float64Regs, int_nvvm_saturate_d>; +def : Pat<(int_nvvm_saturate_ftz_f Float32Regs:$a), + (CVT_f32_f32 Float32Regs:$a, CvtSAT_FTZ)>; +def : Pat<(int_nvvm_saturate_f Float32Regs:$a), + (CVT_f32_f32 Float32Regs:$a, CvtSAT)>; +def : Pat<(int_nvvm_saturate_d Float64Regs:$a), + (CVT_f64_f64 Float64Regs:$a, CvtSAT)>; // // Exp2 Log2 @@ -568,110 +552,110 @@ def INT_NVVM_ADD_RP_D : F_MATH_2<"add.rp.f64 \t$dst, $src0, $src1;", // Convert // -def INT_NVVM_D2F_RN_FTZ : F_MATH_1<"cvt.rn.ftz.f32.f64 \t$dst, $src0;", - Float32Regs, Float64Regs, int_nvvm_d2f_rn_ftz>; -def INT_NVVM_D2F_RN : F_MATH_1<"cvt.rn.f32.f64 \t$dst, $src0;", - Float32Regs, Float64Regs, int_nvvm_d2f_rn>; -def INT_NVVM_D2F_RZ_FTZ : F_MATH_1<"cvt.rz.ftz.f32.f64 \t$dst, $src0;", - Float32Regs, Float64Regs, int_nvvm_d2f_rz_ftz>; -def INT_NVVM_D2F_RZ : F_MATH_1<"cvt.rz.f32.f64 \t$dst, $src0;", - Float32Regs, Float64Regs, int_nvvm_d2f_rz>; -def INT_NVVM_D2F_RM_FTZ : F_MATH_1<"cvt.rm.ftz.f32.f64 \t$dst, $src0;", - Float32Regs, Float64Regs, int_nvvm_d2f_rm_ftz>; -def INT_NVVM_D2F_RM : F_MATH_1<"cvt.rm.f32.f64 \t$dst, $src0;", - Float32Regs, Float64Regs, int_nvvm_d2f_rm>; -def INT_NVVM_D2F_RP_FTZ : F_MATH_1<"cvt.rp.ftz.f32.f64 \t$dst, $src0;", - Float32Regs, Float64Regs, int_nvvm_d2f_rp_ftz>; -def INT_NVVM_D2F_RP : F_MATH_1<"cvt.rp.f32.f64 \t$dst, $src0;", - Float32Regs, Float64Regs, int_nvvm_d2f_rp>; - -def INT_NVVM_D2I_RN : F_MATH_1<"cvt.rni.s32.f64 \t$dst, $src0;", - Int32Regs, Float64Regs, int_nvvm_d2i_rn>; -def INT_NVVM_D2I_RZ : F_MATH_1<"cvt.rzi.s32.f64 \t$dst, $src0;", - Int32Regs, Float64Regs, int_nvvm_d2i_rz>; -def INT_NVVM_D2I_RM : F_MATH_1<"cvt.rmi.s32.f64 \t$dst, $src0;", - Int32Regs, Float64Regs, int_nvvm_d2i_rm>; -def INT_NVVM_D2I_RP : F_MATH_1<"cvt.rpi.s32.f64 \t$dst, $src0;", - Int32Regs, Float64Regs, int_nvvm_d2i_rp>; - -def INT_NVVM_D2UI_RN : F_MATH_1<"cvt.rni.u32.f64 \t$dst, $src0;", - Int32Regs, Float64Regs, int_nvvm_d2ui_rn>; -def INT_NVVM_D2UI_RZ : F_MATH_1<"cvt.rzi.u32.f64 \t$dst, $src0;", - Int32Regs, Float64Regs, int_nvvm_d2ui_rz>; -def INT_NVVM_D2UI_RM : F_MATH_1<"cvt.rmi.u32.f64 \t$dst, $src0;", - Int32Regs, Float64Regs, int_nvvm_d2ui_rm>; -def INT_NVVM_D2UI_RP : F_MATH_1<"cvt.rpi.u32.f64 \t$dst, $src0;", - Int32Regs, Float64Regs, int_nvvm_d2ui_rp>; - -def INT_NVVM_I2D_RN : F_MATH_1<"cvt.rn.f64.s32 \t$dst, $src0;", - Float64Regs, Int32Regs, int_nvvm_i2d_rn>; -def INT_NVVM_I2D_RZ : F_MATH_1<"cvt.rz.f64.s32 \t$dst, $src0;", - Float64Regs, Int32Regs, int_nvvm_i2d_rz>; -def INT_NVVM_I2D_RM : F_MATH_1<"cvt.rm.f64.s32 \t$dst, $src0;", - Float64Regs, Int32Regs, int_nvvm_i2d_rm>; -def INT_NVVM_I2D_RP : F_MATH_1<"cvt.rp.f64.s32 \t$dst, $src0;", - Float64Regs, Int32Regs, int_nvvm_i2d_rp>; - -def INT_NVVM_UI2D_RN : F_MATH_1<"cvt.rn.f64.u32 \t$dst, $src0;", - Float64Regs, Int32Regs, int_nvvm_ui2d_rn>; -def INT_NVVM_UI2D_RZ : F_MATH_1<"cvt.rz.f64.u32 \t$dst, $src0;", - Float64Regs, Int32Regs, int_nvvm_ui2d_rz>; -def INT_NVVM_UI2D_RM : F_MATH_1<"cvt.rm.f64.u32 \t$dst, $src0;", - Float64Regs, Int32Regs, int_nvvm_ui2d_rm>; -def INT_NVVM_UI2D_RP : F_MATH_1<"cvt.rp.f64.u32 \t$dst, $src0;", - Float64Regs, Int32Regs, int_nvvm_ui2d_rp>; - -def INT_NVVM_F2I_RN_FTZ : F_MATH_1<"cvt.rni.ftz.s32.f32 \t$dst, $src0;", - Int32Regs, Float32Regs, int_nvvm_f2i_rn_ftz>; -def INT_NVVM_F2I_RN : F_MATH_1<"cvt.rni.s32.f32 \t$dst, $src0;", Int32Regs, - Float32Regs, int_nvvm_f2i_rn>; -def INT_NVVM_F2I_RZ_FTZ : F_MATH_1<"cvt.rzi.ftz.s32.f32 \t$dst, $src0;", - Int32Regs, Float32Regs, int_nvvm_f2i_rz_ftz>; -def INT_NVVM_F2I_RZ : F_MATH_1<"cvt.rzi.s32.f32 \t$dst, $src0;", Int32Regs, - Float32Regs, int_nvvm_f2i_rz>; -def INT_NVVM_F2I_RM_FTZ : F_MATH_1<"cvt.rmi.ftz.s32.f32 \t$dst, $src0;", - Int32Regs, Float32Regs, int_nvvm_f2i_rm_ftz>; -def INT_NVVM_F2I_RM : F_MATH_1<"cvt.rmi.s32.f32 \t$dst, $src0;", Int32Regs, - Float32Regs, int_nvvm_f2i_rm>; -def INT_NVVM_F2I_RP_FTZ : F_MATH_1<"cvt.rpi.ftz.s32.f32 \t$dst, $src0;", - Int32Regs, Float32Regs, int_nvvm_f2i_rp_ftz>; -def INT_NVVM_F2I_RP : F_MATH_1<"cvt.rpi.s32.f32 \t$dst, $src0;", Int32Regs, - Float32Regs, int_nvvm_f2i_rp>; - -def INT_NVVM_F2UI_RN_FTZ : F_MATH_1<"cvt.rni.ftz.u32.f32 \t$dst, $src0;", - Int32Regs, Float32Regs, int_nvvm_f2ui_rn_ftz>; -def INT_NVVM_F2UI_RN : F_MATH_1<"cvt.rni.u32.f32 \t$dst, $src0;", Int32Regs, - Float32Regs, int_nvvm_f2ui_rn>; -def INT_NVVM_F2UI_RZ_FTZ : F_MATH_1<"cvt.rzi.ftz.u32.f32 \t$dst, $src0;", - Int32Regs, Float32Regs, int_nvvm_f2ui_rz_ftz>; -def INT_NVVM_F2UI_RZ : F_MATH_1<"cvt.rzi.u32.f32 \t$dst, $src0;", Int32Regs, - Float32Regs, int_nvvm_f2ui_rz>; -def INT_NVVM_F2UI_RM_FTZ : F_MATH_1<"cvt.rmi.ftz.u32.f32 \t$dst, $src0;", - Int32Regs, Float32Regs, int_nvvm_f2ui_rm_ftz>; -def INT_NVVM_F2UI_RM : F_MATH_1<"cvt.rmi.u32.f32 \t$dst, $src0;", Int32Regs, - Float32Regs, int_nvvm_f2ui_rm>; -def INT_NVVM_F2UI_RP_FTZ : F_MATH_1<"cvt.rpi.ftz.u32.f32 \t$dst, $src0;", - Int32Regs, Float32Regs, int_nvvm_f2ui_rp_ftz>; -def INT_NVVM_F2UI_RP : F_MATH_1<"cvt.rpi.u32.f32 \t$dst, $src0;", Int32Regs, - Float32Regs, int_nvvm_f2ui_rp>; - -def INT_NVVM_I2F_RN : F_MATH_1<"cvt.rn.f32.s32 \t$dst, $src0;", Float32Regs, - Int32Regs, int_nvvm_i2f_rn>; -def INT_NVVM_I2F_RZ : F_MATH_1<"cvt.rz.f32.s32 \t$dst, $src0;", Float32Regs, - Int32Regs, int_nvvm_i2f_rz>; -def INT_NVVM_I2F_RM : F_MATH_1<"cvt.rm.f32.s32 \t$dst, $src0;", Float32Regs, - Int32Regs, int_nvvm_i2f_rm>; -def INT_NVVM_I2F_RP : F_MATH_1<"cvt.rp.f32.s32 \t$dst, $src0;", Float32Regs, - Int32Regs, int_nvvm_i2f_rp>; - -def INT_NVVM_UI2F_RN : F_MATH_1<"cvt.rn.f32.u32 \t$dst, $src0;", Float32Regs, - Int32Regs, int_nvvm_ui2f_rn>; -def INT_NVVM_UI2F_RZ : F_MATH_1<"cvt.rz.f32.u32 \t$dst, $src0;", Float32Regs, - Int32Regs, int_nvvm_ui2f_rz>; -def INT_NVVM_UI2F_RM : F_MATH_1<"cvt.rm.f32.u32 \t$dst, $src0;", Float32Regs, - Int32Regs, int_nvvm_ui2f_rm>; -def INT_NVVM_UI2F_RP : F_MATH_1<"cvt.rp.f32.u32 \t$dst, $src0;", Float32Regs, - Int32Regs, int_nvvm_ui2f_rp>; +def : Pat<(int_nvvm_d2f_rn_ftz Float64Regs:$a), + (CVT_f32_f64 Float64Regs:$a, CvtRN_FTZ)>; +def : Pat<(int_nvvm_d2f_rn Float64Regs:$a), + (CVT_f32_f64 Float64Regs:$a, CvtRN)>; +def : Pat<(int_nvvm_d2f_rz_ftz Float64Regs:$a), + (CVT_f32_f64 Float64Regs:$a, CvtRZ_FTZ)>; +def : Pat<(int_nvvm_d2f_rz Float64Regs:$a), + (CVT_f32_f64 Float64Regs:$a, CvtRZ)>; +def : Pat<(int_nvvm_d2f_rm_ftz Float64Regs:$a), + (CVT_f32_f64 Float64Regs:$a, CvtRM_FTZ)>; +def : Pat<(int_nvvm_d2f_rm Float64Regs:$a), + (CVT_f32_f64 Float64Regs:$a, CvtRM)>; +def : Pat<(int_nvvm_d2f_rp_ftz Float64Regs:$a), + (CVT_f32_f64 Float64Regs:$a, CvtRP_FTZ)>; +def : Pat<(int_nvvm_d2f_rp Float64Regs:$a), + (CVT_f32_f64 Float64Regs:$a, CvtRP)>; + +def : Pat<(int_nvvm_d2i_rn Float64Regs:$a), + (CVT_s32_f64 Float64Regs:$a, CvtRNI)>; +def : Pat<(int_nvvm_d2i_rz Float64Regs:$a), + (CVT_s32_f64 Float64Regs:$a, CvtRZI)>; +def : Pat<(int_nvvm_d2i_rm Float64Regs:$a), + (CVT_s32_f64 Float64Regs:$a, CvtRMI)>; +def : Pat<(int_nvvm_d2i_rp Float64Regs:$a), + (CVT_s32_f64 Float64Regs:$a, CvtRPI)>; + +def : Pat<(int_nvvm_d2ui_rn Float64Regs:$a), + (CVT_u32_f64 Float64Regs:$a, CvtRNI)>; +def : Pat<(int_nvvm_d2ui_rz Float64Regs:$a), + (CVT_u32_f64 Float64Regs:$a, CvtRZI)>; +def : Pat<(int_nvvm_d2ui_rm Float64Regs:$a), + (CVT_u32_f64 Float64Regs:$a, CvtRMI)>; +def : Pat<(int_nvvm_d2ui_rp Float64Regs:$a), + (CVT_u32_f64 Float64Regs:$a, CvtRPI)>; + +def : Pat<(int_nvvm_i2d_rn Int32Regs:$a), + (CVT_f64_s32 Int32Regs:$a, CvtRN)>; +def : Pat<(int_nvvm_i2d_rz Int32Regs:$a), + (CVT_f64_s32 Int32Regs:$a, CvtRZ)>; +def : Pat<(int_nvvm_i2d_rm Int32Regs:$a), + (CVT_f64_s32 Int32Regs:$a, CvtRM)>; +def : Pat<(int_nvvm_i2d_rp Int32Regs:$a), + (CVT_f64_s32 Int32Regs:$a, CvtRP)>; + +def : Pat<(int_nvvm_ui2d_rn Int32Regs:$a), + (CVT_f64_u32 Int32Regs:$a, CvtRN)>; +def : Pat<(int_nvvm_ui2d_rz Int32Regs:$a), + (CVT_f64_u32 Int32Regs:$a, CvtRZ)>; +def : Pat<(int_nvvm_ui2d_rm Int32Regs:$a), + (CVT_f64_u32 Int32Regs:$a, CvtRM)>; +def : Pat<(int_nvvm_ui2d_rp Int32Regs:$a), + (CVT_f64_u32 Int32Regs:$a, CvtRP)>; + +def : Pat<(int_nvvm_f2i_rn_ftz Float32Regs:$a), + (CVT_s32_f32 Float32Regs:$a, CvtRNI_FTZ)>; +def : Pat<(int_nvvm_f2i_rn Float32Regs:$a), + (CVT_s32_f32 Float32Regs:$a, CvtRNI)>; +def : Pat<(int_nvvm_f2i_rz_ftz Float32Regs:$a), + (CVT_s32_f32 Float32Regs:$a, CvtRZI_FTZ)>; +def : Pat<(int_nvvm_f2i_rz Float32Regs:$a), + (CVT_s32_f32 Float32Regs:$a, CvtRZI)>; +def : Pat<(int_nvvm_f2i_rm_ftz Float32Regs:$a), + (CVT_s32_f32 Float32Regs:$a, CvtRMI_FTZ)>; +def : Pat<(int_nvvm_f2i_rm Float32Regs:$a), + (CVT_s32_f32 Float32Regs:$a, CvtRMI)>; +def : Pat<(int_nvvm_f2i_rp_ftz Float32Regs:$a), + (CVT_s32_f32 Float32Regs:$a, CvtRPI_FTZ)>; +def : Pat<(int_nvvm_f2i_rp Float32Regs:$a), + (CVT_s32_f32 Float32Regs:$a, CvtRPI)>; + +def : Pat<(int_nvvm_f2ui_rn_ftz Float32Regs:$a), + (CVT_u32_f32 Float32Regs:$a, CvtRNI_FTZ)>; +def : Pat<(int_nvvm_f2ui_rn Float32Regs:$a), + (CVT_u32_f32 Float32Regs:$a, CvtRNI)>; +def : Pat<(int_nvvm_f2ui_rz_ftz Float32Regs:$a), + (CVT_u32_f32 Float32Regs:$a, CvtRZI_FTZ)>; +def : Pat<(int_nvvm_f2ui_rz Float32Regs:$a), + (CVT_u32_f32 Float32Regs:$a, CvtRZI)>; +def : Pat<(int_nvvm_f2ui_rm_ftz Float32Regs:$a), + (CVT_u32_f32 Float32Regs:$a, CvtRMI_FTZ)>; +def : Pat<(int_nvvm_f2ui_rm Float32Regs:$a), + (CVT_u32_f32 Float32Regs:$a, CvtRMI)>; +def : Pat<(int_nvvm_f2ui_rp_ftz Float32Regs:$a), + (CVT_u32_f32 Float32Regs:$a, CvtRPI_FTZ)>; +def : Pat<(int_nvvm_f2ui_rp Float32Regs:$a), + (CVT_u32_f32 Float32Regs:$a, CvtRPI)>; + +def : Pat<(int_nvvm_i2f_rn Int32Regs:$a), + (CVT_f32_s32 Int32Regs:$a, CvtRN)>; +def : Pat<(int_nvvm_i2f_rz Int32Regs:$a), + (CVT_f32_s32 Int32Regs:$a, CvtRZ)>; +def : Pat<(int_nvvm_i2f_rm Int32Regs:$a), + (CVT_f32_s32 Int32Regs:$a, CvtRM)>; +def : Pat<(int_nvvm_i2f_rp Int32Regs:$a), + (CVT_f32_s32 Int32Regs:$a, CvtRP)>; + +def : Pat<(int_nvvm_ui2f_rn Int32Regs:$a), + (CVT_f32_u32 Int32Regs:$a, CvtRN)>; +def : Pat<(int_nvvm_ui2f_rz Int32Regs:$a), + (CVT_f32_u32 Int32Regs:$a, CvtRZ)>; +def : Pat<(int_nvvm_ui2f_rm Int32Regs:$a), + (CVT_f32_u32 Int32Regs:$a, CvtRM)>; +def : Pat<(int_nvvm_ui2f_rp Int32Regs:$a), + (CVT_f32_u32 Int32Regs:$a, CvtRP)>; def INT_NVVM_LOHI_I2D : F_MATH_2<"mov.b64 \t$dst, {{$src0, $src1}};", Float64Regs, Int32Regs, Int32Regs, int_nvvm_lohi_i2d>; @@ -687,91 +671,106 @@ def INT_NVVM_D2I_HI : F_MATH_1; -def INT_NVVM_F2LL_RN_FTZ : F_MATH_1<"cvt.rni.ftz.s64.f32 \t$dst, $src0;", - Int64Regs, Float32Regs, int_nvvm_f2ll_rn_ftz>; -def INT_NVVM_F2LL_RN : F_MATH_1<"cvt.rni.s64.f32 \t$dst, $src0;", Int64Regs, - Float32Regs, int_nvvm_f2ll_rn>; -def INT_NVVM_F2LL_RZ_FTZ : F_MATH_1<"cvt.rzi.ftz.s64.f32 \t$dst, $src0;", - Int64Regs, Float32Regs, int_nvvm_f2ll_rz_ftz>; -def INT_NVVM_F2LL_RZ : F_MATH_1<"cvt.rzi.s64.f32 \t$dst, $src0;", Int64Regs, - Float32Regs, int_nvvm_f2ll_rz>; -def INT_NVVM_F2LL_RM_FTZ : F_MATH_1<"cvt.rmi.ftz.s64.f32 \t$dst, $src0;", - Int64Regs, Float32Regs, int_nvvm_f2ll_rm_ftz>; -def INT_NVVM_F2LL_RM : F_MATH_1<"cvt.rmi.s64.f32 \t$dst, $src0;", Int64Regs, - Float32Regs, int_nvvm_f2ll_rm>; -def INT_NVVM_F2LL_RP_FTZ : F_MATH_1<"cvt.rpi.ftz.s64.f32 \t$dst, $src0;", - Int64Regs, Float32Regs, int_nvvm_f2ll_rp_ftz>; -def INT_NVVM_F2LL_RP : F_MATH_1<"cvt.rpi.s64.f32 \t$dst, $src0;", Int64Regs, - Float32Regs, int_nvvm_f2ll_rp>; - -def INT_NVVM_F2ULL_RN_FTZ : F_MATH_1<"cvt.rni.ftz.u64.f32 \t$dst, $src0;", - Int64Regs, Float32Regs, int_nvvm_f2ull_rn_ftz>; -def INT_NVVM_F2ULL_RN : F_MATH_1<"cvt.rni.u64.f32 \t$dst, $src0;", Int64Regs, - Float32Regs, int_nvvm_f2ull_rn>; -def INT_NVVM_F2ULL_RZ_FTZ : F_MATH_1<"cvt.rzi.ftz.u64.f32 \t$dst, $src0;", - Int64Regs, Float32Regs, int_nvvm_f2ull_rz_ftz>; -def INT_NVVM_F2ULL_RZ : F_MATH_1<"cvt.rzi.u64.f32 \t$dst, $src0;", Int64Regs, - Float32Regs, int_nvvm_f2ull_rz>; -def INT_NVVM_F2ULL_RM_FTZ : F_MATH_1<"cvt.rmi.ftz.u64.f32 \t$dst, $src0;", - Int64Regs, Float32Regs, int_nvvm_f2ull_rm_ftz>; -def INT_NVVM_F2ULL_RM : F_MATH_1<"cvt.rmi.u64.f32 \t$dst, $src0;", Int64Regs, - Float32Regs, int_nvvm_f2ull_rm>; -def INT_NVVM_F2ULL_RP_FTZ : F_MATH_1<"cvt.rpi.ftz.u64.f32 \t$dst, $src0;", - Int64Regs, Float32Regs, int_nvvm_f2ull_rp_ftz>; -def INT_NVVM_F2ULL_RP : F_MATH_1<"cvt.rpi.u64.f32 \t$dst, $src0;", Int64Regs, - Float32Regs, int_nvvm_f2ull_rp>; - -def INT_NVVM_D2LL_RN : F_MATH_1<"cvt.rni.s64.f64 \t$dst, $src0;", Int64Regs, - Float64Regs, int_nvvm_d2ll_rn>; -def INT_NVVM_D2LL_RZ : F_MATH_1<"cvt.rzi.s64.f64 \t$dst, $src0;", Int64Regs, - Float64Regs, int_nvvm_d2ll_rz>; -def INT_NVVM_D2LL_RM : F_MATH_1<"cvt.rmi.s64.f64 \t$dst, $src0;", Int64Regs, - Float64Regs, int_nvvm_d2ll_rm>; -def INT_NVVM_D2LL_RP : F_MATH_1<"cvt.rpi.s64.f64 \t$dst, $src0;", Int64Regs, - Float64Regs, int_nvvm_d2ll_rp>; - -def INT_NVVM_D2ULL_RN : F_MATH_1<"cvt.rni.u64.f64 \t$dst, $src0;", Int64Regs, - Float64Regs, int_nvvm_d2ull_rn>; -def INT_NVVM_D2ULL_RZ : F_MATH_1<"cvt.rzi.u64.f64 \t$dst, $src0;", Int64Regs, - Float64Regs, int_nvvm_d2ull_rz>; -def INT_NVVM_D2ULL_RM : F_MATH_1<"cvt.rmi.u64.f64 \t$dst, $src0;", Int64Regs, - Float64Regs, int_nvvm_d2ull_rm>; -def INT_NVVM_D2ULL_RP : F_MATH_1<"cvt.rpi.u64.f64 \t$dst, $src0;", Int64Regs, - Float64Regs, int_nvvm_d2ull_rp>; - -def INT_NVVM_LL2F_RN : F_MATH_1<"cvt.rn.f32.s64 \t$dst, $src0;", Float32Regs, - Int64Regs, int_nvvm_ll2f_rn>; -def INT_NVVM_LL2F_RZ : F_MATH_1<"cvt.rz.f32.s64 \t$dst, $src0;", Float32Regs, - Int64Regs, int_nvvm_ll2f_rz>; -def INT_NVVM_LL2F_RM : F_MATH_1<"cvt.rm.f32.s64 \t$dst, $src0;", Float32Regs, - Int64Regs, int_nvvm_ll2f_rm>; -def INT_NVVM_LL2F_RP : F_MATH_1<"cvt.rp.f32.s64 \t$dst, $src0;", Float32Regs, - Int64Regs, int_nvvm_ll2f_rp>; -def INT_NVVM_ULL2F_RN : F_MATH_1<"cvt.rn.f32.u64 \t$dst, $src0;", Float32Regs, - Int64Regs, int_nvvm_ull2f_rn>; -def INT_NVVM_ULL2F_RZ : F_MATH_1<"cvt.rz.f32.u64 \t$dst, $src0;", Float32Regs, - Int64Regs, int_nvvm_ull2f_rz>; -def INT_NVVM_ULL2F_RM : F_MATH_1<"cvt.rm.f32.u64 \t$dst, $src0;", Float32Regs, - Int64Regs, int_nvvm_ull2f_rm>; -def INT_NVVM_ULL2F_RP : F_MATH_1<"cvt.rp.f32.u64 \t$dst, $src0;", Float32Regs, - Int64Regs, int_nvvm_ull2f_rp>; - -def INT_NVVM_LL2D_RN : F_MATH_1<"cvt.rn.f64.s64 \t$dst, $src0;", Float64Regs, - Int64Regs, int_nvvm_ll2d_rn>; -def INT_NVVM_LL2D_RZ : F_MATH_1<"cvt.rz.f64.s64 \t$dst, $src0;", Float64Regs, - Int64Regs, int_nvvm_ll2d_rz>; -def INT_NVVM_LL2D_RM : F_MATH_1<"cvt.rm.f64.s64 \t$dst, $src0;", Float64Regs, - Int64Regs, int_nvvm_ll2d_rm>; -def INT_NVVM_LL2D_RP : F_MATH_1<"cvt.rp.f64.s64 \t$dst, $src0;", Float64Regs, - Int64Regs, int_nvvm_ll2d_rp>; -def INT_NVVM_ULL2D_RN : F_MATH_1<"cvt.rn.f64.u64 \t$dst, $src0;", Float64Regs, - Int64Regs, int_nvvm_ull2d_rn>; -def INT_NVVM_ULL2D_RZ : F_MATH_1<"cvt.rz.f64.u64 \t$dst, $src0;", Float64Regs, - Int64Regs, int_nvvm_ull2d_rz>; -def INT_NVVM_ULL2D_RM : F_MATH_1<"cvt.rm.f64.u64 \t$dst, $src0;", Float64Regs, - Int64Regs, int_nvvm_ull2d_rm>; -def INT_NVVM_ULL2D_RP : F_MATH_1<"cvt.rp.f64.u64 \t$dst, $src0;", Float64Regs, - Int64Regs, int_nvvm_ull2d_rp>; +def : Pat<(int_nvvm_f2ll_rn_ftz Float32Regs:$a), + (CVT_s64_f32 Float32Regs:$a, CvtRNI_FTZ)>; +def : Pat<(int_nvvm_f2ll_rn Float32Regs:$a), + (CVT_s64_f32 Float32Regs:$a, CvtRNI)>; +def : Pat<(int_nvvm_f2ll_rz_ftz Float32Regs:$a), + (CVT_s64_f32 Float32Regs:$a, CvtRZI_FTZ)>; +def : Pat<(int_nvvm_f2ll_rz Float32Regs:$a), + (CVT_s64_f32 Float32Regs:$a, CvtRZI)>; +def : Pat<(int_nvvm_f2ll_rm_ftz Float32Regs:$a), + (CVT_s64_f32 Float32Regs:$a, CvtRMI_FTZ)>; +def : Pat<(int_nvvm_f2ll_rm Float32Regs:$a), + (CVT_s64_f32 Float32Regs:$a, CvtRMI)>; +def : Pat<(int_nvvm_f2ll_rp_ftz Float32Regs:$a), + (CVT_s64_f32 Float32Regs:$a, CvtRPI_FTZ)>; +def : Pat<(int_nvvm_f2ll_rp Float32Regs:$a), + (CVT_s64_f32 Float32Regs:$a, CvtRPI)>; + +def : Pat<(int_nvvm_f2ull_rn_ftz Float32Regs:$a), + (CVT_u64_f32 Float32Regs:$a, CvtRNI_FTZ)>; +def : Pat<(int_nvvm_f2ull_rn Float32Regs:$a), + (CVT_u64_f32 Float32Regs:$a, CvtRNI)>; +def : Pat<(int_nvvm_f2ull_rz_ftz Float32Regs:$a), + (CVT_u64_f32 Float32Regs:$a, CvtRZI_FTZ)>; +def : Pat<(int_nvvm_f2ull_rz Float32Regs:$a), + (CVT_u64_f32 Float32Regs:$a, CvtRZI)>; +def : Pat<(int_nvvm_f2ull_rm_ftz Float32Regs:$a), + (CVT_u64_f32 Float32Regs:$a, CvtRMI_FTZ)>; +def : Pat<(int_nvvm_f2ull_rm Float32Regs:$a), + (CVT_u64_f32 Float32Regs:$a, CvtRMI)>; +def : Pat<(int_nvvm_f2ull_rp_ftz Float32Regs:$a), + (CVT_u64_f32 Float32Regs:$a, CvtRPI_FTZ)>; +def : Pat<(int_nvvm_f2ull_rp Float32Regs:$a), + (CVT_u64_f32 Float32Regs:$a, CvtRPI)>; + +def : Pat<(int_nvvm_d2ll_rn Float64Regs:$a), + (CVT_s64_f64 Float64Regs:$a, CvtRNI)>; +def : Pat<(int_nvvm_d2ll_rz Float64Regs:$a), + (CVT_s64_f64 Float64Regs:$a, CvtRZI)>; +def : Pat<(int_nvvm_d2ll_rm Float64Regs:$a), + (CVT_s64_f64 Float64Regs:$a, CvtRMI)>; +def : Pat<(int_nvvm_d2ll_rp Float64Regs:$a), + (CVT_s64_f64 Float64Regs:$a, CvtRPI)>; + +def : Pat<(int_nvvm_d2ull_rn Float64Regs:$a), + (CVT_u64_f64 Float64Regs:$a, CvtRNI)>; +def : Pat<(int_nvvm_d2ull_rz Float64Regs:$a), + (CVT_u64_f64 Float64Regs:$a, CvtRZI)>; +def : Pat<(int_nvvm_d2ull_rm Float64Regs:$a), + (CVT_u64_f64 Float64Regs:$a, CvtRMI)>; +def : Pat<(int_nvvm_d2ull_rp Float64Regs:$a), + (CVT_u64_f64 Float64Regs:$a, CvtRPI)>; + +def : Pat<(int_nvvm_ll2f_rn Int64Regs:$a), + (CVT_f32_s64 Int64Regs:$a, CvtRN)>; +def : Pat<(int_nvvm_ll2f_rz Int64Regs:$a), + (CVT_f32_s64 Int64Regs:$a, CvtRZ)>; +def : Pat<(int_nvvm_ll2f_rm Int64Regs:$a), + (CVT_f32_s64 Int64Regs:$a, CvtRM)>; +def : Pat<(int_nvvm_ll2f_rp Int64Regs:$a), + (CVT_f32_s64 Int64Regs:$a, CvtRP)>; + +def : Pat<(int_nvvm_ull2f_rn Int64Regs:$a), + (CVT_f32_u64 Int64Regs:$a, CvtRN)>; +def : Pat<(int_nvvm_ull2f_rz Int64Regs:$a), + (CVT_f32_u64 Int64Regs:$a, CvtRZ)>; +def : Pat<(int_nvvm_ull2f_rm Int64Regs:$a), + (CVT_f32_u64 Int64Regs:$a, CvtRM)>; +def : Pat<(int_nvvm_ull2f_rp Int64Regs:$a), + (CVT_f32_u64 Int64Regs:$a, CvtRP)>; + +def : Pat<(int_nvvm_ll2d_rn Int64Regs:$a), + (CVT_f64_s64 Int64Regs:$a, CvtRN)>; +def : Pat<(int_nvvm_ll2d_rz Int64Regs:$a), + (CVT_f64_s64 Int64Regs:$a, CvtRZ)>; +def : Pat<(int_nvvm_ll2d_rm Int64Regs:$a), + (CVT_f64_s64 Int64Regs:$a, CvtRM)>; +def : Pat<(int_nvvm_ll2d_rp Int64Regs:$a), + (CVT_f64_s64 Int64Regs:$a, CvtRP)>; + +def : Pat<(int_nvvm_ull2d_rn Int64Regs:$a), + (CVT_f64_u64 Int64Regs:$a, CvtRN)>; +def : Pat<(int_nvvm_ull2d_rz Int64Regs:$a), + (CVT_f64_u64 Int64Regs:$a, CvtRZ)>; +def : Pat<(int_nvvm_ull2d_rm Int64Regs:$a), + (CVT_f64_u64 Int64Regs:$a, CvtRM)>; +def : Pat<(int_nvvm_ull2d_rp Int64Regs:$a), + (CVT_f64_u64 Int64Regs:$a, CvtRP)>; + + +// FIXME: Ideally, we could use these patterns instead of the scope-creating +// patterns, but ptxas does not like these since .s16 is not compatible with +// .f16. The solution is to use .bXX for all integer register types, but we +// are not there yet. +//def : Pat<(int_nvvm_f2h_rn_ftz Float32Regs:$a), +// (CVT_f16_f32 Float32Regs:$a, CvtRN_FTZ)>; +//def : Pat<(int_nvvm_f2h_rn Float32Regs:$a), +// (CVT_f16_f32 Float32Regs:$a, CvtRN)>; +// +//def : Pat<(int_nvvm_h2f Int16Regs:$a), +// (CVT_f32_f16 Int16Regs:$a, CvtNONE)>; def INT_NVVM_F2H_RN_FTZ : F_MATH_1; +def : Pat<(f32 (f16_to_f32 Int16Regs:$a)), + (CVT_f32_f16 Int16Regs:$a, CvtNONE)>; +def : Pat<(i16 (f32_to_f16 Float32Regs:$a)), + (CVT_f16_f32 Float32Regs:$a, CvtRN_FTZ)>, Requires<[doF32FTZ]>; +def : Pat<(i16 (f32_to_f16 Float32Regs:$a)), + (CVT_f16_f32 Float32Regs:$a, CvtRN)>; + // // Bitcast // diff --git a/test/CodeGen/NVPTX/pr13291-i1-store.ll b/test/CodeGen/NVPTX/pr13291-i1-store.ll index a5526f8ad76..e7a81be01b1 100644 --- a/test/CodeGen/NVPTX/pr13291-i1-store.ll +++ b/test/CodeGen/NVPTX/pr13291-i1-store.ll @@ -13,11 +13,11 @@ define ptx_kernel void @t1(i1* %a) { define ptx_kernel void @t2(i1* %a, i8* %b) { ; PTX32: ld.u8 %rs{{[0-9]+}}, [%r{{[0-9]+}}] -; PTX32: and.b16 temp, %rs{{[0-9]+}}, 1; -; PTX32: setp.b16.eq %p{{[0-9]+}}, temp, 1; +; PTX32: and.b16 %rs{{[0-9]+}}, %rs{{[0-9]+}}, 1; +; PTX32: setp.eq.b16 %p{{[0-9]+}}, %rs{{[0-9]+}}, 1; ; PTX64: ld.u8 %rs{{[0-9]+}}, [%rl{{[0-9]+}}] -; PTX64: and.b16 temp, %rs{{[0-9]+}}, 1; -; PTX64: setp.b16.eq %p{{[0-9]+}}, temp, 1; +; PTX64: and.b16 %rs{{[0-9]+}}, %rs{{[0-9]+}}, 1; +; PTX64: setp.eq.b16 %p{{[0-9]+}}, %rs{{[0-9]+}}, 1; %t1 = load i1* %a %t2 = select i1 %t1, i8 1, i8 2 -- 2.34.1