setOperationAction(ISD::FROUND, MVT::f32, Legal);
setOperationAction(ISD::FTRUNC, MVT::f32, Legal);
- // The hardware supports 32-bit ROTR, but not ROTL.
- setOperationAction(ISD::ROTL, MVT::i32, Expand);
- setOperationAction(ISD::ROTL, MVT::i64, Expand);
- setOperationAction(ISD::ROTR, MVT::i64, Expand);
-
// Lower floating point store/load to integer store/load to reduce the number
// of patterns in tablegen.
setOperationAction(ISD::STORE, MVT::f32, Promote);
setOperationAction(ISD::BR_CC, MVT::i1, Expand);
- setOperationAction(ISD::SELECT_CC, MVT::i64, Expand);
-
- setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
-
- setOperationAction(ISD::MUL, MVT::i64, Expand);
- setOperationAction(ISD::SUB, MVT::i64, Expand);
-
- setOperationAction(ISD::UDIV, MVT::i32, Expand);
- setOperationAction(ISD::UDIVREM, MVT::i32, Custom);
- setOperationAction(ISD::UDIVREM, MVT::i64, Custom);
- setOperationAction(ISD::UREM, MVT::i32, Expand);
+ if (Subtarget->getGeneration() < AMDGPUSubtarget::SEA_ISLANDS) {
+ setOperationAction(ISD::FCEIL, MVT::f64, Custom);
+ setOperationAction(ISD::FTRUNC, MVT::f64, Custom);
+ setOperationAction(ISD::FRINT, MVT::f64, Custom);
+ setOperationAction(ISD::FFLOOR, MVT::f64, Custom);
+ }
if (!Subtarget->hasBFI()) {
// fcopysign can be done in a single instruction with BFI.
const MVT ScalarIntVTs[] = { MVT::i32, MVT::i64 };
for (MVT VT : ScalarIntVTs) {
+ setOperationAction(ISD::SREM, VT, Expand);
+ setOperationAction(ISD::SDIV, VT, Custom);
+
// GPU does not have divrem function for signed or unsigned.
setOperationAction(ISD::SDIVREM, VT, Expand);
+ setOperationAction(ISD::UDIVREM, VT, Custom);
// GPU does not have [S|U]MUL_LOHI functions as a single instruction.
setOperationAction(ISD::SMUL_LOHI, VT, Expand);
if (!Subtarget->hasBCNT(64))
setOperationAction(ISD::CTPOP, MVT::i64, Expand);
+ // The hardware supports 32-bit ROTR, but not ROTL.
+ setOperationAction(ISD::ROTL, MVT::i32, Expand);
+ setOperationAction(ISD::ROTL, MVT::i64, Expand);
+ setOperationAction(ISD::ROTR, MVT::i64, Expand);
+
+ setOperationAction(ISD::FP_TO_SINT, MVT::i64, Expand);
+ setOperationAction(ISD::MUL, MVT::i64, Expand);
+ setOperationAction(ISD::MULHU, MVT::i64, Expand);
+ setOperationAction(ISD::MULHS, MVT::i64, Expand);
+ setOperationAction(ISD::SUB, MVT::i64, Expand);
+ setOperationAction(ISD::UDIV, MVT::i32, Expand);
+ setOperationAction(ISD::UREM, MVT::i32, Expand);
+ setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
+ setOperationAction(ISD::SELECT_CC, MVT::i64, Expand);
static const MVT::SimpleValueType VectorIntTypes[] = {
MVT::v2i32, MVT::v4i32
setOperationAction(ISD::ROTL, VT, Expand);
setOperationAction(ISD::ROTR, VT, Expand);
setOperationAction(ISD::SUB, VT, Expand);
- setOperationAction(ISD::UDIV, VT, Expand);
setOperationAction(ISD::SINT_TO_FP, VT, Expand);
setOperationAction(ISD::UINT_TO_FP, VT, Expand);
// TODO: Implement custom UREM / SREM routines.
+ setOperationAction(ISD::SDIV, VT, Custom);
+ setOperationAction(ISD::UDIV, VT, Expand);
setOperationAction(ISD::SREM, VT, Expand);
setOperationAction(ISD::UREM, VT, Expand);
- setOperationAction(ISD::SDIVREM, VT, Expand);
setOperationAction(ISD::SMUL_LOHI, VT, Expand);
setOperationAction(ISD::UMUL_LOHI, VT, Expand);
+ setOperationAction(ISD::SDIVREM, VT, Expand);
+ setOperationAction(ISD::UDIVREM, VT, Custom);
setOperationAction(ISD::SELECT, VT, Expand);
setOperationAction(ISD::VSELECT, VT, Expand);
setOperationAction(ISD::XOR, VT, Expand);
setOperationAction(ISD::BSWAP, VT, Expand);
setOperationAction(ISD::CTPOP, VT, Expand);
setOperationAction(ISD::CTTZ, VT, Expand);
+ setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Expand);
setOperationAction(ISD::CTLZ, VT, Expand);
+ setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Expand);
}
static const MVT::SimpleValueType FloatVectorTypes[] = {
for (MVT VT : FloatVectorTypes) {
setOperationAction(ISD::FABS, VT, Expand);
setOperationAction(ISD::FADD, VT, Expand);
+ setOperationAction(ISD::FCEIL, VT, Expand);
setOperationAction(ISD::FCOS, VT, Expand);
setOperationAction(ISD::FDIV, VT, Expand);
setOperationAction(ISD::FPOW, VT, Expand);
setOperationAction(ISD::FTRUNC, VT, Expand);
setOperationAction(ISD::FMUL, VT, Expand);
setOperationAction(ISD::FRINT, VT, Expand);
+ setOperationAction(ISD::FNEARBYINT, VT, Expand);
setOperationAction(ISD::FSQRT, VT, Expand);
setOperationAction(ISD::FSIN, VT, Expand);
setOperationAction(ISD::FSUB, VT, Expand);
setOperationAction(ISD::FCOPYSIGN, VT, Expand);
}
+ setOperationAction(ISD::FNEARBYINT, MVT::f32, Custom);
+ setOperationAction(ISD::FNEARBYINT, MVT::f64, Custom);
+
setTargetDAGCombine(ISD::MUL);
setTargetDAGCombine(ISD::SELECT_CC);
setSchedulingPreference(Sched::RegPressure);
setJumpIsExpensive(true);
+ // There are no integer divide instructions, and these expand to a pretty
+ // large sequence of instructions.
+ setIntDivIsCheap(false);
+
+ // TODO: Investigate this when 64-bit divides are implemented.
+ addBypassSlowDiv(64, 32);
+
// FIXME: Need to really handle these.
MaxStoresPerMemcpy = 4096;
MaxStoresPerMemmove = 4096;
return MVT::i32;
}
+// The backend supports 32 and 64 bit floating point immediates.
+// FIXME: Why are we reporting vectors of FP immediates as legal?
+bool AMDGPUTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
+ EVT ScalarVT = VT.getScalarType();
+ return (ScalarVT == MVT::f32 || ScalarVT == MVT::f64);
+}
+
+// We don't want to shrink f64 / f32 constants.
+bool AMDGPUTargetLowering::ShouldShrinkFPConstant(EVT VT) const {
+ EVT ScalarVT = VT.getScalarType();
+ return (ScalarVT != MVT::f32 && ScalarVT != MVT::f64);
+}
+
bool AMDGPUTargetLowering::isLoadBitCastBeneficial(EVT LoadTy,
EVT CastTy) const {
if (LoadTy.getSizeInBits() != CastTy.getSizeInBits())
return SDValue();
}
-SDValue AMDGPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG)
- const {
+SDValue AMDGPUTargetLowering::LowerOperation(SDValue Op,
+ SelectionDAG &DAG) const {
switch (Op.getOpcode()) {
default:
Op.getNode()->dump();
llvm_unreachable("Custom lowering code for this"
"instruction is not implemented yet!");
break;
- // AMDIL DAG lowering
- case ISD::SDIV: return LowerSDIV(Op, DAG);
- case ISD::SREM: return LowerSREM(Op, DAG);
+ // AMDGPU DAG lowering.
case ISD::SIGN_EXTEND_INREG: return LowerSIGN_EXTEND_INREG(Op, DAG);
- case ISD::BRCOND: return LowerBRCOND(Op, DAG);
- // AMDGPU DAG lowering
case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG);
case ISD::EXTRACT_SUBVECTOR: return LowerEXTRACT_SUBVECTOR(Op, DAG);
case ISD::FrameIndex: return LowerFrameIndex(Op, DAG);
case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
+ case ISD::SDIV: return LowerSDIV(Op, DAG);
+ case ISD::SREM: return LowerSREM(Op, DAG);
case ISD::UDIVREM: return LowerUDIVREM(Op, DAG);
+ case ISD::FCEIL: return LowerFCEIL(Op, DAG);
+ case ISD::FTRUNC: return LowerFTRUNC(Op, DAG);
+ case ISD::FRINT: return LowerFRINT(Op, DAG);
+ case ISD::FNEARBYINT: return LowerFNEARBYINT(Op, DAG);
+ case ISD::FFLOOR: return LowerFFLOOR(Op, DAG);
case ISD::UINT_TO_FP: return LowerUINT_TO_FP(Op, DAG);
+
+ // AMDIL DAG lowering.
+ case ISD::BRCOND: return LowerBRCOND(Op, DAG);
}
return Op;
}
Op.getOperand(1),
Op.getOperand(2));
+ case AMDGPUIntrinsic::AMDGPU_brev:
+ return DAG.getNode(AMDGPUISD::BREV, DL, VT, Op.getOperand(1));
+
case AMDGPUIntrinsic::AMDIL_exp: // Legacy name.
return DAG.getNode(ISD::FEXP2, DL, VT, Op.getOperand(1));
return SDValue();
}
+SDValue AMDGPUTargetLowering::LowerSDIV24(SDValue Op, SelectionDAG &DAG) const {
+ SDLoc DL(Op);
+ EVT OVT = Op.getValueType();
+ SDValue LHS = Op.getOperand(0);
+ SDValue RHS = Op.getOperand(1);
+ MVT INTTY;
+ MVT FLTTY;
+ if (!OVT.isVector()) {
+ INTTY = MVT::i32;
+ FLTTY = MVT::f32;
+ } else if (OVT.getVectorNumElements() == 2) {
+ INTTY = MVT::v2i32;
+ FLTTY = MVT::v2f32;
+ } else if (OVT.getVectorNumElements() == 4) {
+ INTTY = MVT::v4i32;
+ FLTTY = MVT::v4f32;
+ }
+ unsigned bitsize = OVT.getScalarType().getSizeInBits();
+ // char|short jq = ia ^ ib;
+ SDValue jq = DAG.getNode(ISD::XOR, DL, OVT, LHS, RHS);
+
+ // jq = jq >> (bitsize - 2)
+ jq = DAG.getNode(ISD::SRA, DL, OVT, jq, DAG.getConstant(bitsize - 2, OVT));
+
+ // jq = jq | 0x1
+ jq = DAG.getNode(ISD::OR, DL, OVT, jq, DAG.getConstant(1, OVT));
+
+ // jq = (int)jq
+ jq = DAG.getSExtOrTrunc(jq, DL, INTTY);
+
+ // int ia = (int)LHS;
+ SDValue ia = DAG.getSExtOrTrunc(LHS, DL, INTTY);
+
+ // int ib, (int)RHS;
+ SDValue ib = DAG.getSExtOrTrunc(RHS, DL, INTTY);
+
+ // float fa = (float)ia;
+ SDValue fa = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ia);
+
+ // float fb = (float)ib;
+ SDValue fb = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ib);
+
+ // float fq = native_divide(fa, fb);
+ SDValue fq = DAG.getNode(AMDGPUISD::DIV_INF, DL, FLTTY, fa, fb);
+
+ // fq = trunc(fq);
+ fq = DAG.getNode(ISD::FTRUNC, DL, FLTTY, fq);
+
+ // float fqneg = -fq;
+ SDValue fqneg = DAG.getNode(ISD::FNEG, DL, FLTTY, fq);
+
+ // float fr = mad(fqneg, fb, fa);
+ SDValue fr = DAG.getNode(ISD::FADD, DL, FLTTY,
+ DAG.getNode(ISD::MUL, DL, FLTTY, fqneg, fb), fa);
+
+ // int iq = (int)fq;
+ SDValue iq = DAG.getNode(ISD::FP_TO_SINT, DL, INTTY, fq);
+
+ // fr = fabs(fr);
+ fr = DAG.getNode(ISD::FABS, DL, FLTTY, fr);
+
+ // fb = fabs(fb);
+ fb = DAG.getNode(ISD::FABS, DL, FLTTY, fb);
+
+ // int cv = fr >= fb;
+ SDValue cv;
+ if (INTTY == MVT::i32) {
+ cv = DAG.getSetCC(DL, INTTY, fr, fb, ISD::SETOGE);
+ } else {
+ cv = DAG.getSetCC(DL, INTTY, fr, fb, ISD::SETOGE);
+ }
+ // jq = (cv ? jq : 0);
+ jq = DAG.getNode(ISD::SELECT, DL, OVT, cv, jq,
+ DAG.getConstant(0, OVT));
+ // dst = iq + jq;
+ iq = DAG.getSExtOrTrunc(iq, DL, OVT);
+ iq = DAG.getNode(ISD::ADD, DL, OVT, iq, jq);
+ return iq;
+}
+
+SDValue AMDGPUTargetLowering::LowerSDIV32(SDValue Op, SelectionDAG &DAG) const {
+ SDLoc DL(Op);
+ EVT OVT = Op.getValueType();
+ SDValue LHS = Op.getOperand(0);
+ SDValue RHS = Op.getOperand(1);
+ // The LowerSDIV32 function generates equivalent to the following IL.
+ // mov r0, LHS
+ // mov r1, RHS
+ // ilt r10, r0, 0
+ // ilt r11, r1, 0
+ // iadd r0, r0, r10
+ // iadd r1, r1, r11
+ // ixor r0, r0, r10
+ // ixor r1, r1, r11
+ // udiv r0, r0, r1
+ // ixor r10, r10, r11
+ // iadd r0, r0, r10
+ // ixor DST, r0, r10
+
+ // mov r0, LHS
+ SDValue r0 = LHS;
+
+ // mov r1, RHS
+ SDValue r1 = RHS;
+
+ // ilt r10, r0, 0
+ SDValue r10 = DAG.getSelectCC(DL,
+ r0, DAG.getConstant(0, OVT),
+ DAG.getConstant(-1, OVT),
+ DAG.getConstant(0, OVT),
+ ISD::SETLT);
+
+ // ilt r11, r1, 0
+ SDValue r11 = DAG.getSelectCC(DL,
+ r1, DAG.getConstant(0, OVT),
+ DAG.getConstant(-1, OVT),
+ DAG.getConstant(0, OVT),
+ ISD::SETLT);
+
+ // iadd r0, r0, r10
+ r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
+
+ // iadd r1, r1, r11
+ r1 = DAG.getNode(ISD::ADD, DL, OVT, r1, r11);
+
+ // ixor r0, r0, r10
+ r0 = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
+
+ // ixor r1, r1, r11
+ r1 = DAG.getNode(ISD::XOR, DL, OVT, r1, r11);
+
+ // udiv r0, r0, r1
+ r0 = DAG.getNode(ISD::UDIV, DL, OVT, r0, r1);
+
+ // ixor r10, r10, r11
+ r10 = DAG.getNode(ISD::XOR, DL, OVT, r10, r11);
+
+ // iadd r0, r0, r10
+ r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
+
+ // ixor DST, r0, r10
+ SDValue DST = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
+ return DST;
+}
+
+SDValue AMDGPUTargetLowering::LowerSDIV64(SDValue Op, SelectionDAG &DAG) const {
+ return SDValue(Op.getNode(), 0);
+}
+
+SDValue AMDGPUTargetLowering::LowerSDIV(SDValue Op, SelectionDAG &DAG) const {
+ EVT OVT = Op.getValueType().getScalarType();
+
+ if (OVT == MVT::i64)
+ return LowerSDIV64(Op, DAG);
+
+ if (OVT.getScalarType() == MVT::i32)
+ return LowerSDIV32(Op, DAG);
+
+ if (OVT == MVT::i16 || OVT == MVT::i8) {
+ // FIXME: We should be checking for the masked bits. This isn't reached
+ // because i8 and i16 are not legal types.
+ return LowerSDIV24(Op, DAG);
+ }
+
+ return SDValue(Op.getNode(), 0);
+}
+
+SDValue AMDGPUTargetLowering::LowerSREM32(SDValue Op, SelectionDAG &DAG) const {
+ SDLoc DL(Op);
+ EVT OVT = Op.getValueType();
+ SDValue LHS = Op.getOperand(0);
+ SDValue RHS = Op.getOperand(1);
+ // The LowerSREM32 function generates equivalent to the following IL.
+ // mov r0, LHS
+ // mov r1, RHS
+ // ilt r10, r0, 0
+ // ilt r11, r1, 0
+ // iadd r0, r0, r10
+ // iadd r1, r1, r11
+ // ixor r0, r0, r10
+ // ixor r1, r1, r11
+ // udiv r20, r0, r1
+ // umul r20, r20, r1
+ // sub r0, r0, r20
+ // iadd r0, r0, r10
+ // ixor DST, r0, r10
+
+ // mov r0, LHS
+ SDValue r0 = LHS;
+
+ // mov r1, RHS
+ SDValue r1 = RHS;
+
+ // ilt r10, r0, 0
+ SDValue r10 = DAG.getSetCC(DL, OVT, r0, DAG.getConstant(0, OVT), ISD::SETLT);
+
+ // ilt r11, r1, 0
+ SDValue r11 = DAG.getSetCC(DL, OVT, r1, DAG.getConstant(0, OVT), ISD::SETLT);
+
+ // iadd r0, r0, r10
+ r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
+
+ // iadd r1, r1, r11
+ r1 = DAG.getNode(ISD::ADD, DL, OVT, r1, r11);
+
+ // ixor r0, r0, r10
+ r0 = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
+
+ // ixor r1, r1, r11
+ r1 = DAG.getNode(ISD::XOR, DL, OVT, r1, r11);
+
+ // udiv r20, r0, r1
+ SDValue r20 = DAG.getNode(ISD::UREM, DL, OVT, r0, r1);
+
+ // umul r20, r20, r1
+ r20 = DAG.getNode(AMDGPUISD::UMUL, DL, OVT, r20, r1);
+
+ // sub r0, r0, r20
+ r0 = DAG.getNode(ISD::SUB, DL, OVT, r0, r20);
+
+ // iadd r0, r0, r10
+ r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
+
+ // ixor DST, r0, r10
+ SDValue DST = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
+ return DST;
+}
+
+SDValue AMDGPUTargetLowering::LowerSREM64(SDValue Op, SelectionDAG &DAG) const {
+ return SDValue(Op.getNode(), 0);
+}
+
+SDValue AMDGPUTargetLowering::LowerSREM(SDValue Op, SelectionDAG &DAG) const {
+ EVT OVT = Op.getValueType();
+
+ if (OVT.getScalarType() == MVT::i64)
+ return LowerSREM64(Op, DAG);
+
+ if (OVT.getScalarType() == MVT::i32)
+ return LowerSREM32(Op, DAG);
+
+ return SDValue(Op.getNode(), 0);
+}
+
SDValue AMDGPUTargetLowering::LowerUDIVREM(SDValue Op,
SelectionDAG &DAG) const {
SDLoc DL(Op);
return DAG.getMergeValues(Ops, DL);
}
+SDValue AMDGPUTargetLowering::LowerFCEIL(SDValue Op, SelectionDAG &DAG) const {
+ SDLoc SL(Op);
+ SDValue Src = Op.getOperand(0);
+
+ // result = trunc(src)
+ // if (src > 0.0 && src != result)
+ // result += 1.0
+
+ SDValue Trunc = DAG.getNode(ISD::FTRUNC, SL, MVT::f64, Src);
+
+ const SDValue Zero = DAG.getConstantFP(0.0, MVT::f64);
+ const SDValue One = DAG.getConstantFP(1.0, MVT::f64);
+
+ EVT SetCCVT = getSetCCResultType(*DAG.getContext(), MVT::f64);
+
+ SDValue Lt0 = DAG.getSetCC(SL, SetCCVT, Src, Zero, ISD::SETOGT);
+ SDValue NeTrunc = DAG.getSetCC(SL, SetCCVT, Src, Trunc, ISD::SETONE);
+ SDValue And = DAG.getNode(ISD::AND, SL, SetCCVT, Lt0, NeTrunc);
+
+ SDValue Add = DAG.getNode(ISD::SELECT, SL, MVT::f64, And, One, Zero);
+ return DAG.getNode(ISD::FADD, SL, MVT::f64, Trunc, Add);
+}
+
+SDValue AMDGPUTargetLowering::LowerFTRUNC(SDValue Op, SelectionDAG &DAG) const {
+ SDLoc SL(Op);
+ SDValue Src = Op.getOperand(0);
+
+ assert(Op.getValueType() == MVT::f64);
+
+ const SDValue Zero = DAG.getConstant(0, MVT::i32);
+ const SDValue One = DAG.getConstant(1, MVT::i32);
+
+ SDValue VecSrc = DAG.getNode(ISD::BITCAST, SL, MVT::v2i32, Src);
+
+ // Extract the upper half, since this is where we will find the sign and
+ // exponent.
+ SDValue Hi = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, VecSrc, One);
+
+ const unsigned FractBits = 52;
+ const unsigned ExpBits = 11;
+
+ // Extract the exponent.
+ SDValue ExpPart = DAG.getNode(AMDGPUISD::BFE_I32, SL, MVT::i32,
+ Hi,
+ DAG.getConstant(FractBits - 32, MVT::i32),
+ DAG.getConstant(ExpBits, MVT::i32));
+ SDValue Exp = DAG.getNode(ISD::SUB, SL, MVT::i32, ExpPart,
+ DAG.getConstant(1023, MVT::i32));
+
+ // Extract the sign bit.
+ const SDValue SignBitMask = DAG.getConstant(1ul << 31, MVT::i32);
+ SDValue SignBit = DAG.getNode(ISD::AND, SL, MVT::i32, Hi, SignBitMask);
+
+ // Extend back to to 64-bits.
+ SDValue SignBit64 = DAG.getNode(ISD::BUILD_VECTOR, SL, MVT::v2i32,
+ Zero, SignBit);
+ SignBit64 = DAG.getNode(ISD::BITCAST, SL, MVT::i64, SignBit64);
+
+ SDValue BcInt = DAG.getNode(ISD::BITCAST, SL, MVT::i64, Src);
+ const SDValue FractMask = DAG.getConstant((1LL << FractBits) - 1, MVT::i64);
+
+ SDValue Shr = DAG.getNode(ISD::SRA, SL, MVT::i64, FractMask, Exp);
+ SDValue Not = DAG.getNOT(SL, Shr, MVT::i64);
+ SDValue Tmp0 = DAG.getNode(ISD::AND, SL, MVT::i64, BcInt, Not);
+
+ EVT SetCCVT = getSetCCResultType(*DAG.getContext(), MVT::i32);
+
+ const SDValue FiftyOne = DAG.getConstant(FractBits - 1, MVT::i32);
+
+ SDValue ExpLt0 = DAG.getSetCC(SL, SetCCVT, Exp, Zero, ISD::SETLT);
+ SDValue ExpGt51 = DAG.getSetCC(SL, SetCCVT, Exp, FiftyOne, ISD::SETGT);
+
+ SDValue Tmp1 = DAG.getNode(ISD::SELECT, SL, MVT::i64, ExpLt0, SignBit64, Tmp0);
+ SDValue Tmp2 = DAG.getNode(ISD::SELECT, SL, MVT::i64, ExpGt51, BcInt, Tmp1);
+
+ return DAG.getNode(ISD::BITCAST, SL, MVT::f64, Tmp2);
+}
+
+SDValue AMDGPUTargetLowering::LowerFRINT(SDValue Op, SelectionDAG &DAG) const {
+ SDLoc SL(Op);
+ SDValue Src = Op.getOperand(0);
+
+ assert(Op.getValueType() == MVT::f64);
+
+ APFloat C1Val(APFloat::IEEEdouble, "0x1.0p+52");
+ SDValue C1 = DAG.getConstantFP(C1Val, MVT::f64);
+ SDValue CopySign = DAG.getNode(ISD::FCOPYSIGN, SL, MVT::f64, C1, Src);
+
+ SDValue Tmp1 = DAG.getNode(ISD::FADD, SL, MVT::f64, Src, CopySign);
+ SDValue Tmp2 = DAG.getNode(ISD::FSUB, SL, MVT::f64, Tmp1, CopySign);
+
+ SDValue Fabs = DAG.getNode(ISD::FABS, SL, MVT::f64, Src);
+
+ APFloat C2Val(APFloat::IEEEdouble, "0x1.fffffffffffffp+51");
+ SDValue C2 = DAG.getConstantFP(C2Val, MVT::f64);
+
+ EVT SetCCVT = getSetCCResultType(*DAG.getContext(), MVT::f64);
+ SDValue Cond = DAG.getSetCC(SL, SetCCVT, Fabs, C2, ISD::SETOGT);
+
+ return DAG.getSelect(SL, MVT::f64, Cond, Src, Tmp2);
+}
+
+SDValue AMDGPUTargetLowering::LowerFNEARBYINT(SDValue Op, SelectionDAG &DAG) const {
+ // FNEARBYINT and FRINT are the same, except in their handling of FP
+ // exceptions. Those aren't really meaningful for us, and OpenCL only has
+ // rint, so just treat them as equivalent.
+ return DAG.getNode(ISD::FRINT, SDLoc(Op), Op.getValueType(), Op.getOperand(0));
+}
+
+SDValue AMDGPUTargetLowering::LowerFFLOOR(SDValue Op, SelectionDAG &DAG) const {
+ SDLoc SL(Op);
+ SDValue Src = Op.getOperand(0);
+
+ // result = trunc(src);
+ // if (src < 0.0 && src != result)
+ // result += -1.0.
+
+ SDValue Trunc = DAG.getNode(ISD::FTRUNC, SL, MVT::f64, Src);
+
+ const SDValue Zero = DAG.getConstantFP(0.0, MVT::f64);
+ const SDValue NegOne = DAG.getConstantFP(-1.0, MVT::f64);
+
+ EVT SetCCVT = getSetCCResultType(*DAG.getContext(), MVT::f64);
+
+ SDValue Lt0 = DAG.getSetCC(SL, SetCCVT, Src, Zero, ISD::SETOLT);
+ SDValue NeTrunc = DAG.getSetCC(SL, SetCCVT, Src, Trunc, ISD::SETONE);
+ SDValue And = DAG.getNode(ISD::AND, SL, SetCCVT, Lt0, NeTrunc);
+
+ SDValue Add = DAG.getNode(ISD::SELECT, SL, MVT::f64, And, NegOne, Zero);
+ return DAG.getNode(ISD::FADD, SL, MVT::f64, Trunc, Add);
+}
+
SDValue AMDGPUTargetLowering::LowerUINT_TO_FP(SDValue Op,
SelectionDAG &DAG) const {
SDValue S0 = Op.getOperand(0);
NODE_NAME_CASE(BFE_I32)
NODE_NAME_CASE(BFI)
NODE_NAME_CASE(BFM)
+ NODE_NAME_CASE(BREV)
NODE_NAME_CASE(MUL_U24)
NODE_NAME_CASE(MUL_I24)
NODE_NAME_CASE(MAD_U24)
NODE_NAME_CASE(CVT_F32_UBYTE1)
NODE_NAME_CASE(CVT_F32_UBYTE2)
NODE_NAME_CASE(CVT_F32_UBYTE3)
+ NODE_NAME_CASE(BUILD_VERTICAL_VECTOR)
NODE_NAME_CASE(STORE_MSKOR)
NODE_NAME_CASE(TBUFFER_STORE_FORMAT)
}