setTargetDAGCombine(ISD::SELECT_CC);
setTargetDAGCombine(ISD::STORE);
+ setBooleanContents(ZeroOrNegativeOneBooleanContent);
+ setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
+
setSchedulingPreference(Sched::RegPressure);
setJumpIsExpensive(true);
return (ScalarVT != MVT::f32 && ScalarVT != MVT::f64);
}
+bool AMDGPUTargetLowering::shouldReduceLoadWidth(SDNode *N,
+ ISD::LoadExtType,
+ EVT NewVT) const {
+
+ unsigned NewSize = NewVT.getStoreSizeInBits();
+
+ // If we are reducing to a 32-bit load, this is always better.
+ if (NewSize == 32)
+ return true;
+
+ EVT OldVT = N->getValueType(0);
+ unsigned OldSize = OldVT.getStoreSizeInBits();
+
+ // Don't produce extloads from sub 32-bit types. SI doesn't have scalar
+ // extloads, so doing one requires using a buffer_load. In cases where we
+ // still couldn't use a scalar load, using the wider load shouldn't really
+ // hurt anything.
+
+ // If the old size already had to be an extload, there's no harm in continuing
+ // to reduce the width.
+ return (OldSize < 32);
+}
+
bool AMDGPUTargetLowering::isLoadBitCastBeneficial(EVT LoadTy,
EVT CastTy) const {
if (LoadTy.getSizeInBits() != CastTy.getSizeInBits())
return DAG.getNode(AMDGPUISD::RSQ_LEGACY, DL, VT, Op.getOperand(1));
case Intrinsic::AMDGPU_rsq_clamped:
- return DAG.getNode(AMDGPUISD::RSQ_CLAMPED, DL, VT, Op.getOperand(1));
+ if (Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
+ Type *Type = VT.getTypeForEVT(*DAG.getContext());
+ APFloat Max = APFloat::getLargest(Type->getFltSemantics());
+ APFloat Min = APFloat::getLargest(Type->getFltSemantics(), true);
+
+ SDValue Rsq = DAG.getNode(AMDGPUISD::RSQ, DL, VT, Op.getOperand(1));
+ SDValue Tmp = DAG.getNode(ISD::FMINNUM, DL, VT, Rsq,
+ DAG.getConstantFP(Max, VT));
+ return DAG.getNode(ISD::FMAXNUM, DL, VT, Tmp,
+ DAG.getConstantFP(Min, VT));
+ } else {
+ return DAG.getNode(AMDGPUISD::RSQ_CLAMPED, DL, VT, Op.getOperand(1));
+ }
case Intrinsic::AMDGPU_ldexp:
return DAG.getNode(AMDGPUISD::LDEXP, DL, VT, Op.getOperand(1),
}
/// \brief Generate Min/Max node
-SDValue AMDGPUTargetLowering::CombineFMinMax(SDLoc DL,
- EVT VT,
- SDValue LHS,
- SDValue RHS,
- SDValue True,
- SDValue False,
- SDValue CC,
- SelectionDAG &DAG) const {
+SDValue AMDGPUTargetLowering::CombineFMinMaxLegacy(SDLoc DL,
+ EVT VT,
+ SDValue LHS,
+ SDValue RHS,
+ SDValue True,
+ SDValue False,
+ SDValue CC,
+ DAGCombinerInfo &DCI) const {
+ if (Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS)
+ return SDValue();
+
if (!(LHS == True && RHS == False) && !(LHS == False && RHS == True))
return SDValue();
+ SelectionDAG &DAG = DCI.DAG;
ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
switch (CCOpcode) {
case ISD::SETOEQ:
case ISD::SETO:
break;
case ISD::SETULE:
- case ISD::SETULT:
+ case ISD::SETULT: {
+ if (LHS == True)
+ return DAG.getNode(AMDGPUISD::FMIN_LEGACY, DL, VT, RHS, LHS);
+ return DAG.getNode(AMDGPUISD::FMAX_LEGACY, DL, VT, LHS, RHS);
+ }
case ISD::SETOLE:
case ISD::SETOLT:
case ISD::SETLE:
case ISD::SETLT: {
- unsigned Opc
- = (LHS == True) ? AMDGPUISD::FMIN_LEGACY : AMDGPUISD::FMAX_LEGACY;
- return DAG.getNode(Opc, DL, VT, LHS, RHS);
+ // Ordered. Assume ordered for undefined.
+
+ // Only do this after legalization to avoid interfering with other combines
+ // which might occur.
+ if (DCI.getDAGCombineLevel() < AfterLegalizeDAG &&
+ !DCI.isCalledByLegalizer())
+ return SDValue();
+
+ // We need to permute the operands to get the correct NaN behavior. The
+ // selected operand is the second one based on the failing compare with NaN,
+ // so permute it based on the compare type the hardware uses.
+ if (LHS == True)
+ return DAG.getNode(AMDGPUISD::FMIN_LEGACY, DL, VT, LHS, RHS);
+ return DAG.getNode(AMDGPUISD::FMAX_LEGACY, DL, VT, RHS, LHS);
+ }
+ case ISD::SETUGE:
+ case ISD::SETUGT: {
+ if (LHS == True)
+ return DAG.getNode(AMDGPUISD::FMAX_LEGACY, DL, VT, RHS, LHS);
+ return DAG.getNode(AMDGPUISD::FMIN_LEGACY, DL, VT, LHS, RHS);
}
case ISD::SETGT:
case ISD::SETGE:
- case ISD::SETUGE:
case ISD::SETOGE:
- case ISD::SETUGT:
case ISD::SETOGT: {
- unsigned Opc
- = (LHS == True) ? AMDGPUISD::FMAX_LEGACY : AMDGPUISD::FMIN_LEGACY;
- return DAG.getNode(Opc, DL, VT, LHS, RHS);
+ if (DCI.getDAGCombineLevel() < AfterLegalizeDAG &&
+ !DCI.isCalledByLegalizer())
+ return SDValue();
+
+ if (LHS == True)
+ return DAG.getNode(AMDGPUISD::FMAX_LEGACY, DL, VT, LHS, RHS);
+ return DAG.getNode(AMDGPUISD::FMIN_LEGACY, DL, VT, RHS, LHS);
}
case ISD::SETCC_INVALID:
llvm_unreachable("Invalid setcc condcode!");
return DAG.getMergeValues(Res, DL);
}
+void AMDGPUTargetLowering::LowerUDIVREM64(SDValue Op,
+ SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &Results) const {
+ assert(Op.getValueType() == MVT::i64);
+
+ SDLoc DL(Op);
+ EVT VT = Op.getValueType();
+ EVT HalfVT = VT.getHalfSizedIntegerVT(*DAG.getContext());
+
+ SDValue one = DAG.getConstant(1, HalfVT);
+ SDValue zero = DAG.getConstant(0, HalfVT);
+
+ //HiLo split
+ SDValue LHS = Op.getOperand(0);
+ SDValue LHS_Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, LHS, zero);
+ SDValue LHS_Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, LHS, one);
+
+ SDValue RHS = Op.getOperand(1);
+ SDValue RHS_Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, RHS, zero);
+ SDValue RHS_Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, RHS, one);
+
+ // Get Speculative values
+ SDValue DIV_Part = DAG.getNode(ISD::UDIV, DL, HalfVT, LHS_Hi, RHS_Lo);
+ SDValue REM_Part = DAG.getNode(ISD::UREM, DL, HalfVT, LHS_Hi, RHS_Lo);
+
+ SDValue REM_Hi = zero;
+ SDValue REM_Lo = DAG.getSelectCC(DL, RHS_Hi, zero, REM_Part, LHS_Hi, ISD::SETEQ);
+
+ SDValue DIV_Hi = DAG.getSelectCC(DL, RHS_Hi, zero, DIV_Part, zero, ISD::SETEQ);
+ SDValue DIV_Lo = zero;
+
+ const unsigned halfBitWidth = HalfVT.getSizeInBits();
+
+ for (unsigned i = 0; i < halfBitWidth; ++i) {
+ SDValue POS = DAG.getConstant(halfBitWidth - i - 1, HalfVT);
+ // Get Value of high bit
+ SDValue HBit;
+ if (halfBitWidth == 32 && Subtarget->hasBFE()) {
+ HBit = DAG.getNode(AMDGPUISD::BFE_U32, DL, HalfVT, LHS_Lo, POS, one);
+ } else {
+ HBit = DAG.getNode(ISD::SRL, DL, HalfVT, LHS_Lo, POS);
+ HBit = DAG.getNode(ISD::AND, DL, HalfVT, HBit, one);
+ }
+
+ SDValue Carry = DAG.getNode(ISD::SRL, DL, HalfVT, REM_Lo,
+ DAG.getConstant(halfBitWidth - 1, HalfVT));
+ REM_Hi = DAG.getNode(ISD::SHL, DL, HalfVT, REM_Hi, one);
+ REM_Hi = DAG.getNode(ISD::OR, DL, HalfVT, REM_Hi, Carry);
+
+ REM_Lo = DAG.getNode(ISD::SHL, DL, HalfVT, REM_Lo, one);
+ REM_Lo = DAG.getNode(ISD::OR, DL, HalfVT, REM_Lo, HBit);
+
+
+ SDValue REM = DAG.getNode(ISD::BUILD_PAIR, DL, VT, REM_Lo, REM_Hi);
+
+ SDValue BIT = DAG.getConstant(1 << (halfBitWidth - i - 1), HalfVT);
+ SDValue realBIT = DAG.getSelectCC(DL, REM, RHS, BIT, zero, ISD::SETUGE);
+
+ DIV_Lo = DAG.getNode(ISD::OR, DL, HalfVT, DIV_Lo, realBIT);
+
+ // Update REM
+
+ SDValue REM_sub = DAG.getNode(ISD::SUB, DL, VT, REM, RHS);
+
+ REM = DAG.getSelectCC(DL, REM, RHS, REM_sub, REM, ISD::SETUGE);
+ REM_Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, REM, zero);
+ REM_Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, REM, one);
+ }
+
+ SDValue REM = DAG.getNode(ISD::BUILD_PAIR, DL, VT, REM_Lo, REM_Hi);
+ SDValue DIV = DAG.getNode(ISD::BUILD_PAIR, DL, VT, DIV_Lo, DIV_Hi);
+ Results.push_back(DIV);
+ Results.push_back(REM);
+}
+
SDValue AMDGPUTargetLowering::LowerUDIVREM(SDValue Op,
SelectionDAG &DAG) const {
SDLoc DL(Op);
EVT VT = Op.getValueType();
+ if (VT == MVT::i64) {
+ SmallVector<SDValue, 2> Results;
+ LowerUDIVREM64(Op, DAG, Results);
+ return DAG.getMergeValues(Results, DL);
+ }
+
SDValue Num = Op.getOperand(0);
SDValue Den = Op.getOperand(1);
SDValue Value = SN->getValue();
EVT VT = Value.getValueType();
- if (isTypeLegal(VT) || SN->isVolatile() || !ISD::isNormalLoad(Value.getNode()))
+ if (isTypeLegal(VT) || SN->isVolatile() ||
+ !ISD::isNormalLoad(Value.getNode()) || VT.getSizeInBits() < 8)
return SDValue();
LoadSDNode *LoadVal = cast<LoadSDNode>(Value);
simplifyI24(N1, DCI);
return SDValue();
}
- case ISD::SELECT_CC: {
- SDLoc DL(N);
- EVT VT = N->getValueType(0);
-
- if (VT == MVT::f32 ||
- (VT == MVT::f64 &&
- Subtarget->getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS)) {
- SDValue LHS = N->getOperand(0);
- SDValue RHS = N->getOperand(1);
- SDValue True = N->getOperand(2);
- SDValue False = N->getOperand(3);
- SDValue CC = N->getOperand(4);
-
- return CombineFMinMax(DL, VT, LHS, RHS, True, False, CC, DAG);
- }
-
- break;
- }
case ISD::SELECT: {
SDValue Cond = N->getOperand(0);
- if (Cond.getOpcode() == ISD::SETCC) {
+ if (Cond.getOpcode() == ISD::SETCC && Cond.hasOneUse()) {
SDLoc DL(N);
EVT VT = N->getValueType(0);
SDValue LHS = Cond.getOperand(0);
SDValue True = N->getOperand(1);
SDValue False = N->getOperand(2);
- if (VT == MVT::f32 ||
- (VT == MVT::f64 &&
- Subtarget->getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS)) {
- return CombineFMinMax(DL, VT, LHS, RHS, True, False, CC, DAG);
- }
+ if (VT == MVT::f32)
+ return CombineFMinMaxLegacy(DL, VT, LHS, RHS, True, False, CC, DCI);
// TODO: Implement min / max Evergreen instructions.
if (VT == MVT::i32 &&
NODE_NAME_CASE(FMIN_LEGACY)
NODE_NAME_CASE(SMIN)
NODE_NAME_CASE(UMIN)
+ NODE_NAME_CASE(FMAX3)
+ NODE_NAME_CASE(SMAX3)
+ NODE_NAME_CASE(UMAX3)
+ NODE_NAME_CASE(FMIN3)
+ NODE_NAME_CASE(SMIN3)
+ NODE_NAME_CASE(UMIN3)
NODE_NAME_CASE(URECIP)
NODE_NAME_CASE(DIV_SCALE)
NODE_NAME_CASE(DIV_FMAS)