#include "AMDGPUISelLowering.h"
#include "AMDGPU.h"
+#include "AMDGPUFrameLowering.h"
#include "AMDGPURegisterInfo.h"
#include "AMDGPUSubtarget.h"
#include "AMDILIntrinsicInfo.h"
static bool allocateStack(unsigned ValNo, MVT ValVT, MVT LocVT,
CCValAssign::LocInfo LocInfo,
ISD::ArgFlagsTy ArgFlags, CCState &State) {
- unsigned Offset = State.AllocateStack(ValVT.getSizeInBits() / 8, ArgFlags.getOrigAlign());
- State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
+ unsigned Offset = State.AllocateStack(ValVT.getStoreSize(),
+ ArgFlags.getOrigAlign());
+ State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
return true;
}
setOperationAction(ISD::FABS, MVT::f32, Legal);
setOperationAction(ISD::FFLOOR, MVT::f32, Legal);
setOperationAction(ISD::FRINT, MVT::f32, Legal);
+ setOperationAction(ISD::FROUND, MVT::f32, Legal);
// The hardware supports ROTR, but not ROTL
setOperationAction(ISD::ROTL, MVT::i32, Expand);
setOperationAction(ISD::FNEG, MVT::v2f32, Expand);
setOperationAction(ISD::FNEG, MVT::v4f32, Expand);
+ setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
+
setOperationAction(ISD::MUL, MVT::i64, Expand);
setOperationAction(ISD::UDIV, MVT::i32, Expand);
for (unsigned int x = 0; x < NumFloatTypes; ++x) {
MVT::SimpleValueType VT = FloatTypes[x];
+ setOperationAction(ISD::FABS, VT, Expand);
setOperationAction(ISD::FADD, VT, Expand);
setOperationAction(ISD::FDIV, VT, Expand);
setOperationAction(ISD::FFLOOR, VT, Expand);
return MVT::i32;
}
+bool AMDGPUTargetLowering::isLoadBitCastBeneficial(EVT LoadTy,
+ EVT CastTy) const {
+ if (LoadTy.getSizeInBits() != CastTy.getSizeInBits())
+ return true;
+
+ unsigned LScalarSize = LoadTy.getScalarType().getSizeInBits();
+ unsigned CastScalarSize = CastTy.getScalarType().getSizeInBits();
+
+ return ((LScalarSize <= CastScalarSize) ||
+ (CastScalarSize >= 32) ||
+ (LScalarSize < 32));
+}
//===---------------------------------------------------------------------===//
// Target Properties
switch (Op.getOpcode()) {
default:
Op.getNode()->dump();
- assert(0 && "Custom lowering code for this"
- "instruction is not implemented yet!");
+ llvm_unreachable("Custom lowering code for this"
+ "instruction is not implemented yet!");
break;
// AMDIL DAG lowering
case ISD::SDIV: return LowerSDIV(Op, DAG);
// AMDGPU DAG lowering
case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG);
case ISD::EXTRACT_SUBVECTOR: return LowerEXTRACT_SUBVECTOR(Op, DAG);
+ case ISD::FrameIndex: return LowerFrameIndex(Op, DAG);
case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
- case ISD::STORE: return LowerSTORE(Op, DAG);
case ISD::UDIVREM: return LowerUDIVREM(Op, DAG);
+ case ISD::UINT_TO_FP: return LowerUINT_TO_FP(Op, DAG);
}
return Op;
}
&Args[0], Args.size());
}
+SDValue AMDGPUTargetLowering::LowerFrameIndex(SDValue Op,
+ SelectionDAG &DAG) const {
+
+ MachineFunction &MF = DAG.getMachineFunction();
+ const AMDGPUFrameLowering *TFL =
+ static_cast<const AMDGPUFrameLowering*>(getTargetMachine().getFrameLowering());
+
+ FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Op);
+ assert(FIN);
+
+ unsigned FrameIndex = FIN->getIndex();
+ unsigned Offset = TFL->getFrameIndexOffset(MF, FrameIndex);
+ return DAG.getConstant(Offset * 4 * TFL->getStackWidth(MF),
+ Op.getValueType());
+}
SDValue AMDGPUTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
SelectionDAG &DAG) const {
case ISD::SETTRUE2:
case ISD::SETUO:
case ISD::SETO:
- assert(0 && "Operation should already be optimised !");
+ llvm_unreachable("Operation should already be optimised!");
case ISD::SETULE:
case ISD::SETULT:
case ISD::SETOLE:
return DAG.getNode(AMDGPUISD::FMIN, DL, VT, LHS, RHS);
}
case ISD::SETCC_INVALID:
- assert(0 && "Invalid setcc condcode !");
+ llvm_unreachable("Invalid setcc condcode!");
}
return Op;
}
}
StoreSDNode *Store = cast<StoreSDNode>(Op);
- if (Store->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS &&
+ if ((Store->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS ||
+ Store->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS) &&
Store->getValue().getValueType().isVector()) {
return SplitVectorStore(Op, DAG);
}
SDValue Remainder_GE_Den = DAG.getSelectCC(DL, Remainder, Den,
DAG.getConstant(-1, VT),
DAG.getConstant(0, VT),
- ISD::SETGE);
- // Remainder_GE_Zero = (Remainder >= 0 ? -1 : 0)
- SDValue Remainder_GE_Zero = DAG.getSelectCC(DL, Remainder,
- DAG.getConstant(0, VT),
+ ISD::SETUGE);
+ // Remainder_GE_Zero = (Num >= Num_S_Remainder ? -1 : 0)
+ SDValue Remainder_GE_Zero = DAG.getSelectCC(DL, Num,
+ Num_S_Remainder,
DAG.getConstant(-1, VT),
DAG.getConstant(0, VT),
- ISD::SETGE);
+ ISD::SETUGE);
// Tmp1 = Remainder_GE_Den & Remainder_GE_Zero
SDValue Tmp1 = DAG.getNode(ISD::AND, DL, VT, Remainder_GE_Den,
Remainder_GE_Zero);
return DAG.getMergeValues(Ops, 2, DL);
}
+SDValue AMDGPUTargetLowering::LowerUINT_TO_FP(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDValue S0 = Op.getOperand(0);
+ SDLoc DL(Op);
+ if (Op.getValueType() != MVT::f32 || S0.getValueType() != MVT::i64)
+ return SDValue();
+
+ // f32 uint_to_fp i64
+ SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, S0,
+ DAG.getConstant(0, MVT::i32));
+ SDValue FloatLo = DAG.getNode(ISD::UINT_TO_FP, DL, MVT::f32, Lo);
+ SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, S0,
+ DAG.getConstant(1, MVT::i32));
+ SDValue FloatHi = DAG.getNode(ISD::UINT_TO_FP, DL, MVT::f32, Hi);
+ FloatHi = DAG.getNode(ISD::FMUL, DL, MVT::f32, FloatHi,
+ DAG.getConstantFP(4294967296.0f, MVT::f32)); // 2^32
+ return DAG.getNode(ISD::FADD, DL, MVT::f32, FloatLo, FloatHi);
+
+}
//===----------------------------------------------------------------------===//
// Helper functions