namespace llvm {
+class AMDGPUMachineFunction;
+class AMDGPUSubtarget;
class MachineRegisterInfo;
class AMDGPUTargetLowering : public TargetLowering {
+protected:
+ const AMDGPUSubtarget *Subtarget;
+
private:
+ SDValue LowerConstantInitializer(const Constant* Init, const GlobalValue *GV,
+ const SDValue &InitPtr,
+ SDValue Chain,
+ SelectionDAG &DAG) const;
+ SDValue LowerFrameIndex(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
+ /// \brief Lower vector stores by merging the vector elements into an integer
+ /// of the same bitwidth.
+ SDValue MergeVectorStore(const SDValue &Op, SelectionDAG &DAG) const;
+ /// \brief Split a vector store into multiple scalar stores.
+ /// \returns The resulting chain.
SDValue LowerUDIVREM(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
protected:
/// MachineFunction.
///
/// \returns a RegisterSDNode representing Reg.
- SDValue CreateLiveInRegister(SelectionDAG &DAG, const TargetRegisterClass *RC,
- unsigned Reg, EVT VT) const;
-
+ virtual SDValue CreateLiveInRegister(SelectionDAG &DAG,
+ const TargetRegisterClass *RC,
+ unsigned Reg, EVT VT) const;
+ SDValue LowerGlobalAddress(AMDGPUMachineFunction *MFI, SDValue Op,
+ SelectionDAG &DAG) const;
+ /// \brief Split a vector load into multiple scalar loads.
+ SDValue SplitVectorLoad(const SDValue &Op, SelectionDAG &DAG) const;
+ SDValue SplitVectorStore(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const;
bool isHWTrueValue(SDValue Op) const;
bool isHWFalseValue(SDValue Op) const;
+ /// The SelectionDAGBuilder will automatically promote function arguments
+ /// with illegal types. However, this does not work for the AMDGPU targets
+ /// since the function arguments are stored in memory as these illegal types.
+ /// In order to handle this properly we need to get the origianl types sizes
+ /// from the LLVM IR Function and fixup the ISD:InputArg values before
+ /// passing them to AnalyzeFormalArguments()
+ void getOriginalFunctionArgs(SelectionDAG &DAG,
+ const Function *F,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ SmallVectorImpl<ISD::InputArg> &OrigIns) const;
+ void AnalyzeFormalArguments(CCState &State,
+ const SmallVectorImpl<ISD::InputArg> &Ins) const;
+
public:
AMDGPUTargetLowering(TargetMachine &TM);
- virtual SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv,
- bool isVarArg,
- const SmallVectorImpl<ISD::InputArg> &Ins,
- DebugLoc DL, SelectionDAG &DAG,
- SmallVectorImpl<SDValue> &InVals) const;
-
- virtual SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv,
- bool isVarArg,
- const SmallVectorImpl<ISD::OutputArg> &Outs,
- const SmallVectorImpl<SDValue> &OutVals,
- DebugLoc DL, SelectionDAG &DAG) const;
- virtual SDValue LowerCall(CallLoweringInfo &CLI,
- SmallVectorImpl<SDValue> &InVals) const {
- CLI.Callee.dump();
- llvm_unreachable("Undefined function");
- }
+ bool isFAbsFree(EVT VT) const override;
+ bool isFNegFree(EVT VT) const override;
+ bool isTruncateFree(EVT Src, EVT Dest) const override;
+ bool isTruncateFree(Type *Src, Type *Dest) const override;
+
+ bool isZExtFree(Type *Src, Type *Dest) const override;
+ bool isZExtFree(EVT Src, EVT Dest) const override;
+
+ bool isNarrowingProfitable(EVT VT1, EVT VT2) const override;
+
+ MVT getVectorIdxTy() const override;
+ bool isLoadBitCastBeneficial(EVT, EVT) const override;
+ SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv,
+ bool isVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ SDLoc DL, SelectionDAG &DAG) const override;
+ SDValue LowerCall(CallLoweringInfo &CLI,
+ SmallVectorImpl<SDValue> &InVals) const override;
+
+ SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
+ void ReplaceNodeResults(SDNode * N,
+ SmallVectorImpl<SDValue> &Results,
+ SelectionDAG &DAG) const override;
- virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerIntrinsicIABS(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerIntrinsicLRP(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerMinMax(SDValue Op, SelectionDAG &DAG) const;
- virtual const char* getTargetNodeName(unsigned Opcode) const;
+ SDValue CombineMinMax(SDNode *N, SelectionDAG &DAG) const;
+ const char* getTargetNodeName(unsigned Opcode) const override;
-// Functions defined in AMDILISelLowering.cpp
-public:
+ virtual SDNode *PostISelFolding(MachineSDNode *N,
+ SelectionDAG &DAG) const {
+ return N;
+ }
/// \brief Determine which of the bits specified in \p Mask are known to be
/// either zero or one and return them in the \p KnownZero and \p KnownOne
/// bitsets.
- virtual void computeMaskedBitsForTargetNode(const SDValue Op,
- APInt &KnownZero,
- APInt &KnownOne,
- const SelectionDAG &DAG,
- unsigned Depth = 0) const;
+ void computeKnownBitsForTargetNode(const SDValue Op,
+ APInt &KnownZero,
+ APInt &KnownOne,
+ const SelectionDAG &DAG,
+ unsigned Depth = 0) const override;
+
+ virtual unsigned ComputeNumSignBitsForTargetNode(
+ SDValue Op,
+ const SelectionDAG &DAG,
+ unsigned Depth = 0) const override;
- virtual bool getTgtMemIntrinsic(IntrinsicInfo &Info,
- const CallInst &I, unsigned Intrinsic) const;
+// Functions defined in AMDILISelLowering.cpp
+public:
+ bool getTgtMemIntrinsic(IntrinsicInfo &Info,
+ const CallInst &I, unsigned Intrinsic) const override;
/// We want to mark f32/f64 floating point values as legal.
- bool isFPImmLegal(const APFloat &Imm, EVT VT) const;
+ bool isFPImmLegal(const APFloat &Imm, EVT VT) const override;
/// We don't want to shrink f64/f32 constants.
- bool ShouldShrinkFPConstant(EVT VT) const;
+ bool ShouldShrinkFPConstant(EVT VT) const override;
+
+ SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
private:
void InitAMDILLowering();
SDValue LowerSDIV24(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSDIV32(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSDIV64(SDValue Op, SelectionDAG &DAG) const;
+
+ SDValue ExpandSIGN_EXTEND_INREG(SDValue Op,
+ unsigned BitsDiff,
+ SelectionDAG &DAG) const;
SDValue LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const;
EVT genIntType(uint32_t size = 32, uint32_t numEle = 1) const;
SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const;
};
namespace AMDGPUISD {
enum {
// AMDIL ISD Opcodes
FIRST_NUMBER = ISD::BUILTIN_OP_END,
- MAD, // 32bit Fused Multiply Add instruction
CALL, // Function call based on a single integer
UMUL, // 32bit unsigned multiplication
DIV_INF, // Divide with infinity returned on zero divisor
RET_FLAG,
BRANCH_COND,
// End AMDIL ISD Opcodes
- BITALIGN,
DWORDADDR,
FRACT,
+ COS_HW,
+ SIN_HW,
FMAX,
SMAX,
UMAX,
SMIN,
UMIN,
URECIP,
+ DOT4,
+ BFE_U32, // Extract range of bits with zero extension to 32-bits.
+ BFE_I32, // Extract range of bits with sign extension to 32-bits.
+ BFI, // (src0 & src1) | (~src0 & src2)
+ BFM, // Insert a range of bits into a 32-bit word.
+ MUL_U24,
+ MUL_I24,
+ MAD_U24,
+ MAD_I24,
+ TEXTURE_FETCH,
EXPORT,
CONST_ADDRESS,
REGISTER_LOAD,
REGISTER_STORE,
+ LOAD_INPUT,
+ SAMPLE,
+ SAMPLEB,
+ SAMPLED,
+ SAMPLEL,
+ FIRST_MEM_OPCODE_NUMBER = ISD::FIRST_TARGET_MEMORY_OPCODE,
+ STORE_MSKOR,
+ LOAD_CONSTANT,
+ TBUFFER_STORE_FORMAT,
LAST_AMDGPU_ISD_NUMBER
};