From 7e657c85c6360ca0adcbda29ed26347a2cd9ae55 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Wed, 9 Sep 2015 01:12:27 +0000 Subject: [PATCH] SelectionDAG: Support Expand of f16 extloads Currently this hits an assert that extload should always be supported, which assumes integer extloads. This moves a hack out of SI's argument lowering and is covered by existing tests. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@247113 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/CodeGen/ValueTypes.h | 14 ++++ lib/CodeGen/SelectionDAG/LegalizeDAG.cpp | 20 +++++- lib/IR/ValueTypes.cpp | 5 ++ lib/Target/AMDGPU/SIISelLowering.cpp | 32 +--------- test/CodeGen/AMDGPU/half.ll | 81 ++++++++++++++++++++++++ 5 files changed, 122 insertions(+), 30 deletions(-) diff --git a/include/llvm/CodeGen/ValueTypes.h b/include/llvm/CodeGen/ValueTypes.h index e1a9fd38290..20e5055071a 100644 --- a/include/llvm/CodeGen/ValueTypes.h +++ b/include/llvm/CodeGen/ValueTypes.h @@ -89,6 +89,19 @@ namespace llvm { return VecTy; } + /// Return the type converted to an equivalently sized integer or vector + /// with integer element type. Similar to changeVectorElementTypeToInteger, + /// but also handles scalars. + EVT changeTypeToInteger() { + if (isVector()) + return changeVectorElementTypeToInteger(); + + if (isSimple()) + return MVT::getIntegerVT(getSizeInBits()); + + return changeExtendedTypeToInteger(); + } + /// isSimple - Test if the given EVT is simple (as opposed to being /// extended). bool isSimple() const { @@ -342,6 +355,7 @@ namespace llvm { // Methods for handling the Extended-type case in functions above. // These are all out-of-line to prevent users of this header file // from having a dependency on Type.h. + EVT changeExtendedTypeToInteger() const; EVT changeExtendedVectorElementTypeToInteger() const; static EVT getExtendedIntegerVT(LLVMContext &C, unsigned BitWidth); static EVT getExtendedVectorVT(LLVMContext &C, EVT VT, diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index 889bfd4e52b..c3c36883f94 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -1095,7 +1095,8 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { break; } case TargetLowering::Expand: - if (!TLI.isLoadExtLegal(ISD::EXTLOAD, Node->getValueType(0), SrcVT)) { + EVT DestVT = Node->getValueType(0); + if (!TLI.isLoadExtLegal(ISD::EXTLOAD, DestVT, SrcVT)) { // If the source type is not legal, see if there is a legal extload to // an intermediate type that we can then extend further. EVT LoadVT = TLI.getRegisterType(SrcVT.getSimpleVT()); @@ -1114,6 +1115,23 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { Chain = Load.getValue(1); break; } + + // Handle the special case of fp16 extloads. EXTLOAD doesn't have the + // normal undefined upper bits behavior to allow using an in-reg extend + // with the illegal FP type, so load as an integer and do the + // from-integer conversion. + if (SrcVT.getScalarType() == MVT::f16) { + EVT ISrcVT = SrcVT.changeTypeToInteger(); + EVT IDestVT = DestVT.changeTypeToInteger(); + EVT LoadVT = TLI.getRegisterType(IDestVT.getSimpleVT()); + + SDValue Result = DAG.getExtLoad(ISD::ZEXTLOAD, dl, LoadVT, + Chain, Ptr, ISrcVT, + LD->getMemOperand()); + Value = DAG.getNode(ISD::FP16_TO_FP, dl, DestVT, Result); + Chain = Result.getValue(1); + break; + } } assert(!SrcVT.isVector() && diff --git a/lib/IR/ValueTypes.cpp b/lib/IR/ValueTypes.cpp index d95de3989df..be58fd45069 100644 --- a/lib/IR/ValueTypes.cpp +++ b/lib/IR/ValueTypes.cpp @@ -19,6 +19,11 @@ #include "llvm/Support/ErrorHandling.h" using namespace llvm; +EVT EVT::changeExtendedTypeToInteger() const { + LLVMContext &Context = LLVMTy->getContext(); + return getIntegerVT(Context, getSizeInBits()); +} + EVT EVT::changeExtendedVectorElementTypeToInteger() const { LLVMContext &Context = LLVMTy->getContext(); EVT IntTy = getIntegerVT(Context, getVectorElementType().getSizeInBits()); diff --git a/lib/Target/AMDGPU/SIISelLowering.cpp b/lib/Target/AMDGPU/SIISelLowering.cpp index e7dcd8a3267..ef42a2972bf 100644 --- a/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/lib/Target/AMDGPU/SIISelLowering.cpp @@ -464,12 +464,6 @@ bool SITargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm, return TII->isInlineConstant(Imm); } -static EVT toIntegerVT(EVT VT) { - if (VT.isVector()) - return VT.changeVectorElementTypeToInteger(); - return MVT::getIntegerVT(VT.getSizeInBits()); -} - SDValue SITargetLowering::LowerParameter(SelectionDAG &DAG, EVT VT, EVT MemVT, SDLoc SL, SDValue Chain, unsigned Offset, bool Signed) const { @@ -493,30 +487,10 @@ SDValue SITargetLowering::LowerParameter(SelectionDAG &DAG, EVT VT, EVT MemVT, unsigned Align = DL.getABITypeAlignment(Ty); - if (VT != MemVT && VT.isFloatingPoint()) { - // Do an integer load and convert. - // FIXME: This is mostly because load legalization after type legalization - // doesn't handle FP extloads. - assert(VT.getScalarType() == MVT::f32 && - MemVT.getScalarType() == MVT::f16); - - EVT IVT = toIntegerVT(VT); - EVT MemIVT = toIntegerVT(MemVT); - SDValue Load = DAG.getLoad(ISD::UNINDEXED, ISD::ZEXTLOAD, - IVT, SL, Chain, Ptr, PtrOffset, PtrInfo, MemIVT, - false, // isVolatile - true, // isNonTemporal - true, // isInvariant - Align); // Alignment - SDValue Ops[] = { - DAG.getNode(ISD::FP16_TO_FP, SL, VT, Load), - Load.getValue(1) - }; - - return DAG.getMergeValues(Ops, SL); - } - ISD::LoadExtType ExtTy = Signed ? ISD::SEXTLOAD : ISD::ZEXTLOAD; + if (MemVT.isFloatingPoint()) + ExtTy = ISD::EXTLOAD; + return DAG.getLoad(ISD::UNINDEXED, ExtTy, VT, SL, Chain, Ptr, PtrOffset, PtrInfo, MemVT, false, // isVolatile diff --git a/test/CodeGen/AMDGPU/half.ll b/test/CodeGen/AMDGPU/half.ll index bf8f11860b5..cd0b1b4e68d 100644 --- a/test/CodeGen/AMDGPU/half.ll +++ b/test/CodeGen/AMDGPU/half.ll @@ -112,12 +112,24 @@ define void @extload_v8f16_to_v8f32_arg(<8 x float> addrspace(1)* %out, <8 x hal } ; GCN-LABEL: {{^}}extload_f16_to_f64_arg: +; SI: s_load_dword [[ARG:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb{{$}} +; VI: s_load_dword [[ARG:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x2c{{$}} +; GCN: v_cvt_f64_f32_e32 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[ARG]] +; GCN: buffer_store_dwordx2 [[RESULT]] define void @extload_f16_to_f64_arg(double addrspace(1)* %out, half %arg) #0 { %ext = fpext half %arg to double store double %ext, double addrspace(1)* %out ret void } + ; GCN-LABEL: {{^}}extload_v2f16_to_v2f64_arg: +; GCN-DAG: buffer_load_ushort v +; GCN-DAG: buffer_load_ushort v +; GCN-DAG: v_cvt_f32_f16_e32 +; GCN-DAG: v_cvt_f32_f16_e32 +; GCN-DAG: v_cvt_f64_f32_e32 +; GCN-DAG: v_cvt_f64_f32_e32 +; GCN: s_endpgm define void @extload_v2f16_to_v2f64_arg(<2 x double> addrspace(1)* %out, <2 x half> %arg) #0 { %ext = fpext <2 x half> %arg to <2 x double> store <2 x double> %ext, <2 x double> addrspace(1)* %out @@ -125,6 +137,16 @@ define void @extload_v2f16_to_v2f64_arg(<2 x double> addrspace(1)* %out, <2 x ha } ; GCN-LABEL: {{^}}extload_v3f16_to_v3f64_arg: +; GCN-DAG: buffer_load_ushort v +; GCN-DAG: buffer_load_ushort v +; GCN-DAG: buffer_load_ushort v +; GCN-DAG: v_cvt_f32_f16_e32 +; GCN-DAG: v_cvt_f32_f16_e32 +; GCN-DAG: v_cvt_f32_f16_e32 +; GCN-DAG: v_cvt_f64_f32_e32 +; GCN-DAG: v_cvt_f64_f32_e32 +; GCN-DAG: v_cvt_f64_f32_e32 +; GCN: s_endpgm define void @extload_v3f16_to_v3f64_arg(<3 x double> addrspace(1)* %out, <3 x half> %arg) #0 { %ext = fpext <3 x half> %arg to <3 x double> store <3 x double> %ext, <3 x double> addrspace(1)* %out @@ -132,6 +154,19 @@ define void @extload_v3f16_to_v3f64_arg(<3 x double> addrspace(1)* %out, <3 x ha } ; GCN-LABEL: {{^}}extload_v4f16_to_v4f64_arg: +; GCN-DAG: buffer_load_ushort v +; GCN-DAG: buffer_load_ushort v +; GCN-DAG: buffer_load_ushort v +; GCN-DAG: buffer_load_ushort v +; GCN-DAG: v_cvt_f32_f16_e32 +; GCN-DAG: v_cvt_f32_f16_e32 +; GCN-DAG: v_cvt_f32_f16_e32 +; GCN-DAG: v_cvt_f32_f16_e32 +; GCN-DAG: v_cvt_f64_f32_e32 +; GCN-DAG: v_cvt_f64_f32_e32 +; GCN-DAG: v_cvt_f64_f32_e32 +; GCN-DAG: v_cvt_f64_f32_e32 +; GCN: s_endpgm define void @extload_v4f16_to_v4f64_arg(<4 x double> addrspace(1)* %out, <4 x half> %arg) #0 { %ext = fpext <4 x half> %arg to <4 x double> store <4 x double> %ext, <4 x double> addrspace(1)* %out @@ -139,6 +174,37 @@ define void @extload_v4f16_to_v4f64_arg(<4 x double> addrspace(1)* %out, <4 x ha } ; GCN-LABEL: {{^}}extload_v8f16_to_v8f64_arg: +; GCN-DAG: buffer_load_ushort v +; GCN-DAG: buffer_load_ushort v +; GCN-DAG: buffer_load_ushort v +; GCN-DAG: buffer_load_ushort v + +; GCN-DAG: buffer_load_ushort v +; GCN-DAG: buffer_load_ushort v +; GCN-DAG: buffer_load_ushort v +; GCN-DAG: buffer_load_ushort v + +; GCN-DAG: v_cvt_f32_f16_e32 +; GCN-DAG: v_cvt_f32_f16_e32 +; GCN-DAG: v_cvt_f32_f16_e32 +; GCN-DAG: v_cvt_f32_f16_e32 + +; GCN-DAG: v_cvt_f32_f16_e32 +; GCN-DAG: v_cvt_f32_f16_e32 +; GCN-DAG: v_cvt_f32_f16_e32 +; GCN-DAG: v_cvt_f32_f16_e32 + +; GCN-DAG: v_cvt_f64_f32_e32 +; GCN-DAG: v_cvt_f64_f32_e32 +; GCN-DAG: v_cvt_f64_f32_e32 +; GCN-DAG: v_cvt_f64_f32_e32 + +; GCN-DAG: v_cvt_f64_f32_e32 +; GCN-DAG: v_cvt_f64_f32_e32 +; GCN-DAG: v_cvt_f64_f32_e32 +; GCN-DAG: v_cvt_f64_f32_e32 + +; GCN: s_endpgm define void @extload_v8f16_to_v8f64_arg(<8 x double> addrspace(1)* %out, <8 x half> %arg) #0 { %ext = fpext <8 x half> %arg to <8 x double> store <8 x double> %ext, <8 x double> addrspace(1)* %out @@ -194,6 +260,12 @@ define void @global_extload_f16_to_f32(float addrspace(1)* %out, half addrspace( } ; GCN-LABEL: {{^}}global_extload_v2f16_to_v2f32: +; GCN-DAG: buffer_load_ushort [[LOAD0:v[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0{{$}} +; GCN-DAG: buffer_load_ushort [[LOAD1:v[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0 offset:2{{$}} +; GCN-DAG: v_cvt_f32_f16_e32 v[[CVT0:[0-9]+]], [[LOAD0]] +; GCN-DAG: v_cvt_f32_f16_e32 v[[CVT1:[0-9]+]], [[LOAD1]] +; GCN-DAG: buffer_store_dwordx2 v{{\[}}[[CVT0]]:[[CVT1]]{{\]}} +; GCN: s_endpgm define void @global_extload_v2f16_to_v2f32(<2 x float> addrspace(1)* %out, <2 x half> addrspace(1)* %in) #0 { %val = load <2 x half>, <2 x half> addrspace(1)* %in %cvt = fpext <2 x half> %val to <2 x float> @@ -246,6 +318,15 @@ define void @global_extload_f16_to_f64(double addrspace(1)* %out, half addrspace } ; GCN-LABEL: {{^}}global_extload_v2f16_to_v2f64: +; GCN-DAG: buffer_load_ushort [[LOAD0:v[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0{{$}} +; GCN-DAG: buffer_load_ushort [[LOAD1:v[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0 offset:2{{$}} +; GCN-DAG: v_cvt_f32_f16_e32 v[[CVT0:[0-9]+]], [[LOAD0]] +; GCN-DAG: v_cvt_f32_f16_e32 v[[CVT1:[0-9]+]], [[LOAD1]] +; GCN-DAG: v_cvt_f64_f32_e32 [[CVT2:v\[[0-9]+:[0-9]+\]]], v[[CVT0]] +; GCN-DAG: v_cvt_f64_f32_e32 [[CVT3:v\[[0-9]+:[0-9]+\]]], v[[CVT1]] +; GCN-DAG: buffer_store_dwordx2 [[CVT2]] +; GCN-DAG: buffer_store_dwordx2 [[CVT3]] +; GCN: s_endpgm define void @global_extload_v2f16_to_v2f64(<2 x double> addrspace(1)* %out, <2 x half> addrspace(1)* %in) #0 { %val = load <2 x half>, <2 x half> addrspace(1)* %in %cvt = fpext <2 x half> %val to <2 x double> -- 2.34.1