From: Matt Arsenault <Matthew.Arsenault@amd.com>
Date: Wed, 9 Sep 2015 01:12:27 +0000 (+0000)
Subject: SelectionDAG: Support Expand of f16 extloads
X-Git-Url: http://demsky.eecs.uci.edu/git/?a=commitdiff_plain;h=7e657c85c6360ca0adcbda29ed26347a2cd9ae55;p=oota-llvm.git

SelectionDAG: Support Expand of f16 extloads

Currently this hits an assert that extload should
always be supported, which assumes integer extloads.

This moves a hack out of SI's argument lowering and
is covered by existing tests.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@247113 91177308-0d34-0410-b5e6-96231b3b80d8
---

diff --git a/include/llvm/CodeGen/ValueTypes.h b/include/llvm/CodeGen/ValueTypes.h
index e1a9fd38290..20e5055071a 100644
--- a/include/llvm/CodeGen/ValueTypes.h
+++ b/include/llvm/CodeGen/ValueTypes.h
@@ -89,6 +89,19 @@ namespace llvm {
       return VecTy;
     }
 
+    /// Return the type converted to an equivalently sized integer or vector
+    /// with integer element type. Similar to changeVectorElementTypeToInteger,
+    /// but also handles scalars.
+    EVT changeTypeToInteger() {
+      if (isVector())
+        return changeVectorElementTypeToInteger();
+
+      if (isSimple())
+        return MVT::getIntegerVT(getSizeInBits());
+
+      return changeExtendedTypeToInteger();
+    }
+
     /// isSimple - Test if the given EVT is simple (as opposed to being
     /// extended).
     bool isSimple() const {
@@ -342,6 +355,7 @@ namespace llvm {
     // Methods for handling the Extended-type case in functions above.
     // These are all out-of-line to prevent users of this header file
     // from having a dependency on Type.h.
+    EVT changeExtendedTypeToInteger() const;
     EVT changeExtendedVectorElementTypeToInteger() const;
     static EVT getExtendedIntegerVT(LLVMContext &C, unsigned BitWidth);
     static EVT getExtendedVectorVT(LLVMContext &C, EVT VT,
diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index 889bfd4e52b..c3c36883f94 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -1095,7 +1095,8 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) {
       break;
     }
     case TargetLowering::Expand:
-      if (!TLI.isLoadExtLegal(ISD::EXTLOAD, Node->getValueType(0), SrcVT)) {
+      EVT DestVT = Node->getValueType(0);
+      if (!TLI.isLoadExtLegal(ISD::EXTLOAD, DestVT, SrcVT)) {
         // If the source type is not legal, see if there is a legal extload to
         // an intermediate type that we can then extend further.
         EVT LoadVT = TLI.getRegisterType(SrcVT.getSimpleVT());
@@ -1114,6 +1115,23 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) {
           Chain = Load.getValue(1);
           break;
         }
+
+        // Handle the special case of fp16 extloads. EXTLOAD doesn't have the
+        // normal undefined upper bits behavior to allow using an in-reg extend
+        // with the illegal FP type, so load as an integer and do the
+        // from-integer conversion.
+        if (SrcVT.getScalarType() == MVT::f16) {
+          EVT ISrcVT = SrcVT.changeTypeToInteger();
+          EVT IDestVT = DestVT.changeTypeToInteger();
+          EVT LoadVT = TLI.getRegisterType(IDestVT.getSimpleVT());
+
+          SDValue Result = DAG.getExtLoad(ISD::ZEXTLOAD, dl, LoadVT,
+                                          Chain, Ptr, ISrcVT,
+                                          LD->getMemOperand());
+          Value = DAG.getNode(ISD::FP16_TO_FP, dl, DestVT, Result);
+          Chain = Result.getValue(1);
+          break;
+        }
       }
 
       assert(!SrcVT.isVector() &&
diff --git a/lib/IR/ValueTypes.cpp b/lib/IR/ValueTypes.cpp
index d95de3989df..be58fd45069 100644
--- a/lib/IR/ValueTypes.cpp
+++ b/lib/IR/ValueTypes.cpp
@@ -19,6 +19,11 @@
 #include "llvm/Support/ErrorHandling.h"
 using namespace llvm;
 
+EVT EVT::changeExtendedTypeToInteger() const {
+  LLVMContext &Context = LLVMTy->getContext();
+  return getIntegerVT(Context, getSizeInBits());
+}
+
 EVT EVT::changeExtendedVectorElementTypeToInteger() const {
   LLVMContext &Context = LLVMTy->getContext();
   EVT IntTy = getIntegerVT(Context, getVectorElementType().getSizeInBits());
diff --git a/lib/Target/AMDGPU/SIISelLowering.cpp b/lib/Target/AMDGPU/SIISelLowering.cpp
index e7dcd8a3267..ef42a2972bf 100644
--- a/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -464,12 +464,6 @@ bool SITargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm,
   return TII->isInlineConstant(Imm);
 }
 
-static EVT toIntegerVT(EVT VT) {
-  if (VT.isVector())
-    return VT.changeVectorElementTypeToInteger();
-  return MVT::getIntegerVT(VT.getSizeInBits());
-}
-
 SDValue SITargetLowering::LowerParameter(SelectionDAG &DAG, EVT VT, EVT MemVT,
                                          SDLoc SL, SDValue Chain,
                                          unsigned Offset, bool Signed) const {
@@ -493,30 +487,10 @@ SDValue SITargetLowering::LowerParameter(SelectionDAG &DAG, EVT VT, EVT MemVT,
 
   unsigned Align = DL.getABITypeAlignment(Ty);
 
-  if (VT != MemVT && VT.isFloatingPoint()) {
-    // Do an integer load and convert.
-    // FIXME: This is mostly because load legalization after type legalization
-    // doesn't handle FP extloads.
-    assert(VT.getScalarType() == MVT::f32 &&
-           MemVT.getScalarType() == MVT::f16);
-
-    EVT IVT = toIntegerVT(VT);
-    EVT MemIVT = toIntegerVT(MemVT);
-    SDValue Load = DAG.getLoad(ISD::UNINDEXED, ISD::ZEXTLOAD,
-                               IVT, SL, Chain, Ptr, PtrOffset, PtrInfo, MemIVT,
-                               false, // isVolatile
-                               true, // isNonTemporal
-                               true, // isInvariant
-                               Align); // Alignment
-    SDValue Ops[] = {
-      DAG.getNode(ISD::FP16_TO_FP, SL, VT, Load),
-      Load.getValue(1)
-    };
-
-    return DAG.getMergeValues(Ops, SL);
-  }
-
   ISD::LoadExtType ExtTy = Signed ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
+  if (MemVT.isFloatingPoint())
+    ExtTy = ISD::EXTLOAD;
+
   return DAG.getLoad(ISD::UNINDEXED, ExtTy,
                      VT, SL, Chain, Ptr, PtrOffset, PtrInfo, MemVT,
                      false, // isVolatile
diff --git a/test/CodeGen/AMDGPU/half.ll b/test/CodeGen/AMDGPU/half.ll
index bf8f11860b5..cd0b1b4e68d 100644
--- a/test/CodeGen/AMDGPU/half.ll
+++ b/test/CodeGen/AMDGPU/half.ll
@@ -112,12 +112,24 @@ define void @extload_v8f16_to_v8f32_arg(<8 x float> addrspace(1)* %out, <8 x hal
 }
 
 ; GCN-LABEL: {{^}}extload_f16_to_f64_arg:
+; SI: s_load_dword [[ARG:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb{{$}}
+; VI: s_load_dword [[ARG:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x2c{{$}}
+; GCN: v_cvt_f64_f32_e32 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[ARG]]
+; GCN: buffer_store_dwordx2 [[RESULT]]
 define void @extload_f16_to_f64_arg(double addrspace(1)* %out, half %arg) #0 {
   %ext = fpext half %arg to double
   store double %ext, double addrspace(1)* %out
   ret void
 }
+
 ; GCN-LABEL: {{^}}extload_v2f16_to_v2f64_arg:
+; GCN-DAG: buffer_load_ushort v
+; GCN-DAG: buffer_load_ushort v
+; GCN-DAG: v_cvt_f32_f16_e32
+; GCN-DAG: v_cvt_f32_f16_e32
+; GCN-DAG: v_cvt_f64_f32_e32
+; GCN-DAG: v_cvt_f64_f32_e32
+; GCN: s_endpgm
 define void @extload_v2f16_to_v2f64_arg(<2 x double> addrspace(1)* %out, <2 x half> %arg) #0 {
   %ext = fpext <2 x half> %arg to <2 x double>
   store <2 x double> %ext, <2 x double> addrspace(1)* %out
@@ -125,6 +137,16 @@ define void @extload_v2f16_to_v2f64_arg(<2 x double> addrspace(1)* %out, <2 x ha
 }
 
 ; GCN-LABEL: {{^}}extload_v3f16_to_v3f64_arg:
+; GCN-DAG: buffer_load_ushort v
+; GCN-DAG: buffer_load_ushort v
+; GCN-DAG: buffer_load_ushort v
+; GCN-DAG: v_cvt_f32_f16_e32
+; GCN-DAG: v_cvt_f32_f16_e32
+; GCN-DAG: v_cvt_f32_f16_e32
+; GCN-DAG: v_cvt_f64_f32_e32
+; GCN-DAG: v_cvt_f64_f32_e32
+; GCN-DAG: v_cvt_f64_f32_e32
+; GCN: s_endpgm
 define void @extload_v3f16_to_v3f64_arg(<3 x double> addrspace(1)* %out, <3 x half> %arg) #0 {
   %ext = fpext <3 x half> %arg to <3 x double>
   store <3 x double> %ext, <3 x double> addrspace(1)* %out
@@ -132,6 +154,19 @@ define void @extload_v3f16_to_v3f64_arg(<3 x double> addrspace(1)* %out, <3 x ha
 }
 
 ; GCN-LABEL: {{^}}extload_v4f16_to_v4f64_arg:
+; GCN-DAG: buffer_load_ushort v
+; GCN-DAG: buffer_load_ushort v
+; GCN-DAG: buffer_load_ushort v
+; GCN-DAG: buffer_load_ushort v
+; GCN-DAG: v_cvt_f32_f16_e32
+; GCN-DAG: v_cvt_f32_f16_e32
+; GCN-DAG: v_cvt_f32_f16_e32
+; GCN-DAG: v_cvt_f32_f16_e32
+; GCN-DAG: v_cvt_f64_f32_e32
+; GCN-DAG: v_cvt_f64_f32_e32
+; GCN-DAG: v_cvt_f64_f32_e32
+; GCN-DAG: v_cvt_f64_f32_e32
+; GCN: s_endpgm
 define void @extload_v4f16_to_v4f64_arg(<4 x double> addrspace(1)* %out, <4 x half> %arg) #0 {
   %ext = fpext <4 x half> %arg to <4 x double>
   store <4 x double> %ext, <4 x double> addrspace(1)* %out
@@ -139,6 +174,37 @@ define void @extload_v4f16_to_v4f64_arg(<4 x double> addrspace(1)* %out, <4 x ha
 }
 
 ; GCN-LABEL: {{^}}extload_v8f16_to_v8f64_arg:
+; GCN-DAG: buffer_load_ushort v
+; GCN-DAG: buffer_load_ushort v
+; GCN-DAG: buffer_load_ushort v
+; GCN-DAG: buffer_load_ushort v
+
+; GCN-DAG: buffer_load_ushort v
+; GCN-DAG: buffer_load_ushort v
+; GCN-DAG: buffer_load_ushort v
+; GCN-DAG: buffer_load_ushort v
+
+; GCN-DAG: v_cvt_f32_f16_e32
+; GCN-DAG: v_cvt_f32_f16_e32
+; GCN-DAG: v_cvt_f32_f16_e32
+; GCN-DAG: v_cvt_f32_f16_e32
+
+; GCN-DAG: v_cvt_f32_f16_e32
+; GCN-DAG: v_cvt_f32_f16_e32
+; GCN-DAG: v_cvt_f32_f16_e32
+; GCN-DAG: v_cvt_f32_f16_e32
+
+; GCN-DAG: v_cvt_f64_f32_e32
+; GCN-DAG: v_cvt_f64_f32_e32
+; GCN-DAG: v_cvt_f64_f32_e32
+; GCN-DAG: v_cvt_f64_f32_e32
+
+; GCN-DAG: v_cvt_f64_f32_e32
+; GCN-DAG: v_cvt_f64_f32_e32
+; GCN-DAG: v_cvt_f64_f32_e32
+; GCN-DAG: v_cvt_f64_f32_e32
+
+; GCN: s_endpgm
 define void @extload_v8f16_to_v8f64_arg(<8 x double> addrspace(1)* %out, <8 x half> %arg) #0 {
   %ext = fpext <8 x half> %arg to <8 x double>
   store <8 x double> %ext, <8 x double> addrspace(1)* %out
@@ -194,6 +260,12 @@ define void @global_extload_f16_to_f32(float addrspace(1)* %out, half addrspace(
 }
 
 ; GCN-LABEL: {{^}}global_extload_v2f16_to_v2f32:
+; GCN-DAG: buffer_load_ushort [[LOAD0:v[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
+; GCN-DAG: buffer_load_ushort [[LOAD1:v[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0 offset:2{{$}}
+; GCN-DAG: v_cvt_f32_f16_e32 v[[CVT0:[0-9]+]], [[LOAD0]]
+; GCN-DAG: v_cvt_f32_f16_e32 v[[CVT1:[0-9]+]], [[LOAD1]]
+; GCN-DAG: buffer_store_dwordx2 v{{\[}}[[CVT0]]:[[CVT1]]{{\]}}
+; GCN: s_endpgm
 define void @global_extload_v2f16_to_v2f32(<2 x float> addrspace(1)* %out, <2 x half> addrspace(1)* %in) #0 {
   %val = load <2 x half>, <2 x half> addrspace(1)* %in
   %cvt = fpext <2 x half> %val to <2 x float>
@@ -246,6 +318,15 @@ define void @global_extload_f16_to_f64(double addrspace(1)* %out, half addrspace
 }
 
 ; GCN-LABEL: {{^}}global_extload_v2f16_to_v2f64:
+; GCN-DAG: buffer_load_ushort [[LOAD0:v[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
+; GCN-DAG: buffer_load_ushort [[LOAD1:v[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0 offset:2{{$}}
+; GCN-DAG: v_cvt_f32_f16_e32 v[[CVT0:[0-9]+]], [[LOAD0]]
+; GCN-DAG: v_cvt_f32_f16_e32 v[[CVT1:[0-9]+]], [[LOAD1]]
+; GCN-DAG: v_cvt_f64_f32_e32 [[CVT2:v\[[0-9]+:[0-9]+\]]], v[[CVT0]]
+; GCN-DAG: v_cvt_f64_f32_e32 [[CVT3:v\[[0-9]+:[0-9]+\]]], v[[CVT1]]
+; GCN-DAG: buffer_store_dwordx2 [[CVT2]]
+; GCN-DAG: buffer_store_dwordx2 [[CVT3]]
+; GCN: s_endpgm
 define void @global_extload_v2f16_to_v2f64(<2 x double> addrspace(1)* %out, <2 x half> addrspace(1)* %in) #0 {
   %val = load <2 x half>, <2 x half> addrspace(1)* %in
   %cvt = fpext <2 x half> %val to <2 x double>