CodeGen: emit IR-level f16 conversion intrinsics as fptrunc/fpext

author Tim Northover <tnorthover@apple.com>

Mon, 21 Jul 2014 09:13:56 +0000 (09:13 +0000)

committer Tim Northover <tnorthover@apple.com>

Mon, 21 Jul 2014 09:13:56 +0000 (09:13 +0000)
author Tim Northover <tnorthover@apple.com>
Mon, 21 Jul 2014 09:13:56 +0000 (09:13 +0000)
committer Tim Northover <tnorthover@apple.com>
Mon, 21 Jul 2014 09:13:56 +0000 (09:13 +0000)
diff --git a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp

index e3c034c8a1550b391204720f29b7b33e10f76847..649dd7a349ff9b8b16ba553d83e4a90bd9495737 100644 (file)
--- a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
@@ -373,6 +373,13 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FNEG(SDNode *N) {
  SDValue DAGTypeLegalizer::SoftenFloatRes_FP_EXTEND(SDNode *N) {
    EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
    SDValue Op = N->getOperand(0);
+
+  // There's only a libcall for f16 -> f32, so proceed in two stages. Also, it's
+  // entirely possible for both f16 and f32 to be legal, so use the fully
+  // hard-float FP_EXTEND rather than FP16_TO_FP.
+  if (Op.getValueType() == MVT::f16 && N->getValueType(0) != MVT::f32)
+    Op = DAG.getNode(ISD::FP_EXTEND, SDLoc(N), MVT::f32, Op);
+
    RTLIB::Libcall LC = RTLIB::getFPEXT(Op.getValueType(), N->getValueType(0));
    assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_EXTEND!");
    return TLI.makeLibCall(DAG, LC, NVT, &Op, 1, false, SDLoc(N)).first;
@@ -511,6 +518,9 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FSUB(SDNode *N) {
  
  SDValue DAGTypeLegalizer::SoftenFloatRes_FTRUNC(SDNode *N) {
    EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+  if (N->getValueType(0) == MVT::f16)
+    return DAG.getNode(ISD::FP_TO_FP16, SDLoc(N), NVT, N->getOperand(0));
+
    SDValue Op = GetSoftenedFloat(N->getOperand(0));
    return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
                                             RTLIB::TRUNC_F32,
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp

index 189b56d5a933041b75e8a0178b85a0580746dd12..c07b5e6a7362e2ae71751c6f7a7dff8de243a30d 100644 (file)
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -5155,13 +5155,16 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
      return nullptr;
    }
    case Intrinsic::convert_to_fp16:
-    setValue(&I, DAG.getNode(ISD::FP_TO_FP16, sdl,
-                             MVT::i16, getValue(I.getArgOperand(0))));
+    setValue(&I, DAG.getNode(ISD::BITCAST, sdl, MVT::i16,
+                             DAG.getNode(ISD::FP_ROUND, sdl, MVT::f16,
+                                         getValue(I.getArgOperand(0)),
+                                         DAG.getTargetConstant(0, MVT::i32))));
      return nullptr;
    case Intrinsic::convert_from_fp16:
      setValue(&I,
-             DAG.getNode(ISD::FP16_TO_FP, sdl, TLI->getValueType(I.getType()),
-                         getValue(I.getArgOperand(0))));
+             DAG.getNode(ISD::FP_EXTEND, sdl, TLI->getValueType(I.getType()),
+                         DAG.getNode(ISD::BITCAST, sdl, MVT::f16,
+                                     getValue(I.getArgOperand(0)))));
      return nullptr;
    case Intrinsic::pcmarker: {
      SDValue Tmp = getValue(I.getArgOperand(0));
diff --git a/lib/CodeGen/TargetLoweringBase.cpp b/lib/CodeGen/TargetLoweringBase.cpp

index 42cd4bf1fe15de54f9ec71f9d8bfca7d911f42d3..e80ef7176c214fc5073c5f909f805f48a11f5e41 100644 (file)
--- a/lib/CodeGen/TargetLoweringBase.cpp
+++ b/lib/CodeGen/TargetLoweringBase.cpp
@@ -422,7 +422,10 @@ static void InitLibcallCallingConvs(CallingConv::ID *CCs) {
  /// getFPEXT - Return the FPEXT_*_* value for the given types, or
  /// UNKNOWN_LIBCALL if there is none.
  RTLIB::Libcall RTLIB::getFPEXT(EVT OpVT, EVT RetVT) {
-  if (OpVT == MVT::f32) {
+  if (OpVT == MVT::f16) {
+    if (RetVT == MVT::f32)
+      return FPEXT_F16_F32;
+  } else if (OpVT == MVT::f32) {
      if (RetVT == MVT::f64)
        return FPEXT_F32_F64;
      if (RetVT == MVT::f128)
diff --git a/lib/Target/AArch64/AArch64InstrInfo.td b/lib/Target/AArch64/AArch64InstrInfo.td

index 2708ee03313ecb777971da0452dd3b0ad4ca02a2..0ba069e99a8624af87f6506837552802de2f03cc 100644 (file)
--- a/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/lib/Target/AArch64/AArch64InstrInfo.td
@@ -2244,90 +2244,6 @@ def : Pat<(f64 (fpimm0)), (FMOVXDr XZR)>, Requires<[NoZCZ]>;
  
  defm FCVT : FPConversion<"fcvt">;
  
-def : Pat<(fp_to_f16 FPR32:$Rn),
-          (i32 (COPY_TO_REGCLASS
-                   (f32 (SUBREG_TO_REG (i32 0), (FCVTHSr FPR32:$Rn), hsub)),
-                   GPR32))>;
-
-def : Pat<(f32 (f16_to_fp i32:$Rn)),
-          (FCVTSHr (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS i32:$Rn, FPR32)),
-                                   hsub))>;
-
-// When converting from f16 coming directly from a load, make sure we
-// load into the FPR16 registers rather than going through the GPRs.
-//   f16->f32
-def : Pat<(f32 (f16_to_fp (i32
-                (zextloadi16 (ro_Windexed16 GPR64sp:$Rn, GPR32:$Rm,
-                                    ro_Wextend16:$extend))))),
-          (FCVTSHr (LDRHroW GPR64sp:$Rn, GPR32:$Rm, ro_Wextend16:$extend))>;
-def : Pat<(f32 (f16_to_fp (i32
-                (zextloadi16 (ro_Xindexed16 GPR64sp:$Rn, GPR64:$Rm,
-                                    ro_Xextend16:$extend))))),
-          (FCVTSHr (LDRHroX GPR64sp:$Rn, GPR64:$Rm, ro_Xextend16:$extend))>;
-def : Pat <(f32 (f16_to_fp (i32
-                  (zextloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))))),
-           (FCVTSHr (LDRHui GPR64sp:$Rn, uimm12s2:$offset))>;
-def : Pat <(f32 (f16_to_fp (i32
-                  (zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))))),
-           (FCVTSHr (LDURHi GPR64sp:$Rn, simm9:$offset))>;
-
-//   f16->f64
-def : Pat<(f64 (fextend (f32 (f16_to_fp (i32
-                (zextloadi16 (ro_Windexed16 GPR64sp:$Rn, GPR32:$Rm,
-                                    ro_Wextend16:$extend))))))),
-          (FCVTDHr (LDRHroW GPR64sp:$Rn, GPR32:$Rm, ro_Wextend16:$extend))>;
-def : Pat<(f64 (fextend (f32 (f16_to_fp (i32
-                (zextloadi16 (ro_Xindexed16 GPR64sp:$Rn, GPR64:$Rm,
-                                    ro_Xextend16:$extend))))))),
-          (FCVTDHr (LDRHroX GPR64sp:$Rn, GPR64:$Rm, ro_Xextend16:$extend))>;
-def : Pat <(f64 (fextend (f32 (f16_to_fp (i32
-                  (zextloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))))))),
-           (FCVTDHr (LDRHui GPR64sp:$Rn, uimm12s2:$offset))>;
-def : Pat <(f64 (fextend (f32 (f16_to_fp (i32
-                  (zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))))))),
-           (FCVTDHr (LDURHi GPR64sp:$Rn, simm9:$offset))>;
-
-// When converting to f16 going directly to a store, make sure we use the
-// appropriate direct conversion instructions and store via the FPR16
-// registers rather than going through the GPRs.
-let AddedComplexity = 10 in {
-// f32->f16
-def : Pat< (truncstorei16 (assertzext (i32 (fp_to_f16 FPR32:$Rt))),
-                          (ro_Windexed16 GPR64sp:$Rn, GPR32:$Rm,
-                                         ro_Wextend16:$extend)),
-           (STRHroW (FCVTHSr FPR32:$Rt), GPR64sp:$Rn, GPR32:$Rm,
-                                         ro_Wextend16:$extend)>;
-def : Pat< (truncstorei16 (assertzext (i32 (fp_to_f16 FPR32:$Rt))),
-                          (ro_Xindexed16 GPR64sp:$Rn, GPR64:$Rm,
-                                         ro_Xextend16:$extend)),
-           (STRHroX (FCVTHSr FPR32:$Rt), GPR64sp:$Rn, GPR64:$Rm,
-                                         ro_Xextend16:$extend)>;
-def : Pat <(truncstorei16 (assertzext (i32 (fp_to_f16 FPR32:$Rt))),
-              (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset)),
-           (STRHui (FCVTHSr FPR32:$Rt), GPR64sp:$Rn, uimm12s2:$offset)>;
-def : Pat <(truncstorei16 (assertzext (i32 (fp_to_f16 FPR32:$Rt))),
-              (am_unscaled16 GPR64sp:$Rn, simm9:$offset)),
-           (STURHi (FCVTHSr FPR32:$Rt), GPR64sp:$Rn, simm9:$offset)>;
-// f64->f16
-def : Pat< (truncstorei16 (assertzext (i32 (fp_to_f16 (f32 (fround FPR64:$Rt))))),
-                          (ro_Windexed16 GPR64sp:$Rn, GPR32:$Rm,
-                                         ro_Wextend16:$extend)),
-           (STRHroW (FCVTHDr FPR64:$Rt), GPR64sp:$Rn, GPR32:$Rm,
-                                         ro_Wextend16:$extend)>;
-def : Pat< (truncstorei16 (assertzext (i32 (fp_to_f16 (f32 (fround FPR64:$Rt))))),
-                          (ro_Xindexed16 GPR64sp:$Rn, GPR64:$Rm,
-                                         ro_Xextend16:$extend)),
-           (STRHroX (FCVTHDr FPR64:$Rt), GPR64sp:$Rn, GPR64:$Rm,
-                                         ro_Xextend16:$extend)>;
-def : Pat <(truncstorei16 (assertzext (i32 (fp_to_f16 (f32 (fround FPR64:$Rt))))),
-              (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset)),
-           (STRHui (FCVTHDr FPR64:$Rt), GPR64sp:$Rn, uimm12s2:$offset)>;
-def : Pat <(truncstorei16 (assertzext (i32 (fp_to_f16 (f32 (fround FPR64:$Rt))))),
-              (am_unscaled16 GPR64sp:$Rn, simm9:$offset)),
-           (STURHi (FCVTHDr FPR64:$Rt), GPR64sp:$Rn, simm9:$offset)>;
-}
-
-
  //===----------------------------------------------------------------------===//
  // Floating point single operand instructions.
  //===----------------------------------------------------------------------===//
diff --git a/test/CodeGen/AArch64/f16-convert.ll b/test/CodeGen/AArch64/f16-convert.ll

index 72685e87bd5776185e54376fd1fd34cdd41c7b05..12412d45aa6e06ed2e98ebdde22f96ac6b797a54 100644 (file)
--- a/test/CodeGen/AArch64/f16-convert.ll
+++ b/test/CodeGen/AArch64/f16-convert.ll
@@ -18,8 +18,7 @@ define double @load1(i16* nocapture readonly %a) nounwind {
  ; CHECK-NEXT: ret
  
    %tmp = load i16* %a, align 2
-  %tmp1 = tail call float @llvm.convert.from.fp16.f32(i16 %tmp)
-  %conv = fpext float %tmp1 to double
+  %conv = tail call double @llvm.convert.from.fp16.f64(i16 %tmp)
    ret double %conv
  }
  
@@ -45,8 +44,7 @@ define double @load3(i16* nocapture readonly %a, i32 %i) nounwind {
    %idxprom = sext i32 %i to i64
    %arrayidx = getelementptr inbounds i16* %a, i64 %idxprom
    %tmp = load i16* %arrayidx, align 2
-  %tmp1 = tail call float @llvm.convert.from.fp16.f32(i16 %tmp)
-  %conv = fpext float %tmp1 to double
+  %conv = tail call double @llvm.convert.from.fp16.f64(i16 %tmp)
    ret double %conv
  }
  
@@ -70,8 +68,7 @@ define double @load5(i16* nocapture readonly %a, i64 %i) nounwind {
  
    %arrayidx = getelementptr inbounds i16* %a, i64 %i
    %tmp = load i16* %arrayidx, align 2
-  %tmp1 = tail call float @llvm.convert.from.fp16.f32(i16 %tmp)
-  %conv = fpext float %tmp1 to double
+  %conv = tail call double @llvm.convert.from.fp16.f64(i16 %tmp)
    ret double %conv
  }
  
@@ -95,8 +92,7 @@ define double @load7(i16* nocapture readonly %a) nounwind {
  
    %arrayidx = getelementptr inbounds i16* %a, i64 10
    %tmp = load i16* %arrayidx, align 2
-  %tmp1 = tail call float @llvm.convert.from.fp16.f32(i16 %tmp)
-  %conv = fpext float %tmp1 to double
+  %conv = tail call double @llvm.convert.from.fp16.f64(i16 %tmp)
    ret double %conv
  }
  
@@ -120,8 +116,7 @@ define double @load9(i16* nocapture readonly %a) nounwind {
  
    %arrayidx = getelementptr inbounds i16* %a, i64 -10
    %tmp = load i16* %arrayidx, align 2
-  %tmp1 = tail call float @llvm.convert.from.fp16.f32(i16 %tmp)
-  %conv = fpext float %tmp1 to double
+  %conv = tail call double @llvm.convert.from.fp16.f64(i16 %tmp)
    ret double %conv
  }
  
@@ -252,3 +247,5 @@ define void @store9(i16* nocapture %a, double %val) nounwind {
  
  declare i16 @llvm.convert.to.fp16.f32(float) nounwind readnone
  declare float @llvm.convert.from.fp16.f32(i16) nounwind readnone
+declare i16 @llvm.convert.to.fp16.f64(double) nounwind readnone
+declare double @llvm.convert.from.fp16.f64(i16) nounwind readnone
author	Tim Northover <tnorthover@apple.com>
	Mon, 21 Jul 2014 09:13:56 +0000 (09:13 +0000)
committer	Tim Northover <tnorthover@apple.com>
	Mon, 21 Jul 2014 09:13:56 +0000 (09:13 +0000)
lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp		patch \| blob \| history
lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp		patch \| blob \| history
lib/CodeGen/TargetLoweringBase.cpp		patch \| blob \| history
lib/Target/AArch64/AArch64InstrInfo.td		patch \| blob \| history
test/CodeGen/AArch64/f16-convert.ll		patch \| blob \| history