[AArch64] Fold a floating-point divide by power of two into fp conversion.

author Chad Rosier <mcrosier@codeaurora.org>

Wed, 7 Oct 2015 17:51:37 +0000 (17:51 +0000)

committer Chad Rosier <mcrosier@codeaurora.org>

Wed, 7 Oct 2015 17:51:37 +0000 (17:51 +0000)
author Chad Rosier <mcrosier@codeaurora.org>
Wed, 7 Oct 2015 17:51:37 +0000 (17:51 +0000)
committer Chad Rosier <mcrosier@codeaurora.org>
Wed, 7 Oct 2015 17:51:37 +0000 (17:51 +0000)
diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp

index 6af2d9d9d0689424739a9ccebe97dc1f25f0b2f0..fa8cad827955eee07db34a9ff5dfa6bbdae8dea9 100644 (file)
--- a/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -480,6 +480,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
  
    setTargetDAGCombine(ISD::FP_TO_SINT);
    setTargetDAGCombine(ISD::FP_TO_UINT);
+  setTargetDAGCombine(ISD::FDIV);
  
    setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN);
  
@@ -7596,6 +7597,70 @@ static SDValue performFpToIntCombine(SDNode *N, SelectionDAG &DAG,
    return FixConv;
  }
  
+/// Fold a floating-point divide by power of two into fixed-point to
+/// floating-point conversion.
+static SDValue performFDivCombine(SDNode *N, SelectionDAG &DAG,
+                                  const AArch64Subtarget *Subtarget) {
+  if (!Subtarget->hasNEON())
+    return SDValue();
+
+  SDValue Op = N->getOperand(0);
+  unsigned Opc = Op->getOpcode();
+  if (!Op.getValueType().isVector() ||
+      (Opc != ISD::SINT_TO_FP && Opc != ISD::UINT_TO_FP))
+    return SDValue();
+
+  SDValue ConstVec = N->getOperand(1);
+  if (!isa<BuildVectorSDNode>(ConstVec))
+    return SDValue();
+
+  MVT IntTy = Op.getOperand(0).getSimpleValueType().getVectorElementType();
+  int32_t IntBits = IntTy.getSizeInBits();
+  if (IntBits != 16 && IntBits != 32 && IntBits != 64)
+    return SDValue();
+
+  MVT FloatTy = N->getSimpleValueType(0).getVectorElementType();
+  int32_t FloatBits = FloatTy.getSizeInBits();
+  if (FloatBits != 32 && FloatBits != 64)
+    return SDValue();
+
+  // Avoid conversions where iN is larger than the float (e.g., i64 -> float).
+  if (IntBits > FloatBits)
+    return SDValue();
+
+  BitVector UndefElements;
+  BuildVectorSDNode *BV = cast<BuildVectorSDNode>(ConstVec);
+  int32_t C = BV->getConstantFPSplatPow2ToLog2Int(&UndefElements, FloatBits + 1);
+  if (C == -1 || C == 0 || C > FloatBits)
+    return SDValue();
+
+  MVT ResTy;
+  unsigned NumLanes = Op.getValueType().getVectorNumElements();
+  switch (NumLanes) {
+  default:
+    return SDValue();
+  case 2:
+    ResTy = FloatBits == 32 ? MVT::v2i32 : MVT::v2i64;
+    break;
+  case 4:
+    ResTy = MVT::v4i32;
+    break;
+  }
+
+  SDLoc DL(N);
+  SDValue ConvInput = Op.getOperand(0);
+  bool IsSigned = Opc == ISD::SINT_TO_FP;
+  if (IntBits < FloatBits)
+    ConvInput = DAG.getNode(IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, DL,
+                            ResTy, ConvInput);
+
+  unsigned IntrinsicOpcode = IsSigned ? Intrinsic::aarch64_neon_vcvtfxs2fp
+                                      : Intrinsic::aarch64_neon_vcvtfxu2fp;
+  return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, Op.getValueType(),
+                     DAG.getConstant(IntrinsicOpcode, DL, MVT::i32), ConvInput,
+                     DAG.getConstant(C, DL, MVT::i32));
+}
+
  /// An EXTR instruction is made up of two shifts, ORed together. This helper
  /// searches for and classifies those shifts.
  static bool findEXTRHalf(SDValue N, SDValue &Src, uint32_t &ShiftAmount,
@@ -9470,6 +9535,8 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
    case ISD::FP_TO_SINT:
    case ISD::FP_TO_UINT:
      return performFpToIntCombine(N, DAG, Subtarget);
+  case ISD::FDIV:
+    return performFDivCombine(N, DAG, Subtarget);
    case ISD::OR:
      return performORCombine(N, DCI, Subtarget);
    case ISD::INTRINSIC_WO_CHAIN:
diff --git a/test/CodeGen/AArch64/fdiv_combine.ll b/test/CodeGen/AArch64/fdiv_combine.ll

new file mode 100644 (file)

index 0000000..6f38a26
--- /dev/null
+++ b/test/CodeGen/AArch64/fdiv_combine.ll
@@ -0,0 +1,115 @@
+; RUN: llc -mtriple=aarch64-linux-gnu -aarch64-neon-syntax=apple -verify-machineinstrs -o - %s | FileCheck %s
+
+; Test signed conversion.
+; CHECK-LABEL: @test1
+; CHECK: scvtf.2s v0, v0, #4
+; CHECK: ret
+define <2 x float> @test1(<2 x i32> %in) {
+entry:
+  %vcvt.i = sitofp <2 x i32> %in to <2 x float>
+  %div.i = fdiv <2 x float> %vcvt.i, <float 16.0, float 16.0>
+  ret <2 x float> %div.i
+}
+
+; Test unsigned conversion.
+; CHECK-LABEL: @test2
+; CHECK: ucvtf.2s v0, v0, #3
+; CHECK: ret
+define <2 x float> @test2(<2 x i32> %in) {
+entry:
+  %vcvt.i = uitofp <2 x i32> %in to <2 x float>
+  %div.i = fdiv <2 x float> %vcvt.i, <float 8.0, float 8.0>
+  ret <2 x float> %div.i
+}
+
+; Test which should not fold due to non-power of 2.
+; CHECK-LABEL: @test3
+; CHECK: scvtf.2s v0, v0
+; CHECK: fmov.2s v1, #9.00000000
+; CHECK: fdiv.2s v0, v0, v1
+; CHECK: ret
+define <2 x float> @test3(<2 x i32> %in) {
+entry:
+  %vcvt.i = sitofp <2 x i32> %in to <2 x float>
+  %div.i = fdiv <2 x float> %vcvt.i, <float 9.0, float 9.0>
+  ret <2 x float> %div.i
+}
+
+; Test which should not fold due to power of 2 out of range.
+; CHECK-LABEL: @test4
+; CHECK: scvtf.2s v0, v0
+; CHECK: movi.2s v1, #0x50, lsl #24
+; CHECK: fdiv.2s v0, v0, v1
+; CHECK: ret
+define <2 x float> @test4(<2 x i32> %in) {
+entry:
+  %vcvt.i = sitofp <2 x i32> %in to <2 x float>
+  %div.i = fdiv <2 x float> %vcvt.i, <float 0x4200000000000000, float 0x4200000000000000>
+  ret <2 x float> %div.i
+}
+
+; Test case where const is max power of 2 (i.e., 2^32).
+; CHECK-LABEL: @test5
+; CHECK: scvtf.2s v0, v0, #32
+; CHECK: ret
+define <2 x float> @test5(<2 x i32> %in) {
+entry:
+  %vcvt.i = sitofp <2 x i32> %in to <2 x float>
+  %div.i = fdiv <2 x float> %vcvt.i, <float 0x41F0000000000000, float 0x41F0000000000000>
+  ret <2 x float> %div.i
+}
+
+; Test quadword.
+; CHECK-LABEL: @test6
+; CHECK: scvtf.4s v0, v0, #2
+; CHECK: ret
+define <4 x float> @test6(<4 x i32> %in) {
+entry:
+  %vcvt.i = sitofp <4 x i32> %in to <4 x float>
+  %div.i = fdiv <4 x float> %vcvt.i, <float 4.0, float 4.0, float 4.0, float 4.0>
+  ret <4 x float> %div.i
+}
+
+; Test unsigned i16 to float
+; CHECK-LABEL: @test7
+; CHECK: ushll.4s  v0, v0, #0
+; CHECK: ucvtf.4s  v0, v0, #1
+; CHECK: ret
+define <4 x float> @test7(<4 x i16> %in) {
+  %conv = uitofp <4 x i16> %in to <4 x float>
+  %shift = fdiv <4 x float> %conv, <float 2.0, float 2.0, float 2.0, float 2.0>
+  ret <4 x float> %shift
+}
+
+; Test signed i16 to float
+; CHECK-LABEL: @test8
+; CHECK: sshll.4s v0, v0, #0
+; CHECK: scvtf.4s v0, v0, #2
+; CHECK: ret
+define <4 x float> @test8(<4 x i16> %in) {
+  %conv = sitofp <4 x i16> %in to <4 x float>
+  %shift = fdiv <4 x float> %conv, <float 4.0, float 4.0, float 4.0, float 4.0>
+  ret <4 x float> %shift
+}
+
+; Can't convert i64 to float.
+; CHECK-LABEL: @test9
+; CHECK: ucvtf.2d v0, v0
+; CHECK: fcvtn v0.2s, v0.2d
+; CHECK: movi.2s v1, #0x40, lsl #24
+; CHECK: fdiv.2s v0, v0, v1
+; CHECK: ret
+define <2 x float> @test9(<2 x i64> %in) {
+  %conv = uitofp <2 x i64> %in to <2 x float>
+  %shift = fdiv <2 x float> %conv, <float 2.0, float 2.0>
+  ret <2 x float> %shift
+}
+
+; CHECK-LABEL: @test10
+; CHECK: ucvtf.2d v0, v0, #1
+; CHECK: ret
+define <2 x double> @test10(<2 x i64> %in) {
+  %conv = uitofp <2 x i64> %in to <2 x double>
+  %shift = fdiv <2 x double> %conv, <double 2.0, double 2.0>
+  ret <2 x double> %shift
+}
author	Chad Rosier <mcrosier@codeaurora.org>
	Wed, 7 Oct 2015 17:51:37 +0000 (17:51 +0000)
committer	Chad Rosier <mcrosier@codeaurora.org>
	Wed, 7 Oct 2015 17:51:37 +0000 (17:51 +0000)
lib/Target/AArch64/AArch64ISelLowering.cpp		patch \| blob \| history
test/CodeGen/AArch64/fdiv_combine.ll	[new file with mode: 0644]	patch \| blob