ARM: implement some simple f64 materializations.

author Tim Northover <tnorthover@apple.com>

Tue, 20 Aug 2013 08:57:11 +0000 (08:57 +0000)

committer Tim Northover <tnorthover@apple.com>

Tue, 20 Aug 2013 08:57:11 +0000 (08:57 +0000)
author Tim Northover <tnorthover@apple.com>
Tue, 20 Aug 2013 08:57:11 +0000 (08:57 +0000)
committer Tim Northover <tnorthover@apple.com>
Tue, 20 Aug 2013 08:57:11 +0000 (08:57 +0000)
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp

index caec11e108f57c7c1232e7453aa64f593f7148dd..f22c5b9ca4ddc2eafab4527a28189d4ba73ed196 100644 (file)
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -452,6 +452,7 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
    }
  
    setOperationAction(ISD::ConstantFP, MVT::f32, Custom);
+  setOperationAction(ISD::ConstantFP, MVT::f64, Custom);
  
    if (Subtarget->hasNEON()) {
      addDRTypeForNEON(MVT::v2f32);
@@ -4271,17 +4272,25 @@ static SDValue isNEONModifiedImm(uint64_t SplatBits, uint64_t SplatUndef,
  
  SDValue ARMTargetLowering::LowerConstantFP(SDValue Op, SelectionDAG &DAG,
                                             const ARMSubtarget *ST) const {
-  if (!ST->useNEONForSinglePrecisionFP() || !ST->hasVFP3() || ST->hasD16())
+  if (!ST->hasVFP3())
      return SDValue();
  
+  bool IsDouble = Op.getValueType() == MVT::f64;
    ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Op);
-  assert(Op.getValueType() == MVT::f32 &&
-         "ConstantFP custom lowering should only occur for f32.");
  
    // Try splatting with a VMOV.f32...
    APFloat FPVal = CFP->getValueAPF();
-  int ImmVal = ARM_AM::getFP32Imm(FPVal);
+  int ImmVal = IsDouble ? ARM_AM::getFP64Imm(FPVal) : ARM_AM::getFP32Imm(FPVal);
+
    if (ImmVal != -1) {
+    if (IsDouble || !ST->useNEONForSinglePrecisionFP()) {
+      // We have code in place to select a valid ConstantFP already, no need to
+      // do any mangling.
+      return Op;
+    }
+
+    // It's a float and we are trying to use NEON operations where
+    // possible. Lower it to a splat followed by an extract.
      SDLoc DL(Op);
      SDValue NewVal = DAG.getTargetConstant(ImmVal, MVT::i32);
      SDValue VecConstant = DAG.getNode(ARMISD::VMOVFPIMM, DL, MVT::v2f32,
@@ -4290,15 +4299,31 @@ SDValue ARMTargetLowering::LowerConstantFP(SDValue Op, SelectionDAG &DAG,
                         DAG.getConstant(0, MVT::i32));
    }
  
-  // If that fails, try a VMOV.i32
+  // The rest of our options are NEON only, make sure that's allowed before
+  // proceeding..
+  if (!ST->hasNEON() || (!IsDouble && !ST->useNEONForSinglePrecisionFP()))
+    return SDValue();
+
    EVT VMovVT;
-  unsigned iVal = FPVal.bitcastToAPInt().getZExtValue();
-  SDValue NewVal = isNEONModifiedImm(iVal, 0, 32, DAG, VMovVT, false,
-                                     VMOVModImm);
+  uint64_t iVal = FPVal.bitcastToAPInt().getZExtValue();
+
+  // It wouldn't really be worth bothering for doubles except for one very
+  // important value, which does happen to match: 0.0. So make sure we don't do
+  // anything stupid.
+  if (IsDouble && (iVal & 0xffffffff) != (iVal >> 32))
+    return SDValue();
+
+  // Try a VMOV.i32 (FIXME: i8, i16, or i64 could work too).
+  SDValue NewVal = isNEONModifiedImm(iVal & 0xffffffffU, 0, 32, DAG, VMovVT,
+                                     false, VMOVModImm);
    if (NewVal != SDValue()) {
      SDLoc DL(Op);
      SDValue VecConstant = DAG.getNode(ARMISD::VMOVIMM, DL, VMovVT,
                                        NewVal);
+    if (IsDouble)
+      return DAG.getNode(ISD::BITCAST, DL, MVT::f64, VecConstant);
+
+    // It's a float: cast and extract a vector element.
      SDValue VecFConstant = DAG.getNode(ISD::BITCAST, DL, MVT::v2f32,
                                         VecConstant);
      return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, VecFConstant,
@@ -4306,11 +4331,16 @@ SDValue ARMTargetLowering::LowerConstantFP(SDValue Op, SelectionDAG &DAG,
    }
  
    // Finally, try a VMVN.i32
-  NewVal = isNEONModifiedImm(~iVal & 0xffffffff, 0, 32, DAG, VMovVT, false,
-                             VMVNModImm);
+  NewVal = isNEONModifiedImm(~iVal & 0xffffffffU, 0, 32, DAG, VMovVT,
+                             false, VMVNModImm);
    if (NewVal != SDValue()) {
      SDLoc DL(Op);
      SDValue VecConstant = DAG.getNode(ARMISD::VMVNIMM, DL, VMovVT, NewVal);
+
+    if (IsDouble)
+      return DAG.getNode(ISD::BITCAST, DL, MVT::f64, VecConstant);
+
+    // It's a float: cast and extract a vector element.
      SDValue VecFConstant = DAG.getNode(ISD::BITCAST, DL, MVT::v2f32,
                                         VecConstant);
      return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, VecFConstant,
diff --git a/test/CodeGen/ARM/constantfp.ll b/test/CodeGen/ARM/constantfp.ll

new file mode 100644 (file)

index 0000000..974bdd7
--- /dev/null
+++ b/test/CodeGen/ARM/constantfp.ll
@@ -0,0 +1,68 @@
+; RUN: llc -mtriple=armv7 -mattr=+neon -mcpu=swift %s -o - | FileCheck %s
+; RUN: llc -mtriple=armv7 -mattr=+neon -mcpu=cortex-a8 %s -o - | FileCheck --check-prefix=CHECK-NONEONFP %s
+; RUN: llc -mtriple=armv7 -mattr=-neon -mcpu=cortex-a8 %s -o - | FileCheck --check-prefix=CHECK-NONEON %s
+
+define arm_aapcs_vfpcc float @test_vmov_f32() {
+; CHECK-LABEL: test_vmov_f32:
+; CHECK: vmov.f32 d0, #1.0
+
+; CHECK-NONEONFP: vmov.f32 s0, #1.0
+  ret float 1.0
+}
+
+define arm_aapcs_vfpcc float @test_vmov_imm() {
+; CHECK-LABEL: test_vmov_imm:
+; CHECK: vmov.i32 d0, #0
+
+; CHECK-NONEON-LABEL: test_vmov_imm:
+; CHECK_NONEON: vldr s0, {{.?LCPI[0-9]+_[0-9]+}}
+  ret float 0.0
+}
+
+define arm_aapcs_vfpcc float @test_vmvn_imm() {
+; CHECK-LABEL: test_vmvn_imm:
+; CHECK: vmvn.i32 d0, #0xb0000000
+
+; CHECK-NONEON-LABEL: test_vmvn_imm:
+; CHECK_NONEON: vldr s0, {{.?LCPI[0-9]+_[0-9]+}}
+  ret float 8589934080.0
+}
+
+define arm_aapcs_vfpcc double @test_vmov_f64() {
+; CHECK-LABEL: test_vmov_f64:
+; CHECK: vmov.f64 d0, #1.0
+
+; CHECK-NONEON-LABEL: test_vmov_f64:
+; CHECK_NONEON: vmov.f64 d0, #1.0
+
+  ret double 1.0
+}
+
+define arm_aapcs_vfpcc double @test_vmov_double_imm() {
+; CHECK-LABEL: test_vmov_double_imm:
+; CHECK: vmov.i32 d0, #0
+
+; CHECK-NONEON-LABEL: test_vmov_double_imm:
+; CHECK_NONEON: vldr d0, {{.?LCPI[0-9]+_[0-9]+}}
+  ret double 0.0
+}
+
+define arm_aapcs_vfpcc double @test_vmvn_double_imm() {
+; CHECK-LABEL: test_vmvn_double_imm:
+; CHECK: vmvn.i32 d0, #0xb0000000
+
+; CHECK-NONEON-LABEL: test_vmvn_double_imm:
+; CHECK_NONEON: vldr d0, {{.?LCPI[0-9]+_[0-9]+}}
+  ret double 0x4fffffff4fffffff
+}
+
+; Make sure we don't ignore the high half of 64-bit values when deciding whether
+; a vmov/vmvn is possible.
+define arm_aapcs_vfpcc double @test_notvmvn_double_imm() {
+; CHECK-LABEL: test_notvmvn_double_imm:
+; CHECK: vldr d0, {{.?LCPI[0-9]+_[0-9]+}}
+
+; CHECK-NONEON-LABEL: test_notvmvn_double_imm:
+; CHECK_NONEON: vldr d0, {{.?LCPI[0-9]+_[0-9]+}}
+  ret double 0x4fffffffffffffff
+}
diff --git a/test/CodeGen/ARM/reg_sequence.ll b/test/CodeGen/ARM/reg_sequence.ll

index 3fe2bb8e3828a3217a41885c4c1f829c382b82ae..25484f484853b274f47dcf234ca3556795bbd964 100644 (file)
--- a/test/CodeGen/ARM/reg_sequence.ll
+++ b/test/CodeGen/ARM/reg_sequence.ll
@@ -239,10 +239,9 @@ bb14:                                             ; preds = %bb6
  ; PR7157
  define arm_aapcs_vfpcc float @t9(%0* nocapture, %3* nocapture) nounwind {
  ; CHECK-LABEL:        t9:
-; CHECK:        vldr
-; CHECK-NOT:    vmov d{{.*}}, d16
-; CHECK:        vmov.i32 d17
+; CHECK: vmov.i32 d16, #0x0
  ; CHECK-NEXT:   vst1.64 {d16, d17}, [r0:128]
+; CHECK-NEXT:   vorr d17, d16, d16
  ; CHECK-NEXT:   vst1.64 {d16, d17}, [r0:128]
    %3 = bitcast double 0.000000e+00 to <2 x float> ; <<2 x float>> [#uses=2]
    %4 = shufflevector <2 x float> %3, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> ; <<4 x float>> [#uses=1]
author	Tim Northover <tnorthover@apple.com>
	Tue, 20 Aug 2013 08:57:11 +0000 (08:57 +0000)
committer	Tim Northover <tnorthover@apple.com>
	Tue, 20 Aug 2013 08:57:11 +0000 (08:57 +0000)
lib/Target/ARM/ARMISelLowering.cpp		patch \| blob \| history
test/CodeGen/ARM/constantfp.ll	[new file with mode: 0644]	patch \| blob
test/CodeGen/ARM/reg_sequence.ll		patch \| blob \| history