AArch64: Constant fold converting vector setcc results to float.

author Jim Grosbach <grosbach@apple.com>

Fri, 18 Jul 2014 00:40:52 +0000 (00:40 +0000)

committer Jim Grosbach <grosbach@apple.com>

Fri, 18 Jul 2014 00:40:52 +0000 (00:40 +0000)
author Jim Grosbach <grosbach@apple.com>
Fri, 18 Jul 2014 00:40:52 +0000 (00:40 +0000)
committer Jim Grosbach <grosbach@apple.com>
Fri, 18 Jul 2014 00:40:52 +0000 (00:40 +0000)
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp

index daff1f228c71096da94c8215d9399caf2c5a640d..34f5014d1b0e7008fef335e0039775710ec07824 100644 (file)
--- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -2772,6 +2772,32 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL,
      }
    }
  
+  // Constant fold unary operations with a vector integer operand.
+  if (BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(Operand.getNode())) {
+    APInt Val;
+    APInt DummyUndefs;
+    unsigned SplatBitSize;
+    bool DummyHasUndefs;
+    if (BV->isConstantSplat(Val, DummyUndefs, SplatBitSize, DummyHasUndefs)) {
+      switch (Opcode) {
+      default:
+        // FIXME: Entirely reasonable to perform folding of other unary
+        // operations here as the need arises.
+        break;
+      case ISD::UINT_TO_FP:
+      case ISD::SINT_TO_FP: {
+        APFloat APF(
+            EVTToAPFloatSemantics(VT.getVectorElementType()),
+            APInt::getNullValue(VT.getVectorElementType().getSizeInBits()));
+        (void)APF.convertFromAPInt(Val, Opcode == ISD::SINT_TO_FP,
+                                   APFloat::rmNearestTiesToEven);
+
+        return getConstantFP(APF, VT);
+      }
+      }
+    }
+  }
+
    unsigned OpOpcode = Operand.getNode()->getOpcode();
    switch (Opcode) {
    case ISD::TokenFactor:
diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp

index 7b77c59ed11fe49e2f2b5908b952cf550be8966f..bf3832ca1de26a11d4221674635a653efeff82b8 100644 (file)
--- a/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -6417,10 +6417,61 @@ static SDValue performMulCombine(SDNode *N, SelectionDAG &DAG,
    return SDValue();
  }
  
+static SDValue performVectorCompareAndMaskUnaryOpCombine(SDNode *N,
+                                                         SelectionDAG &DAG) {
+  // Take advantage of vector comparisons producing 0 or -1 in each lane to
+  // optimize away operation when it's from a constant.
+  //
+  // The general transformation is:
+  //    UNARYOP(AND(VECTOR_CMP(x,y), constant)) -->
+  //       AND(VECTOR_CMP(x,y), constant2)
+  //    constant2 = UNARYOP(constant)
+
+  // Early exit if this isn't a vector operation or if the operand of the
+  // unary operation isn't a bitwise AND.
+  EVT VT = N->getValueType(0);
+  if (!VT.isVector() || N->getOperand(0)->getOpcode() != ISD::AND ||
+      N->getOperand(0)->getOperand(0)->getOpcode() != ISD::SETCC)
+    return SDValue();
+
+  // Now check that the other operand of the AND is a constant splat. We could
+  // make the transformation for non-constant splats as well, but it's unclear
+  // that would be a benefit as it would not eliminate any operations, just
+  // perform one more step in scalar code before moving to the vector unit.
+  if (BuildVectorSDNode *BV =
+          dyn_cast<BuildVectorSDNode>(N->getOperand(0)->getOperand(1))) {
+    // Bail out if the vector isn't a constant splat.
+    if (!BV->getConstantSplatNode())
+      return SDValue();
+
+    // Everything checks out. Build up the new and improved node.
+    SDLoc DL(N);
+    EVT IntVT = BV->getValueType(0);
+    // Create a new constant of the appropriate type for the transformed
+    // DAG.
+    SDValue SourceConst = DAG.getNode(N->getOpcode(), DL, VT, SDValue(BV, 0));
+    // The AND node needs bitcasts to/from an integer vector type around it.
+    SDValue MaskConst = DAG.getNode(ISD::BITCAST, DL, IntVT, SourceConst);
+    SDValue NewAnd = DAG.getNode(ISD::AND, DL, IntVT,
+                                 N->getOperand(0)->getOperand(0), MaskConst);
+    SDValue Res = DAG.getNode(ISD::BITCAST, DL, VT, NewAnd);
+    return Res;
+  }
+
+  return SDValue();
+}
+
  static SDValue performIntToFpCombine(SDNode *N, SelectionDAG &DAG) {
+  // First try to optimize away the conversion when it's conditionally from
+  // a constant. Vectors only.
+  SDValue Res = performVectorCompareAndMaskUnaryOpCombine(N, DAG);
+  if (Res != SDValue())
+    return Res;
+
    EVT VT = N->getValueType(0);
    if (VT != MVT::f32 && VT != MVT::f64)
      return SDValue();
+
    // Only optimize when the source and destination types have the same width.
    if (VT.getSizeInBits() != N->getOperand(0).getValueType().getSizeInBits())
      return SDValue();
diff --git a/test/CodeGen/AArch64/arm64-setcc-int-to-fp-combine.ll b/test/CodeGen/AArch64/arm64-setcc-int-to-fp-combine.ll

new file mode 100644 (file)

index 0000000..b10fe75
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-setcc-int-to-fp-combine.ll
@@ -0,0 +1,13 @@
+; RUN: llc < %s -asm-verbose=false -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s
+
+define <4 x float> @foo(<4 x float> %val, <4 x float> %test) nounwind {
+; CHECK-LABEL: foo:
+; CHECK-NEXT: fcmeq.4s  v0, v0, v1
+; CHECK-NEXT: fmov.4s v1, #1.00000000
+; CHECK-NEXT: and.16b v0, v0, v1
+; CHECK-NEXT: ret
+  %cmp = fcmp oeq <4 x float> %val, %test
+  %ext = zext <4 x i1> %cmp to <4 x i32>
+  %result = sitofp <4 x i32> %ext to <4 x float>
+  ret <4 x float> %result
+}
author	Jim Grosbach <grosbach@apple.com>
	Fri, 18 Jul 2014 00:40:52 +0000 (00:40 +0000)
committer	Jim Grosbach <grosbach@apple.com>
	Fri, 18 Jul 2014 00:40:52 +0000 (00:40 +0000)
lib/CodeGen/SelectionDAG/SelectionDAG.cpp		patch \| blob \| history
lib/Target/AArch64/AArch64ISelLowering.cpp		patch \| blob \| history
test/CodeGen/AArch64/arm64-setcc-int-to-fp-combine.ll	[new file with mode: 0644]	patch \| blob