[AArch64] Use [SU]ABSDIFF nodes instead of intrinsics for ABD/ABA

author James Molloy <james.molloy@arm.com>

Fri, 17 Jul 2015 17:10:45 +0000 (17:10 +0000)

committer James Molloy <james.molloy@arm.com>

Fri, 17 Jul 2015 17:10:45 +0000 (17:10 +0000)
author James Molloy <james.molloy@arm.com>
Fri, 17 Jul 2015 17:10:45 +0000 (17:10 +0000)
committer James Molloy <james.molloy@arm.com>
Fri, 17 Jul 2015 17:10:45 +0000 (17:10 +0000)
diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp

index 91e8ba18e34a67373c0e916e33647c6e4eeaced1..3a8b711dff3fd12abcbdf303b0958b626be5ce65 100644 (file)
--- a/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -683,10 +683,12 @@ void AArch64TargetLowering::addTypeForNEON(EVT VT, EVT PromotedBitwiseVT) {
    setOperationAction(ISD::FP_TO_SINT, VT.getSimpleVT(), Custom);
    setOperationAction(ISD::FP_TO_UINT, VT.getSimpleVT(), Custom);
  
-  // [SU][MIN|MAX] are available for all NEON types apart from i64.
+  // [SU][MIN|MAX] and [SU]ABSDIFF are available for all NEON types apart from
+  // i64.
    if (!VT.isFloatingPoint() &&
        VT.getSimpleVT() != MVT::v2i64 && VT.getSimpleVT() != MVT::v1i64)
-    for (unsigned Opcode : {ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX})
+    for (unsigned Opcode : {ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX,
+                            ISD::SABSDIFF, ISD::UABSDIFF})
        setOperationAction(Opcode, VT.getSimpleVT(), Legal);
  
    if (Subtarget->isLittleEndian()) {
@@ -8063,14 +8065,15 @@ static SDValue performAddSubLongCombine(SDNode *N,
  //   (aarch64_neon_umull (extract_high (v2i64 vec)))
  //                     (extract_high (v2i64 (dup128 scalar)))))
  //
-static SDValue tryCombineLongOpWithDup(unsigned IID, SDNode *N,
+static SDValue tryCombineLongOpWithDup(SDNode *N,
                                         TargetLowering::DAGCombinerInfo &DCI,
                                         SelectionDAG &DAG) {
    if (DCI.isBeforeLegalizeOps())
      return SDValue();
  
-  SDValue LHS = N->getOperand(1);
-  SDValue RHS = N->getOperand(2);
+  bool IsIntrinsic = N->getOpcode() == ISD::INTRINSIC_WO_CHAIN;
+  SDValue LHS = N->getOperand(IsIntrinsic ? 1 : 0);
+  SDValue RHS = N->getOperand(IsIntrinsic ? 2 : 1);
    assert(LHS.getValueType().is64BitVector() &&
           RHS.getValueType().is64BitVector() &&
           "unexpected shape for long operation");
@@ -8088,8 +8091,13 @@ static SDValue tryCombineLongOpWithDup(unsigned IID, SDNode *N,
        return SDValue();
    }
  
-  return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, SDLoc(N), N->getValueType(0),
-                     N->getOperand(0), LHS, RHS);
+  // N could either be an intrinsic or a sabsdiff/uabsdiff node.
+  if (IsIntrinsic)
+    return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, SDLoc(N), N->getValueType(0),
+                       N->getOperand(0), LHS, RHS);
+  else
+    return DAG.getNode(N->getOpcode(), SDLoc(N), N->getValueType(0),
+                       LHS, RHS);
  }
  
  static SDValue tryCombineShiftImm(unsigned IID, SDNode *N, SelectionDAG &DAG) {
@@ -8208,11 +8216,17 @@ static SDValue performIntrinsicCombine(SDNode *N,
    case Intrinsic::aarch64_neon_fmin:
      return DAG.getNode(AArch64ISD::FMIN, SDLoc(N), N->getValueType(0),
                         N->getOperand(1), N->getOperand(2));
+  case Intrinsic::aarch64_neon_sabd:
+    return DAG.getNode(ISD::SABSDIFF, SDLoc(N), N->getValueType(0),
+                       N->getOperand(1), N->getOperand(2));
+  case Intrinsic::aarch64_neon_uabd:
+    return DAG.getNode(ISD::UABSDIFF, SDLoc(N), N->getValueType(0),
+                       N->getOperand(1), N->getOperand(2));
    case Intrinsic::aarch64_neon_smull:
    case Intrinsic::aarch64_neon_umull:
    case Intrinsic::aarch64_neon_pmull:
    case Intrinsic::aarch64_neon_sqdmull:
-    return tryCombineLongOpWithDup(IID, N, DCI, DAG);
+    return tryCombineLongOpWithDup(N, DCI, DAG);
    case Intrinsic::aarch64_neon_sqshl:
    case Intrinsic::aarch64_neon_uqshl:
    case Intrinsic::aarch64_neon_sqshlu:
@@ -8237,18 +8251,15 @@ static SDValue performExtendCombine(SDNode *N,
    // helps the backend to decide that an sabdl2 would be useful, saving a real
    // extract_high operation.
    if (!DCI.isBeforeLegalizeOps() && N->getOpcode() == ISD::ZERO_EXTEND &&
-      N->getOperand(0).getOpcode() == ISD::INTRINSIC_WO_CHAIN) {
+      (N->getOperand(0).getOpcode() == ISD::SABSDIFF ||
+       N->getOperand(0).getOpcode() == ISD::UABSDIFF)) {
      SDNode *ABDNode = N->getOperand(0).getNode();
-    unsigned IID = getIntrinsicID(ABDNode);
-    if (IID == Intrinsic::aarch64_neon_sabd ||
-        IID == Intrinsic::aarch64_neon_uabd) {
-      SDValue NewABD = tryCombineLongOpWithDup(IID, ABDNode, DCI, DAG);
-      if (!NewABD.getNode())
-        return SDValue();
+    SDValue NewABD = tryCombineLongOpWithDup(ABDNode, DCI, DAG);
+    if (!NewABD.getNode())
+      return SDValue();
  
-      return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), N->getValueType(0),
-                         NewABD);
-    }
+    return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), N->getValueType(0),
+                       NewABD);
    }
  
    // This is effectively a custom type legalization for AArch64.
diff --git a/lib/Target/AArch64/AArch64InstrInfo.td b/lib/Target/AArch64/AArch64InstrInfo.td

index 4067378630b51ebba82eeb1a7428c4612a97c3d4..a30b78f6e2964035f970ec89564ad0e5165da885 100644 (file)
--- a/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/lib/Target/AArch64/AArch64InstrInfo.td
@@ -2843,8 +2843,8 @@ defm MLS      : SIMDThreeSameVectorBHSTied<1, 0b10010, "mls",
  defm MUL      : SIMDThreeSameVectorBHS<0, 0b10011, "mul", mul>;
  defm PMUL     : SIMDThreeSameVectorB<1, 0b10011, "pmul", int_aarch64_neon_pmul>;
  defm SABA     : SIMDThreeSameVectorBHSTied<0, 0b01111, "saba",
-      TriOpFrag<(add node:$LHS, (int_aarch64_neon_sabd node:$MHS, node:$RHS))> >;
-defm SABD     : SIMDThreeSameVectorBHS<0,0b01110,"sabd", int_aarch64_neon_sabd>;
+      TriOpFrag<(add node:$LHS, (sabsdiff node:$MHS, node:$RHS))> >;
+defm SABD     : SIMDThreeSameVectorBHS<0,0b01110,"sabd", sabsdiff>;
  defm SHADD    : SIMDThreeSameVectorBHS<0,0b00000,"shadd", int_aarch64_neon_shadd>;
  defm SHSUB    : SIMDThreeSameVectorBHS<0,0b00100,"shsub", int_aarch64_neon_shsub>;
  defm SMAXP    : SIMDThreeSameVectorBHS<0,0b10100,"smaxp", int_aarch64_neon_smaxp>;
@@ -2862,8 +2862,8 @@ defm SRSHL    : SIMDThreeSameVector<0,0b01010,"srshl", int_aarch64_neon_srshl>;
  defm SSHL     : SIMDThreeSameVector<0,0b01000,"sshl", int_aarch64_neon_sshl>;
  defm SUB      : SIMDThreeSameVector<1,0b10000,"sub", sub>;
  defm UABA     : SIMDThreeSameVectorBHSTied<1, 0b01111, "uaba",
-      TriOpFrag<(add node:$LHS, (int_aarch64_neon_uabd node:$MHS, node:$RHS))> >;
-defm UABD     : SIMDThreeSameVectorBHS<1,0b01110,"uabd", int_aarch64_neon_uabd>;
+      TriOpFrag<(add node:$LHS, (uabsdiff node:$MHS, node:$RHS))> >;
+defm UABD     : SIMDThreeSameVectorBHS<1,0b01110,"uabd", uabsdiff>;
  defm UHADD    : SIMDThreeSameVectorBHS<1,0b00000,"uhadd", int_aarch64_neon_uhadd>;
  defm UHSUB    : SIMDThreeSameVectorBHS<1,0b00100,"uhsub", int_aarch64_neon_uhsub>;
  defm UMAXP    : SIMDThreeSameVectorBHS<1,0b10100,"umaxp", int_aarch64_neon_umaxp>;
@@ -3381,9 +3381,9 @@ defm RADDHN : SIMDNarrowThreeVectorBHS<1,0b0100,"raddhn",int_aarch64_neon_raddhn
  defm RSUBHN : SIMDNarrowThreeVectorBHS<1,0b0110,"rsubhn",int_aarch64_neon_rsubhn>;
  defm PMULL  : SIMDDifferentThreeVectorBD<0,0b1110,"pmull",int_aarch64_neon_pmull>;
  defm SABAL  : SIMDLongThreeVectorTiedBHSabal<0,0b0101,"sabal",
-                                             int_aarch64_neon_sabd>;
+                                             sabsdiff>;
  defm SABDL   : SIMDLongThreeVectorBHSabdl<0, 0b0111, "sabdl",
-                                          int_aarch64_neon_sabd>;
+                                          sabsdiff>;
  defm SADDL   : SIMDLongThreeVectorBHS<   0, 0b0000, "saddl",
              BinOpFrag<(add (sext node:$LHS), (sext node:$RHS))>>;
  defm SADDW   : SIMDWideThreeVectorBHS<   0, 0b0001, "saddw",
@@ -3404,9 +3404,9 @@ defm SSUBL   : SIMDLongThreeVectorBHS<0, 0b0010, "ssubl",
  defm SSUBW   : SIMDWideThreeVectorBHS<0, 0b0011, "ssubw",
                   BinOpFrag<(sub node:$LHS, (sext node:$RHS))>>;
  defm UABAL   : SIMDLongThreeVectorTiedBHSabal<1, 0b0101, "uabal",
-                                              int_aarch64_neon_uabd>;
+                                              uabsdiff>;
  defm UABDL   : SIMDLongThreeVectorBHSabdl<1, 0b0111, "uabdl",
-                                          int_aarch64_neon_uabd>;
+                                          uabsdiff>;
  defm UADDL   : SIMDLongThreeVectorBHS<1, 0b0000, "uaddl",
                   BinOpFrag<(add (zext node:$LHS), (zext node:$RHS))>>;
  defm UADDW   : SIMDWideThreeVectorBHS<1, 0b0001, "uaddw",
author	James Molloy <james.molloy@arm.com>
	Fri, 17 Jul 2015 17:10:45 +0000 (17:10 +0000)
committer	James Molloy <james.molloy@arm.com>
	Fri, 17 Jul 2015 17:10:45 +0000 (17:10 +0000)
lib/Target/AArch64/AArch64ISelLowering.cpp		patch \| blob \| history
lib/Target/AArch64/AArch64InstrInfo.td		patch \| blob \| history