[Codegen] Add intrinsics 'absdiff' and corresponding SDNodes for absolute difference...

author James Molloy <james.molloy@arm.com>

Thu, 16 Jul 2015 15:22:46 +0000 (15:22 +0000)

committer James Molloy <james.molloy@arm.com>

Thu, 16 Jul 2015 15:22:46 +0000 (15:22 +0000)
author James Molloy <james.molloy@arm.com>
Thu, 16 Jul 2015 15:22:46 +0000 (15:22 +0000)
committer James Molloy <james.molloy@arm.com>
Thu, 16 Jul 2015 15:22:46 +0000 (15:22 +0000)
diff --git a/docs/LangRef.rst b/docs/LangRef.rst

index e7d6f67c9399c1cfb8b189b7fc5b0ed4a3f297eb..17ee4b32bc387ca59ce98f595dca36fb67c60994 100644 (file)
--- a/docs/LangRef.rst
+++ b/docs/LangRef.rst
@@ -10328,6 +10328,65 @@ Examples:
  
        %r2 = call float @llvm.fmuladd.f32(float %a, float %b, float %c) ; yields float:r2 = (a * b) + c
  
+
+'``llvm.uabsdiff.*``' and '``llvm.sabsdiff.*``' Intrinsics
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+This is an overloaded intrinsic. The loaded data is a vector of any integer bit width.
+
+.. code-block:: llvm
+
+      declare <4 x integer> @llvm.uabsdiff.v4i32(<4 x integer> %a, <4 x integer> %b)
+
+
+Overview:
+"""""""""
+
+The ``llvm.uabsdiff`` intrinsic returns a vector result of the absolute difference of the two operands,
+treating them both as unsigned integers.
+
+The ``llvm.sabsdiff`` intrinsic returns  a vector result of the absolute difference of the two operands,
+treating them both as signed integers.
+
+.. note::
+
+    These intrinsics are primarily used during the code generation stage of compilation.
+    They are generated by compiler passes such as the Loop and SLP vectorizers.it is not
+    recommended for users to create them manually.
+
+Arguments:
+""""""""""
+
+Both intrinsics take two integer of the same bitwidth.
+
+Semantics:
+""""""""""
+
+The expression::
+
+    call <4 x i32> @llvm.uabsdiff.v4i32(<4 x i32> %a, <4 x i32> %b)
+
+is equivalent to::
+
+    %sub = sub <4 x i32> %a, %b
+    %ispos = icmp ugt <4 x i32> %sub, <i32 -1, i32 -1, i32 -1, i32 -1>
+    %neg = sub <4 x i32> zeroinitializer, %sub
+    %1 = select <4 x i1> %ispos, <4 x i32> %sub, <4 x i32> %neg
+
+Similarly the expression::
+
+    call <4 x i32> @llvm.sabsdiff.v4i32(<4 x i32> %a, <4 x i32> %b)
+
+is equivalent to::
+
+    %sub = sub nsw <4 x i32> %a, %b
+    %ispos = icmp sgt <4 x i32> %sub, <i32 -1, i32 -1, i32 -1, i32 -1>
+    %neg = sub nsw <4 x i32> zeroinitializer, %sub
+    %1 = select <4 x i1> %ispos, <4 x i32> %sub, <4 x i32> %neg
+
+
  Half Precision Floating Point Intrinsics
  ----------------------------------------
  
diff --git a/include/llvm/CodeGen/ISDOpcodes.h b/include/llvm/CodeGen/ISDOpcodes.h

index fa44301a2d4aa5c0126ed86563d1028a0e15ebc9..8a4b779f03ab77ff8e873774d04f22aa7de28e86 100644 (file)
--- a/include/llvm/CodeGen/ISDOpcodes.h
+++ b/include/llvm/CodeGen/ISDOpcodes.h
@@ -334,6 +334,10 @@ namespace ISD {
      /// Byte Swap and Counting operators.
      BSWAP, CTTZ, CTLZ, CTPOP,
  
+    /// [SU]ABSDIFF - Signed/Unsigned absolute difference of two input integer
+    /// vector. These nodes are generated from llvm.*absdiff* intrinsics.
+    SABSDIFF, UABSDIFF,
+
      /// Bit counting operators with an undefined result for zero inputs.
      CTTZ_ZERO_UNDEF, CTLZ_ZERO_UNDEF,
  
diff --git a/include/llvm/IR/Intrinsics.td b/include/llvm/IR/Intrinsics.td

index bbae720b4e12760e61e88ec32a1e864c00db5b07..af312be186c0fbdc4f655fb11318bb2aa1a0a195 100644 (file)
--- a/include/llvm/IR/Intrinsics.td
+++ b/include/llvm/IR/Intrinsics.td
@@ -605,6 +605,12 @@ def int_convertuu  : Intrinsic<[llvm_anyint_ty],
  def int_clear_cache : Intrinsic<[], [llvm_ptr_ty, llvm_ptr_ty],
                                  [], "llvm.clear_cache">;
  
+// Calculate the Absolute Differences of the two input vectors.
+def int_sabsdiff : Intrinsic<[llvm_anyvector_ty],
+                        [ LLVMMatchType<0>, LLVMMatchType<0> ], [IntrNoMem]>;
+def int_uabsdiff : Intrinsic<[llvm_anyvector_ty],
+                        [ LLVMMatchType<0>, LLVMMatchType<0> ], [IntrNoMem]>;
+
  //===-------------------------- Masked Intrinsics -------------------------===//
  //
  def int_masked_store : Intrinsic<[], [llvm_anyvector_ty, LLVMPointerTo<0>,
diff --git a/include/llvm/Target/TargetSelectionDAG.td b/include/llvm/Target/TargetSelectionDAG.td

index 4abbe3793995c8fd9fd57d80edffd96144c23344..6c7eef147151e66c3d16be9d2fafc1f8b75dacbb 100644 (file)
--- a/include/llvm/Target/TargetSelectionDAG.td
+++ b/include/llvm/Target/TargetSelectionDAG.td
@@ -386,6 +386,8 @@ def smax       : SDNode<"ISD::SMAX"      , SDTIntBinOp>;
  def umin       : SDNode<"ISD::UMIN"      , SDTIntBinOp>;
  def umax       : SDNode<"ISD::UMAX"      , SDTIntBinOp>;
  
+def sabsdiff   : SDNode<"ISD::SABSDIFF"   , SDTIntBinOp>;
+def uabsdiff   : SDNode<"ISD::UABSDIFF"   , SDTIntBinOp>;
  def sext_inreg : SDNode<"ISD::SIGN_EXTEND_INREG", SDTExtInreg>;
  def bswap      : SDNode<"ISD::BSWAP"      , SDTIntUnaryOp>;
  def ctlz       : SDNode<"ISD::CTLZ"       , SDTIntUnaryOp>;
diff --git a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp

index 9f060a09a0f3552e48e001587cea332bdd9763fa..511239ce477e69a110b9d47581e7e3efb094852f 100644 (file)
--- a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -146,6 +146,10 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
    case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS:
      Res = PromoteIntRes_AtomicCmpSwap(cast<AtomicSDNode>(N), ResNo);
      break;
+  case ISD::UABSDIFF:
+  case ISD::SABSDIFF:
+    Res = PromoteIntRes_SimpleIntBinOp(N);
+    break;
    }
  
    // If the result is null then the sub-method took care of registering it.
diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp

index 83d4ad5ea1f4ed79417f5acfca486f504021c41e..0f25a61072447f132866641f47c7689fa0f37c0c 100644 (file)
--- a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -105,6 +105,7 @@ class VectorLegalizer {
    SDValue ExpandLoad(SDValue Op);
    SDValue ExpandStore(SDValue Op);
    SDValue ExpandFNEG(SDValue Op);
+  SDValue ExpandABSDIFF(SDValue Op);
  
    /// \brief Implements vector promotion.
    ///
@@ -326,6 +327,8 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
    case ISD::SMAX:
    case ISD::UMIN:
    case ISD::UMAX:
+  case ISD::UABSDIFF:
+  case ISD::SABSDIFF:
      QueryType = Node->getValueType(0);
      break;
    case ISD::FP_ROUND_INREG:
@@ -708,11 +711,36 @@ SDValue VectorLegalizer::Expand(SDValue Op) {
      return ExpandFNEG(Op);
    case ISD::SETCC:
      return UnrollVSETCC(Op);
+  case ISD::UABSDIFF:
+  case ISD::SABSDIFF:
+    return ExpandABSDIFF(Op);
    default:
      return DAG.UnrollVectorOp(Op.getNode());
    }
  }
  
+SDValue VectorLegalizer::ExpandABSDIFF(SDValue Op) {
+  SDLoc dl(Op);
+  SDValue Tmp1, Tmp2, Tmp3, Tmp4;
+  EVT VT = Op.getValueType();
+  SDNodeFlags Flags;
+  Flags.setNoSignedWrap(Op->getOpcode() == ISD::SABSDIFF);
+
+  Tmp2 = Op.getOperand(0);
+  Tmp3 = Op.getOperand(1);
+  Tmp1 = DAG.getNode(ISD::SUB, dl, VT, Tmp2, Tmp3, &Flags);
+  Tmp2 =
+      DAG.getNode(ISD::SUB, dl, VT, DAG.getConstant(0, dl, VT), Tmp1, &Flags);
+  Tmp4 = DAG.getNode(
+      ISD::SETCC, dl,
+      TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT), Tmp2,
+      DAG.getConstant(0, dl, VT),
+      DAG.getCondCode(Op->getOpcode() == ISD::SABSDIFF ? ISD::SETLT
+                                                       : ISD::SETULT));
+  Tmp1 = DAG.getNode(ISD::VSELECT, dl, VT, Tmp4, Tmp1, Tmp2);
+  return Tmp1;
+}
+
  SDValue VectorLegalizer::ExpandSELECT(SDValue Op) {
    // Lower a select instruction where the condition is a scalar and the
    // operands are vectors. Lower this select to VSELECT and implement it
diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp

index 4348ab79f7d19091a21624d5610608b2a9a38cf5..5f9afc9cfc51435f3bd5627e6602bb02561644da 100644 (file)
--- a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -678,6 +678,8 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
    case ISD::SMAX:
    case ISD::UMIN:
    case ISD::UMAX:
+  case ISD::UABSDIFF:
+  case ISD::SABSDIFF:
      SplitVecRes_BinOp(N, Lo, Hi);
      break;
    case ISD::FMA:
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp

index 5c8db91484576777d1876da65c4dce123f639226..73de6e3cfbdb42df9fbe7c2af7a9dd38b4ded8f7 100644 (file)
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -4646,6 +4646,18 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
                               getValue(I.getArgOperand(0)).getValueType(),
                               getValue(I.getArgOperand(0))));
      return nullptr;
+  case Intrinsic::uabsdiff:
+    setValue(&I, DAG.getNode(ISD::UABSDIFF, sdl,
+                             getValue(I.getArgOperand(0)).getValueType(),
+                             getValue(I.getArgOperand(0)),
+                             getValue(I.getArgOperand(1))));
+    return nullptr;
+  case Intrinsic::sabsdiff:
+    setValue(&I, DAG.getNode(ISD::SABSDIFF, sdl,
+                             getValue(I.getArgOperand(0)).getValueType(),
+                             getValue(I.getArgOperand(0)),
+                             getValue(I.getArgOperand(1))));
+    return nullptr;
    case Intrinsic::cttz: {
      SDValue Arg = getValue(I.getArgOperand(0));
      ConstantInt *CI = cast<ConstantInt>(I.getArgOperand(1));
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp

index 5b9b18286faec6bce866164d433c3e56c41dedf0..8dabddc642bcf4214930652260bdc7ab4a268b83 100644 (file)
--- a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
@@ -225,6 +225,8 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
    case ISD::SHL_PARTS:                  return "shl_parts";
    case ISD::SRA_PARTS:                  return "sra_parts";
    case ISD::SRL_PARTS:                  return "srl_parts";
+  case ISD::UABSDIFF:                   return "uabsdiff";
+  case ISD::SABSDIFF:                   return "sabsdiff";
  
    // Conversion operators.
    case ISD::SIGN_EXTEND:                return "sign_extend";
diff --git a/lib/CodeGen/TargetLoweringBase.cpp b/lib/CodeGen/TargetLoweringBase.cpp

index 50240bf70461d4864fc24c259b713b64f8791282..e6d07f5134b38d8964a42058e18c7f9b0e7907ed 100644 (file)
--- a/lib/CodeGen/TargetLoweringBase.cpp
+++ b/lib/CodeGen/TargetLoweringBase.cpp
@@ -827,6 +827,8 @@ void TargetLoweringBase::initActions() {
      setOperationAction(ISD::USUBO, VT, Expand);
      setOperationAction(ISD::SMULO, VT, Expand);
      setOperationAction(ISD::UMULO, VT, Expand);
+    setOperationAction(ISD::UABSDIFF, VT, Expand);
+    setOperationAction(ISD::SABSDIFF, VT, Expand);
  
      // These library functions default to expand.
      setOperationAction(ISD::FROUND, VT, Expand);
diff --git a/test/CodeGen/X86/absdiff_expand.ll b/test/CodeGen/X86/absdiff_expand.ll

new file mode 100644 (file)

index 0000000..8ba8727
--- /dev/null
+++ b/test/CodeGen/X86/absdiff_expand.ll
@@ -0,0 +1,242 @@
+; RUN: llc -mtriple=x86_64-unknown-linux-gnu  < %s | FileCheck %s -check-prefix=CHECK
+
+declare <4 x i8> @llvm.uabsdiff.v4i8(<4 x i8>, <4 x i8>)
+
+define <4 x i8> @test_uabsdiff_v4i8_expand(<4 x i8> %a1, <4 x i8> %a2) {
+; CHECK-LABEL: test_uabsdiff_v4i8_expand
+; CHECK:             psubd  %xmm1, %xmm0
+; CHECK-NEXT:        pxor   %xmm1, %xmm1
+; CHECK-NEXT:        psubd  %xmm0, %xmm1
+; CHECK-NEXT:        movdqa  .LCPI{{[0-9_]*}}
+; CHECK-NEXT:        movdqa  %xmm1, %xmm3
+; CHECK-NEXT:        pxor   %xmm2, %xmm3
+; CHECK-NEXT:        pcmpgtd        %xmm3, %xmm2
+; CHECK-NEXT:        pand    %xmm2, %xmm0
+; CHECK-NEXT:        pandn   %xmm1, %xmm2
+; CHECK-NEXT:        por     %xmm2, %xmm0
+; CHECK-NEXT:        retq
+
+  %1 = call <4 x i8> @llvm.uabsdiff.v4i8(<4 x i8> %a1, <4 x i8> %a2)
+  ret <4 x i8> %1
+}
+
+declare <4 x i8> @llvm.sabsdiff.v4i8(<4 x i8>, <4 x i8>)
+
+define <4 x i8> @test_sabsdiff_v4i8_expand(<4 x i8> %a1, <4 x i8> %a2) {
+; CHECK-LABEL: test_sabsdiff_v4i8_expand
+; CHECK:      psubd  %xmm1, %xmm0
+; CHECK-NEXT: pxor   %xmm1, %xmm1
+; CHECK-NEXT: pxor    %xmm2, %xmm2
+; CHECK-NEXT: psubd  %xmm0, %xmm2
+; CHECK-NEXT: pcmpgtd  %xmm2, %xmm1
+; CHECK-NEXT: pand    %xmm1, %xmm0
+; CHECK-NEXT: pandn   %xmm2, %xmm1
+; CHECK-NEXT: por     %xmm1, %xmm0
+; CHECK-NEXT: retq
+
+  %1 = call <4 x i8> @llvm.sabsdiff.v4i8(<4 x i8> %a1, <4 x i8> %a2)
+  ret <4 x i8> %1
+}
+
+
+declare <8 x i8> @llvm.sabsdiff.v8i8(<8 x i8>, <8 x i8>)
+
+define <8 x i8> @test_sabsdiff_v8i8_expand(<8 x i8> %a1, <8 x i8> %a2) {
+; CHECK-LABEL: test_sabsdiff_v8i8_expand
+; CHECK:      psubw  %xmm1, %xmm0
+; CHECK-NEXT: pxor   %xmm1, %xmm1
+; CHECK-NEXT: pxor   %xmm2, %xmm2
+; CHECK-NEXT: psubw  %xmm0, %xmm2
+; CHECK-NEXT: pcmpgtw        %xmm2, %xmm1
+; CHECK-NEXT: pand  %xmm1, %xmm0
+; CHECK-NEXT: pandn %xmm2, %xmm1
+; CHECK-NEXT: por  %xmm1, %xmm0
+; CHECK-NEXT: retq
+  %1 = call <8 x i8> @llvm.sabsdiff.v8i8(<8 x i8> %a1, <8 x i8> %a2)
+  ret <8 x i8> %1
+}
+
+declare <16 x i8> @llvm.uabsdiff.v16i8(<16 x i8>, <16 x i8>)
+
+define <16 x i8> @test_uabsdiff_v16i8_expand(<16 x i8> %a1, <16 x i8> %a2) {
+; CHECK-LABEL: test_uabsdiff_v16i8_expand
+; CHECK:             psubb  %xmm1, %xmm0
+; CHECK-NEXT:        pxor   %xmm1, %xmm1
+; CHECK-NEXT:        psubb  %xmm0, %xmm1
+; CHECK-NEXT:        movdqa  .LCPI{{[0-9_]*}}
+; CHECK-NEXT:        movdqa  %xmm1, %xmm3
+; CHECK-NEXT:        pxor   %xmm2, %xmm3
+; CHECK-NEXT:        pcmpgtb        %xmm3, %xmm2
+; CHECK-NEXT:        pand    %xmm2, %xmm0
+; CHECK-NEXT:        pandn   %xmm1, %xmm2
+; CHECK-NEXT:        por     %xmm2, %xmm0
+; CHECK-NEXT:        retq
+  %1 = call <16 x i8> @llvm.uabsdiff.v16i8(<16 x i8> %a1, <16 x i8> %a2)
+  ret <16 x i8> %1
+}
+
+declare <8 x i16> @llvm.uabsdiff.v8i16(<8 x i16>, <8 x i16>)
+
+define <8 x i16> @test_uabsdiff_v8i16_expand(<8 x i16> %a1, <8 x i16> %a2) {
+; CHECK-LABEL: test_uabsdiff_v8i16_expand
+; CHECK:             psubw  %xmm1, %xmm0
+; CHECK-NEXT:        pxor   %xmm1, %xmm1
+; CHECK-NEXT:        psubw  %xmm0, %xmm1
+; CHECK-NEXT:        movdqa  .LCPI{{[0-9_]*}}
+; CHECK-NEXT:        movdqa  %xmm1, %xmm3
+; CHECK-NEXT:        pxor   %xmm2, %xmm3
+; CHECK-NEXT:        pcmpgtw        %xmm3, %xmm2
+; CHECK-NEXT:        pand    %xmm2, %xmm0
+; CHECK-NEXT:        pandn   %xmm1, %xmm2
+; CHECK-NEXT:        por     %xmm2, %xmm0
+; CHECK-NEXT:        retq
+  %1 = call <8 x i16> @llvm.uabsdiff.v8i16(<8 x i16> %a1, <8 x i16> %a2)
+  ret <8 x i16> %1
+}
+
+declare <8 x i16> @llvm.sabsdiff.v8i16(<8 x i16>, <8 x i16>)
+
+define <8 x i16> @test_sabsdiff_v8i16_expand(<8 x i16> %a1, <8 x i16> %a2) {
+; CHECK-LABEL: test_sabsdiff_v8i16_expand
+; CHECK:      psubw  %xmm1, %xmm0
+; CHECK-NEXT: pxor   %xmm1, %xmm1
+; CHECK-NEXT: pxor   %xmm2, %xmm2
+; CHECK-NEXT: psubw  %xmm0, %xmm2
+; CHECK-NEXT: pcmpgtw        %xmm2, %xmm1
+; CHECK-NEXT: pand  %xmm1, %xmm0
+; CHECK-NEXT: pandn %xmm2, %xmm1
+; CHECK-NEXT: por  %xmm1, %xmm0
+; CHECK-NEXT: retq
+  %1 = call <8 x i16> @llvm.sabsdiff.v8i16(<8 x i16> %a1, <8 x i16> %a2)
+  ret <8 x i16> %1
+}
+
+declare <4 x i32> @llvm.sabsdiff.v4i32(<4 x i32>, <4 x i32>)
+
+define <4 x i32> @test_sabsdiff_v4i32_expand(<4 x i32> %a1, <4 x i32> %a2) {
+; CHECK-LABEL: test_sabsdiff_v4i32_expand
+; CHECK:             psubd  %xmm1, %xmm0
+; CHECK-NEXT:        pxor  %xmm1, %xmm1
+; CHECK-NEXT:        pxor  %xmm2, %xmm2
+; CHECK-NEXT:        psubd  %xmm0, %xmm2
+; CHECK-NEXT:        pcmpgtd        %xmm2, %xmm1
+; CHECK-NEXT:        pand    %xmm1, %xmm0
+; CHECK-NEXT:        pandn   %xmm2, %xmm1
+; CHECK-NEXT:        por    %xmm1, %xmm0
+; CHECK-NEXT:        retq
+  %1 = call <4 x i32> @llvm.sabsdiff.v4i32(<4 x i32> %a1, <4 x i32> %a2)
+  ret <4 x i32> %1
+}
+
+declare <4 x i32> @llvm.uabsdiff.v4i32(<4 x i32>, <4 x i32>)
+
+define <4 x i32> @test_uabsdiff_v4i32_expand(<4 x i32> %a1, <4 x i32> %a2) {
+; CHECK-LABEL: test_uabsdiff_v4i32_expand
+; CHECK:             psubd  %xmm1, %xmm0
+; CHECK-NEXT:        pxor   %xmm1, %xmm1
+; CHECK-NEXT:        psubd  %xmm0, %xmm1
+; CHECK-NEXT:        movdqa  .LCPI{{[0-9_]*}}
+; CHECK-NEXT:        movdqa  %xmm1, %xmm3
+; CHECK-NEXT:        pxor   %xmm2, %xmm3
+; CHECK-NEXT:        pcmpgtd        %xmm3, %xmm2
+; CHECK-NEXT:        pand    %xmm2, %xmm0
+; CHECK-NEXT:        pandn   %xmm1, %xmm2
+; CHECK-NEXT:        por     %xmm2, %xmm0
+; CHECK-NEXT:        retq
+  %1 = call <4 x i32> @llvm.uabsdiff.v4i32(<4 x i32> %a1, <4 x i32> %a2)
+  ret <4 x i32> %1
+}
+
+declare <2 x i32> @llvm.sabsdiff.v2i32(<2 x i32>, <2 x i32>)
+
+define <2 x i32> @test_sabsdiff_v2i32_expand(<2 x i32> %a1, <2 x i32> %a2) {
+; CHECK-LABEL: test_sabsdiff_v2i32_expand
+; CHECK:        psubq   %xmm1, %xmm0
+; CHECK-NEXT:   pxor    %xmm1, %xmm1
+; CHECK-NEXT:   psubq   %xmm0, %xmm1
+; CHECK-NEXT:   movdqa  .LCPI{{[0-9_]*}}
+; CHECK-NEXT:   movdqa  %xmm1, %xmm3
+; CHECK-NEXT:   pxor    %xmm2, %xmm3
+; CHECK-NEXT:   movdqa  %xmm2, %xmm4
+; CHECK-NEXT:   pcmpgtd %xmm3, %xmm4
+; CHECK-NEXT:   pshufd  $160, %xmm4, %xmm5      # xmm5 = xmm4[0,0,2,2]
+; CHECK-NEXT:   pcmpeqd %xmm2, %xmm3
+; CHECK-NEXT:   pshufd  $245, %xmm3, %xmm2      # xmm2 = xmm3[1,1,3,3]
+; CHECK-NEXT:   pand    %xmm5, %xmm2
+; CHECK-NEXT:   pshufd  $245, %xmm4, %xmm3      # xmm3 = xmm4[1,1,3,3]
+; CHECK-NEXT:   por     %xmm2, %xmm3
+; CHECK-NEXT:   pand    %xmm3, %xmm0
+; CHECK-NEXT:   pandn   %xmm1, %xmm3
+; CHECK-NEXT:   por     %xmm3, %xmm0
+; CHECK-NEXT:   retq
+  %1 = call <2 x i32> @llvm.sabsdiff.v2i32(<2 x i32> %a1, <2 x i32> %a2)
+  ret <2 x i32> %1
+}
+
+declare <2 x i64> @llvm.sabsdiff.v2i64(<2 x i64>, <2 x i64>)
+
+define <2 x i64> @test_sabsdiff_v2i64_expand(<2 x i64> %a1, <2 x i64> %a2) {
+; CHECK-LABEL: test_sabsdiff_v2i64_expand
+; CHECK:        psubq   %xmm1, %xmm0
+; CHECK-NEXT:   pxor    %xmm1, %xmm1
+; CHECK-NEXT:   psubq   %xmm0, %xmm1
+; CHECK-NEXT:   movdqa  .LCPI{{[0-9_]*}}
+; CHECK-NEXT:   movdqa  %xmm1, %xmm3
+; CHECK-NEXT:   pxor    %xmm2, %xmm3
+; CHECK-NEXT:   movdqa  %xmm2, %xmm4
+; CHECK-NEXT:   pcmpgtd %xmm3, %xmm4
+; CHECK-NEXT:   pshufd  $160, %xmm4, %xmm5      # xmm5 = xmm4[0,0,2,2]
+; CHECK-NEXT:   pcmpeqd %xmm2, %xmm3
+; CHECK-NEXT:   pshufd  $245, %xmm3, %xmm2      # xmm2 = xmm3[1,1,3,3]
+; CHECK-NEXT:   pand    %xmm5, %xmm2
+; CHECK-NEXT:   pshufd  $245, %xmm4, %xmm3      # xmm3 = xmm4[1,1,3,3]
+; CHECK-NEXT:   por     %xmm2, %xmm3
+; CHECK-NEXT:   pand    %xmm3, %xmm0
+; CHECK-NEXT:   pandn   %xmm1, %xmm3
+; CHECK-NEXT:   por     %xmm3, %xmm0
+; CHECK-NEXT:   retq
+  %1 = call <2 x i64> @llvm.sabsdiff.v2i64(<2 x i64> %a1, <2 x i64> %a2)
+  ret <2 x i64> %1
+}
+
+declare <16 x i32> @llvm.sabsdiff.v16i32(<16 x i32>, <16 x i32>)
+
+define <16 x i32> @test_sabsdiff_v16i32_expand(<16 x i32> %a1, <16 x i32> %a2) {
+; CHECK-LABEL: test_sabsdiff_v16i32_expand
+; CHECK:             psubd  %xmm4, %xmm0
+; CHECK-NEXT:        pxor    %xmm8, %xmm8
+; CHECK-NEXT:        pxor    %xmm9, %xmm9
+; CHECK-NEXT:        psubd   %xmm0, %xmm9
+; CHECK-NEXT:        pxor    %xmm4, %xmm4
+; CHECK-NEXT:        pcmpgtd %xmm9, %xmm4
+; CHECK-NEXT:        pand    %xmm4, %xmm0
+; CHECK-NEXT:        pandn   %xmm9, %xmm4
+; CHECK-NEXT:        por     %xmm4, %xmm0
+; CHECK-NEXT:        psubd   %xmm5, %xmm1
+; CHECK-NEXT:        pxor    %xmm4, %xmm4
+; CHECK-NEXT:        psubd   %xmm1, %xmm4
+; CHECK-NEXT:        pxor    %xmm5, %xmm5
+; CHECK-NEXT:        pcmpgtd %xmm4, %xmm5
+; CHECK-NEXT:        pand    %xmm5, %xmm1
+; CHECK-NEXT:        pandn   %xmm4, %xmm5
+; CHECK-NEXT:        por     %xmm5, %xmm1
+; CHECK-NEXT:        psubd   %xmm6, %xmm2
+; CHECK-NEXT:        pxor    %xmm4, %xmm4
+; CHECK-NEXT:        psubd   %xmm2, %xmm4
+; CHECK-NEXT:        pxor    %xmm5, %xmm5
+; CHECK-NEXT:        pcmpgtd %xmm4, %xmm5
+; CHECK-NEXT:        pand    %xmm5, %xmm2
+; CHECK-NEXT:        pandn   %xmm4, %xmm5
+; CHECK-NEXT:        por     %xmm5, %xmm2
+; CHECK-NEXT:        psubd   %xmm7, %xmm3
+; CHECK-NEXT:        pxor    %xmm4, %xmm4
+; CHECK-NEXT:        psubd   %xmm3, %xmm4
+; CHECK-NEXT:        pcmpgtd %xmm4, %xmm8
+; CHECK-NEXT:        pand    %xmm8, %xmm3
+; CHECK-NEXT:        pandn   %xmm4, %xmm8
+; CHECK-NEXT:        por     %xmm8, %xmm3
+; CHECK-NEXT:        retq
+  %1 = call <16 x i32> @llvm.sabsdiff.v16i32(<16 x i32> %a1, <16 x i32> %a2)
+  ret <16 x i32> %1
+}
+
author	James Molloy <james.molloy@arm.com>
	Thu, 16 Jul 2015 15:22:46 +0000 (15:22 +0000)
committer	James Molloy <james.molloy@arm.com>
	Thu, 16 Jul 2015 15:22:46 +0000 (15:22 +0000)
docs/LangRef.rst		patch \| blob \| history
include/llvm/CodeGen/ISDOpcodes.h		patch \| blob \| history
include/llvm/IR/Intrinsics.td		patch \| blob \| history
include/llvm/Target/TargetSelectionDAG.td		patch \| blob \| history
lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp		patch \| blob \| history
lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp		patch \| blob \| history
lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp		patch \| blob \| history
lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp		patch \| blob \| history
lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp		patch \| blob \| history
lib/CodeGen/TargetLoweringBase.cpp		patch \| blob \| history
test/CodeGen/X86/absdiff_expand.ll	[new file with mode: 0644]	patch \| blob