%r2 = call float @llvm.fmuladd.f32(float %a, float %b, float %c) ; yields float:r2 = (a * b) + c
-
-'``llvm.uabsdiff.*``' and '``llvm.sabsdiff.*``' Intrinsics
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-
-Syntax:
-"""""""
-This is an overloaded intrinsic. The loaded data is a vector of any integer bit width.
-
-.. code-block:: llvm
-
- declare <4 x integer> @llvm.uabsdiff.v4i32(<4 x integer> %a, <4 x integer> %b)
-
-
-Overview:
-"""""""""
-
-The ``llvm.uabsdiff`` intrinsic returns a vector result of the absolute difference
-of the two operands, treating them both as unsigned integers. The intermediate
-calculations are computed using infinitely precise unsigned arithmetic. The final
-result will be truncated to the given type.
-
-The ``llvm.sabsdiff`` intrinsic returns a vector result of the absolute difference of
-the two operands, treating them both as signed integers. If the result overflows, the
-behavior is undefined.
-
-.. note::
-
- These intrinsics are primarily used during the code generation stage of compilation.
- They are generated by compiler passes such as the Loop and SLP vectorizers. It is not
- recommended for users to create them manually.
-
-Arguments:
-""""""""""
-
-Both intrinsics take two integer of the same bitwidth.
-
-Semantics:
-""""""""""
-
-The expression::
-
- call <4 x i32> @llvm.uabsdiff.v4i32(<4 x i32> %a, <4 x i32> %b)
-
-is equivalent to::
-
- %1 = zext <4 x i32> %a to <4 x i64>
- %2 = zext <4 x i32> %b to <4 x i64>
- %sub = sub <4 x i64> %1, %2
- %trunc = trunc <4 x i64> to <4 x i32>
-
-and the expression::
-
- call <4 x i32> @llvm.sabsdiff.v4i32(<4 x i32> %a, <4 x i32> %b)
-
-is equivalent to::
-
- %sub = sub nsw <4 x i32> %a, %b
- %ispos = icmp sge <4 x i32> %sub, zeroinitializer
- %neg = sub nsw <4 x i32> zeroinitializer, %sub
- %1 = select <4 x i1> %ispos, <4 x i32> %sub, <4 x i32> %neg
-
-
Half Precision Floating Point Intrinsics
----------------------------------------
/// Byte Swap and Counting operators.
BSWAP, CTTZ, CTLZ, CTPOP, BITREVERSE,
- /// [SU]ABSDIFF - Signed/Unsigned absolute difference of two input integer
- /// vector. These nodes are generated from llvm.*absdiff* intrinsics.
- SABSDIFF, UABSDIFF,
-
/// Bit counting operators with an undefined result for zero inputs.
CTTZ_ZERO_UNDEF, CTLZ_ZERO_UNDEF,
def int_clear_cache : Intrinsic<[], [llvm_ptr_ty, llvm_ptr_ty],
[], "llvm.clear_cache">;
-// Calculate the Absolute Differences of the two input vectors.
-def int_sabsdiff : Intrinsic<[llvm_anyvector_ty],
- [ LLVMMatchType<0>, LLVMMatchType<0> ], [IntrNoMem]>;
-def int_uabsdiff : Intrinsic<[llvm_anyvector_ty],
- [ LLVMMatchType<0>, LLVMMatchType<0> ], [IntrNoMem]>;
-
//===-------------------------- Masked Intrinsics -------------------------===//
//
def int_masked_store : Intrinsic<[], [llvm_anyvector_ty, LLVMPointerTo<0>,
def umin : SDNode<"ISD::UMIN" , SDTIntBinOp>;
def umax : SDNode<"ISD::UMAX" , SDTIntBinOp>;
-def sabsdiff : SDNode<"ISD::SABSDIFF" , SDTIntBinOp>;
-def uabsdiff : SDNode<"ISD::UABSDIFF" , SDTIntBinOp>;
def sext_inreg : SDNode<"ISD::SIGN_EXTEND_INREG", SDTExtInreg>;
def bitreverse : SDNode<"ISD::BITREVERSE" , SDTIntUnaryOp>;
def bswap : SDNode<"ISD::BSWAP" , SDTIntUnaryOp>;
case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS:
Res = PromoteIntRes_AtomicCmpSwap(cast<AtomicSDNode>(N), ResNo);
break;
- case ISD::UABSDIFF:
- case ISD::SABSDIFF:
- Res = PromoteIntRes_SimpleIntBinOp(N);
- break;
}
// If the result is null then the sub-method took care of registering it.
SDValue ExpandLoad(SDValue Op);
SDValue ExpandStore(SDValue Op);
SDValue ExpandFNEG(SDValue Op);
- SDValue ExpandABSDIFF(SDValue Op);
/// \brief Implements vector promotion.
///
case ISD::SMAX:
case ISD::UMIN:
case ISD::UMAX:
- case ISD::UABSDIFF:
- case ISD::SABSDIFF:
QueryType = Node->getValueType(0);
break;
case ISD::FP_ROUND_INREG:
return ExpandFNEG(Op);
case ISD::SETCC:
return UnrollVSETCC(Op);
- case ISD::UABSDIFF:
- case ISD::SABSDIFF:
- return ExpandABSDIFF(Op);
default:
return DAG.UnrollVectorOp(Op.getNode());
}
}
-SDValue VectorLegalizer::ExpandABSDIFF(SDValue Op) {
- SDLoc dl(Op);
- SDValue Op0 = Op.getOperand(0);
- SDValue Op1 = Op.getOperand(1);
- EVT VT = Op.getValueType();
-
- // For unsigned intrinsic, promote the type to handle unsigned overflow.
- bool isUabsdiff = (Op->getOpcode() == ISD::UABSDIFF);
- if (isUabsdiff) {
- VT = VT.widenIntegerVectorElementType(*DAG.getContext());
- Op0 = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Op0);
- Op1 = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Op1);
- }
-
- SDNodeFlags Flags;
- Flags.setNoSignedWrap(!isUabsdiff);
- SDValue Sub = DAG.getNode(ISD::SUB, dl, VT, Op0, Op1, &Flags);
- if (isUabsdiff)
- return DAG.getNode(ISD::TRUNCATE, dl, Op.getValueType(), Sub);
-
- SDValue Cmp =
- DAG.getNode(ISD::SETCC, dl, TLI.getSetCCResultType(DAG.getDataLayout(),
- *DAG.getContext(), VT),
- Sub, DAG.getConstant(0, dl, VT), DAG.getCondCode(ISD::SETGE));
- SDValue Neg = DAG.getNode(ISD::SUB, dl, VT, DAG.getConstant(0, dl, VT), Sub, &Flags);
- return DAG.getNode(ISD::VSELECT, dl, VT, Cmp, Sub, Neg);
-}
-
SDValue VectorLegalizer::ExpandSELECT(SDValue Op) {
// Lower a select instruction where the condition is a scalar and the
// operands are vectors. Lower this select to VSELECT and implement it
case ISD::SMAX:
case ISD::UMIN:
case ISD::UMAX:
- case ISD::UABSDIFF:
- case ISD::SABSDIFF:
SplitVecRes_BinOp(N, Lo, Hi);
break;
case ISD::FMA:
getValue(I.getArgOperand(0)).getValueType(),
getValue(I.getArgOperand(0))));
return nullptr;
- case Intrinsic::uabsdiff:
- setValue(&I, DAG.getNode(ISD::UABSDIFF, sdl,
- getValue(I.getArgOperand(0)).getValueType(),
- getValue(I.getArgOperand(0)),
- getValue(I.getArgOperand(1))));
- return nullptr;
- case Intrinsic::sabsdiff:
- setValue(&I, DAG.getNode(ISD::SABSDIFF, sdl,
- getValue(I.getArgOperand(0)).getValueType(),
- getValue(I.getArgOperand(0)),
- getValue(I.getArgOperand(1))));
- return nullptr;
case Intrinsic::cttz: {
SDValue Arg = getValue(I.getArgOperand(0));
ConstantInt *CI = cast<ConstantInt>(I.getArgOperand(1));
case ISD::SHL_PARTS: return "shl_parts";
case ISD::SRA_PARTS: return "sra_parts";
case ISD::SRL_PARTS: return "srl_parts";
- case ISD::UABSDIFF: return "uabsdiff";
- case ISD::SABSDIFF: return "sabsdiff";
// Conversion operators.
case ISD::SIGN_EXTEND: return "sign_extend";
setOperationAction(ISD::USUBO, VT, Expand);
setOperationAction(ISD::SMULO, VT, Expand);
setOperationAction(ISD::UMULO, VT, Expand);
- setOperationAction(ISD::UABSDIFF, VT, Expand);
- setOperationAction(ISD::SABSDIFF, VT, Expand);
+
setOperationAction(ISD::BITREVERSE, VT, Expand);
// These library functions default to expand.
setOperationAction(ISD::FP_TO_SINT, VT.getSimpleVT(), Custom);
setOperationAction(ISD::FP_TO_UINT, VT.getSimpleVT(), Custom);
- // [SU][MIN|MAX] and [SU]ABSDIFF are available for all NEON types apart from
- // i64.
+ // [SU][MIN|MAX] are available for all NEON types apart from i64.
if (!VT.isFloatingPoint() &&
VT.getSimpleVT() != MVT::v2i64 && VT.getSimpleVT() != MVT::v1i64)
- for (unsigned Opcode : {ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX,
- ISD::SABSDIFF, ISD::UABSDIFF})
+ for (unsigned Opcode : {ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX})
setOperationAction(Opcode, VT.getSimpleVT(), Legal);
// F[MIN|MAX][NUM|NAN] are available for all FP NEON types (not f16 though!).
// (aarch64_neon_umull (extract_high (v2i64 vec)))
// (extract_high (v2i64 (dup128 scalar)))))
//
-static SDValue tryCombineLongOpWithDup(SDNode *N,
+static SDValue tryCombineLongOpWithDup(unsigned IID, SDNode *N,
TargetLowering::DAGCombinerInfo &DCI,
SelectionDAG &DAG) {
if (DCI.isBeforeLegalizeOps())
return SDValue();
- bool IsIntrinsic = N->getOpcode() == ISD::INTRINSIC_WO_CHAIN;
- SDValue LHS = N->getOperand(IsIntrinsic ? 1 : 0);
- SDValue RHS = N->getOperand(IsIntrinsic ? 2 : 1);
+ SDValue LHS = N->getOperand(1);
+ SDValue RHS = N->getOperand(2);
assert(LHS.getValueType().is64BitVector() &&
RHS.getValueType().is64BitVector() &&
"unexpected shape for long operation");
return SDValue();
}
- // N could either be an intrinsic or a sabsdiff/uabsdiff node.
- if (IsIntrinsic)
- return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, SDLoc(N), N->getValueType(0),
- N->getOperand(0), LHS, RHS);
- else
- return DAG.getNode(N->getOpcode(), SDLoc(N), N->getValueType(0),
- LHS, RHS);
+ return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, SDLoc(N), N->getValueType(0),
+ N->getOperand(0), LHS, RHS);
}
static SDValue tryCombineShiftImm(unsigned IID, SDNode *N, SelectionDAG &DAG) {
case Intrinsic::aarch64_neon_fmin:
return DAG.getNode(ISD::FMINNAN, SDLoc(N), N->getValueType(0),
N->getOperand(1), N->getOperand(2));
- case Intrinsic::aarch64_neon_sabd:
- return DAG.getNode(ISD::SABSDIFF, SDLoc(N), N->getValueType(0),
- N->getOperand(1), N->getOperand(2));
- case Intrinsic::aarch64_neon_uabd:
- return DAG.getNode(ISD::UABSDIFF, SDLoc(N), N->getValueType(0),
- N->getOperand(1), N->getOperand(2));
case Intrinsic::aarch64_neon_fmaxnm:
return DAG.getNode(ISD::FMAXNUM, SDLoc(N), N->getValueType(0),
N->getOperand(1), N->getOperand(2));
case Intrinsic::aarch64_neon_umull:
case Intrinsic::aarch64_neon_pmull:
case Intrinsic::aarch64_neon_sqdmull:
- return tryCombineLongOpWithDup(N, DCI, DAG);
+ return tryCombineLongOpWithDup(IID, N, DCI, DAG);
case Intrinsic::aarch64_neon_sqshl:
case Intrinsic::aarch64_neon_uqshl:
case Intrinsic::aarch64_neon_sqshlu:
// helps the backend to decide that an sabdl2 would be useful, saving a real
// extract_high operation.
if (!DCI.isBeforeLegalizeOps() && N->getOpcode() == ISD::ZERO_EXTEND &&
- (N->getOperand(0).getOpcode() == ISD::SABSDIFF ||
- N->getOperand(0).getOpcode() == ISD::UABSDIFF)) {
+ N->getOperand(0).getOpcode() == ISD::INTRINSIC_WO_CHAIN) {
SDNode *ABDNode = N->getOperand(0).getNode();
- SDValue NewABD = tryCombineLongOpWithDup(ABDNode, DCI, DAG);
- if (!NewABD.getNode())
- return SDValue();
+ unsigned IID = getIntrinsicID(ABDNode);
+ if (IID == Intrinsic::aarch64_neon_sabd ||
+ IID == Intrinsic::aarch64_neon_uabd) {
+ SDValue NewABD = tryCombineLongOpWithDup(IID, ABDNode, DCI, DAG);
+ if (!NewABD.getNode())
+ return SDValue();
- return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), N->getValueType(0),
- NewABD);
+ return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), N->getValueType(0),
+ NewABD);
+ }
}
// This is effectively a custom type legalization for AArch64.
//===----------------------------------------------------------------------===//
defm UABDL : SIMDLongThreeVectorBHSabdl<1, 0b0111, "uabdl",
- uabsdiff>;
+ int_aarch64_neon_uabd>;
// Match UABDL in log2-shuffle patterns.
def : Pat<(xor (v8i16 (AArch64vashr v8i16:$src, (i32 15))),
(v8i16 (add (sub (zext (v8i8 V64:$opA)),
defm MUL : SIMDThreeSameVectorBHS<0, 0b10011, "mul", mul>;
defm PMUL : SIMDThreeSameVectorB<1, 0b10011, "pmul", int_aarch64_neon_pmul>;
defm SABA : SIMDThreeSameVectorBHSTied<0, 0b01111, "saba",
- TriOpFrag<(add node:$LHS, (sabsdiff node:$MHS, node:$RHS))> >;
-defm SABD : SIMDThreeSameVectorBHS<0,0b01110,"sabd", sabsdiff>;
+ TriOpFrag<(add node:$LHS, (int_aarch64_neon_sabd node:$MHS, node:$RHS))> >;
+defm SABD : SIMDThreeSameVectorBHS<0,0b01110,"sabd", int_aarch64_neon_sabd>;
defm SHADD : SIMDThreeSameVectorBHS<0,0b00000,"shadd", int_aarch64_neon_shadd>;
defm SHSUB : SIMDThreeSameVectorBHS<0,0b00100,"shsub", int_aarch64_neon_shsub>;
defm SMAXP : SIMDThreeSameVectorBHS<0,0b10100,"smaxp", int_aarch64_neon_smaxp>;
defm SSHL : SIMDThreeSameVector<0,0b01000,"sshl", int_aarch64_neon_sshl>;
defm SUB : SIMDThreeSameVector<1,0b10000,"sub", sub>;
defm UABA : SIMDThreeSameVectorBHSTied<1, 0b01111, "uaba",
- TriOpFrag<(add node:$LHS, (uabsdiff node:$MHS, node:$RHS))> >;
-defm UABD : SIMDThreeSameVectorBHS<1,0b01110,"uabd", uabsdiff>;
+ TriOpFrag<(add node:$LHS, (int_aarch64_neon_uabd node:$MHS, node:$RHS))> >;
+defm UABD : SIMDThreeSameVectorBHS<1,0b01110,"uabd", int_aarch64_neon_uabd>;
defm UHADD : SIMDThreeSameVectorBHS<1,0b00000,"uhadd", int_aarch64_neon_uhadd>;
defm UHSUB : SIMDThreeSameVectorBHS<1,0b00100,"uhsub", int_aarch64_neon_uhsub>;
defm UMAXP : SIMDThreeSameVectorBHS<1,0b10100,"umaxp", int_aarch64_neon_umaxp>;
defm RSUBHN : SIMDNarrowThreeVectorBHS<1,0b0110,"rsubhn",int_aarch64_neon_rsubhn>;
defm PMULL : SIMDDifferentThreeVectorBD<0,0b1110,"pmull",int_aarch64_neon_pmull>;
defm SABAL : SIMDLongThreeVectorTiedBHSabal<0,0b0101,"sabal",
- sabsdiff>;
+ int_aarch64_neon_sabd>;
defm SABDL : SIMDLongThreeVectorBHSabdl<0, 0b0111, "sabdl",
- sabsdiff>;
+ int_aarch64_neon_sabd>;
defm SADDL : SIMDLongThreeVectorBHS< 0, 0b0000, "saddl",
BinOpFrag<(add (sext node:$LHS), (sext node:$RHS))>>;
defm SADDW : SIMDWideThreeVectorBHS< 0, 0b0001, "saddw",
defm SSUBW : SIMDWideThreeVectorBHS<0, 0b0011, "ssubw",
BinOpFrag<(sub node:$LHS, (sext node:$RHS))>>;
defm UABAL : SIMDLongThreeVectorTiedBHSabal<1, 0b0101, "uabal",
- uabsdiff>;
+ int_aarch64_neon_uabd>;
defm UADDL : SIMDLongThreeVectorBHS<1, 0b0000, "uaddl",
BinOpFrag<(add (zext node:$LHS), (zext node:$RHS))>>;
defm UADDW : SIMDWideThreeVectorBHS<1, 0b0001, "uaddw",
setOperationAction(ISD::UREM, VT, Expand);
setOperationAction(ISD::FREM, VT, Expand);
- if (VT.isInteger()) {
- setOperationAction(ISD::SABSDIFF, VT, Legal);
- setOperationAction(ISD::UABSDIFF, VT, Legal);
- }
if (!VT.isFloatingPoint() &&
VT != MVT::v2i64 && VT != MVT::v1i64)
for (unsigned Opcode : {ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX})
setOperationAction(Opcode, VT, Legal);
-
}
void ARMTargetLowering::addDRTypeForNEON(MVT VT) {
// Don't do anything for most intrinsics.
break;
- case Intrinsic::arm_neon_vabds:
- if (!N->getValueType(0).isInteger())
- return SDValue();
- return DAG.getNode(ISD::SABSDIFF, SDLoc(N), N->getValueType(0),
- N->getOperand(1), N->getOperand(2));
- case Intrinsic::arm_neon_vabdu:
- return DAG.getNode(ISD::UABSDIFF, SDLoc(N), N->getValueType(0),
- N->getOperand(1), N->getOperand(2));
-
// Vector shifts: check for immediate versions and lower them.
// Note: This is done during DAG combining instead of DAG legalizing because
// the build_vectors for 64-bit vector element shift counts are generally
// VABD : Vector Absolute Difference
defm VABDs : N3VInt_QHS<0, 0, 0b0111, 0, N3RegFrm,
IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
- "vabd", "s", sabsdiff, 1>;
+ "vabd", "s", int_arm_neon_vabds, 1>;
defm VABDu : N3VInt_QHS<1, 0, 0b0111, 0, N3RegFrm,
IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
- "vabd", "u", uabsdiff, 1>;
+ "vabd", "u", int_arm_neon_vabdu, 1>;
def VABDfd : N3VDInt<1, 0, 0b10, 0b1101, 0, N3RegFrm, IIC_VBIND,
"vabd", "f32", v2f32, v2f32, int_arm_neon_vabds, 1>;
def VABDfq : N3VQInt<1, 0, 0b10, 0b1101, 0, N3RegFrm, IIC_VBINQ,
// VABDL : Vector Absolute Difference Long (Q = | D - D |)
defm VABDLs : N3VLIntExt_QHS<0,1,0b0111,0, IIC_VSUBi4Q,
- "vabdl", "s", sabsdiff, zext, 1>;
+ "vabdl", "s", int_arm_neon_vabds, zext, 1>;
defm VABDLu : N3VLIntExt_QHS<1,1,0b0111,0, IIC_VSUBi4Q,
- "vabdl", "u", uabsdiff, zext, 1>;
+ "vabdl", "u", int_arm_neon_vabdu, zext, 1>;
def abd_shr :
PatFrag<(ops node:$in1, node:$in2, node:$shift),
// VABA : Vector Absolute Difference and Accumulate
defm VABAs : N3VIntOp_QHS<0,0,0b0111,1, IIC_VABAD, IIC_VABAQ,
- "vaba", "s", sabsdiff, add>;
+ "vaba", "s", int_arm_neon_vabds, add>;
defm VABAu : N3VIntOp_QHS<1,0,0b0111,1, IIC_VABAD, IIC_VABAQ,
- "vaba", "u", uabsdiff, add>;
+ "vaba", "u", int_arm_neon_vabdu, add>;
// VABAL : Vector Absolute Difference and Accumulate Long (Q += | D - D |)
defm VABALs : N3VLIntExtOp_QHS<0,1,0b0101,0, IIC_VABAD,
- "vabal", "s", sabsdiff, zext, add>;
+ "vabal", "s", int_arm_neon_vabds, zext, add>;
defm VABALu : N3VLIntExtOp_QHS<1,1,0b0101,0, IIC_VABAD,
- "vabal", "u", uabsdiff, zext, add>;
+ "vabal", "u", int_arm_neon_vabdu, zext, add>;
// Vector Maximum and Minimum.
+++ /dev/null
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s
-
-declare <4 x i8> @llvm.uabsdiff.v4i8(<4 x i8>, <4 x i8>)
-
-define <4 x i8> @test_uabsdiff_v4i8_expand(<4 x i8> %a1, <4 x i8> %a2) {
-; CHECK-LABEL: test_uabsdiff_v4i8_expand
-; CHECK: pshufd
-; CHECK: movd
-; CHECK: subl
-; CHECK: punpckldq
-; CHECK-DAG: movd %xmm1, [[SRC:%.*]]
-; CHECK-DAG: movd %xmm0, [[DST:%.*]]
-; CHECK: subl [[SRC]], [[DST]]
-; CHECK: movd
-; CHECK: pshufd
-; CHECK: movd
-; CHECK: punpckldq
-; CHECK: movdqa
-; CHECK: retq
-
- %1 = call <4 x i8> @llvm.uabsdiff.v4i8(<4 x i8> %a1, <4 x i8> %a2)
- ret <4 x i8> %1
-}
-
-declare <4 x i8> @llvm.sabsdiff.v4i8(<4 x i8>, <4 x i8>)
-
-define <4 x i8> @test_sabsdiff_v4i8_expand(<4 x i8> %a1, <4 x i8> %a2) {
-; CHECK-LABEL: test_sabsdiff_v4i8_expand
-; CHECK: psubd
-; CHECK: pcmpgtd
-; CHECK: pcmpeqd
-; CHECK: pxor
-; CHECK-DAG: psubd {{%xmm[0-9]+}}, [[SRC1:%xmm[0-9]+]]
-; CHECK-DAG: pandn {{%xmm[0-9]+}}, [[SRC2:%xmm[0-9]+]]
-; CHECK-DAG: pandn [[SRC1]], [[DST:%xmm[0-9]+]]
-; CHECK: por [[SRC2]], [[DST]]
-; CHECK: retq
-
- %1 = call <4 x i8> @llvm.sabsdiff.v4i8(<4 x i8> %a1, <4 x i8> %a2)
- ret <4 x i8> %1
-}
-
-declare <8 x i8> @llvm.sabsdiff.v8i8(<8 x i8>, <8 x i8>)
-
-define <8 x i8> @test_sabsdiff_v8i8_expand(<8 x i8> %a1, <8 x i8> %a2) {
-; CHECK-LABEL: test_sabsdiff_v8i8_expand
-; CHECK: psubw
-; CHECK: pcmpgtw
-; CHECK: pcmpeqd
-; CHECK: pxor
-; CHECK-DAG: psubw {{%xmm[0-9]+}}, [[SRC1:%xmm[0-9]+]]
-; CHECK-DAG: pandn {{%xmm[0-9]+}}, [[SRC2:%xmm[0-9]+]]
-; CHECK-DAG: pandn [[SRC1]], [[DST:%xmm[0-9]+]]
-; CHECK: por [[SRC2]], [[DST]]
-; CHECK: retq
-
- %1 = call <8 x i8> @llvm.sabsdiff.v8i8(<8 x i8> %a1, <8 x i8> %a2)
- ret <8 x i8> %1
-}
-
-declare <16 x i8> @llvm.uabsdiff.v16i8(<16 x i8>, <16 x i8>)
-
-define <16 x i8> @test_uabsdiff_v16i8_expand(<16 x i8> %a1, <16 x i8> %a2) {
-; CHECK-LABEL: test_uabsdiff_v16i8_expand
-; CHECK: movd
-; CHECK: movzbl
-; CHECK: movzbl
-; CHECK: subl
-; CHECK: punpcklbw
-; CHECK: retq
-
- %1 = call <16 x i8> @llvm.uabsdiff.v16i8(<16 x i8> %a1, <16 x i8> %a2)
- ret <16 x i8> %1
-}
-
-declare <8 x i16> @llvm.uabsdiff.v8i16(<8 x i16>, <8 x i16>)
-
-define <8 x i16> @test_uabsdiff_v8i16_expand(<8 x i16> %a1, <8 x i16> %a2) {
-; CHECK-LABEL: test_uabsdiff_v8i16_expand
-; CHECK: pextrw
-; CHECK: pextrw
-; CHECK: subl
-; CHECK: punpcklwd
-; CHECK: retq
-
- %1 = call <8 x i16> @llvm.uabsdiff.v8i16(<8 x i16> %a1, <8 x i16> %a2)
- ret <8 x i16> %1
-}
-
-declare <8 x i16> @llvm.sabsdiff.v8i16(<8 x i16>, <8 x i16>)
-
-define <8 x i16> @test_sabsdiff_v8i16_expand(<8 x i16> %a1, <8 x i16> %a2) {
-; CHECK-LABEL: test_sabsdiff_v8i16_expand
-; CHECK: psubw
-; CHECK: pcmpgtw
-; CHECK: pcmpeqd
-; CHECK: pxor
-; CHECK-DAG: psubw {{%xmm[0-9]+}}, [[SRC1:%xmm[0-9]+]]
-; CHECK-DAG: pandn {{%xmm[0-9]+}}, [[SRC2:%xmm[0-9]+]]
-; CHECK-DAG: pandn [[SRC1]], [[DST:%xmm[0-9]+]]
-; CHECK: por [[SRC2]], [[DST]]
-; CHECK: retq
-
- %1 = call <8 x i16> @llvm.sabsdiff.v8i16(<8 x i16> %a1, <8 x i16> %a2)
- ret <8 x i16> %1
-}
-
-declare <4 x i32> @llvm.sabsdiff.v4i32(<4 x i32>, <4 x i32>)
-
-define <4 x i32> @test_sabsdiff_v4i32_expand(<4 x i32> %a1, <4 x i32> %a2) {
-; CHECK-LABEL: test_sabsdiff_v4i32_expand
-; CHECK: psubd
-; CHECK: pcmpgtd
-; CHECK: pcmpeqd
-; CHECK: pxor
-; CHECK-DAG: psubd {{%xmm[0-9]+}}, [[SRC1:%xmm[0-9]+]]
-; CHECK-DAG: pandn {{%xmm[0-9]+}}, [[SRC2:%xmm[0-9]+]]
-; CHECK-DAG: pandn [[SRC1]], [[DST:%xmm[0-9]+]]
-; CHECK: por [[SRC2]], [[DST]]
-; CHECK: retq
- %1 = call <4 x i32> @llvm.sabsdiff.v4i32(<4 x i32> %a1, <4 x i32> %a2)
- ret <4 x i32> %1
-}
-
-declare <4 x i32> @llvm.uabsdiff.v4i32(<4 x i32>, <4 x i32>)
-
-define <4 x i32> @test_uabsdiff_v4i32_expand(<4 x i32> %a1, <4 x i32> %a2) {
-; CHECK-LABEL: test_uabsdiff_v4i32_expand
-; CHECK: pshufd
-; CHECK: movd
-; CHECK: subl
-; CHECK: punpckldq
-; CHECK-DAG: movd %xmm1, [[SRC:%.*]]
-; CHECK-DAG: movd %xmm0, [[DST:%.*]]
-; CHECK: subl [[SRC]], [[DST]]
-; CHECK: movd
-; CHECK: pshufd
-; CHECK: movd
-; CHECK: punpckldq
-; CHECK: movdqa
-; CHECK: retq
-
- %1 = call <4 x i32> @llvm.uabsdiff.v4i32(<4 x i32> %a1, <4 x i32> %a2)
- ret <4 x i32> %1
-}
-
-declare <2 x i32> @llvm.sabsdiff.v2i32(<2 x i32>, <2 x i32>)
-
-define <2 x i32> @test_sabsdiff_v2i32_expand(<2 x i32> %a1, <2 x i32> %a2) {
-; CHECK-LABEL: test_sabsdiff_v2i32_expand
-; CHECK: psubq
-; CHECK: pcmpgtd
-; CHECK: pcmpeqd
-; CHECK: pxor
-; CHECK-DAG: psubq {{%xmm[0-9]+}}, [[SRC1:%xmm[0-9]+]]
-; CHECK-DAG: pandn {{%xmm[0-9]+}}, [[SRC2:%xmm[0-9]+]]
-; CHECK-DAG: pandn [[SRC1]], [[DST:%xmm[0-9]+]]
-; CHECK: por [[SRC2]], [[DST]]
-; CHECK: retq
-
- %1 = call <2 x i32> @llvm.sabsdiff.v2i32(<2 x i32> %a1, <2 x i32> %a2)
- ret <2 x i32> %1
-}
-
-declare <2 x i64> @llvm.sabsdiff.v2i64(<2 x i64>, <2 x i64>)
-
-define <2 x i64> @test_sabsdiff_v2i64_expand(<2 x i64> %a1, <2 x i64> %a2) {
-; CHECK-LABEL: test_sabsdiff_v2i64_expand
-; CHECK: psubq
-; CHECK: pcmpgtd
-; CHECK: pcmpeqd
-; CHECK: pxor
-; CHECK-DAG: psubq {{%xmm[0-9]+}}, [[SRC1:%xmm[0-9]+]]
-; CHECK-DAG: pandn {{%xmm[0-9]+}}, [[SRC2:%xmm[0-9]+]]
-; CHECK-DAG: pandn [[SRC1]], [[DST:%xmm[0-9]+]]
-; CHECK: por [[SRC2]], [[DST]]
-; CHECK: retq
-
- %1 = call <2 x i64> @llvm.sabsdiff.v2i64(<2 x i64> %a1, <2 x i64> %a2)
- ret <2 x i64> %1
-}
+++ /dev/null
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s
-
-declare <16 x i16> @llvm.sabsdiff.v16i16(<16 x i16>, <16 x i16>)
-
-define <16 x i16> @test_sabsdiff_v16i16_expand(<16 x i16> %a1, <16 x i16> %a2) {
-; CHECK-LABEL: test_sabsdiff_v16i16_expand:
-; CHECK: # BB#0:
-; CHECK: psubw
-; CHECK: pxor
-; CHECK: pcmpgtw
-; CHECK: movdqa
-; CHECK: pandn
-; CHECK: pxor
-; CHECK: psubw
-; CHECK: pcmpeqd
-; CHECK: pxor
-; CHECK: pandn
-; CHECK: por
-; CHECK: pcmpgtw
-; CHECK-DAG: psubw {{%xmm[0-9]+}}, [[SRC:%xmm[0-9]+]]
-; CHECK-DAG: pxor {{%xmm[0-9]+}}, [[DST:%xmm[0-9]+]]
-; CHECK: pandn [[SRC]], [[DST]]
-; CHECK: por
-; CHECK: movdqa
-; CHECK: retq
- %1 = call <16 x i16> @llvm.sabsdiff.v16i16(<16 x i16> %a1, <16 x i16> %a2)
- ret <16 x i16> %1
-}
-