Overview:
"""""""""
-The ``llvm.uabsdiff`` intrinsic returns a vector result of the absolute difference of
-the two operands, treating them both as unsigned integers.
+The ``llvm.uabsdiff`` intrinsic returns a vector result of the absolute difference
+of the two operands, treating them both as unsigned integers. The intermediate
+calculations are computed using infinitely precise unsigned arithmetic. The final
+result will be truncated to the given type.
The ``llvm.sabsdiff`` intrinsic returns a vector result of the absolute difference of
-the two operands, treating them both as signed integers.
+the two operands, treating them both as signed integers. If the result overflows, the
+behavior is undefined.
.. note::
These intrinsics are primarily used during the code generation stage of compilation.
- They are generated by compiler passes such as the Loop and SLP vectorizers.it is not
+ They are generated by compiler passes such as the Loop and SLP vectorizers. It is not
recommended for users to create them manually.
Arguments:
is equivalent to::
- %sub = sub <4 x i32> %a, %b
- %ispos = icmp ugt <4 x i32> %sub, <i32 -1, i32 -1, i32 -1, i32 -1>
- %neg = sub <4 x i32> zeroinitializer, %sub
- %1 = select <4 x i1> %ispos, <4 x i32> %sub, <4 x i32> %neg
+ %1 = zext <4 x i32> %a to <4 x i64>
+ %2 = zext <4 x i32> %b to <4 x i64>
+ %sub = sub <4 x i64> %1, %2
+ %trunc = trunc <4 x i64> to <4 x i32>
-Similarly the expression::
+and the expression::
call <4 x i32> @llvm.sabsdiff.v4i32(<4 x i32> %a, <4 x i32> %b)
is equivalent to::
%sub = sub nsw <4 x i32> %a, %b
- %ispos = icmp sgt <4 x i32> %sub, <i32 -1, i32 -1, i32 -1, i32 -1>
+ %ispos = icmp sge <4 x i32> %sub, zeroinitializer
%neg = sub nsw <4 x i32> zeroinitializer, %sub
%1 = select <4 x i1> %ispos, <4 x i32> %sub, <4 x i32> %neg
SDValue VectorLegalizer::ExpandABSDIFF(SDValue Op) {
SDLoc dl(Op);
- SDValue Tmp1, Tmp2, Tmp3, Tmp4;
+ SDValue Op0 = Op.getOperand(0);
+ SDValue Op1 = Op.getOperand(1);
EVT VT = Op.getValueType();
+
+ // For unsigned intrinsic, promote the type to handle unsigned overflow.
+ bool isUabsdiff = (Op->getOpcode() == ISD::UABSDIFF);
+ if (isUabsdiff) {
+ VT = VT.widenIntegerVectorElementType(*DAG.getContext());
+ Op0 = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Op0);
+ Op1 = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Op1);
+ }
+
SDNodeFlags Flags;
- Flags.setNoSignedWrap(Op->getOpcode() == ISD::SABSDIFF);
-
- Tmp2 = Op.getOperand(0);
- Tmp3 = Op.getOperand(1);
- Tmp1 = DAG.getNode(ISD::SUB, dl, VT, Tmp2, Tmp3, &Flags);
- Tmp2 =
- DAG.getNode(ISD::SUB, dl, VT, DAG.getConstant(0, dl, VT), Tmp1, &Flags);
- Tmp4 = DAG.getNode(
- ISD::SETCC, dl,
- TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT), Tmp2,
- DAG.getConstant(0, dl, VT),
- DAG.getCondCode(Op->getOpcode() == ISD::SABSDIFF ? ISD::SETLT
- : ISD::SETULT));
- Tmp1 = DAG.getNode(ISD::VSELECT, dl, VT, Tmp4, Tmp1, Tmp2);
- return Tmp1;
+ Flags.setNoSignedWrap(!isUabsdiff);
+ SDValue Sub = DAG.getNode(ISD::SUB, dl, VT, Op0, Op1, &Flags);
+ if (isUabsdiff)
+ return DAG.getNode(ISD::TRUNCATE, dl, Op.getValueType(), Sub);
+
+ SDValue Cmp =
+ DAG.getNode(ISD::SETCC, dl, TLI.getSetCCResultType(DAG.getDataLayout(),
+ *DAG.getContext(), VT),
+ Sub, DAG.getConstant(0, dl, VT), DAG.getCondCode(ISD::SETGE));
+ SDValue Neg = DAG.getNode(ISD::SUB, dl, VT, DAG.getConstant(0, dl, VT), Sub, &Flags);
+ return DAG.getNode(ISD::VSELECT, dl, VT, Cmp, Sub, Neg);
}
SDValue VectorLegalizer::ExpandSELECT(SDValue Op) {
--- /dev/null
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s
+
+declare <4 x i8> @llvm.uabsdiff.v4i8(<4 x i8>, <4 x i8>)
+
+define <4 x i8> @test_uabsdiff_v4i8_expand(<4 x i8> %a1, <4 x i8> %a2) {
+; CHECK-LABEL: test_uabsdiff_v4i8_expand
+; CHECK: pshufd
+; CHECK: movd
+; CHECK: subl
+; CHECK: punpckldq
+; CHECK-DAG: movd %xmm1, [[SRC:%.*]]
+; CHECK-DAG: movd %xmm0, [[DST:%.*]]
+; CHECK: subl [[SRC]], [[DST]]
+; CHECK: movd
+; CHECK: pshufd
+; CHECK: movd
+; CHECK: punpckldq
+; CHECK: movdqa
+; CHECK: retq
+
+ %1 = call <4 x i8> @llvm.uabsdiff.v4i8(<4 x i8> %a1, <4 x i8> %a2)
+ ret <4 x i8> %1
+}
+
+declare <4 x i8> @llvm.sabsdiff.v4i8(<4 x i8>, <4 x i8>)
+
+define <4 x i8> @test_sabsdiff_v4i8_expand(<4 x i8> %a1, <4 x i8> %a2) {
+; CHECK-LABEL: test_sabsdiff_v4i8_expand
+; CHECK: psubd
+; CHECK: pcmpgtd
+; CHECK: pcmpeqd
+; CHECK: pxor
+; CHECK-DAG: psubd {{%xmm[0-9]+}}, [[SRC1:%xmm[0-9]+]]
+; CHECK-DAG: pandn {{%xmm[0-9]+}}, [[SRC2:%xmm[0-9]+]]
+; CHECK-DAG: pandn [[SRC1]], [[DST:%xmm[0-9]+]]
+; CHECK: por [[SRC2]], [[DST]]
+; CHECK: retq
+
+ %1 = call <4 x i8> @llvm.sabsdiff.v4i8(<4 x i8> %a1, <4 x i8> %a2)
+ ret <4 x i8> %1
+}
+
+declare <8 x i8> @llvm.sabsdiff.v8i8(<8 x i8>, <8 x i8>)
+
+define <8 x i8> @test_sabsdiff_v8i8_expand(<8 x i8> %a1, <8 x i8> %a2) {
+; CHECK-LABEL: test_sabsdiff_v8i8_expand
+; CHECK: psubw
+; CHECK: pcmpgtw
+; CHECK: pcmpeqd
+; CHECK: pxor
+; CHECK-DAG: psubw {{%xmm[0-9]+}}, [[SRC1:%xmm[0-9]+]]
+; CHECK-DAG: pandn {{%xmm[0-9]+}}, [[SRC2:%xmm[0-9]+]]
+; CHECK-DAG: pandn [[SRC1]], [[DST:%xmm[0-9]+]]
+; CHECK: por [[SRC2]], [[DST]]
+; CHECK: retq
+
+ %1 = call <8 x i8> @llvm.sabsdiff.v8i8(<8 x i8> %a1, <8 x i8> %a2)
+ ret <8 x i8> %1
+}
+
+declare <16 x i8> @llvm.uabsdiff.v16i8(<16 x i8>, <16 x i8>)
+
+define <16 x i8> @test_uabsdiff_v16i8_expand(<16 x i8> %a1, <16 x i8> %a2) {
+; CHECK-LABEL: test_uabsdiff_v16i8_expand
+; CHECK: movd
+; CHECK: movzbl
+; CHECK: movzbl
+; CHECK: subl
+; CHECK: punpcklbw
+; CHECK: retq
+
+ %1 = call <16 x i8> @llvm.uabsdiff.v16i8(<16 x i8> %a1, <16 x i8> %a2)
+ ret <16 x i8> %1
+}
+
+declare <8 x i16> @llvm.uabsdiff.v8i16(<8 x i16>, <8 x i16>)
+
+define <8 x i16> @test_uabsdiff_v8i16_expand(<8 x i16> %a1, <8 x i16> %a2) {
+; CHECK-LABEL: test_uabsdiff_v8i16_expand
+; CHECK: pextrw
+; CHECK: pextrw
+; CHECK: subl
+; CHECK: punpcklwd
+; CHECK: retq
+
+ %1 = call <8 x i16> @llvm.uabsdiff.v8i16(<8 x i16> %a1, <8 x i16> %a2)
+ ret <8 x i16> %1
+}
+
+declare <8 x i16> @llvm.sabsdiff.v8i16(<8 x i16>, <8 x i16>)
+
+define <8 x i16> @test_sabsdiff_v8i16_expand(<8 x i16> %a1, <8 x i16> %a2) {
+; CHECK-LABEL: test_sabsdiff_v8i16_expand
+; CHECK: psubw
+; CHECK: pcmpgtw
+; CHECK: pcmpeqd
+; CHECK: pxor
+; CHECK-DAG: psubw {{%xmm[0-9]+}}, [[SRC1:%xmm[0-9]+]]
+; CHECK-DAG: pandn {{%xmm[0-9]+}}, [[SRC2:%xmm[0-9]+]]
+; CHECK-DAG: pandn [[SRC1]], [[DST:%xmm[0-9]+]]
+; CHECK: por [[SRC2]], [[DST]]
+; CHECK: retq
+
+ %1 = call <8 x i16> @llvm.sabsdiff.v8i16(<8 x i16> %a1, <8 x i16> %a2)
+ ret <8 x i16> %1
+}
+
+declare <4 x i32> @llvm.sabsdiff.v4i32(<4 x i32>, <4 x i32>)
+
+define <4 x i32> @test_sabsdiff_v4i32_expand(<4 x i32> %a1, <4 x i32> %a2) {
+; CHECK-LABEL: test_sabsdiff_v4i32_expand
+; CHECK: psubd
+; CHECK: pcmpgtd
+; CHECK: pcmpeqd
+; CHECK: pxor
+; CHECK-DAG: psubd {{%xmm[0-9]+}}, [[SRC1:%xmm[0-9]+]]
+; CHECK-DAG: pandn {{%xmm[0-9]+}}, [[SRC2:%xmm[0-9]+]]
+; CHECK-DAG: pandn [[SRC1]], [[DST:%xmm[0-9]+]]
+; CHECK: por [[SRC2]], [[DST]]
+; CHECK: retq
+ %1 = call <4 x i32> @llvm.sabsdiff.v4i32(<4 x i32> %a1, <4 x i32> %a2)
+ ret <4 x i32> %1
+}
+
+declare <4 x i32> @llvm.uabsdiff.v4i32(<4 x i32>, <4 x i32>)
+
+define <4 x i32> @test_uabsdiff_v4i32_expand(<4 x i32> %a1, <4 x i32> %a2) {
+; CHECK-LABEL: test_uabsdiff_v4i32_expand
+; CHECK: pshufd
+; CHECK: movd
+; CHECK: subl
+; CHECK: punpckldq
+; CHECK-DAG: movd %xmm1, [[SRC:%.*]]
+; CHECK-DAG: movd %xmm0, [[DST:%.*]]
+; CHECK: subl [[SRC]], [[DST]]
+; CHECK: movd
+; CHECK: pshufd
+; CHECK: movd
+; CHECK: punpckldq
+; CHECK: movdqa
+; CHECK: retq
+
+ %1 = call <4 x i32> @llvm.uabsdiff.v4i32(<4 x i32> %a1, <4 x i32> %a2)
+ ret <4 x i32> %1
+}
+
+declare <2 x i32> @llvm.sabsdiff.v2i32(<2 x i32>, <2 x i32>)
+
+define <2 x i32> @test_sabsdiff_v2i32_expand(<2 x i32> %a1, <2 x i32> %a2) {
+; CHECK-LABEL: test_sabsdiff_v2i32_expand
+; CHECK: psubq
+; CHECK: pcmpgtd
+; CHECK: pcmpeqd
+; CHECK: pxor
+; CHECK-DAG: psubq {{%xmm[0-9]+}}, [[SRC1:%xmm[0-9]+]]
+; CHECK-DAG: pandn {{%xmm[0-9]+}}, [[SRC2:%xmm[0-9]+]]
+; CHECK-DAG: pandn [[SRC1]], [[DST:%xmm[0-9]+]]
+; CHECK: por [[SRC2]], [[DST]]
+; CHECK: retq
+
+ %1 = call <2 x i32> @llvm.sabsdiff.v2i32(<2 x i32> %a1, <2 x i32> %a2)
+ ret <2 x i32> %1
+}
+
+declare <2 x i64> @llvm.sabsdiff.v2i64(<2 x i64>, <2 x i64>)
+
+define <2 x i64> @test_sabsdiff_v2i64_expand(<2 x i64> %a1, <2 x i64> %a2) {
+; CHECK-LABEL: test_sabsdiff_v2i64_expand
+; CHECK: psubq
+; CHECK: pcmpgtd
+; CHECK: pcmpeqd
+; CHECK: pxor
+; CHECK-DAG: psubq {{%xmm[0-9]+}}, [[SRC1:%xmm[0-9]+]]
+; CHECK-DAG: pandn {{%xmm[0-9]+}}, [[SRC2:%xmm[0-9]+]]
+; CHECK-DAG: pandn [[SRC1]], [[DST:%xmm[0-9]+]]
+; CHECK: por [[SRC2]], [[DST]]
+; CHECK: retq
+
+ %1 = call <2 x i64> @llvm.sabsdiff.v2i64(<2 x i64> %a1, <2 x i64> %a2)
+ ret <2 x i64> %1
+}
--- /dev/null
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s
+
+declare <16 x i16> @llvm.sabsdiff.v16i16(<16 x i16>, <16 x i16>)
+
+define <16 x i16> @test_sabsdiff_v16i16_expand(<16 x i16> %a1, <16 x i16> %a2) {
+; CHECK-LABEL: test_sabsdiff_v16i16_expand:
+; CHECK: # BB#0:
+; CHECK: psubw
+; CHECK: pxor
+; CHECK: pcmpgtw
+; CHECK: movdqa
+; CHECK: pandn
+; CHECK: pxor
+; CHECK: psubw
+; CHECK: pcmpeqd
+; CHECK: pxor
+; CHECK: pandn
+; CHECK: por
+; CHECK: pcmpgtw
+; CHECK-DAG: psubw {{%xmm[0-9]+}}, [[SRC:%xmm[0-9]+]]
+; CHECK-DAG: pxor {{%xmm[0-9]+}}, [[DST:%xmm[0-9]+]]
+; CHECK: pandn [[SRC]], [[DST]]
+; CHECK: por
+; CHECK: movdqa
+; CHECK: retq
+ %1 = call <16 x i16> @llvm.sabsdiff.v16i16(<16 x i16> %a1, <16 x i16> %a2)
+ ret <16 x i16> %1
+}
+