From 6242fda42ad13eebc908e744426ae7bc8cf8d1c3 Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Fri, 26 Apr 2013 09:19:19 +0000 Subject: [PATCH] DAGCombiner: Canonicalize vector integer abs in the same way we do it for scalars. This already helps SSE2 x86 a lot because it lacks an efficient way to represent a vector select. The long term goal is to enable the backend to match a canonicalized pattern into a single instruction (e.g. vabs or pabs). git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@180597 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 42 +++++++++++++++ test/CodeGen/X86/viabs.ll | 66 ++++++++++++++++++++++++ 2 files changed, 108 insertions(+) create mode 100644 test/CodeGen/X86/viabs.ll diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index f67cd3e2709..05a58b1c1b2 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -205,6 +205,7 @@ namespace { SDValue visitCTTZ_ZERO_UNDEF(SDNode *N); SDValue visitCTPOP(SDNode *N); SDValue visitSELECT(SDNode *N); + SDValue visitVSELECT(SDNode *N); SDValue visitSELECT_CC(SDNode *N); SDValue visitSETCC(SDNode *N); SDValue visitSIGN_EXTEND(SDNode *N); @@ -1126,6 +1127,7 @@ SDValue DAGCombiner::visit(SDNode *N) { case ISD::CTTZ_ZERO_UNDEF: return visitCTTZ_ZERO_UNDEF(N); case ISD::CTPOP: return visitCTPOP(N); case ISD::SELECT: return visitSELECT(N); + case ISD::VSELECT: return visitVSELECT(N); case ISD::SELECT_CC: return visitSELECT_CC(N); case ISD::SETCC: return visitSETCC(N); case ISD::SIGN_EXTEND: return visitSIGN_EXTEND(N); @@ -4162,6 +4164,46 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) { return SDValue(); } +SDValue DAGCombiner::visitVSELECT(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + SDValue N2 = N->getOperand(2); + DebugLoc DL = N->getDebugLoc(); + + // Canonicalize integer abs. + // vselect (setg[te] X, 0), X, -X -> + // vselect (setgt X, -1), X, -X -> + // vselect (setl[te] X, 0), -X, X -> + // Y = sra (X, size(X)-1); xor (add (X, Y), Y) + if (N0.getOpcode() == ISD::SETCC) { + SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1); + ISD::CondCode CC = cast(N0.getOperand(2))->get(); + bool isAbs = false; + bool RHSIsAllZeros = ISD::isBuildVectorAllZeros(RHS.getNode()); + + if (((RHSIsAllZeros && (CC == ISD::SETGT || CC == ISD::SETGE)) || + (ISD::isBuildVectorAllOnes(RHS.getNode()) && CC == ISD::SETGT)) && + N1 == LHS && N2.getOpcode() == ISD::SUB && N1 == N2.getOperand(1)) + isAbs = ISD::isBuildVectorAllZeros(N2.getOperand(0).getNode()); + else if ((RHSIsAllZeros && (CC == ISD::SETLT || CC == ISD::SETLE)) && + N2 == LHS && N1.getOpcode() == ISD::SUB && N2 == N1.getOperand(1)) + isAbs = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode()); + + if (isAbs) { + EVT VT = LHS.getValueType(); + SDValue Shift = DAG.getNode( + ISD::SRA, DL, VT, LHS, + DAG.getConstant(VT.getScalarType().getSizeInBits() - 1, VT)); + SDValue Add = DAG.getNode(ISD::ADD, DL, VT, LHS, Shift); + AddToWorkList(Shift.getNode()); + AddToWorkList(Add.getNode()); + return DAG.getNode(ISD::XOR, DL, VT, Add, Shift); + } + } + + return SDValue(); +} + SDValue DAGCombiner::visitSELECT_CC(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); diff --git a/test/CodeGen/X86/viabs.ll b/test/CodeGen/X86/viabs.ll new file mode 100644 index 00000000000..a509d8aa81e --- /dev/null +++ b/test/CodeGen/X86/viabs.ll @@ -0,0 +1,66 @@ +; RUN: llc < %s -march=x86-64 -mcpu=x86-64 | FileCheck %s -check-prefix=SSE2 + +define <4 x i32> @test1(<4 x i32> %a) nounwind { +; SSE2: test1: +; SSE2: movdqa +; SSE2-NEXT: psrad $31 +; SSE2-NEXT: padd +; SSE2-NEXT: pxor +; SSE2-NEXT: ret + %tmp1neg = sub <4 x i32> zeroinitializer, %a + %b = icmp sgt <4 x i32> %a, + %abs = select <4 x i1> %b, <4 x i32> %a, <4 x i32> %tmp1neg + ret <4 x i32> %abs +} + +define <4 x i32> @test2(<4 x i32> %a) nounwind { +; SSE2: test2: +; SSE2: movdqa +; SSE2-NEXT: psrad $31 +; SSE2-NEXT: padd +; SSE2-NEXT: pxor +; SSE2-NEXT: ret + %tmp1neg = sub <4 x i32> zeroinitializer, %a + %b = icmp sge <4 x i32> %a, zeroinitializer + %abs = select <4 x i1> %b, <4 x i32> %a, <4 x i32> %tmp1neg + ret <4 x i32> %abs +} + +define <4 x i32> @test3(<4 x i32> %a) nounwind { +; SSE2: test3: +; SSE2: movdqa +; SSE2-NEXT: psrad $31 +; SSE2-NEXT: padd +; SSE2-NEXT: pxor +; SSE2-NEXT: ret + %tmp1neg = sub <4 x i32> zeroinitializer, %a + %b = icmp sgt <4 x i32> %a, zeroinitializer + %abs = select <4 x i1> %b, <4 x i32> %a, <4 x i32> %tmp1neg + ret <4 x i32> %abs +} + +define <4 x i32> @test4(<4 x i32> %a) nounwind { +; SSE2: test4: +; SSE2: movdqa +; SSE2-NEXT: psrad $31 +; SSE2-NEXT: padd +; SSE2-NEXT: pxor +; SSE2-NEXT: ret + %tmp1neg = sub <4 x i32> zeroinitializer, %a + %b = icmp slt <4 x i32> %a, zeroinitializer + %abs = select <4 x i1> %b, <4 x i32> %tmp1neg, <4 x i32> %a + ret <4 x i32> %abs +} + +define <4 x i32> @test5(<4 x i32> %a) nounwind { +; SSE2: test5: +; SSE2: movdqa +; SSE2-NEXT: psrad $31 +; SSE2-NEXT: padd +; SSE2-NEXT: pxor +; SSE2-NEXT: ret + %tmp1neg = sub <4 x i32> zeroinitializer, %a + %b = icmp sle <4 x i32> %a, zeroinitializer + %abs = select <4 x i1> %b, <4 x i32> %tmp1neg, <4 x i32> %a + ret <4 x i32> %abs +} -- 2.34.1