From b2f47c6a3455a9d265e21c8ab1ca81657ff577a0 Mon Sep 17 00:00:00 2001 From: Andrea Di Biagio Date: Fri, 27 Dec 2013 20:20:28 +0000 Subject: [PATCH] Teach DAGCombiner how to fold a SIGN_EXTEND_INREG of a BUILD_VECTOR of ConstantSDNodes (or UNDEFs) into a simple BUILD_VECTOR. For example, given the following sequence of dag nodes: i32 C = Constant<1> v4i32 V = BUILD_VECTOR C, C, C, C v4i32 Result = SIGN_EXTEND_INREG V, ValueType:v4i1 The SIGN_EXTEND_INREG node can be folded into a build_vector since the vector in input is a BUILD_VECTOR of constants. The optimized sequence is: i32 C = Constant<-1> v4i32 Result = BUILD_VECTOR C, C, C, C git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@198084 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/CodeGen/SelectionDAGNodes.h | 4 + lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 23 ++++ lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 16 +++ test/CodeGen/X86/2011-12-28-vselecti8.ll | 19 +++- test/CodeGen/X86/blend-msb.ll | 12 +- test/CodeGen/X86/sse2-blend.ll | 12 +- test/CodeGen/X86/vselect.ll | 133 ++++++++++++++++++++++ 7 files changed, 202 insertions(+), 17 deletions(-) create mode 100644 test/CodeGen/X86/vselect.ll diff --git a/include/llvm/CodeGen/SelectionDAGNodes.h b/include/llvm/CodeGen/SelectionDAGNodes.h index 70c15e6c6e6..08eda723c6b 100644 --- a/include/llvm/CodeGen/SelectionDAGNodes.h +++ b/include/llvm/CodeGen/SelectionDAGNodes.h @@ -70,6 +70,10 @@ namespace ISD { /// BUILD_VECTOR where all of the elements are 0 or undef. bool isBuildVectorAllZeros(const SDNode *N); + /// \brief Return true if the specified node is a BUILD_VECTOR node of + /// all ConstantSDNode or undef. + bool isBuildVectorOfConstantSDNodes(const SDNode *N); + /// isScalarToVector - Return true if the specified node is a /// ISD::SCALAR_TO_VECTOR node or a BUILD_VECTOR node where only the low /// element is not an undef. diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index f8728b6d603..68d0521e763 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -5511,6 +5511,29 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) { BSwap, N1); } + // Fold a sext_inreg of a build_vector of ConstantSDNodes or undefs + // into a build_vector. + if (ISD::isBuildVectorOfConstantSDNodes(N0.getNode())) { + SmallVector Elts; + unsigned NumElts = N0->getNumOperands(); + unsigned ShAmt = VTBits - EVTBits; + + for (unsigned i = 0; i != NumElts; ++i) { + SDValue Op = N0->getOperand(i); + if (Op->getOpcode() == ISD::UNDEF) { + Elts.push_back(Op); + continue; + } + + ConstantSDNode *CurrentND = cast(Op); + const APInt &C = CurrentND->getAPIntValue(); + Elts.push_back(DAG.getConstant(C.shl(ShAmt).ashr(ShAmt), + Op.getValueType())); + } + + return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT, &Elts[0], NumElts); + } + return SDValue(); } diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 45d5a4fa69e..f163f6bdf2c 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -179,6 +179,22 @@ bool ISD::isBuildVectorAllZeros(const SDNode *N) { return true; } +/// \brief Return true if the specified node is a BUILD_VECTOR node of +/// all ConstantSDNode or undef. +bool ISD::isBuildVectorOfConstantSDNodes(const SDNode *N) { + if (N->getOpcode() != ISD::BUILD_VECTOR) + return false; + + for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { + SDValue Op = N->getOperand(i); + if (Op.getOpcode() == ISD::UNDEF) + continue; + if (!isa(Op)) + return false; + } + return true; +} + /// isScalarToVector - Return true if the specified node is a /// ISD::SCALAR_TO_VECTOR node or a BUILD_VECTOR node where only the low /// element is not an undef. diff --git a/test/CodeGen/X86/2011-12-28-vselecti8.ll b/test/CodeGen/X86/2011-12-28-vselecti8.ll index dbc122ac6e4..c91646640b8 100644 --- a/test/CodeGen/X86/2011-12-28-vselecti8.ll +++ b/test/CodeGen/X86/2011-12-28-vselecti8.ll @@ -3,10 +3,20 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-apple-darwin11.2.0" -; CHECK: @foo8 -; CHECK: psll -; CHECK: psraw -; CHECK: pblendvb +; During legalization, the vselect mask is 'type legalized' into a +; wider BUILD_VECTOR. This causes the introduction of a new +; sign_extend_inreg in the DAG. +; +; A sign_extend_inreg of a vector of ConstantSDNode or undef can be +; always folded into a simple build_vector. +; +; Make sure that the sign_extend_inreg is simplified and that we +; don't generate psll, psraw and pblendvb from the vselect. + +; CHECK-LABEL: foo8 +; CHECK-NOT: psll +; CHECK-NOT: psraw +; CHECK-NOT: pblendvb ; CHECK: ret define void @foo8(float* nocapture %RET) nounwind { allocas: @@ -17,4 +27,3 @@ allocas: ret void } - diff --git a/test/CodeGen/X86/blend-msb.ll b/test/CodeGen/X86/blend-msb.ll index 4f2060f7012..0485a42eb7e 100644 --- a/test/CodeGen/X86/blend-msb.ll +++ b/test/CodeGen/X86/blend-msb.ll @@ -5,7 +5,7 @@ ; shifting the needed bit to the MSB, and not using shl+sra. ;CHECK-LABEL: vsel_float: -;CHECK: movl $-2147483648 +;CHECK: movl $-1 ;CHECK-NEXT: movd ;CHECK-NEXT: blendvps ;CHECK: ret @@ -15,7 +15,7 @@ define <4 x float> @vsel_float(<4 x float> %v1, <4 x float> %v2) { } ;CHECK-LABEL: vsel_4xi8: -;CHECK: movl $-2147483648 +;CHECK: movl $-1 ;CHECK-NEXT: movd ;CHECK-NEXT: blendvps ;CHECK: ret @@ -26,12 +26,12 @@ define <4 x i8> @vsel_4xi8(<4 x i8> %v1, <4 x i8> %v2) { ; We do not have native support for v8i16 blends and we have to use the -; blendvb instruction or a sequence of NAND/OR/AND. Make sure that we do not r +; blendvb instruction or a sequence of NAND/OR/AND. Make sure that we do not ; reduce the mask in this case. ;CHECK-LABEL: vsel_8xi16: -;CHECK: psllw -;CHECK: psraw -;CHECK: pblendvb +;CHECK: andps +;CHECK: andps +;CHECK: orps ;CHECK: ret define <8 x i16> @vsel_8xi16(<8 x i16> %v1, <8 x i16> %v2) { %vsel = select <8 x i1> , <8 x i16> %v1, <8 x i16> %v2 diff --git a/test/CodeGen/X86/sse2-blend.ll b/test/CodeGen/X86/sse2-blend.ll index 1ac983254ea..968595c383a 100644 --- a/test/CodeGen/X86/sse2-blend.ll +++ b/test/CodeGen/X86/sse2-blend.ll @@ -1,9 +1,9 @@ ; RUN: llc < %s -march=x86 -mcpu=yonah -mattr=+sse2,-sse4.1 | FileCheck %s ; CHECK: vsel_float -; CHECK: pandn -; CHECK: pand -; CHECK: por +; CHECK: xorps +; CHECK: movss +; CHECK: orps ; CHECK: ret define void@vsel_float(<4 x float>* %v1, <4 x float>* %v2) { %A = load <4 x float>* %v1 @@ -14,9 +14,9 @@ define void@vsel_float(<4 x float>* %v1, <4 x float>* %v2) { } ; CHECK: vsel_i32 -; CHECK: pandn -; CHECK: pand -; CHECK: por +; CHECK: xorps +; CHECK: movss +; CHECK: orps ; CHECK: ret define void@vsel_i32(<4 x i32>* %v1, <4 x i32>* %v2) { %A = load <4 x i32>* %v1 diff --git a/test/CodeGen/X86/vselect.ll b/test/CodeGen/X86/vselect.ll new file mode 100644 index 00000000000..af8c3106189 --- /dev/null +++ b/test/CodeGen/X86/vselect.ll @@ -0,0 +1,133 @@ +; RUN: llc -march=x86-64 -mcpu=corei7 -mattr=-sse4.1 < %s | FileCheck %s + +; Verify that we don't emit packed vector shifts instructions if the +; condition used by the vector select is a vector of constants. + + +define <4 x float> @test1(<4 x float> %a, <4 x float> %b) { + %1 = select <4 x i1> , <4 x float> %a, <4 x float> %b + ret <4 x float> %1 +} +; CHECK-LABEL: test1 +; CHECK-NOT: psllw +; CHECK-NOT: psraw +; CHECK: ret + + +define <4 x float> @test2(<4 x float> %a, <4 x float> %b) { + %1 = select <4 x i1> , <4 x float> %a, <4 x float> %b + ret <4 x float> %1 +} +; CHECK-LABEL: test2 +; CHECK-NOT: psllw +; CHECK-NOT: psraw +; CHECK: ret + + +define <4 x float> @test3(<4 x float> %a, <4 x float> %b) { + %1 = select <4 x i1> , <4 x float> %a, <4 x float> %b + ret <4 x float> %1 +} +; CHECK-LABEL: test3 +; CHECK-NOT: psllw +; CHECK-NOT: psraw +; CHECK: ret + + +define <4 x float> @test4(<4 x float> %a, <4 x float> %b) { + %1 = select <4 x i1> , <4 x float> %a, <4 x float> %b + ret <4 x float> %1 +} +; CHECK-LABEL: test4 +; CHECK-NOT: psllw +; CHECK-NOT: psraw +; CHECK: movaps %xmm1, %xmm0 +; CHECK: ret + + +define <4 x float> @test5(<4 x float> %a, <4 x float> %b) { + %1 = select <4 x i1> , <4 x float> %a, <4 x float> %b + ret <4 x float> %1 +} +; CHECK-LABEL: test5 +; CHECK-NOT: psllw +; CHECK-NOT: psraw +; CHECK: ret + + +define <8 x i16> @test6(<8 x i16> %a, <8 x i16> %b) { + %1 = select <8 x i1> , <8 x i16> %a, <8 x i16> %a + ret <8 x i16> %1 +} +; CHECK-LABEL: test6 +; CHECK-NOT: psllw +; CHECK-NOT: psraw +; CHECK: ret + + +define <8 x i16> @test7(<8 x i16> %a, <8 x i16> %b) { + %1 = select <8 x i1> , <8 x i16> %a, <8 x i16> %b + ret <8 x i16> %1 +} +; CHECK-LABEL: test7 +; CHECK-NOT: psllw +; CHECK-NOT: psraw +; CHECK: ret + + +define <8 x i16> @test8(<8 x i16> %a, <8 x i16> %b) { + %1 = select <8 x i1> , <8 x i16> %a, <8 x i16> %b + ret <8 x i16> %1 +} +; CHECK-LABEL: test8 +; CHECK-NOT: psllw +; CHECK-NOT: psraw +; CHECK: ret + +define <8 x i16> @test9(<8 x i16> %a, <8 x i16> %b) { + %1 = select <8 x i1> , <8 x i16> %a, <8 x i16> %b + ret <8 x i16> %1 +} +; CHECK-LABEL: test9 +; CHECK-NOT: psllw +; CHECK-NOT: psraw +; CHECK: movaps %xmm1, %xmm0 +; CHECK-NEXT: ret + +define <8 x i16> @test10(<8 x i16> %a, <8 x i16> %b) { + %1 = select <8 x i1> , <8 x i16> %a, <8 x i16> %b + ret <8 x i16> %1 +} +; CHECK-LABEL: test10 +; CHECK-NOT: psllw +; CHECK-NOT: psraw +; CHECK: ret + +define <8 x i16> @test11(<8 x i16> %a, <8 x i16> %b) { + %1 = select <8 x i1> , <8 x i16> %a, <8 x i16> %b + ret <8 x i16> %1 +} +; CHECK-LABEL: test11 +; CHECK-NOT: psllw +; CHECK-NOT: psraw +; CHECK: ret + +define <8 x i16> @test12(<8 x i16> %a, <8 x i16> %b) { + %1 = select <8 x i1> , <8 x i16> %a, <8 x i16> %b + ret <8 x i16> %1 +} +; CHECK-LABEL: test12 +; CHECK-NOT: psllw +; CHECK-NOT: psraw +; CHECK: ret + +define <8 x i16> @test13(<8 x i16> %a, <8 x i16> %b) { + %1 = select <8 x i1> , <8 x i16> %a, <8 x i16> %b + ret <8 x i16> %1 +} +; CHECK-LABEL: test13 +; CHECK-NOT: psllw +; CHECK-NOT: psraw +; CHECK: ret + + -- 2.34.1