From facace808cc5f83067a05c4c319f98fd75336f47 Mon Sep 17 00:00:00 2001 From: Michael Liao Date: Fri, 19 Oct 2012 17:15:18 +0000 Subject: [PATCH] Lower BUILD_VECTOR to SHUFFLE + INSERT_VECTOR_ELT for X86 - If INSERT_VECTOR_ELT is supported (above SSE2, either by custom sequence of legal insn), transform BUILD_VECTOR into SHUFFLE + INSERT_VECTOR_ELT if most of elements could be built from SHUFFLE with few (so far 1) elements being inserted. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@166288 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 79 ++++++++++++++++++++++++++ lib/Target/X86/X86ISelLowering.h | 3 +- test/CodeGen/X86/buildvec-insertvec.ll | 15 +++++ 3 files changed, 96 insertions(+), 1 deletion(-) create mode 100644 test/CodeGen/X86/buildvec-insertvec.ll diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index a205aef7427..2e77e64f2f2 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -5175,6 +5175,80 @@ X86TargetLowering::LowerVectorBroadcast(SDValue Op, SelectionDAG &DAG) const { return SDValue(); } +SDValue +X86TargetLowering::buildFromShuffleMostly(SDValue Op, SelectionDAG &DAG) const { + EVT VT = Op.getValueType(); + + // Skip if insert_vec_elt is not supported. + if (!isOperationLegalOrCustom(ISD::INSERT_VECTOR_ELT, VT)) + return SDValue(); + + DebugLoc DL = Op.getDebugLoc(); + unsigned NumElems = Op.getNumOperands(); + + SDValue VecIn1; + SDValue VecIn2; + SmallVector InsertIndices; + SmallVector Mask(NumElems, -1); + + for (unsigned i = 0; i != NumElems; ++i) { + unsigned Opc = Op.getOperand(i).getOpcode(); + + if (Opc == ISD::UNDEF) + continue; + + if (Opc != ISD::EXTRACT_VECTOR_ELT) { + // Quit if more than 1 elements need inserting. + if (InsertIndices.size() > 1) + return SDValue(); + + InsertIndices.push_back(i); + continue; + } + + SDValue ExtractedFromVec = Op.getOperand(i).getOperand(0); + SDValue ExtIdx = Op.getOperand(i).getOperand(1); + + // Quit if extracted from vector of different type. + if (ExtractedFromVec.getValueType() != VT) + return SDValue(); + + // Quit if non-constant index. + if (!isa(ExtIdx)) + return SDValue(); + + if (VecIn1.getNode() == 0) + VecIn1 = ExtractedFromVec; + else if (VecIn1 != ExtractedFromVec) { + if (VecIn2.getNode() == 0) + VecIn2 = ExtractedFromVec; + else if (VecIn2 != ExtractedFromVec) + // Quit if more than 2 vectors to shuffle + return SDValue(); + } + + unsigned Idx = cast(ExtIdx)->getZExtValue(); + + if (ExtractedFromVec == VecIn1) + Mask[i] = Idx; + else if (ExtractedFromVec == VecIn2) + Mask[i] = Idx + NumElems; + } + + if (VecIn1.getNode() == 0) + return SDValue(); + + VecIn2 = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(VT); + SDValue NV = DAG.getVectorShuffle(VT, DL, VecIn1, VecIn2, &Mask[0]); + for (unsigned i = 0, e = InsertIndices.size(); i != e; ++i) { + unsigned Idx = InsertIndices[i]; + NV = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, NV, Op.getOperand(Idx), + DAG.getIntPtrConstant(Idx)); + } + + return NV; +} + SDValue X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { DebugLoc dl = Op.getDebugLoc(); @@ -5451,6 +5525,11 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { if (LD.getNode()) return LD; + // Check for a build vector from mostly shuffle plus few inserting. + SDValue Sh = buildFromShuffleMostly(Op, DAG); + if (Sh.getNode()) + return Sh; + // For SSE 4.1, use insertps to put the high elements into the low element. if (getSubtarget()->hasSSE41()) { SDValue Result; diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index 8bb07897386..228fab1689e 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -825,9 +825,10 @@ namespace llvm { SDValue LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const; - // Utility functions to help LowerVECTOR_SHUFFLE + // Utility functions to help LowerVECTOR_SHUFFLE & LowerBUILD_VECTOR SDValue LowerVectorBroadcast(SDValue Op, SelectionDAG &DAG) const; SDValue NormalizeVectorShuffle(SDValue Op, SelectionDAG &DAG) const; + SDValue buildFromShuffleMostly(SDValue Op, SelectionDAG &DAG) const; SDValue LowerVectorAllZeroTest(SDValue Op, SelectionDAG &DAG) const; diff --git a/test/CodeGen/X86/buildvec-insertvec.ll b/test/CodeGen/X86/buildvec-insertvec.ll new file mode 100644 index 00000000000..3fb69a48b3c --- /dev/null +++ b/test/CodeGen/X86/buildvec-insertvec.ll @@ -0,0 +1,15 @@ +; RUN: llc < %s -mcpu=corei7 -mtriple=x86_64-unknown-linux-gnu | FileCheck %s + +define void @foo(<3 x float> %in, <4 x i8>* nocapture %out) nounwind { + %t0 = fptoui <3 x float> %in to <3 x i8> + %t1 = shufflevector <3 x i8> %t0, <3 x i8> undef, <4 x i32> + %t2 = insertelement <4 x i8> %t1, i8 -1, i32 3 + store <4 x i8> %t2, <4 x i8>* %out, align 4 + ret void +; CHECK: foo +; CHECK: cvttps2dq +; CHECK-NOT: pextrd +; CHECK: pinsrd +; CHECK-NEXT: pshufb +; CHECK: ret +} -- 2.34.1