return SDValue();
}
+SDValue
+X86TargetLowering::buildFromShuffleMostly(SDValue Op, SelectionDAG &DAG) const {
+ EVT VT = Op.getValueType();
+
+ // Skip if insert_vec_elt is not supported.
+ if (!isOperationLegalOrCustom(ISD::INSERT_VECTOR_ELT, VT))
+ return SDValue();
+
+ DebugLoc DL = Op.getDebugLoc();
+ unsigned NumElems = Op.getNumOperands();
+
+ SDValue VecIn1;
+ SDValue VecIn2;
+ SmallVector<unsigned, 4> InsertIndices;
+ SmallVector<int, 8> Mask(NumElems, -1);
+
+ for (unsigned i = 0; i != NumElems; ++i) {
+ unsigned Opc = Op.getOperand(i).getOpcode();
+
+ if (Opc == ISD::UNDEF)
+ continue;
+
+ if (Opc != ISD::EXTRACT_VECTOR_ELT) {
+ // Quit if more than 1 elements need inserting.
+ if (InsertIndices.size() > 1)
+ return SDValue();
+
+ InsertIndices.push_back(i);
+ continue;
+ }
+
+ SDValue ExtractedFromVec = Op.getOperand(i).getOperand(0);
+ SDValue ExtIdx = Op.getOperand(i).getOperand(1);
+
+ // Quit if extracted from vector of different type.
+ if (ExtractedFromVec.getValueType() != VT)
+ return SDValue();
+
+ // Quit if non-constant index.
+ if (!isa<ConstantSDNode>(ExtIdx))
+ return SDValue();
+
+ if (VecIn1.getNode() == 0)
+ VecIn1 = ExtractedFromVec;
+ else if (VecIn1 != ExtractedFromVec) {
+ if (VecIn2.getNode() == 0)
+ VecIn2 = ExtractedFromVec;
+ else if (VecIn2 != ExtractedFromVec)
+ // Quit if more than 2 vectors to shuffle
+ return SDValue();
+ }
+
+ unsigned Idx = cast<ConstantSDNode>(ExtIdx)->getZExtValue();
+
+ if (ExtractedFromVec == VecIn1)
+ Mask[i] = Idx;
+ else if (ExtractedFromVec == VecIn2)
+ Mask[i] = Idx + NumElems;
+ }
+
+ if (VecIn1.getNode() == 0)
+ return SDValue();
+
+ VecIn2 = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(VT);
+ SDValue NV = DAG.getVectorShuffle(VT, DL, VecIn1, VecIn2, &Mask[0]);
+ for (unsigned i = 0, e = InsertIndices.size(); i != e; ++i) {
+ unsigned Idx = InsertIndices[i];
+ NV = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, NV, Op.getOperand(Idx),
+ DAG.getIntPtrConstant(Idx));
+ }
+
+ return NV;
+}
+
SDValue
X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
DebugLoc dl = Op.getDebugLoc();
if (LD.getNode())
return LD;
+ // Check for a build vector from mostly shuffle plus few inserting.
+ SDValue Sh = buildFromShuffleMostly(Op, DAG);
+ if (Sh.getNode())
+ return Sh;
+
// For SSE 4.1, use insertps to put the high elements into the low element.
if (getSubtarget()->hasSSE41()) {
SDValue Result;
--- /dev/null
+; RUN: llc < %s -mcpu=corei7 -mtriple=x86_64-unknown-linux-gnu | FileCheck %s
+
+define void @foo(<3 x float> %in, <4 x i8>* nocapture %out) nounwind {
+ %t0 = fptoui <3 x float> %in to <3 x i8>
+ %t1 = shufflevector <3 x i8> %t0, <3 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
+ %t2 = insertelement <4 x i8> %t1, i8 -1, i32 3
+ store <4 x i8> %t2, <4 x i8>* %out, align 4
+ ret void
+; CHECK: foo
+; CHECK: cvttps2dq
+; CHECK-NOT: pextrd
+; CHECK: pinsrd
+; CHECK-NEXT: pshufb
+; CHECK: ret
+}