From: Chris Lattner Date: Sat, 28 Aug 2010 17:15:43 +0000 (+0000) Subject: improve comments in the unpcklps generating logic, introduce X-Git-Url: http://demsky.eecs.uci.edu/git/?a=commitdiff_plain;h=6e80e449261e259d41a247603228be7802c112d1;p=oota-llvm.git improve comments in the unpcklps generating logic, introduce a new EltStride variable instead of reusing NumElems variable for a non-obvious purpose. No functionality change. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@112377 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 5a7ed4f71f5..91fc26cd1e3 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -4040,8 +4040,8 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, SmallVectorImpl &Elts, SDValue X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { DebugLoc dl = Op.getDebugLoc(); - // All zero's are handled with pxor in SSE2 and above, xorps in SSE1 and - // all one's are handled with pcmpeqd. In AVX, zero's are handled with + // All zero's are handled with pxor in SSE2 and above, xorps in SSE1. + // All one's are handled with pcmpeqd. In AVX, zero's are handled with // vpxor in 128-bit and xor{pd,ps} in 256-bit, but no 256 version of pcmpeqd // is present, so AllOnes is ignored. if (ISD::isBuildVectorAllZeros(Op.getNode()) || @@ -4288,18 +4288,25 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { return V[0]; } - // Otherwise, expand into a number of unpckl* - // e.g. for v4f32 + // Otherwise, expand into a number of unpckl*, start by extending each of + // our (non-undef) elements to the full vector width with the element in the + // bottom slot of the vector (which generates no code for SSE). + for (unsigned i = 0; i < NumElems; ++i) { + if (Op.getOperand(i).getOpcode() != ISD::UNDEF) + V[i] = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Op.getOperand(i)); + else + V[i] = DAG.getUNDEF(VT); + } + + // Next, we iteratively mix elements, e.g. for v4f32: // Step 1: unpcklps 0, 2 ==> X: // : unpcklps 1, 3 ==> Y: // Step 2: unpcklps X, Y ==> <3, 2, 1, 0> - for (unsigned i = 0; i < NumElems; ++i) - V[i] = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Op.getOperand(i)); - NumElems >>= 1; - while (NumElems != 0) { - for (unsigned i = 0; i < NumElems; ++i) - V[i] = getUnpackl(DAG, dl, VT, V[i], V[i + NumElems]); - NumElems >>= 1; + unsigned EltStride = NumElems >> 1; + while (EltStride != 0) { + for (unsigned i = 0; i < EltStride; ++i) + V[i] = getUnpackl(DAG, dl, VT, V[i], V[i + EltStride]); + EltStride >>= 1; } return V[0]; }