From: Simon Pilgrim Date: Wed, 29 Jul 2015 21:44:27 +0000 (+0000) Subject: [X86][SSE] Keep 32-bit target i64 vector shifts on SSE unit. X-Git-Url: http://demsky.eecs.uci.edu/git/?a=commitdiff_plain;h=f66eda96ebf7b2071c95347d1911d68ebcc41102;p=oota-llvm.git [X86][SSE] Keep 32-bit target i64 vector shifts on SSE unit. This patch improves the 32-bit target i64 constant matching to detect the shuffle vector splats that are introduced by i64 vector shift vectorization (D8416). Differential Revision: http://reviews.llvm.org/D11327 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@243577 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index cea2aa73c23..906f0464d84 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -17307,34 +17307,50 @@ static SDValue LowerScalarImmediateShift(SDValue Op, SelectionDAG &DAG, // Special case in 32-bit mode, where i64 is expanded into high and low parts. if (!Subtarget->is64Bit() && - (VT == MVT::v2i64 || (Subtarget->hasInt256() && VT == MVT::v4i64)) && - Amt.getOpcode() == ISD::BITCAST && - Amt.getOperand(0).getOpcode() == ISD::BUILD_VECTOR) { + (VT == MVT::v2i64 || (Subtarget->hasInt256() && VT == MVT::v4i64))) { + + // Peek through any splat that was introduced for i64 shift vectorization. + int SplatIndex = -1; + if (ShuffleVectorSDNode *SVN = dyn_cast(Amt.getNode())) + if (SVN->isSplat()) { + SplatIndex = SVN->getSplatIndex(); + Amt = Amt.getOperand(0); + assert(SplatIndex < (int)VT.getVectorNumElements() && + "Splat shuffle referencing second operand"); + } + + if (Amt.getOpcode() != ISD::BITCAST || + Amt.getOperand(0).getOpcode() != ISD::BUILD_VECTOR) + return SDValue(); + Amt = Amt.getOperand(0); unsigned Ratio = Amt.getSimpleValueType().getVectorNumElements() / VT.getVectorNumElements(); unsigned RatioInLog2 = Log2_32_Ceil(Ratio); uint64_t ShiftAmt = 0; + unsigned BaseOp = (SplatIndex < 0 ? 0 : SplatIndex * Ratio); for (unsigned i = 0; i != Ratio; ++i) { - ConstantSDNode *C = dyn_cast(Amt.getOperand(i)); + ConstantSDNode *C = dyn_cast(Amt.getOperand(i + BaseOp)); if (!C) return SDValue(); // 6 == Log2(64) ShiftAmt |= C->getZExtValue() << (i * (1 << (6 - RatioInLog2))); } - // Check remaining shift amounts. - for (unsigned i = Ratio; i != Amt.getNumOperands(); i += Ratio) { - uint64_t ShAmt = 0; - for (unsigned j = 0; j != Ratio; ++j) { - ConstantSDNode *C = - dyn_cast(Amt.getOperand(i + j)); - if (!C) + + // Check remaining shift amounts (if not a splat). + if (SplatIndex < 0) { + for (unsigned i = Ratio; i != Amt.getNumOperands(); i += Ratio) { + uint64_t ShAmt = 0; + for (unsigned j = 0; j != Ratio; ++j) { + ConstantSDNode *C = dyn_cast(Amt.getOperand(i + j)); + if (!C) + return SDValue(); + // 6 == Log2(64) + ShAmt |= C->getZExtValue() << (j * (1 << (6 - RatioInLog2))); + } + if (ShAmt != ShiftAmt) return SDValue(); - // 6 == Log2(64) - ShAmt |= C->getZExtValue() << (j * (1 << (6 - RatioInLog2))); } - if (ShAmt != ShiftAmt) - return SDValue(); } if (SupportedVectorShiftWithImm(VT, Subtarget, Op.getOpcode())) diff --git a/test/CodeGen/X86/vector-shift-ashr-128.ll b/test/CodeGen/X86/vector-shift-ashr-128.ll index fff788a2298..64d4ea03762 100644 --- a/test/CodeGen/X86/vector-shift-ashr-128.ll +++ b/test/CodeGen/X86/vector-shift-ashr-128.ll @@ -877,21 +877,17 @@ define <2 x i64> @constant_shift_v2i64(<2 x i64> %a) nounwind { ; ; X32-SSE-LABEL: constant_shift_v2i64: ; X32-SSE: # BB#0: -; X32-SSE-NEXT: movl $7, %eax -; X32-SSE-NEXT: movd %eax, %xmm2 ; X32-SSE-NEXT: movdqa {{.*#+}} xmm1 = [0,2147483648,0,2147483648] -; X32-SSE-NEXT: movdqa %xmm1, %xmm3 -; X32-SSE-NEXT: psrlq %xmm2, %xmm3 -; X32-SSE-NEXT: movl $1, %eax -; X32-SSE-NEXT: movd %eax, %xmm4 -; X32-SSE-NEXT: psrlq %xmm4, %xmm1 -; X32-SSE-NEXT: movsd {{.*#+}} xmm3 = xmm1[0],xmm3[1] +; X32-SSE-NEXT: movdqa %xmm1, %xmm2 +; X32-SSE-NEXT: psrlq $7, %xmm2 +; X32-SSE-NEXT: psrlq $1, %xmm1 +; X32-SSE-NEXT: movsd {{.*#+}} xmm2 = xmm1[0],xmm2[1] ; X32-SSE-NEXT: movdqa %xmm0, %xmm1 -; X32-SSE-NEXT: psrlq %xmm2, %xmm1 -; X32-SSE-NEXT: psrlq %xmm4, %xmm0 +; X32-SSE-NEXT: psrlq $7, %xmm1 +; X32-SSE-NEXT: psrlq $1, %xmm0 ; X32-SSE-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1] -; X32-SSE-NEXT: xorpd %xmm3, %xmm1 -; X32-SSE-NEXT: psubq %xmm3, %xmm1 +; X32-SSE-NEXT: xorpd %xmm2, %xmm1 +; X32-SSE-NEXT: psubq %xmm2, %xmm1 ; X32-SSE-NEXT: movdqa %xmm1, %xmm0 ; X32-SSE-NEXT: retl %shift = ashr <2 x i64> %a, diff --git a/test/CodeGen/X86/vector-shift-lshr-128.ll b/test/CodeGen/X86/vector-shift-lshr-128.ll index 7ee0412b6ad..1c988946a46 100644 --- a/test/CodeGen/X86/vector-shift-lshr-128.ll +++ b/test/CodeGen/X86/vector-shift-lshr-128.ll @@ -655,13 +655,9 @@ define <2 x i64> @constant_shift_v2i64(<2 x i64> %a) nounwind { ; ; X32-SSE-LABEL: constant_shift_v2i64: ; X32-SSE: # BB#0: -; X32-SSE-NEXT: movl $7, %eax -; X32-SSE-NEXT: movd %eax, %xmm2 ; X32-SSE-NEXT: movdqa %xmm0, %xmm1 -; X32-SSE-NEXT: psrlq %xmm2, %xmm1 -; X32-SSE-NEXT: movl $1, %eax -; X32-SSE-NEXT: movd %eax, %xmm2 -; X32-SSE-NEXT: psrlq %xmm2, %xmm0 +; X32-SSE-NEXT: psrlq $7, %xmm1 +; X32-SSE-NEXT: psrlq $1, %xmm0 ; X32-SSE-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1] ; X32-SSE-NEXT: movapd %xmm1, %xmm0 ; X32-SSE-NEXT: retl diff --git a/test/CodeGen/X86/vector-shift-shl-128.ll b/test/CodeGen/X86/vector-shift-shl-128.ll index facbcc14c2c..fa6533d39cb 100644 --- a/test/CodeGen/X86/vector-shift-shl-128.ll +++ b/test/CodeGen/X86/vector-shift-shl-128.ll @@ -607,13 +607,9 @@ define <2 x i64> @constant_shift_v2i64(<2 x i64> %a) nounwind { ; ; X32-SSE-LABEL: constant_shift_v2i64: ; X32-SSE: # BB#0: -; X32-SSE-NEXT: movl $7, %eax -; X32-SSE-NEXT: movd %eax, %xmm2 ; X32-SSE-NEXT: movdqa %xmm0, %xmm1 -; X32-SSE-NEXT: psllq %xmm2, %xmm1 -; X32-SSE-NEXT: movl $1, %eax -; X32-SSE-NEXT: movd %eax, %xmm2 -; X32-SSE-NEXT: psllq %xmm2, %xmm0 +; X32-SSE-NEXT: psllq $7, %xmm1 +; X32-SSE-NEXT: psllq $1, %xmm0 ; X32-SSE-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1] ; X32-SSE-NEXT: movapd %xmm1, %xmm0 ; X32-SSE-NEXT: retl