From d23f1883d3eef7c002f865a306887c2403a3ec31 Mon Sep 17 00:00:00 2001 From: Chandler Carruth Date: Mon, 29 Sep 2014 02:01:20 +0000 Subject: [PATCH] [x86] Delete a bunch of really bad and totally unnecessary code in the X86 target-specific DAG combining that tried to convert VSELECT nodes into VECTOR_SHUFFLE nodes that it "knew" would lower into immediate-controlled blend nodes. Turns out, we have perfectly good lowering of all these VSELECT nodes, and indeed that lowering already knows how to handle lowering through BLENDI to immediate-controlled blend nodes. The code just wasn't getting used much because this thing forced the world to go through the vector shuffle lowering. Yuck. This also exposes that I was too aggressive in avoiding domain crossing in v218588 with that lowering -- when the other option is to expand into two 128-bit vectors, it is worth domain crossing. Restore that behavior now that we have nice tests covering it. The test updates here fall into two camps. One is where previously we ended up with an unsigned encoding of the blend operand and now we get a signed encoding. In most of those places there were elaborate comments explaining exactly what these operands really mean. Rather than that, just switch these tests to use the nicely decoded comments that make it obvious that the final shuffle matches. The other updates are just removing pointless domain crossing by blending integers with PBLENDW rather than BLENDPS. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@218589 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 124 +++-------------------------- test/CodeGen/X86/avx-blend.ll | 12 +-- test/CodeGen/X86/blend-msb.ll | 12 +-- test/CodeGen/X86/sse41-blend.ll | 6 +- 4 files changed, 17 insertions(+), 137 deletions(-) diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 552d420b805..8ea1790e52b 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -11797,43 +11797,6 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { return SDValue(); } -// This function assumes its argument is a BUILD_VECTOR of constants or -// undef SDNodes. i.e: ISD::isBuildVectorOfConstantSDNodes(BuildVector) is -// true. -static bool BUILD_VECTORtoBlendMask(BuildVectorSDNode *BuildVector, - unsigned &MaskValue) { - MaskValue = 0; - unsigned NumElems = BuildVector->getNumOperands(); - // There are 2 lanes if (NumElems > 8), and 1 lane otherwise. - unsigned NumLanes = (NumElems - 1) / 8 + 1; - unsigned NumElemsInLane = NumElems / NumLanes; - - // Blend for v16i16 should be symetric for the both lanes. - for (unsigned i = 0; i < NumElemsInLane; ++i) { - SDValue EltCond = BuildVector->getOperand(i); - SDValue SndLaneEltCond = - (NumLanes == 2) ? BuildVector->getOperand(i + NumElemsInLane) : EltCond; - - int Lane1Cond = -1, Lane2Cond = -1; - if (isa(EltCond)) - Lane1Cond = !isZero(EltCond); - if (isa(SndLaneEltCond)) - Lane2Cond = !isZero(SndLaneEltCond); - - if (Lane1Cond == Lane2Cond || Lane2Cond < 0) - // Lane1Cond != 0, means we want the first argument. - // Lane1Cond == 0, means we want the second argument. - // The encoding of this argument is 0 for the first argument, 1 - // for the second. Therefore, invert the condition. - MaskValue |= !Lane1Cond << i; - else if (Lane1Cond < 0) - MaskValue |= !Lane2Cond << i; - else - return false; - } - return true; -} - /// \brief Try to lower a VSELECT instruction to an immediate-controlled blend /// instruction. static SDValue lowerVSELECTtoBLENDI(SDValue Op, const X86Subtarget *Subtarget, @@ -11883,17 +11846,18 @@ static SDValue lowerVSELECTtoBLENDI(SDValue Op, const X86Subtarget *Subtarget, } else { // Everything else uses a generic blend mask computation with a custom type. if (VT.isInteger()) { - if (VT.is256BitVector()) { - // The 256-bit integer blend instructions are only available on AVX2. - if (!Subtarget->hasAVX2()) - return SDValue(); - - // We do the blend on v8i32 for 256-bit integer types. - BlendVT = MVT::v8i32; - } else { + if (VT.is256BitVector()) + // We cast to floating point types if integer blends aren't available, + // and we coerce integer blends when available to occur on the v8i32 + // type. + BlendVT = Subtarget->hasAVX2() + ? MVT::v8i32 + : MVT::getVectorVT( + MVT::getFloatingPointVT(VT.getScalarSizeInBits()), + VT.getVectorNumElements()); + else // For 128-bit vectors we do the blend on v8i16 types. BlendVT = MVT::v8i16; - } } assert(BlendVT.getVectorNumElements() <= 8 && "Cannot blend more than 8 elements with an immediate!"); @@ -21718,57 +21682,6 @@ matchIntegerMINMAX(SDValue Cond, EVT VT, SDValue LHS, SDValue RHS, return std::make_pair(Opc, NeedSplit); } -static SDValue -TransformVSELECTtoBlendVECTOR_SHUFFLE(SDNode *N, SelectionDAG &DAG, - const X86Subtarget *Subtarget) { - SDLoc dl(N); - SDValue Cond = N->getOperand(0); - SDValue LHS = N->getOperand(1); - SDValue RHS = N->getOperand(2); - - if (Cond.getOpcode() == ISD::SIGN_EXTEND) { - SDValue CondSrc = Cond->getOperand(0); - if (CondSrc->getOpcode() == ISD::SIGN_EXTEND_INREG) - Cond = CondSrc->getOperand(0); - } - - MVT VT = N->getSimpleValueType(0); - MVT EltVT = VT.getVectorElementType(); - unsigned NumElems = VT.getVectorNumElements(); - // There is no blend with immediate in AVX-512. - if (VT.is512BitVector()) - return SDValue(); - - if (!Subtarget->hasSSE41() || EltVT == MVT::i8) - return SDValue(); - if (!Subtarget->hasInt256() && VT == MVT::v16i16) - return SDValue(); - - if (!ISD::isBuildVectorOfConstantSDNodes(Cond.getNode())) - return SDValue(); - - // A vselect where all conditions and data are constants can be optimized into - // a single vector load by SelectionDAGLegalize::ExpandBUILD_VECTOR(). - if (ISD::isBuildVectorOfConstantSDNodes(LHS.getNode()) && - ISD::isBuildVectorOfConstantSDNodes(RHS.getNode())) - return SDValue(); - - unsigned MaskValue = 0; - if (!BUILD_VECTORtoBlendMask(cast(Cond), MaskValue)) - return SDValue(); - - SmallVector ShuffleMask(NumElems, -1); - for (unsigned i = 0; i < NumElems; ++i) { - // Be sure we emit undef where we can. - if (Cond.getOperand(i)->getOpcode() == ISD::UNDEF) - ShuffleMask[i] = -1; - else - ShuffleMask[i] = i + NumElems * ((MaskValue >> i) & 1); - } - - return DAG.getVectorShuffle(VT, dl, LHS, RHS, &ShuffleMask[0]); -} - /// PerformSELECTCombine - Do target-specific dag combines on SELECT and VSELECT /// nodes. static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG, @@ -22318,23 +22231,6 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG, DCI.CommitTargetLoweringOpt(TLO); } - // We should generate an X86ISD::BLENDI from a vselect if its argument - // is a sign_extend_inreg of an any_extend of a BUILD_VECTOR of - // constants. This specific pattern gets generated when we split a - // selector for a 512 bit vector in a machine without AVX512 (but with - // 256-bit vectors), during legalization: - // - // (vselect (sign_extend (any_extend (BUILD_VECTOR)) i1) LHS RHS) - // - // Iff we find this pattern and the build_vectors are built from - // constants, we translate the vselect into a shuffle_vector that we - // know will be matched by LowerVECTOR_SHUFFLEtoBlend. - if (N->getOpcode() == ISD::VSELECT && !DCI.isBeforeLegalize()) { - SDValue Shuffle = TransformVSELECTtoBlendVECTOR_SHUFFLE(N, DAG, Subtarget); - if (Shuffle.getNode()) - return Shuffle; - } - return SDValue(); } diff --git a/test/CodeGen/X86/avx-blend.ll b/test/CodeGen/X86/avx-blend.ll index d2a22d70947..17a4f71e48e 100644 --- a/test/CodeGen/X86/avx-blend.ll +++ b/test/CodeGen/X86/avx-blend.ll @@ -21,7 +21,7 @@ define <4 x float> @vsel_float(<4 x float> %v1, <4 x float> %v2) { ;CHECK-LABEL: vsel_i32: -;CHECK: vblendps $10, %xmm1, %xmm0, %xmm0 +;CHECK: vpblendw {{.*}} ## xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7] ;CHECK: ret define <4 x i32> @vsel_i32(<4 x i32> %v1, <4 x i32> %v2) { %vsel = select <4 x i1> , <4 x i32> %v1, <4 x i32> %v2 @@ -61,13 +61,7 @@ define <16 x i8> @vsel_i8(<16 x i8> %v1, <16 x i8> %v2) { ;CHECK-LABEL: vsel_float8: ;CHECK-NOT: vinsertf128 -; -; which translates into the boolean mask (big endian representation): -; 00010001 = 17. -; '1' means takes the first argument, '0' means takes the second argument. -; This is the opposite of the intel syntax, thus we expect -; the inverted mask: 11101110 = 238. -;CHECK: vblendps $238, %ymm1, %ymm0, %ymm0 +;CHECK: vblendps {{.*}} ## ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7] ;CHECK: ret define <8 x float> @vsel_float8(<8 x float> %v1, <8 x float> %v2) { %vsel = select <8 x i1> , <8 x float> %v1, <8 x float> %v2 @@ -76,7 +70,7 @@ define <8 x float> @vsel_float8(<8 x float> %v1, <8 x float> %v2) { ;CHECK-LABEL: vsel_i328: ;CHECK-NOT: vinsertf128 -;CHECK: vblendps $238, %ymm1, %ymm0, %ymm0 +;CHECK: vblendps {{.*}} ## ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7] ;CHECK-NEXT: ret define <8 x i32> @vsel_i328(<8 x i32> %v1, <8 x i32> %v2) { %vsel = select <8 x i1> , <8 x i32> %v1, <8 x i32> %v2 diff --git a/test/CodeGen/X86/blend-msb.ll b/test/CodeGen/X86/blend-msb.ll index 34aaf2c31ac..c4a6d32ae5a 100644 --- a/test/CodeGen/X86/blend-msb.ll +++ b/test/CodeGen/X86/blend-msb.ll @@ -22,17 +22,7 @@ define <4 x i8> @vsel_4xi8(<4 x i8> %v1, <4 x i8> %v2) { } ;CHECK-LABEL: vsel_8xi16: -; The select mask is -; -; which translates into the boolean mask (big endian representation): -; 00010001 = 17. -; '1' means takes the first argument, '0' means takes the second argument. -; This is the opposite of the intel syntax, thus we expect -; the inverted mask: 11101110 = 238. -; According to the ABI: -; v1 is in xmm0 => first argument is xmm0. -; v2 is in xmm1 => second argument is xmm1. -;CHECK: pblendw $238, %xmm1, %xmm0 +;CHECK: pblendw {{.*}} ## xmm0 = xmm0[0],xmm1[1,2,3],xmm0[4],xmm1[5,6,7] ;CHECK: ret define <8 x i16> @vsel_8xi16(<8 x i16> %v1, <8 x i16> %v2) { %vsel = select <8 x i1> , <8 x i16> %v1, <8 x i16> %v2 diff --git a/test/CodeGen/X86/sse41-blend.ll b/test/CodeGen/X86/sse41-blend.ll index 3992da0b512..4e1722da8d0 100644 --- a/test/CodeGen/X86/sse41-blend.ll +++ b/test/CodeGen/X86/sse41-blend.ll @@ -10,7 +10,7 @@ define <4 x float> @vsel_float(<4 x float> %v1, <4 x float> %v2) { ;CHECK-LABEL: vsel_4xi8: -;CHECK: blendps +;CHECK: blendw ;CHECK: ret define <4 x i8> @vsel_4xi8(<4 x i8> %v1, <4 x i8> %v2) { %vsel = select <4 x i1> , <4 x i8> %v1, <4 x i8> %v2 @@ -18,7 +18,7 @@ define <4 x i8> @vsel_4xi8(<4 x i8> %v1, <4 x i8> %v2) { } ;CHECK-LABEL: vsel_4xi16: -;CHECK: blendps +;CHECK: blendw ;CHECK: ret define <4 x i16> @vsel_4xi16(<4 x i16> %v1, <4 x i16> %v2) { %vsel = select <4 x i1> , <4 x i16> %v1, <4 x i16> %v2 @@ -27,7 +27,7 @@ define <4 x i16> @vsel_4xi16(<4 x i16> %v1, <4 x i16> %v2) { ;CHECK-LABEL: vsel_i32: -;CHECK: blendps +;CHECK: blendw ;CHECK: ret define <4 x i32> @vsel_i32(<4 x i32> %v1, <4 x i32> %v2) { %vsel = select <4 x i1> , <4 x i32> %v1, <4 x i32> %v2 -- 2.34.1