return SDValue();
}
-// This function assumes its argument is a BUILD_VECTOR of constants or
-// undef SDNodes. i.e: ISD::isBuildVectorOfConstantSDNodes(BuildVector) is
-// true.
-static bool BUILD_VECTORtoBlendMask(BuildVectorSDNode *BuildVector,
- unsigned &MaskValue) {
- MaskValue = 0;
- unsigned NumElems = BuildVector->getNumOperands();
- // There are 2 lanes if (NumElems > 8), and 1 lane otherwise.
- unsigned NumLanes = (NumElems - 1) / 8 + 1;
- unsigned NumElemsInLane = NumElems / NumLanes;
-
- // Blend for v16i16 should be symetric for the both lanes.
- for (unsigned i = 0; i < NumElemsInLane; ++i) {
- SDValue EltCond = BuildVector->getOperand(i);
- SDValue SndLaneEltCond =
- (NumLanes == 2) ? BuildVector->getOperand(i + NumElemsInLane) : EltCond;
-
- int Lane1Cond = -1, Lane2Cond = -1;
- if (isa<ConstantSDNode>(EltCond))
- Lane1Cond = !isZero(EltCond);
- if (isa<ConstantSDNode>(SndLaneEltCond))
- Lane2Cond = !isZero(SndLaneEltCond);
-
- if (Lane1Cond == Lane2Cond || Lane2Cond < 0)
- // Lane1Cond != 0, means we want the first argument.
- // Lane1Cond == 0, means we want the second argument.
- // The encoding of this argument is 0 for the first argument, 1
- // for the second. Therefore, invert the condition.
- MaskValue |= !Lane1Cond << i;
- else if (Lane1Cond < 0)
- MaskValue |= !Lane2Cond << i;
- else
- return false;
- }
- return true;
-}
-
/// \brief Try to lower a VSELECT instruction to an immediate-controlled blend
/// instruction.
static SDValue lowerVSELECTtoBLENDI(SDValue Op, const X86Subtarget *Subtarget,
} else {
// Everything else uses a generic blend mask computation with a custom type.
if (VT.isInteger()) {
- if (VT.is256BitVector()) {
- // The 256-bit integer blend instructions are only available on AVX2.
- if (!Subtarget->hasAVX2())
- return SDValue();
-
- // We do the blend on v8i32 for 256-bit integer types.
- BlendVT = MVT::v8i32;
- } else {
+ if (VT.is256BitVector())
+ // We cast to floating point types if integer blends aren't available,
+ // and we coerce integer blends when available to occur on the v8i32
+ // type.
+ BlendVT = Subtarget->hasAVX2()
+ ? MVT::v8i32
+ : MVT::getVectorVT(
+ MVT::getFloatingPointVT(VT.getScalarSizeInBits()),
+ VT.getVectorNumElements());
+ else
// For 128-bit vectors we do the blend on v8i16 types.
BlendVT = MVT::v8i16;
- }
}
assert(BlendVT.getVectorNumElements() <= 8 &&
"Cannot blend more than 8 elements with an immediate!");
return std::make_pair(Opc, NeedSplit);
}
-static SDValue
-TransformVSELECTtoBlendVECTOR_SHUFFLE(SDNode *N, SelectionDAG &DAG,
- const X86Subtarget *Subtarget) {
- SDLoc dl(N);
- SDValue Cond = N->getOperand(0);
- SDValue LHS = N->getOperand(1);
- SDValue RHS = N->getOperand(2);
-
- if (Cond.getOpcode() == ISD::SIGN_EXTEND) {
- SDValue CondSrc = Cond->getOperand(0);
- if (CondSrc->getOpcode() == ISD::SIGN_EXTEND_INREG)
- Cond = CondSrc->getOperand(0);
- }
-
- MVT VT = N->getSimpleValueType(0);
- MVT EltVT = VT.getVectorElementType();
- unsigned NumElems = VT.getVectorNumElements();
- // There is no blend with immediate in AVX-512.
- if (VT.is512BitVector())
- return SDValue();
-
- if (!Subtarget->hasSSE41() || EltVT == MVT::i8)
- return SDValue();
- if (!Subtarget->hasInt256() && VT == MVT::v16i16)
- return SDValue();
-
- if (!ISD::isBuildVectorOfConstantSDNodes(Cond.getNode()))
- return SDValue();
-
- // A vselect where all conditions and data are constants can be optimized into
- // a single vector load by SelectionDAGLegalize::ExpandBUILD_VECTOR().
- if (ISD::isBuildVectorOfConstantSDNodes(LHS.getNode()) &&
- ISD::isBuildVectorOfConstantSDNodes(RHS.getNode()))
- return SDValue();
-
- unsigned MaskValue = 0;
- if (!BUILD_VECTORtoBlendMask(cast<BuildVectorSDNode>(Cond), MaskValue))
- return SDValue();
-
- SmallVector<int, 8> ShuffleMask(NumElems, -1);
- for (unsigned i = 0; i < NumElems; ++i) {
- // Be sure we emit undef where we can.
- if (Cond.getOperand(i)->getOpcode() == ISD::UNDEF)
- ShuffleMask[i] = -1;
- else
- ShuffleMask[i] = i + NumElems * ((MaskValue >> i) & 1);
- }
-
- return DAG.getVectorShuffle(VT, dl, LHS, RHS, &ShuffleMask[0]);
-}
-
/// PerformSELECTCombine - Do target-specific dag combines on SELECT and VSELECT
/// nodes.
static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
DCI.CommitTargetLoweringOpt(TLO);
}
- // We should generate an X86ISD::BLENDI from a vselect if its argument
- // is a sign_extend_inreg of an any_extend of a BUILD_VECTOR of
- // constants. This specific pattern gets generated when we split a
- // selector for a 512 bit vector in a machine without AVX512 (but with
- // 256-bit vectors), during legalization:
- //
- // (vselect (sign_extend (any_extend (BUILD_VECTOR)) i1) LHS RHS)
- //
- // Iff we find this pattern and the build_vectors are built from
- // constants, we translate the vselect into a shuffle_vector that we
- // know will be matched by LowerVECTOR_SHUFFLEtoBlend.
- if (N->getOpcode() == ISD::VSELECT && !DCI.isBeforeLegalize()) {
- SDValue Shuffle = TransformVSELECTtoBlendVECTOR_SHUFFLE(N, DAG, Subtarget);
- if (Shuffle.getNode())
- return Shuffle;
- }
-
return SDValue();
}
;CHECK-LABEL: vsel_i32:
-;CHECK: vblendps $10, %xmm1, %xmm0, %xmm0
+;CHECK: vpblendw {{.*}} ## xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
;CHECK: ret
define <4 x i32> @vsel_i32(<4 x i32> %v1, <4 x i32> %v2) {
%vsel = select <4 x i1> <i1 true, i1 false, i1 true, i1 false>, <4 x i32> %v1, <4 x i32> %v2
;CHECK-LABEL: vsel_float8:
;CHECK-NOT: vinsertf128
-; <i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false>
-; which translates into the boolean mask (big endian representation):
-; 00010001 = 17.
-; '1' means takes the first argument, '0' means takes the second argument.
-; This is the opposite of the intel syntax, thus we expect
-; the inverted mask: 11101110 = 238.
-;CHECK: vblendps $238, %ymm1, %ymm0, %ymm0
+;CHECK: vblendps {{.*}} ## ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7]
;CHECK: ret
define <8 x float> @vsel_float8(<8 x float> %v1, <8 x float> %v2) {
%vsel = select <8 x i1> <i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false>, <8 x float> %v1, <8 x float> %v2
;CHECK-LABEL: vsel_i328:
;CHECK-NOT: vinsertf128
-;CHECK: vblendps $238, %ymm1, %ymm0, %ymm0
+;CHECK: vblendps {{.*}} ## ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7]
;CHECK-NEXT: ret
define <8 x i32> @vsel_i328(<8 x i32> %v1, <8 x i32> %v2) {
%vsel = select <8 x i1> <i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false>, <8 x i32> %v1, <8 x i32> %v2
}
;CHECK-LABEL: vsel_8xi16:
-; The select mask is
-; <i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false>
-; which translates into the boolean mask (big endian representation):
-; 00010001 = 17.
-; '1' means takes the first argument, '0' means takes the second argument.
-; This is the opposite of the intel syntax, thus we expect
-; the inverted mask: 11101110 = 238.
-; According to the ABI:
-; v1 is in xmm0 => first argument is xmm0.
-; v2 is in xmm1 => second argument is xmm1.
-;CHECK: pblendw $238, %xmm1, %xmm0
+;CHECK: pblendw {{.*}} ## xmm0 = xmm0[0],xmm1[1,2,3],xmm0[4],xmm1[5,6,7]
;CHECK: ret
define <8 x i16> @vsel_8xi16(<8 x i16> %v1, <8 x i16> %v2) {
%vsel = select <8 x i1> <i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false>, <8 x i16> %v1, <8 x i16> %v2