From b54c36fb4d8d32c1ae3a72823d6befa22b7ce4b4 Mon Sep 17 00:00:00 2001 From: Chandler Carruth Date: Thu, 26 Feb 2015 22:15:34 +0000 Subject: [PATCH] [x86] Fix PR22706 where we would incorrectly try lower a v32i8 dynamic blend as legal. We made the same mistake in two different places. Whenever we are custom lowering a v32i8 blend we need to check whether we are custom lowering it only for constant conditions that can be shuffled, or whether we actually have AVX2 and full dynamic blending support on bytes. Both are fixed, with comments added to make it clear what is going on and a new test case. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@230695 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 43 +++++++++++++++++++++--------- test/CodeGen/X86/vselect-avx.ll | 11 ++++++++ 2 files changed, 41 insertions(+), 13 deletions(-) diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 656b32ec556..ecf085727f6 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -10126,24 +10126,31 @@ SDValue X86TargetLowering::LowerVSELECT(SDValue Op, SelectionDAG &DAG) const { if (!Subtarget->hasSSE41()) return SDValue(); - // Some types for vselect were previously set to Expand, not Legal or - // Custom. Return an empty SDValue so we fall-through to Expand, after - // the Custom lowering phase. - MVT VT = Op.getSimpleValueType(); - switch (VT.SimpleTy) { + // Only some types will be legal on some subtargets. If we can emit a legal + // VSELECT-matching blend, return Op, and but if we need to expand, return + // a null value. + switch (Op.getSimpleValueType().SimpleTy) { default: - break; + // Most of the vector types have blends past SSE4.1. + return Op; + + case MVT::v32i8: + // The byte blends for AVX vectors were introduced only in AVX2. + if (Subtarget->hasAVX2()) + return Op; + + return SDValue(); + case MVT::v8i16: case MVT::v16i16: + // AVX-512 BWI and VLX features support VSELECT with i16 elements. if (Subtarget->hasBWI() && Subtarget->hasVLX()) - break; + return Op; + + // FIXME: We should custom lower this by fixing the condition and using i8 + // blends. return SDValue(); } - - // We couldn't create a "Blend with immediate" node. - // This node should still be legal, but we'll have to emit a blendv* - // instruction. - return Op; } static SDValue LowerEXTRACT_VECTOR_ELT_SSE4(SDValue Op, SelectionDAG &DAG) { @@ -20784,7 +20791,17 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG, // lowered. if (!TLI.isOperationLegalOrCustom(ISD::VSELECT, VT)) return SDValue(); - if (!Subtarget->hasSSE41() || VT == MVT::v16i16 || VT == MVT::v8i16) + // FIXME: We don't support i16-element blends currently. We could and + // should support them by making *all* the bits in the condition be set + // rather than just the high bit and using an i8-element blend. + if (VT.getScalarType() == MVT::i16) + return SDValue(); + // Dynamic blending was only available from SSE4.1 onward. + if (VT.getSizeInBits() == 128 && !Subtarget->hasSSE41()) + return SDValue(); + // Byte blends are only available in AVX2 + if (VT.getSizeInBits() == 256 && VT.getScalarType() == MVT::i8 && + !Subtarget->hasAVX2()) return SDValue(); assert(BitWidth >= 8 && BitWidth <= 64 && "Invalid mask size"); diff --git a/test/CodeGen/X86/vselect-avx.ll b/test/CodeGen/X86/vselect-avx.ll index 26b00dbd85f..02a9ef4c1a3 100644 --- a/test/CodeGen/X86/vselect-avx.ll +++ b/test/CodeGen/X86/vselect-avx.ll @@ -79,3 +79,14 @@ define void @test3(<4 x i32> %induction30, <4 x i16>* %tmp16, <4 x i16>* %tmp17, store <4 x i16> %predphi, <4 x i16>* %tmp17, align 8 ret void } + +; We shouldn't try to lower this directly using VSELECT because we don't have +; vpblendvb in AVX1, only in AVX2. Instead, it should be expanded. +; +; CHECK-LABEL: PR22706: +; CHECK: vpcmpgtb +; CHECK: vpcmpgtb +define <32 x i8> @PR22706(<32 x i1> %x) { + %tmp = select <32 x i1> %x, <32 x i8> , <32 x i8> + ret <32 x i8> %tmp +} -- 2.34.1