From a9a568a79dbaf7315db863b4808d31ad9f5f91dc Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Wed, 2 May 2012 08:03:44 +0000 Subject: [PATCH] Add support for selecting AVX2 vpshuflw and vpshufhw. Add decoding support for AsmPrinter. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@155982 91177308-0d34-0410-b5e6-96231b3b80d8 --- .../X86/InstPrinter/X86InstComments.cpp | 24 ++++++++- lib/Target/X86/Utils/X86ShuffleDecode.cpp | 52 ++++++++++++------- lib/Target/X86/Utils/X86ShuffleDecode.h | 4 +- lib/Target/X86/X86ISelLowering.cpp | 46 +++++++++++----- test/CodeGen/X86/avx2-shuffle.ll | 14 +++++ 5 files changed, 104 insertions(+), 36 deletions(-) diff --git a/lib/Target/X86/InstPrinter/X86InstComments.cpp b/lib/Target/X86/InstPrinter/X86InstComments.cpp index f532019acdf..dbee88614d6 100644 --- a/lib/Target/X86/InstPrinter/X86InstComments.cpp +++ b/lib/Target/X86/InstPrinter/X86InstComments.cpp @@ -96,7 +96,17 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS, case X86::PSHUFHWmi: case X86::VPSHUFHWmi: DestName = getRegName(MI->getOperand(0).getReg()); - DecodePSHUFHWMask(MI->getOperand(MI->getNumOperands()-1).getImm(), + DecodePSHUFHWMask(MVT::v8i16, + MI->getOperand(MI->getNumOperands()-1).getImm(), + ShuffleMask); + break; + case X86::VPSHUFHWYri: + Src1Name = getRegName(MI->getOperand(1).getReg()); + // FALL THROUGH. + case X86::VPSHUFHWYmi: + DestName = getRegName(MI->getOperand(0).getReg()); + DecodePSHUFHWMask(MVT::v16i16, + MI->getOperand(MI->getNumOperands()-1).getImm(), ShuffleMask); break; case X86::PSHUFLWri: @@ -106,7 +116,17 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS, case X86::PSHUFLWmi: case X86::VPSHUFLWmi: DestName = getRegName(MI->getOperand(0).getReg()); - DecodePSHUFLWMask(MI->getOperand(MI->getNumOperands()-1).getImm(), + DecodePSHUFLWMask(MVT::v8i16, + MI->getOperand(MI->getNumOperands()-1).getImm(), + ShuffleMask); + break; + case X86::VPSHUFLWYri: + Src1Name = getRegName(MI->getOperand(1).getReg()); + // FALL THROUGH. + case X86::VPSHUFLWYmi: + DestName = getRegName(MI->getOperand(0).getReg()); + DecodePSHUFLWMask(MVT::v16i16, + MI->getOperand(MI->getNumOperands()-1).getImm(), ShuffleMask); break; diff --git a/lib/Target/X86/Utils/X86ShuffleDecode.cpp b/lib/Target/X86/Utils/X86ShuffleDecode.cpp index a802333002d..a1f242476ef 100644 --- a/lib/Target/X86/Utils/X86ShuffleDecode.cpp +++ b/lib/Target/X86/Utils/X86ShuffleDecode.cpp @@ -70,7 +70,7 @@ void DecodePSHUFMask(EVT VT, unsigned Imm, SmallVectorImpl &ShuffleMask) { unsigned NumLanes = VT.getSizeInBits() / 128; unsigned NumLaneElts = NumElts / NumLanes; - int NewImm = Imm; + unsigned NewImm = Imm; for (unsigned l = 0; l != NumElts; l += NumLaneElts) { for (unsigned i = 0; i != NumLaneElts; ++i) { ShuffleMask.push_back(NewImm % NumLaneElts + l); @@ -80,26 +80,38 @@ void DecodePSHUFMask(EVT VT, unsigned Imm, SmallVectorImpl &ShuffleMask) { } } -void DecodePSHUFHWMask(unsigned Imm, SmallVectorImpl &ShuffleMask) { - ShuffleMask.push_back(0); - ShuffleMask.push_back(1); - ShuffleMask.push_back(2); - ShuffleMask.push_back(3); - for (unsigned i = 0; i != 4; ++i) { - ShuffleMask.push_back(4+(Imm & 3)); - Imm >>= 2; +void DecodePSHUFHWMask(EVT VT, unsigned Imm, + SmallVectorImpl &ShuffleMask) { + unsigned NumLanes = VT.getSizeInBits() / 128; + unsigned NumElts = 8 * NumLanes; + + for (unsigned l = 0; l != NumElts; l += 8) { + unsigned NewImm = Imm; + for (unsigned i = 0, e = 4; i != e; ++i) { + ShuffleMask.push_back(l + i); + } + for (unsigned i = 4, e = 8; i != e; ++i) { + ShuffleMask.push_back(l + 4 + (NewImm & 3)); + NewImm >>= 2; + } } } -void DecodePSHUFLWMask(unsigned Imm, SmallVectorImpl &ShuffleMask) { - for (unsigned i = 0; i != 4; ++i) { - ShuffleMask.push_back((Imm & 3)); - Imm >>= 2; +void DecodePSHUFLWMask(EVT VT, unsigned Imm, + SmallVectorImpl &ShuffleMask) { + unsigned NumLanes = VT.getSizeInBits() / 128; + unsigned NumElts = 8 * NumLanes; + + for (unsigned l = 0; l != NumElts; l += 8) { + unsigned NewImm = Imm; + for (unsigned i = 0, e = 4; i != e; ++i) { + ShuffleMask.push_back(l + (NewImm & 3)); + NewImm >>= 2; + } + for (unsigned i = 4, e = 8; i != e; ++i) { + ShuffleMask.push_back(l + i); + } } - ShuffleMask.push_back(4); - ShuffleMask.push_back(5); - ShuffleMask.push_back(6); - ShuffleMask.push_back(7); } /// DecodeSHUFPMask - This decodes the shuffle masks for shufp*. VT indicates @@ -111,7 +123,7 @@ void DecodeSHUFPMask(EVT VT, unsigned Imm, SmallVectorImpl &ShuffleMask) { unsigned NumLanes = VT.getSizeInBits() / 128; unsigned NumLaneElts = NumElts / NumLanes; - int NewImm = Imm; + unsigned NewImm = Imm; for (unsigned l = 0; l != NumElts; l += NumLaneElts) { // Part that reads from dest. for (unsigned i = 0; i != NumLaneElts/2; ++i) { @@ -176,9 +188,9 @@ void DecodeVPERM2X128Mask(EVT VT, unsigned Imm, unsigned FstHalfBegin = (Imm & 0x3) * HalfSize; unsigned SndHalfBegin = ((Imm >> 4) & 0x3) * HalfSize; - for (int i = FstHalfBegin, e = FstHalfBegin+HalfSize; i != e; ++i) + for (unsigned i = FstHalfBegin, e = FstHalfBegin+HalfSize; i != e; ++i) ShuffleMask.push_back(i); - for (int i = SndHalfBegin, e = SndHalfBegin+HalfSize; i != e; ++i) + for (unsigned i = SndHalfBegin, e = SndHalfBegin+HalfSize; i != e; ++i) ShuffleMask.push_back(i); } diff --git a/lib/Target/X86/Utils/X86ShuffleDecode.h b/lib/Target/X86/Utils/X86ShuffleDecode.h index 5b8c6ef62e2..14545e73022 100644 --- a/lib/Target/X86/Utils/X86ShuffleDecode.h +++ b/lib/Target/X86/Utils/X86ShuffleDecode.h @@ -37,9 +37,9 @@ void DecodeMOVLHPSMask(unsigned NElts, SmallVectorImpl &ShuffleMask); void DecodePSHUFMask(EVT VT, unsigned Imm, SmallVectorImpl &ShuffleMask); -void DecodePSHUFHWMask(unsigned Imm, SmallVectorImpl &ShuffleMask); +void DecodePSHUFHWMask(EVT VT, unsigned Imm, SmallVectorImpl &ShuffleMask); -void DecodePSHUFLWMask(unsigned Imm, SmallVectorImpl &ShuffleMask); +void DecodePSHUFLWMask(EVT, unsigned Imm, SmallVectorImpl &ShuffleMask); /// DecodeSHUFPMask - This decodes the shuffle masks for shufp*. VT indicates /// the type of the vector allowing it to handle different datatypes and vector diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 8005e239ddf..7ab4b26986c 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -3196,8 +3196,8 @@ static bool isPSHUFDMask(ArrayRef Mask, EVT VT) { /// isPSHUFHWMask - Return true if the node specifies a shuffle of elements that /// is suitable for input to PSHUFHW. -static bool isPSHUFHWMask(ArrayRef Mask, EVT VT) { - if (VT != MVT::v8i16) +static bool isPSHUFHWMask(ArrayRef Mask, EVT VT, bool HasAVX2) { + if (VT != MVT::v8i16 && (!HasAVX2 || VT != MVT::v16i16)) return false; // Lower quadword copied in order or undef. @@ -3206,16 +3206,27 @@ static bool isPSHUFHWMask(ArrayRef Mask, EVT VT) { // Upper quadword shuffled. for (unsigned i = 4; i != 8; ++i) - if (Mask[i] >= 0 && (Mask[i] < 4 || Mask[i] > 7)) + if (!isUndefOrInRange(Mask[i], 4, 8)) return false; + if (VT == MVT::v16i16) { + // Lower quadword copied in order or undef. + if (!isSequentialOrUndefInRange(Mask, 8, 4, 8)) + return false; + + // Upper quadword shuffled. + for (unsigned i = 12; i != 16; ++i) + if (!isUndefOrInRange(Mask[i], 12, 16)) + return false; + } + return true; } /// isPSHUFLWMask - Return true if the node specifies a shuffle of elements that /// is suitable for input to PSHUFLW. -static bool isPSHUFLWMask(ArrayRef Mask, EVT VT) { - if (VT != MVT::v8i16) +static bool isPSHUFLWMask(ArrayRef Mask, EVT VT, bool HasAVX2) { + if (VT != MVT::v8i16 && (!HasAVX2 || VT != MVT::v16i16)) return false; // Upper quadword copied in order. @@ -3224,9 +3235,20 @@ static bool isPSHUFLWMask(ArrayRef Mask, EVT VT) { // Lower quadword shuffled. for (unsigned i = 0; i != 4; ++i) - if (Mask[i] >= 4) + if (!isUndefOrInRange(Mask[i], 0, 4)) return false; + if (VT == MVT::v16i16) { + // Upper quadword copied in order. + if (!isSequentialOrUndefInRange(Mask, 12, 4, 12)) + return false; + + // Lower quadword shuffled. + for (unsigned i = 8; i != 12; ++i) + if (!isUndefOrInRange(Mask[i], 8, 12)) + return false; + } + return true; } @@ -4405,12 +4427,12 @@ static bool getTargetShuffleMask(SDNode *N, EVT VT, break; case X86ISD::PSHUFHW: ImmN = N->getOperand(N->getNumOperands()-1); - DecodePSHUFHWMask(cast(ImmN)->getZExtValue(), Mask); + DecodePSHUFHWMask(VT, cast(ImmN)->getZExtValue(), Mask); IsUnary = true; break; case X86ISD::PSHUFLW: ImmN = N->getOperand(N->getNumOperands()-1); - DecodePSHUFLWMask(cast(ImmN)->getZExtValue(), Mask); + DecodePSHUFLWMask(VT, cast(ImmN)->getZExtValue(), Mask); IsUnary = true; break; case X86ISD::MOVSS: @@ -6581,12 +6603,12 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { return getTargetShuffleNode(X86ISD::UNPCKL, dl, VT, V1, V1, DAG); } - if (isPSHUFHWMask(M, VT)) + if (isPSHUFHWMask(M, VT, HasAVX2)) return getTargetShuffleNode(X86ISD::PSHUFHW, dl, VT, V1, getShufflePSHUFHWImmediate(SVOp), DAG); - if (isPSHUFLWMask(M, VT)) + if (isPSHUFLWMask(M, VT, HasAVX2)) return getTargetShuffleNode(X86ISD::PSHUFLW, dl, VT, V1, getShufflePSHUFLWImmediate(SVOp), DAG); @@ -11376,8 +11398,8 @@ X86TargetLowering::isShuffleMaskLegal(const SmallVectorImpl &M, isMOVLMask(M, VT) || isSHUFPMask(M, VT, Subtarget->hasAVX()) || isPSHUFDMask(M, VT) || - isPSHUFHWMask(M, VT) || - isPSHUFLWMask(M, VT) || + isPSHUFHWMask(M, VT, Subtarget->hasAVX2()) || + isPSHUFLWMask(M, VT, Subtarget->hasAVX2()) || isPALIGNRMask(M, VT, Subtarget) || isUNPCKLMask(M, VT, Subtarget->hasAVX2()) || isUNPCKHMask(M, VT, Subtarget->hasAVX2()) || diff --git a/test/CodeGen/X86/avx2-shuffle.ll b/test/CodeGen/X86/avx2-shuffle.ll index ad75e93c9fa..bb9f4605570 100644 --- a/test/CodeGen/X86/avx2-shuffle.ll +++ b/test/CodeGen/X86/avx2-shuffle.ll @@ -12,3 +12,17 @@ define <16 x i16> @blendw1(<16 x i16> %a, <16 x i16> %b) nounwind alwaysinline { %t = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> ret <16 x i16> %t } + +; CHECK: vpshufhw $27, %ymm +define <16 x i16> @vpshufhw(<16 x i16> %src1) nounwind uwtable readnone ssp { +entry: + %shuffle.i = shufflevector <16 x i16> %src1, <16 x i16> %src1, <16 x i32> + ret <16 x i16> %shuffle.i +} + +; CHECK: vpshuflw $27, %ymm +define <16 x i16> @vpshuflw(<16 x i16> %src1) nounwind uwtable readnone ssp { +entry: + %shuffle.i = shufflevector <16 x i16> %src1, <16 x i16> %src1, <16 x i32> + ret <16 x i16> %shuffle.i +} -- 2.34.1