From: Craig Topper Date: Tue, 22 Nov 2011 01:57:35 +0000 (+0000) Subject: Fix shuffle decoding logic to handle UNPCKLPS/UNPCKLPD on 256-bit vectors correctly... X-Git-Url: http://demsky.eecs.uci.edu/git/?a=commitdiff_plain;h=f7de577a08a705970f0fd8f3c1bb40f7040e4476;p=oota-llvm.git Fix shuffle decoding logic to handle UNPCKLPS/UNPCKLPD on 256-bit vectors correctly. Add support for decoding UNPCKHPS/UNPCKHPD for AVX 128-bit and 256-bit forms. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@145055 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/X86/InstPrinter/X86InstComments.cpp b/lib/Target/X86/InstPrinter/X86InstComments.cpp index 8d85b95fe81..3f5a9a9a1dd 100644 --- a/lib/Target/X86/InstPrinter/X86InstComments.cpp +++ b/lib/Target/X86/InstPrinter/X86InstComments.cpp @@ -197,16 +197,44 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS, Src2Name = getRegName(MI->getOperand(2).getReg()); // FALL THROUGH. case X86::UNPCKHPDrm: - DecodeUNPCKHPMask(2, ShuffleMask); + DecodeUNPCKHPDMask(2, ShuffleMask); Src1Name = getRegName(MI->getOperand(0).getReg()); break; + case X86::VUNPCKHPDrr: + Src2Name = getRegName(MI->getOperand(2).getReg()); + // FALL THROUGH. + case X86::VUNPCKHPDrm: + DecodeUNPCKHPDMask(2, ShuffleMask); + Src1Name = getRegName(MI->getOperand(1).getReg()); + break; + case X86::VUNPCKHPDYrr: + Src2Name = getRegName(MI->getOperand(2).getReg()); + // FALL THROUGH. + case X86::VUNPCKHPDYrm: + DecodeUNPCKLPDMask(4, ShuffleMask); + Src1Name = getRegName(MI->getOperand(1).getReg()); + break; case X86::UNPCKHPSrr: Src2Name = getRegName(MI->getOperand(2).getReg()); // FALL THROUGH. case X86::UNPCKHPSrm: - DecodeUNPCKHPMask(4, ShuffleMask); + DecodeUNPCKHPSMask(4, ShuffleMask); Src1Name = getRegName(MI->getOperand(0).getReg()); break; + case X86::VUNPCKHPSrr: + Src2Name = getRegName(MI->getOperand(2).getReg()); + // FALL THROUGH. + case X86::VUNPCKHPSrm: + DecodeUNPCKHPSMask(4, ShuffleMask); + Src1Name = getRegName(MI->getOperand(1).getReg()); + break; + case X86::VUNPCKHPSYrr: + Src2Name = getRegName(MI->getOperand(2).getReg()); + // FALL THROUGH. + case X86::VUNPCKHPSYrm: + DecodeUNPCKHPSMask(8, ShuffleMask); + Src1Name = getRegName(MI->getOperand(1).getReg()); + break; case X86::VPERMILPSri: DecodeVPERMILPSMask(4, MI->getOperand(2).getImm(), ShuffleMask); diff --git a/lib/Target/X86/Utils/X86ShuffleDecode.cpp b/lib/Target/X86/Utils/X86ShuffleDecode.cpp index aeb3309d09a..8acd3c358f8 100644 --- a/lib/Target/X86/Utils/X86ShuffleDecode.cpp +++ b/lib/Target/X86/Utils/X86ShuffleDecode.cpp @@ -142,11 +142,32 @@ void DecodeSHUFPSMask(unsigned NElts, unsigned Imm, } } -void DecodeUNPCKHPMask(unsigned NElts, - SmallVectorImpl &ShuffleMask) { - for (unsigned i = 0; i != NElts/2; ++i) { - ShuffleMask.push_back(i+NElts/2); // Reads from dest - ShuffleMask.push_back(i+NElts+NElts/2); // Reads from src +void DecodeUNPCKHPSMask(unsigned NElts, + SmallVectorImpl &ShuffleMask) { + DecodeUNPCKHPMask(MVT::getVectorVT(MVT::i32, NElts), ShuffleMask); +} + +void DecodeUNPCKHPDMask(unsigned NElts, + SmallVectorImpl &ShuffleMask) { + DecodeUNPCKHPMask(MVT::getVectorVT(MVT::i64, NElts), ShuffleMask); +} + +void DecodeUNPCKHPMask(EVT VT, SmallVectorImpl &ShuffleMask) { + unsigned NumElts = VT.getVectorNumElements(); + + // Handle 128 and 256-bit vector lengths. AVX defines UNPCK* to operate + // independently on 128-bit lanes. + unsigned NumLanes = VT.getSizeInBits() / 128; + if (NumLanes == 0 ) NumLanes = 1; // Handle MMX + unsigned NumLaneElts = NumElts / NumLanes; + + for (unsigned s = 0; s < NumLanes; ++s) { + unsigned Start = s * NumLaneElts + NumLaneElts/2; + unsigned End = s * NumLaneElts + NumLaneElts; + for (unsigned i = Start; i != End; ++i) { + ShuffleMask.push_back(i); // Reads from dest/src1 + ShuffleMask.push_back(i+NumElts); // Reads from src/src2 + } } } @@ -163,8 +184,7 @@ void DecodeUNPCKLPDMask(unsigned NElts, /// DecodeUNPCKLPMask - This decodes the shuffle masks for unpcklps/unpcklpd /// etc. VT indicates the type of the vector allowing it to handle different /// datatypes and vector widths. -void DecodeUNPCKLPMask(EVT VT, - SmallVectorImpl &ShuffleMask) { +void DecodeUNPCKLPMask(EVT VT, SmallVectorImpl &ShuffleMask) { unsigned NumElts = VT.getVectorNumElements(); // Handle 128 and 256-bit vector lengths. AVX defines UNPCK* to operate @@ -173,16 +193,13 @@ void DecodeUNPCKLPMask(EVT VT, if (NumLanes == 0 ) NumLanes = 1; // Handle MMX unsigned NumLaneElts = NumElts / NumLanes; - unsigned Start = 0; - unsigned End = NumLaneElts / 2; for (unsigned s = 0; s < NumLanes; ++s) { + unsigned Start = s * NumLaneElts; + unsigned End = s * NumLaneElts + NumLaneElts/2; for (unsigned i = Start; i != End; ++i) { - ShuffleMask.push_back(i); // Reads from dest/src1 - ShuffleMask.push_back(i+NumLaneElts); // Reads from src/src2 + ShuffleMask.push_back(i); // Reads from dest/src1 + ShuffleMask.push_back(i+NumElts); // Reads from src/src2 } - // Process the next 128 bits. - Start += NumLaneElts; - End += NumLaneElts; } } diff --git a/lib/Target/X86/Utils/X86ShuffleDecode.h b/lib/Target/X86/Utils/X86ShuffleDecode.h index 58193e6a468..d7150166c0c 100644 --- a/lib/Target/X86/Utils/X86ShuffleDecode.h +++ b/lib/Target/X86/Utils/X86ShuffleDecode.h @@ -67,8 +67,16 @@ void DecodePUNPCKHMask(unsigned NElts, void DecodeSHUFPSMask(unsigned NElts, unsigned Imm, SmallVectorImpl &ShuffleMask); -void DecodeUNPCKHPMask(unsigned NElts, - SmallVectorImpl &ShuffleMask); +void DecodeUNPCKHPSMask(unsigned NElts, + SmallVectorImpl &ShuffleMask); + +void DecodeUNPCKHPDMask(unsigned NElts, + SmallVectorImpl &ShuffleMask); + +/// DecodeUNPCKHPMask - This decodes the shuffle masks for unpckhps/unpckhpd +/// etc. VT indicates the type of the vector allowing it to handle different +/// datatypes and vector widths. +void DecodeUNPCKHPMask(EVT VT, SmallVectorImpl &ShuffleMask); void DecodeUNPCKLPSMask(unsigned NElts, SmallVectorImpl &ShuffleMask); @@ -79,8 +87,7 @@ void DecodeUNPCKLPDMask(unsigned NElts, /// DecodeUNPCKLPMask - This decodes the shuffle masks for unpcklps/unpcklpd /// etc. VT indicates the type of the vector allowing it to handle different /// datatypes and vector widths. -void DecodeUNPCKLPMask(EVT VT, - SmallVectorImpl &ShuffleMask); +void DecodeUNPCKLPMask(EVT VT, SmallVectorImpl &ShuffleMask); // DecodeVPERMILPSMask - Decodes VPERMILPS permutes for any 128-bit 32-bit diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 75928b70ec4..5a03f444bd8 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -4653,7 +4653,7 @@ static SDValue getShuffleScalarElt(SDNode *N, int Index, SelectionDAG &DAG, case X86ISD::UNPCKHPD: case X86ISD::VUNPCKHPSY: case X86ISD::VUNPCKHPDY: - DecodeUNPCKHPMask(NumElems, ShuffleMask); + DecodeUNPCKHPMask(VT, ShuffleMask); break; case X86ISD::PUNPCKLBW: case X86ISD::PUNPCKLWD: