Fix shuffle decoding logic to handle UNPCKLPS/UNPCKLPD on 256-bit vectors correctly...
authorCraig Topper <craig.topper@gmail.com>
Tue, 22 Nov 2011 01:57:35 +0000 (01:57 +0000)
committerCraig Topper <craig.topper@gmail.com>
Tue, 22 Nov 2011 01:57:35 +0000 (01:57 +0000)
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@145055 91177308-0d34-0410-b5e6-96231b3b80d8

lib/Target/X86/InstPrinter/X86InstComments.cpp
lib/Target/X86/Utils/X86ShuffleDecode.cpp
lib/Target/X86/Utils/X86ShuffleDecode.h
lib/Target/X86/X86ISelLowering.cpp

index 8d85b95fe81d97fe1c883d41fbcfb83949b3fa05..3f5a9a9a1dd43e9e671ded922d4c92d5aa536830 100644 (file)
@@ -197,16 +197,44 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
     Src2Name = getRegName(MI->getOperand(2).getReg());
     // FALL THROUGH.
   case X86::UNPCKHPDrm:
-    DecodeUNPCKHPMask(2, ShuffleMask);
+    DecodeUNPCKHPDMask(2, ShuffleMask);
     Src1Name = getRegName(MI->getOperand(0).getReg());
     break;
+  case X86::VUNPCKHPDrr:
+    Src2Name = getRegName(MI->getOperand(2).getReg());
+    // FALL THROUGH.
+  case X86::VUNPCKHPDrm:
+    DecodeUNPCKHPDMask(2, ShuffleMask);
+    Src1Name = getRegName(MI->getOperand(1).getReg());
+    break;
+  case X86::VUNPCKHPDYrr:
+    Src2Name = getRegName(MI->getOperand(2).getReg());
+    // FALL THROUGH.
+  case X86::VUNPCKHPDYrm:
+    DecodeUNPCKLPDMask(4, ShuffleMask);
+    Src1Name = getRegName(MI->getOperand(1).getReg());
+    break;
   case X86::UNPCKHPSrr:
     Src2Name = getRegName(MI->getOperand(2).getReg());
     // FALL THROUGH.
   case X86::UNPCKHPSrm:
-    DecodeUNPCKHPMask(4, ShuffleMask);
+    DecodeUNPCKHPSMask(4, ShuffleMask);
     Src1Name = getRegName(MI->getOperand(0).getReg());
     break;
+  case X86::VUNPCKHPSrr:
+    Src2Name = getRegName(MI->getOperand(2).getReg());
+    // FALL THROUGH.
+  case X86::VUNPCKHPSrm:
+    DecodeUNPCKHPSMask(4, ShuffleMask);
+    Src1Name = getRegName(MI->getOperand(1).getReg());
+    break;
+  case X86::VUNPCKHPSYrr:
+    Src2Name = getRegName(MI->getOperand(2).getReg());
+    // FALL THROUGH.
+  case X86::VUNPCKHPSYrm:
+    DecodeUNPCKHPSMask(8, ShuffleMask);
+    Src1Name = getRegName(MI->getOperand(1).getReg());
+    break;
   case X86::VPERMILPSri:
     DecodeVPERMILPSMask(4, MI->getOperand(2).getImm(),
                         ShuffleMask);
index aeb3309d09aa3a874e1dc0c0c3f99ef88491a56e..8acd3c358f89a2d302beb191ea6dc745eaf6ed83 100644 (file)
@@ -142,11 +142,32 @@ void DecodeSHUFPSMask(unsigned NElts, unsigned Imm,
   }
 }
 
-void DecodeUNPCKHPMask(unsigned NElts,
-                       SmallVectorImpl<unsigned> &ShuffleMask) {
-  for (unsigned i = 0; i != NElts/2; ++i) {
-    ShuffleMask.push_back(i+NElts/2);        // Reads from dest
-    ShuffleMask.push_back(i+NElts+NElts/2);  // Reads from src
+void DecodeUNPCKHPSMask(unsigned NElts,
+                        SmallVectorImpl<unsigned> &ShuffleMask) {
+  DecodeUNPCKHPMask(MVT::getVectorVT(MVT::i32, NElts), ShuffleMask);
+}
+
+void DecodeUNPCKHPDMask(unsigned NElts,
+                        SmallVectorImpl<unsigned> &ShuffleMask) {
+  DecodeUNPCKHPMask(MVT::getVectorVT(MVT::i64, NElts), ShuffleMask);
+}
+
+void DecodeUNPCKHPMask(EVT VT, SmallVectorImpl<unsigned> &ShuffleMask) {
+  unsigned NumElts = VT.getVectorNumElements();
+
+  // Handle 128 and 256-bit vector lengths. AVX defines UNPCK* to operate
+  // independently on 128-bit lanes.
+  unsigned NumLanes = VT.getSizeInBits() / 128;
+  if (NumLanes == 0 ) NumLanes = 1;  // Handle MMX
+  unsigned NumLaneElts = NumElts / NumLanes;
+
+  for (unsigned s = 0; s < NumLanes; ++s) {
+    unsigned Start = s * NumLaneElts + NumLaneElts/2;
+    unsigned End   = s * NumLaneElts + NumLaneElts;
+    for (unsigned i = Start; i != End; ++i) {
+      ShuffleMask.push_back(i);          // Reads from dest/src1
+      ShuffleMask.push_back(i+NumElts);  // Reads from src/src2
+    }
   }
 }
 
@@ -163,8 +184,7 @@ void DecodeUNPCKLPDMask(unsigned NElts,
 /// DecodeUNPCKLPMask - This decodes the shuffle masks for unpcklps/unpcklpd
 /// etc.  VT indicates the type of the vector allowing it to handle different
 /// datatypes and vector widths.
-void DecodeUNPCKLPMask(EVT VT,
-                       SmallVectorImpl<unsigned> &ShuffleMask) {
+void DecodeUNPCKLPMask(EVT VT, SmallVectorImpl<unsigned> &ShuffleMask) {
   unsigned NumElts = VT.getVectorNumElements();
 
   // Handle 128 and 256-bit vector lengths. AVX defines UNPCK* to operate
@@ -173,16 +193,13 @@ void DecodeUNPCKLPMask(EVT VT,
   if (NumLanes == 0 ) NumLanes = 1;  // Handle MMX
   unsigned NumLaneElts = NumElts / NumLanes;
 
-  unsigned Start = 0;
-  unsigned End = NumLaneElts / 2;
   for (unsigned s = 0; s < NumLanes; ++s) {
+    unsigned Start = s * NumLaneElts;
+    unsigned End   = s * NumLaneElts + NumLaneElts/2;
     for (unsigned i = Start; i != End; ++i) {
-      ShuffleMask.push_back(i);                 // Reads from dest/src1
-      ShuffleMask.push_back(i+NumLaneElts);  // Reads from src/src2
+      ShuffleMask.push_back(i);          // Reads from dest/src1
+      ShuffleMask.push_back(i+NumElts);  // Reads from src/src2
     }
-    // Process the next 128 bits.
-    Start += NumLaneElts;
-    End += NumLaneElts;
   }
 }
 
index 58193e6a4688714f2366e67dea9a65688aab0d1e..d7150166c0c52ffb76967d8123617d080a773405 100644 (file)
@@ -67,8 +67,16 @@ void DecodePUNPCKHMask(unsigned NElts,
 void DecodeSHUFPSMask(unsigned NElts, unsigned Imm,
                       SmallVectorImpl<unsigned> &ShuffleMask);
 
-void DecodeUNPCKHPMask(unsigned NElts,
-                       SmallVectorImpl<unsigned> &ShuffleMask);
+void DecodeUNPCKHPSMask(unsigned NElts,
+                        SmallVectorImpl<unsigned> &ShuffleMask);
+
+void DecodeUNPCKHPDMask(unsigned NElts,
+                        SmallVectorImpl<unsigned> &ShuffleMask);
+
+/// DecodeUNPCKHPMask - This decodes the shuffle masks for unpckhps/unpckhpd
+/// etc.  VT indicates the type of the vector allowing it to handle different
+/// datatypes and vector widths.
+void DecodeUNPCKHPMask(EVT VT, SmallVectorImpl<unsigned> &ShuffleMask);
 
 void DecodeUNPCKLPSMask(unsigned NElts,
                         SmallVectorImpl<unsigned> &ShuffleMask);
@@ -79,8 +87,7 @@ void DecodeUNPCKLPDMask(unsigned NElts,
 /// DecodeUNPCKLPMask - This decodes the shuffle masks for unpcklps/unpcklpd
 /// etc.  VT indicates the type of the vector allowing it to handle different
 /// datatypes and vector widths.
-void DecodeUNPCKLPMask(EVT VT,
-                       SmallVectorImpl<unsigned> &ShuffleMask);
+void DecodeUNPCKLPMask(EVT VT, SmallVectorImpl<unsigned> &ShuffleMask);
 
 
 // DecodeVPERMILPSMask - Decodes VPERMILPS permutes for any 128-bit 32-bit
index 75928b70ec498c672b65280b590f26e49f702517..5a03f444bd893026e5385f644d81cb07c783a843 100644 (file)
@@ -4653,7 +4653,7 @@ static SDValue getShuffleScalarElt(SDNode *N, int Index, SelectionDAG &DAG,
     case X86ISD::UNPCKHPD:
     case X86ISD::VUNPCKHPSY:
     case X86ISD::VUNPCKHPDY:
-      DecodeUNPCKHPMask(NumElems, ShuffleMask);
+      DecodeUNPCKHPMask(VT, ShuffleMask);
       break;
     case X86ISD::PUNPCKLBW:
     case X86ISD::PUNPCKLWD: