- Only use pshufd for v4i32 vector shuffles.
authorEvan Cheng <evan.cheng@apple.com>
Wed, 29 Mar 2006 01:30:51 +0000 (01:30 +0000)
committerEvan Cheng <evan.cheng@apple.com>
Wed, 29 Mar 2006 01:30:51 +0000 (01:30 +0000)
- Other shuffle related fixes.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@27244 91177308-0d34-0410-b5e6-96231b3b80d8

lib/Target/X86/X86ISelLowering.cpp
lib/Target/X86/X86InstrSSE.td

index 303b808a8d6569f6c97cb92801c9c7bb3e4c2842..8247167920c25fe597b18b99e0c0208a6ad067c1 100644 (file)
@@ -1583,15 +1583,21 @@ unsigned X86::getShuffleSHUFImmediate(SDNode *N) {
   return Mask;
 }
 
-/// CommuteVectorShuffleIfNeeded - Swap vector_shuffle operands (as well as
-/// values in ther permute mask if needed. Return an empty SDOperand is it is
-/// already well formed.
-static SDOperand CommuteVectorShuffleIfNeeded(SDOperand V1, SDOperand V2,
-                                              SDOperand Mask, MVT::ValueType VT,
-                                              SelectionDAG &DAG) {
+/// NormalizeVectorShuffle - Swap vector_shuffle operands (as well as
+/// values in ther permute mask if needed. Use V1 as second vector if it is
+/// undef. Return an empty SDOperand is it is already well formed.
+static SDOperand NormalizeVectorShuffle(SDOperand V1, SDOperand V2,
+                                        SDOperand Mask, MVT::ValueType VT,
+                                        SelectionDAG &DAG) {
   unsigned NumElems = Mask.getNumOperands();
   SDOperand Half1 = Mask.getOperand(0);
   SDOperand Half2 = Mask.getOperand(NumElems/2);
+  bool V2Undef = false;
+  if (V2.getOpcode() == ISD::UNDEF) {
+    V2Undef = true;
+    V2 = V1;
+  }
+
   if (cast<ConstantSDNode>(Half1)->getValue() >= NumElems &&
       cast<ConstantSDNode>(Half2)->getValue() <  NumElems) {
     // Swap the operands and change mask.
@@ -1604,6 +1610,10 @@ static SDOperand CommuteVectorShuffleIfNeeded(SDOperand V1, SDOperand V2,
       DAG.getNode(ISD::BUILD_VECTOR, Mask.getValueType(), MaskVec);
     return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V2, V1, Mask);
   }
+
+  if (V2Undef)
+    return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V1, Mask);
+
   return SDOperand();
 }
 
@@ -2387,8 +2397,26 @@ SDOperand X86TargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) {
     MVT::ValueType VT = Op.getValueType();
     unsigned NumElems = PermMask.getNumOperands();
 
-    if (NumElems == 2)
-      return CommuteVectorShuffleIfNeeded(V1, V2, PermMask, VT, DAG);
+    if (X86::isUNPCKLMask(PermMask.Val) ||
+        X86::isUNPCKHMask(PermMask.Val))
+      // Leave the VECTOR_SHUFFLE alone. It matches {P}UNPCKL*.
+      return SDOperand();
+
+    // PSHUFD's 2nd vector must be undef.
+    if (MVT::isInteger(VT) && X86::isPSHUFDMask(PermMask.Val))
+      if (V2.getOpcode() == ISD::UNDEF)
+        return SDOperand();
+      else
+        return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1,
+                           DAG.getNode(ISD::UNDEF, V1.getValueType()),
+                           PermMask);
+
+    if (NumElems == 2 ||
+        X86::isSplatMask(PermMask.Val) ||
+        X86::isSHUFPMask(PermMask.Val)) {
+      return NormalizeVectorShuffle(V1, V2, PermMask, VT, DAG);
+    }
+#if 0
     else if (X86::isSplatMask(PermMask.Val)) {
       // Handle splat cases.
       if (V2.getOpcode() == ISD::UNDEF)
@@ -2400,10 +2428,6 @@ SDOperand X86TargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) {
         return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1,
                            DAG.getNode(ISD::UNDEF, V1.getValueType()),
                            PermMask);
-    } else if (X86::isUNPCKLMask(PermMask.Val) ||
-               X86::isUNPCKHMask(PermMask.Val)) {
-      // Leave the VECTOR_SHUFFLE alone. It matches {P}UNPCKL*.
-      return SDOperand();
     } else if (X86::isPSHUFDMask(PermMask.Val)) {
       if (V2.getOpcode() == ISD::UNDEF)
         // Leave the VECTOR_SHUFFLE alone. It matches PSHUFD.
@@ -2414,7 +2438,8 @@ SDOperand X86TargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) {
                            DAG.getNode(ISD::UNDEF, V1.getValueType()),
                            PermMask);
     } else if (X86::isSHUFPMask(PermMask.Val))
-      return CommuteVectorShuffleIfNeeded(V1, V2, PermMask, VT, DAG);
+      return NormalizeVectorShuffle(V1, V2, PermMask, VT, DAG);
+#endif
 
     assert(0 && "Unexpected VECTOR_SHUFFLE to lower");
     abort();
index 82d4c1f874f9ed3d9443c9db88b0a4eb186f2f35..18d889f69fc4cefd153a52e1ac101543d2e6cf61 100644 (file)
@@ -79,9 +79,8 @@ def UNPCKH_shuffle_mask : PatLeaf<(build_vector), [{
   return X86::isUNPCKHMask(N);
 }]>;
 
-// Only use PSHUF if it is not a splat.
 def PSHUFD_shuffle_mask : PatLeaf<(build_vector), [{
-  return !X86::isSplatMask(N) && X86::isPSHUFDMask(N);
+  return X86::isPSHUFDMask(N);
 }], SHUFFLE_get_shuf_imm>;
 
 def SHUFP_shuffle_mask : PatLeaf<(build_vector), [{
@@ -918,86 +917,92 @@ def PSHUFWrm : PSIi8<0x70, MRMSrcMem,
                      "pshufw {$src2, $src1, $dst|$dst, $src1, $src2}", []>;
 def PSHUFDrr : PDIi8<0x70, MRMDestReg,
                      (ops VR128:$dst, VR128:$src1, i8imm:$src2),
-                     "pshufd {$src2, $src1, $dst|$dst, $src1, $src2}", []>;
+                     "pshufd {$src2, $src1, $dst|$dst, $src1, $src2}",
+                     [(set VR128:$dst, (v4i32 (vector_shuffle
+                                               VR128:$src1, (undef),
+                                               PSHUFD_shuffle_mask:$src2)))]>;
 def PSHUFDrm : PDIi8<0x70, MRMSrcMem,
                      (ops VR128:$dst, i128mem:$src1, i8imm:$src2),
-                     "pshufd {$src2, $src1, $dst|$dst, $src1, $src2}", []>;
+                     "pshufd {$src2, $src1, $dst|$dst, $src1, $src2}",
+                     [(set VR128:$dst, (v4i32 (vector_shuffle
+                                               (load addr:$src1), (undef),
+                                               PSHUFD_shuffle_mask:$src2)))]>;
 
 let isTwoAddress = 1 in {
 def SHUFPSrr : PSIi8<0xC6, MRMSrcReg, 
                      (ops VR128:$dst, VR128:$src1, VR128:$src2, i32i8imm:$src3),
                      "shufps {$src3, $src2, $dst|$dst, $src2, $src3}",
-                    [(set VR128:$dst, (vector_shuffle
-                                       (v4f32 VR128:$src1), (v4f32 VR128:$src2),
-                                       SHUFP_shuffle_mask:$src3))]>;
+                     [(set VR128:$dst, (v4f32 (vector_shuffle
+                                               VR128:$src1, VR128:$src2,
+                                               SHUFP_shuffle_mask:$src3)))]>;
 def SHUFPSrm : PSIi8<0xC6, MRMSrcMem, 
                    (ops VR128:$dst, VR128:$src1, f128mem:$src2, i32i8imm:$src3),
                      "shufps {$src3, $src2, $dst|$dst, $src2, $src3}",
-                    [(set VR128:$dst, (vector_shuffle
-                                       (v4f32 VR128:$src1), (load addr:$src2),
-                                       SHUFP_shuffle_mask:$src3))]>;
+                     [(set VR128:$dst, (v4f32 (vector_shuffle
+                                               VR128:$src1, (load addr:$src2),
+                                               SHUFP_shuffle_mask:$src3)))]>;
 def SHUFPDrr : PDIi8<0xC6, MRMSrcReg, 
                      (ops VR128:$dst, VR128:$src1, VR128:$src2, i8imm:$src3),
                      "shufpd {$src3, $src2, $dst|$dst, $src2, $src3}",
-                    [(set VR128:$dst, (vector_shuffle
-                                       (v2f64 VR128:$src1), (v2f64 VR128:$src2),
-                                       SHUFP_shuffle_mask:$src3))]>;
+                     [(set VR128:$dst, (v2f64 (vector_shuffle
+                                               VR128:$src1, VR128:$src2,
+                                               SHUFP_shuffle_mask:$src3)))]>;
 def SHUFPDrm : PDIi8<0xC6, MRMSrcMem, 
                      (ops VR128:$dst, VR128:$src1, f128mem:$src2, i8imm:$src3),
                      "shufpd {$src3, $src2, $dst|$dst, $src2, $src3}",
-                    [(set VR128:$dst, (vector_shuffle
-                                       (v2f64 VR128:$src1), (load addr:$src2),
-                                       SHUFP_shuffle_mask:$src3))]>;
+                     [(set VR128:$dst, (v2f64 (vector_shuffle
+                                               VR128:$src1, (load addr:$src2),
+                                               SHUFP_shuffle_mask:$src3)))]>;
 
 def UNPCKHPSrr : PSI<0x15, MRMSrcReg, 
                     (ops VR128:$dst, VR128:$src1, VR128:$src2),
                     "unpckhps {$src2, $dst|$dst, $src2}",
-                    [(set VR128:$dst,
-                      (v4f32 (vector_shuffle VR128:$src1, VR128:$src2,
-                              UNPCKH_shuffle_mask)))]>;
+                    [(set VR128:$dst, (v4f32 (vector_shuffle
+                                              VR128:$src1, VR128:$src2,
+                                              UNPCKH_shuffle_mask)))]>;
 def UNPCKHPSrm : PSI<0x15, MRMSrcMem, 
                     (ops VR128:$dst, VR128:$src1, f128mem:$src2),
                     "unpckhps {$src2, $dst|$dst, $src2}",
-                    [(set VR128:$dst,
-                      (v4f32 (vector_shuffle VR128:$src1, (load addr:$src2),
-                              UNPCKH_shuffle_mask)))]>;
+                    [(set VR128:$dst, (v4f32 (vector_shuffle
+                                              VR128:$src1, (load addr:$src2),
+                                              UNPCKH_shuffle_mask)))]>;
 def UNPCKHPDrr : PDI<0x15, MRMSrcReg, 
                     (ops VR128:$dst, VR128:$src1, VR128:$src2),
                     "unpckhpd {$src2, $dst|$dst, $src2}",
-                    [(set VR128:$dst,
-                      (v2f64 (vector_shuffle VR128:$src1, VR128:$src2,
-                              UNPCKH_shuffle_mask)))]>;
+                    [(set VR128:$dst, (v2f64 (vector_shuffle
+                                              VR128:$src1, VR128:$src2,
+                                              UNPCKH_shuffle_mask)))]>;
 def UNPCKHPDrm : PDI<0x15, MRMSrcMem, 
                     (ops VR128:$dst, VR128:$src1, f128mem:$src2),
                     "unpckhpd {$src2, $dst|$dst, $src2}",
-                    [(set VR128:$dst,
-                      (v2f64 (vector_shuffle VR128:$src1, (load addr:$src2),
-                              UNPCKH_shuffle_mask)))]>;
+                    [(set VR128:$dst, (v2f64 (vector_shuffle
+                                              VR128:$src1, (load addr:$src2),
+                                              UNPCKH_shuffle_mask)))]>;
 
 def UNPCKLPSrr : PSI<0x14, MRMSrcReg, 
                     (ops VR128:$dst, VR128:$src1, VR128:$src2),
                     "unpcklps {$src2, $dst|$dst, $src2}",
-                    [(set VR128:$dst,
-                      (v4f32 (vector_shuffle VR128:$src1, VR128:$src2,
-                              UNPCKL_shuffle_mask)))]>;
+                    [(set VR128:$dst, (v4f32 (vector_shuffle
+                                              VR128:$src1, VR128:$src2,
+                                              UNPCKL_shuffle_mask)))]>;
 def UNPCKLPSrm : PSI<0x14, MRMSrcMem, 
                     (ops VR128:$dst, VR128:$src1, f128mem:$src2),
                     "unpcklps {$src2, $dst|$dst, $src2}",
-                    [(set VR128:$dst,
-                      (v4f32 (vector_shuffle VR128:$src1, (load addr:$src2),
-                              UNPCKL_shuffle_mask)))]>;
+                    [(set VR128:$dst, (v4f32 (vector_shuffle
+                                              VR128:$src1, (load addr:$src2),
+                                              UNPCKL_shuffle_mask)))]>;
 def UNPCKLPDrr : PDI<0x14, MRMSrcReg, 
                     (ops VR128:$dst, VR128:$src1, VR128:$src2),
                     "unpcklpd {$src2, $dst|$dst, $src2}",
-                    [(set VR128:$dst,
-                      (v2f64 (vector_shuffle VR128:$src1, VR128:$src2,
-                              UNPCKL_shuffle_mask)))]>;
+                    [(set VR128:$dst, (v2f64 (vector_shuffle
+                                              VR128:$src1, VR128:$src2,
+                                              UNPCKL_shuffle_mask)))]>;
 def UNPCKLPDrm : PDI<0x14, MRMSrcMem, 
                     (ops VR128:$dst, VR128:$src1, f128mem:$src2),
                     "unpcklpd {$src2, $dst|$dst, $src2}",
-                    [(set VR128:$dst,
-                      (v2f64 (vector_shuffle VR128:$src1, (load addr:$src2),
-                              UNPCKL_shuffle_mask)))]>;
+                    [(set VR128:$dst, (v2f64 (vector_shuffle
+                                              VR128:$src1, (load addr:$src2),
+                                              UNPCKL_shuffle_mask)))]>;
 }
 
 //===----------------------------------------------------------------------===//
@@ -1354,11 +1359,3 @@ def : Pat<(vector_shuffle (v2f64 VR128:$src), (undef), MOVLHPS_splat_mask:$sm),
           (v2f64 (MOVLHPSrr VR128:$src, VR128:$src))>, Requires<[HasSSE2]>;
 def : Pat<(vector_shuffle (v2i64 VR128:$src), (undef), MOVLHPS_splat_mask:$sm),
           (v2i64 (MOVLHPSrr VR128:$src, VR128:$src))>, Requires<[HasSSE2]>;
-
-// Shuffle v4f32 / v4i32, undef. These should only match if splat cases do not.
-def : Pat<(vector_shuffle (v4f32 VR128:$src), (undef), PSHUFD_shuffle_mask:$sm),
-          (v4f32 (PSHUFDrr VR128:$src, PSHUFD_shuffle_mask:$sm))>,
-      Requires<[HasSSE2]>;
-def : Pat<(vector_shuffle (v4i32 VR128:$src), (undef), PSHUFD_shuffle_mask:$sm),
-          (v4i32 (PSHUFDrr VR128:$src, PSHUFD_shuffle_mask:$sm))>,
-      Requires<[HasSSE2]>;