Add instruction selection for 256-bit VPSHUFD and 128-bit VPERMILPS/VPERMILPD.

author Craig Topper <craig.topper@gmail.com>

Tue, 7 Feb 2012 06:28:42 +0000 (06:28 +0000)

committer Craig Topper <craig.topper@gmail.com>

Tue, 7 Feb 2012 06:28:42 +0000 (06:28 +0000)
author Craig Topper <craig.topper@gmail.com>
Tue, 7 Feb 2012 06:28:42 +0000 (06:28 +0000)
committer Craig Topper <craig.topper@gmail.com>
Tue, 7 Feb 2012 06:28:42 +0000 (06:28 +0000)
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp

index 8bf72d15e8c0bd83940005acbf3a175c362487b3..b2eb0432e4cfb29d9ec75ab84b4339804fbcd38b 100644 (file)
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -3713,7 +3713,7 @@ static unsigned getShuffleVPERM2X128Immediate(ShuffleVectorSDNode *SVOp) {
  /// type is 32 or 64. In the VPERMILPS the high half of the mask should point
  /// to the same elements of the low, but to the higher half of the source.
  /// In VPERMILPD the two lanes could be shuffled independently of each other
-/// with the same restriction that lanes can't be crossed.
+/// with the same restriction that lanes can't be crossed. Also handles PSHUFDY.
  static bool isVPERMILPMask(ArrayRef<int> Mask, EVT VT, bool HasAVX) {
    if (!HasAVX)
      return false;
@@ -6467,6 +6467,9 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
  
      unsigned TargetMask = X86::getShuffleSHUFImmediate(SVOp);
  
+    if (HasAVX && (VT == MVT::v4f32 || VT == MVT::v2f64))
+      return getTargetShuffleNode(X86ISD::VPERMILP, dl, VT, V1, TargetMask, DAG);
+
      if (HasSSE2 && (VT == MVT::v4f32 || VT == MVT::v4i32))
        return getTargetShuffleNode(X86ISD::PSHUFD, dl, VT, V1, TargetMask, DAG);
  
@@ -6636,9 +6639,13 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
      return getTargetShuffleNode(X86ISD::MOVDDUP, dl, VT, V1, DAG);
  
    // Handle VPERMILPS/D* permutations
-  if (isVPERMILPMask(M, VT, HasAVX))
+  if (isVPERMILPMask(M, VT, HasAVX)) {
+    if (HasAVX2 && VT == MVT::v8i32)
+      return getTargetShuffleNode(X86ISD::PSHUFD, dl, VT, V1,
+                                  X86::getShuffleSHUFImmediate(SVOp), DAG);
      return getTargetShuffleNode(X86ISD::VPERMILP, dl, VT, V1,
                                  X86::getShuffleSHUFImmediate(SVOp), DAG);
+  }
  
    // Handle VPERM2F128/VPERM2I128 permutations
    if (isVPERM2X128Mask(M, VT, HasAVX))
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td

index d9a599c1bda16417c11bacbb18a7445fe8fa5fd3..fb70b9cf0af30f392436aad50e206638a437614a 100644 (file)
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@@ -3993,21 +3993,19 @@ def mi : Ii8<0x70, MRMSrcMem,
                                        (undef))))]>;
  }
  
-multiclass sse2_pshuffle_y<string OpcodeStr, ValueType vt, PatFrag pshuf_frag,
-                           PatFrag bc_frag> {
+multiclass sse2_pshuffle_y<string OpcodeStr, ValueType vt, SDNode OpNode> {
  def Yri : Ii8<0x70, MRMSrcReg,
                (outs VR256:$dst), (ins VR256:$src1, i8imm:$src2),
                !strconcat(OpcodeStr,
                           "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
-              [(set VR256:$dst, (vt (pshuf_frag:$src2 VR256:$src1,
-                                                      (undef))))]>;
+              [(set VR256:$dst, (vt (OpNode VR256:$src1, (i8 imm:$src2))))]>;
  def Ymi : Ii8<0x70, MRMSrcMem,
                (outs VR256:$dst), (ins i256mem:$src1, i8imm:$src2),
                !strconcat(OpcodeStr,
                           "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
-              [(set VR256:$dst, (vt (pshuf_frag:$src2
-                                      (bc_frag (memopv4i64 addr:$src1)),
-                                      (undef))))]>;
+              [(set VR256:$dst,
+                (vt (OpNode (bitconvert (memopv4i64 addr:$src1)),
+                             (i8 imm:$src2))))]>;
  }
  } // ExeDomain = SSEPackedInt
  
@@ -4053,17 +4051,9 @@ let Predicates = [HasAVX] in {
  }
  
  let Predicates = [HasAVX2] in {
-  let AddedComplexity = 5 in
-  defm VPSHUFD : sse2_pshuffle_y<"vpshufd", v8i32, pshufd, bc_v8i32>, TB,
-                                 OpSize, VEX;
-
-  // SSE2 with ImmT == Imm8 and XS prefix.
-  defm VPSHUFHW : sse2_pshuffle_y<"vpshufhw", v16i16, pshufhw, bc_v16i16>, XS,
-                                  VEX;
-
-  // SSE2 with ImmT == Imm8 and XD prefix.
-  defm VPSHUFLW : sse2_pshuffle_y<"vpshuflw", v16i16, pshuflw, bc_v16i16>, XD,
-                                  VEX;
+  defm VPSHUFD : sse2_pshuffle_y<"vpshufd", v8i32, X86PShufd>, TB, OpSize, VEX;
+  defm VPSHUFHW : sse2_pshuffle_y<"vpshufhw", v16i16, X86PShufhw>, XS, VEX;
+  defm VPSHUFLW : sse2_pshuffle_y<"vpshuflw", v16i16, X86PShuflw>, XD, VEX;
  }
  
  let Predicates = [HasSSE2] in {
@@ -4225,10 +4215,10 @@ let Predicates = [HasAVX] in {
  
  // Splat v2f64 / v2i64
  let AddedComplexity = 10 in {
-  def : Pat<(splat_lo (v2i64 VR128:$src), (undef)),
-            (PUNPCKLQDQrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>;
    def : Pat<(splat_lo (v2i64 VR128:$src), (undef)),
              (VPUNPCKLQDQrr VR128:$src, VR128:$src)>, Requires<[HasAVX]>;
+  def : Pat<(splat_lo (v2i64 VR128:$src), (undef)),
+            (PUNPCKLQDQrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>;
  }
  
  //===---------------------------------------------------------------------===//
@@ -7200,6 +7190,19 @@ def : Pat<(v8i32 (X86VPermilp (bc_v8i32 (memopv4i64 addr:$src1)),
            (VPERMILPSYmi addr:$src1, imm:$imm)>;
  def : Pat<(v4i64 (X86VPermilp (memopv4i64 addr:$src1), (i8 imm:$imm))),
            (VPERMILPDYmi addr:$src1, imm:$imm)>;
+
+def : Pat<(v4f32 (X86VPermilp VR128:$src1, (i8 imm:$imm))),
+          (VPERMILPSri VR128:$src1, imm:$imm)>;
+def : Pat<(v2f64 (X86VPermilp VR128:$src1, (i8 imm:$imm))),
+          (VPERMILPDri VR128:$src1, imm:$imm)>;
+def : Pat<(v2i64 (X86VPermilp VR128:$src1, (i8 imm:$imm))),
+          (VPERMILPDri VR128:$src1, imm:$imm)>;
+def : Pat<(v4f32 (X86VPermilp (memopv4f32 addr:$src1), (i8 imm:$imm))),
+          (VPERMILPSmi addr:$src1, imm:$imm)>;
+def : Pat<(v2f64 (X86VPermilp (memopv2f64 addr:$src1), (i8 imm:$imm))),
+          (VPERMILPDmi addr:$src1, imm:$imm)>;
+def : Pat<(v2i64 (X86VPermilp (memopv2i64 addr:$src1), (i8 imm:$imm))),
+          (VPERMILPDmi addr:$src1, imm:$imm)>;
  }
  
  //===----------------------------------------------------------------------===//
diff --git a/test/CodeGen/X86/avx-shuffle.ll b/test/CodeGen/X86/avx-shuffle.ll

index 5a5c35333f2c60c5422f53020ed76960f5c74b04..947d79f9e4c066e172b761d79afbb872241a1f9d 100644 (file)
--- a/test/CodeGen/X86/avx-shuffle.ll
+++ b/test/CodeGen/X86/avx-shuffle.ll
@@ -6,7 +6,7 @@ define <4 x float> @test1(<4 x float> %a) nounwind {
    ret <4 x float> %b
  ; CHECK: test1:
  ; CHECK: vshufps
-; CHECK: vpshufd
+; CHECK: vpermilps
  }
  
  ; rdar://10538417
@@ -98,22 +98,40 @@ define i32 @test10(<4 x i32> %a) nounwind {
  }
  
  define <4 x float> @test11(<4 x float> %a) nounwind  {
-; CHECK: pshufd $27
+; check: test11
+; check: vpermilps $27
    %tmp1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
    ret <4 x float> %tmp1
  }
  
  define <4 x float> @test12(<4 x float>* %a) nounwind  {
-; CHECK: pshufd $27, (
+; CHECK: test12
+; CHECK: vpermilps $27, (
    %tmp0 = load <4 x float>* %a
    %tmp1 = shufflevector <4 x float> %tmp0, <4 x float> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
    ret <4 x float> %tmp1
  }
  
-;CHECK: test13
-;CHECK: shufd
-;CHECK: ret
-define <4 x i32> @test13(<2 x i32>%x) nounwind readnone {
+define <4 x i32> @test13(<4 x i32> %a) nounwind  {
+; check: test13
+; check: vpshufd $27
+  %tmp1 = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+  ret <4 x i32> %tmp1
+}
+
+define <4 x i32> @test14(<4 x i32>* %a) nounwind  {
+; CHECK: test14
+; CHECK: vpshufd $27, (
+  %tmp0 = load <4 x i32>* %a
+  %tmp1 = shufflevector <4 x i32> %tmp0, <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+  ret <4 x i32> %tmp1
+}
+
+; CHECK: test15
+; CHECK: vpshufd $8
+; CHECK: ret
+define <4 x i32> @test15(<2 x i32>%x) nounwind readnone {
    %x1 = shufflevector <2 x i32> %x, <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
    ret <4 x i32>%x1
  }
+
diff --git a/test/CodeGen/X86/avx-splat.ll b/test/CodeGen/X86/avx-splat.ll

index f8522c269515d3ebb3746f811cefb12a6e37241a..94bcddd9759284e320c87fb321ffeafa6bd6fb28 100644 (file)
--- a/test/CodeGen/X86/avx-splat.ll
+++ b/test/CodeGen/X86/avx-splat.ll
@@ -32,7 +32,7 @@ entry:
    ret <4 x i64> %vecinit6.i
  }
  
-; CHECK: vshufpd $0
+; CHECK: vpermilpd $0
  ; CHECK-NEXT: vinsertf128 $1
  define <4 x double> @funcD(double %q) nounwind uwtable readnone ssp {
  entry:
diff --git a/test/CodeGen/X86/avx-vpermil.ll b/test/CodeGen/X86/avx-vpermil.ll

index 3d521e7cea11ba4e2890ac0ac4e92ab816aaa823..9707cd9b549d3b69d2cee90ad094969f16aacf6a 100644 (file)
--- a/test/CodeGen/X86/avx-vpermil.ll
+++ b/test/CodeGen/X86/avx-vpermil.ll
@@ -45,7 +45,7 @@ entry:
    ret <8 x float> %shuffle
  }
  
-; CHECK-NOT: vpermilps
+; CHECK: vpermilps
  define <8 x float> @funcF(<8 x float> %a) nounwind uwtable readnone ssp {
  entry:
    %shuffle = shufflevector <8 x float> %a, <8 x float> zeroinitializer, <8 x i32> <i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9>
author	Craig Topper <craig.topper@gmail.com>
	Tue, 7 Feb 2012 06:28:42 +0000 (06:28 +0000)
committer	Craig Topper <craig.topper@gmail.com>
	Tue, 7 Feb 2012 06:28:42 +0000 (06:28 +0000)
lib/Target/X86/X86ISelLowering.cpp		patch \| blob \| history
lib/Target/X86/X86InstrSSE.td		patch \| blob \| history
test/CodeGen/X86/avx-shuffle.ll		patch \| blob \| history
test/CodeGen/X86/avx-splat.ll		patch \| blob \| history
test/CodeGen/X86/avx-vpermil.ll		patch \| blob \| history