[X86][AVX] Add support for shuffle decoding of vperm2f128/vperm2i128 with zero'd...

author Simon Pilgrim <llvm-dev@redking.me.uk>

Mon, 6 Jul 2015 22:46:46 +0000 (22:46 +0000)

committer Simon Pilgrim <llvm-dev@redking.me.uk>

Mon, 6 Jul 2015 22:46:46 +0000 (22:46 +0000)
author Simon Pilgrim <llvm-dev@redking.me.uk>
Mon, 6 Jul 2015 22:46:46 +0000 (22:46 +0000)
committer Simon Pilgrim <llvm-dev@redking.me.uk>
Mon, 6 Jul 2015 22:46:46 +0000 (22:46 +0000)
diff --git a/lib/Target/X86/Utils/X86ShuffleDecode.cpp b/lib/Target/X86/Utils/X86ShuffleDecode.cpp

index 9777c0d85e9bd3a27ffdff355456c3110695b6c3..b1c69e779f3fdf8c224c8fd0ff2b9c966ac1ab3d 100644 (file)
--- a/lib/Target/X86/Utils/X86ShuffleDecode.cpp
+++ b/lib/Target/X86/Utils/X86ShuffleDecode.cpp
@@ -255,15 +255,13 @@ void DecodeUNPCKLMask(MVT VT, SmallVectorImpl<int> &ShuffleMask) {
  
  void DecodeVPERM2X128Mask(MVT VT, unsigned Imm,
                            SmallVectorImpl<int> &ShuffleMask) {
-  if (Imm & 0x88)
-    return; // Not a shuffle
-
    unsigned HalfSize = VT.getVectorNumElements() / 2;
  
    for (unsigned l = 0; l != 2; ++l) {
-    unsigned HalfBegin = ((Imm >> (l * 4)) & 0x3) * HalfSize;
+    unsigned HalfMask = Imm >> (l * 4);
+    unsigned HalfBegin = (HalfMask & 0x3) * HalfSize;
      for (unsigned i = HalfBegin, e = HalfBegin + HalfSize; i != e; ++i)
-      ShuffleMask.push_back(i);
+      ShuffleMask.push_back(HalfMask & 8 ? SM_SentinelZero : i);
    }
  }
  
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp

index a92ab5ae2a07e9a9fa97717b555682d6e66c451d..5ad31b742a6d7eb10254f199dd4d570dd4119ae8 100644 (file)
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -4390,6 +4390,7 @@ static SDValue getShuffleVectorZeroOrUndef(SDValue V2, unsigned Idx,
  /// IsUnary to true if only uses one source. Note that this will set IsUnary for
  /// shuffles which use a single input multiple times, and in those cases it will
  /// adjust the mask to only have indices within that single input.
+/// FIXME: Add support for Decode*Mask functions that return SM_SentinelZero.
  static bool getTargetShuffleMask(SDNode *N, MVT VT,
                                   SmallVectorImpl<int> &Mask, bool &IsUnary) {
    unsigned NumElems = VT.getVectorNumElements();
@@ -4519,6 +4520,10 @@ static bool getTargetShuffleMask(SDNode *N, MVT VT,
      ImmN = N->getOperand(N->getNumOperands()-1);
      DecodeVPERM2X128Mask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
      if (Mask.empty()) return false;
+    // Mask only contains negative index if an element is zero.
+    if (std::any_of(Mask.begin(), Mask.end(), 
+                    [](int M){ return M == SM_SentinelZero; }))
+      return false;
      break;
    case X86ISD::MOVSLDUP:
      DecodeMOVSLDUPMask(VT, Mask);
diff --git a/test/CodeGen/X86/avx-vperm2x128.ll b/test/CodeGen/X86/avx-vperm2x128.ll

index 74d20f348b5298c9c79cc33ab07ec4062db33b95..4e43f6f5192105d3fbea76b72aa594f4a249326b 100644 (file)
--- a/test/CodeGen/X86/avx-vperm2x128.ll
+++ b/test/CodeGen/X86/avx-vperm2x128.ll
@@ -269,7 +269,7 @@ entry:
  define <4 x double> @vperm2z_0x08(<4 x double> %a) {
  ; ALL-LABEL: vperm2z_0x08:
  ; ALL:       # BB#0:
-; ALL-NEXT:    vperm2f128 $40, %ymm0, %ymm0, %ymm0
+; ALL-NEXT:    vperm2f128 {{.*#+}} ymm0 = zero,zero,ymm0[0,1]
  ; ALL-NEXT:    retq
    %s = shufflevector <4 x double> %a, <4 x double> <double 0.0, double 0.0, double undef, double undef>, <4 x i32> <i32 4, i32 5, i32 0, i32 1>
    ret <4 x double> %s
@@ -279,7 +279,7 @@ define <4 x double> @vperm2z_0x18(<4 x double> %a) {
  ; ALL-LABEL: vperm2z_0x18:
  ; ALL:       # BB#0:
  ; ALL-NEXT:    vxorpd %ymm1, %ymm1, %ymm1
-; ALL-NEXT:    vblendpd $12, %ymm0, %ymm1, %ymm0
+; ALL-NEXT:    vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3]
  ; ALL-NEXT:    retq
    %s = shufflevector <4 x double> %a, <4 x double> <double 0.0, double 0.0, double undef, double undef>, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
    ret <4 x double> %s
@@ -288,7 +288,7 @@ define <4 x double> @vperm2z_0x18(<4 x double> %a) {
  define <4 x double> @vperm2z_0x28(<4 x double> %a) {
  ; ALL-LABEL: vperm2z_0x28:
  ; ALL:       # BB#0:
-; ALL-NEXT:    vperm2f128 $40, %ymm0, %ymm0, %ymm0
+; ALL-NEXT:    vperm2f128 {{.*#+}} ymm0 = zero,zero,ymm0[0,1]
  ; ALL-NEXT:    retq
    %s = shufflevector <4 x double> <double 0.0, double 0.0, double undef, double undef>, <4 x double> %a, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
    ret <4 x double> %s
@@ -298,7 +298,7 @@ define <4 x double> @vperm2z_0x38(<4 x double> %a) {
  ; ALL-LABEL: vperm2z_0x38:
  ; ALL:       # BB#0:
  ; ALL-NEXT:    vxorpd %ymm1, %ymm1, %ymm1
-; ALL-NEXT:    vblendpd $12, %ymm0, %ymm1, %ymm0
+; ALL-NEXT:    vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3]
  ; ALL-NEXT:    retq
    %s = shufflevector <4 x double> <double 0.0, double 0.0, double undef, double undef>, <4 x double> %a, <4 x i32> <i32 0, i32 1, i32 6, i32 7>
    ret <4 x double> %s
@@ -307,7 +307,7 @@ define <4 x double> @vperm2z_0x38(<4 x double> %a) {
  define <4 x double> @vperm2z_0x80(<4 x double> %a) {
  ; ALL-LABEL: vperm2z_0x80:
  ; ALL:       # BB#0:
-; ALL-NEXT:    vperm2f128 $128, %ymm0, %ymm0, %ymm0
+; ALL-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[0,1],zero,zero
  ; ALL-NEXT:    retq
    %s = shufflevector <4 x double> %a, <4 x double> <double 0.0, double 0.0, double undef, double undef>, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
    ret <4 x double> %s
@@ -316,7 +316,7 @@ define <4 x double> @vperm2z_0x80(<4 x double> %a) {
  define <4 x double> @vperm2z_0x81(<4 x double> %a) {
  ; ALL-LABEL: vperm2z_0x81:
  ; ALL:       # BB#0:
-; ALL-NEXT:    vperm2f128 $129, %ymm0, %ymm0, %ymm0
+; ALL-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],zero,zero
  ; ALL-NEXT:    retq
    %s = shufflevector <4 x double> %a, <4 x double> <double 0.0, double 0.0, double undef, double undef>, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
    ret <4 x double> %s
@@ -325,7 +325,7 @@ define <4 x double> @vperm2z_0x81(<4 x double> %a) {
  define <4 x double> @vperm2z_0x82(<4 x double> %a) {
  ; ALL-LABEL: vperm2z_0x82:
  ; ALL:       # BB#0:
-; ALL-NEXT:    vperm2f128 $128, %ymm0, %ymm0, %ymm0
+; ALL-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[0,1],zero,zero
  ; ALL-NEXT:    retq
    %s = shufflevector <4 x double> <double 0.0, double 0.0, double undef, double undef>, <4 x double> %a, <4 x i32> <i32 4, i32 5, i32 0, i32 1>
    ret <4 x double> %s
@@ -334,7 +334,7 @@ define <4 x double> @vperm2z_0x82(<4 x double> %a) {
  define <4 x double> @vperm2z_0x83(<4 x double> %a) {
  ; ALL-LABEL: vperm2z_0x83:
  ; ALL:       # BB#0:
-; ALL-NEXT:    vperm2f128 $129, %ymm0, %ymm0, %ymm0
+; ALL-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],zero,zero
  ; ALL-NEXT:    retq
    %s = shufflevector <4 x double> <double 0.0, double 0.0, double undef, double undef>, <4 x double> %a, <4 x i32> <i32 6, i32 7, i32 0, i32 1>
    ret <4 x double> %s
@@ -345,8 +345,8 @@ define <4 x double> @vperm2z_0x83(<4 x double> %a) {
  define <4 x i64> @vperm2z_int_0x83(<4 x i64> %a, <4 x i64> %b) {
  ; ALL-LABEL: vperm2z_int_0x83:
  ; ALL:       # BB#0:
-; AVX1:    vperm2f128 $129, %ymm0, %ymm0, %ymm0
-; AVX2:    vperm2i128 $129, %ymm0, %ymm0, %ymm0
+; AVX1:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],zero,zero
+; AVX2:    vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],zero,zero
    %s = shufflevector <4 x i64> <i64 0, i64 0, i64 undef, i64 undef>, <4 x i64> %a, <4 x i32> <i32 6, i32 7, i32 0, i32 1>
    %c = add <4 x i64> %b, %s
    ret <4 x i64> %c
author	Simon Pilgrim <llvm-dev@redking.me.uk>
	Mon, 6 Jul 2015 22:46:46 +0000 (22:46 +0000)
committer	Simon Pilgrim <llvm-dev@redking.me.uk>
	Mon, 6 Jul 2015 22:46:46 +0000 (22:46 +0000)
lib/Target/X86/Utils/X86ShuffleDecode.cpp		patch \| blob \| history
lib/Target/X86/X86ISelLowering.cpp		patch \| blob \| history
test/CodeGen/X86/avx-vperm2x128.ll		patch \| blob \| history