Add AVX2 support for vselect of v32i8

author Nadav Rotem <nadav.rotem@intel.com>

Wed, 9 Nov 2011 13:21:28 +0000 (13:21 +0000)

committer Nadav Rotem <nadav.rotem@intel.com>

Wed, 9 Nov 2011 13:21:28 +0000 (13:21 +0000)
author Nadav Rotem <nadav.rotem@intel.com>
Wed, 9 Nov 2011 13:21:28 +0000 (13:21 +0000)
committer Nadav Rotem <nadav.rotem@intel.com>
Wed, 9 Nov 2011 13:21:28 +0000 (13:21 +0000)
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp

index e5d3c91fd0056e9b2742bc5e1e292b078b23aa88..c34f225dc53c83580f6cb824edfcf3087733f993 100644 (file)
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -1050,6 +1050,8 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
        setOperationAction(ISD::MUL,             MVT::v4i64, Custom);
        setOperationAction(ISD::MUL,             MVT::v8i32, Legal);
        setOperationAction(ISD::MUL,             MVT::v16i16, Legal);
+
+      setOperationAction(ISD::VSELECT,         MVT::v32i8, Legal);
        // Don't lower v32i8 because there is no 128-bit byte mul
      } else {
        setOperationAction(ISD::ADD,             MVT::v4i64, Custom);
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td

index 8648d48cdb991672e849076be2f195d8f51163a2..068e223e59e96fcdc6f204da38dc3e7ac8c22849 100644 (file)
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@@ -6568,6 +6568,12 @@ let Predicates = [HasAVX] in {
              (VBLENDVPDYrr VR256:$src2, VR256:$src1, VR256:$mask)>;
  }
  
+let Predicates = [HasAVX2] in {
+  def : Pat<(v32i8 (vselect (v32i8 VR256:$mask), (v32i8 VR256:$src1),
+                            (v32i8 VR256:$src2))),
+            (VPBLENDVBYrr VR256:$src2, VR256:$src1, VR256:$mask)>;
+}
+
  /// SS41I_ternary_int - SSE 4.1 ternary operator
  let Uses = [XMM0], Constraints = "$src1 = $dst" in {
    multiclass SS41I_ternary_int<bits<8> opc, string OpcodeStr, Intrinsic IntId> {
diff --git a/test/CodeGen/X86/avx2-logic.ll b/test/CodeGen/X86/avx2-logic.ll

index a763bc0010cc68e4cae80005586d24eedd000344..944849cf4ff461b35169767a01d72a0b1bcab781 100644 (file)
--- a/test/CodeGen/X86/avx2-logic.ll
+++ b/test/CodeGen/X86/avx2-logic.ll
@@ -1,6 +1,8 @@
  ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=core-avx2 -mattr=+avx2 | FileCheck %s
  
+; CHECK: vpandn
  ; CHECK: vpandn  %ymm
+; CHECK: ret
  define <4 x i64> @vpandn(<4 x i64> %a, <4 x i64> %b) nounwind uwtable readnone ssp {
  entry:
    ; Force the execution domain with an add.
@@ -10,7 +12,9 @@ entry:
    ret <4 x i64> %x
  }
  
+; CHECK: vpand
  ; CHECK: vpand %ymm
+; CHECK: ret
  define <4 x i64> @vpand(<4 x i64> %a, <4 x i64> %b) nounwind uwtable readnone ssp {
  entry:
    ; Force the execution domain with an add.
@@ -19,7 +23,9 @@ entry:
    ret <4 x i64> %x
  }
  
+; CHECK: vpor
  ; CHECK: vpor %ymm
+; CHECK: ret
  define <4 x i64> @vpor(<4 x i64> %a, <4 x i64> %b) nounwind uwtable readnone ssp {
  entry:
    ; Force the execution domain with an add.
@@ -28,7 +34,9 @@ entry:
    ret <4 x i64> %x
  }
  
+; CHECK: vpxor
  ; CHECK: vpxor %ymm
+; CHECK: ret
  define <4 x i64> @vpxor(<4 x i64> %a, <4 x i64> %b) nounwind uwtable readnone ssp {
  entry:
    ; Force the execution domain with an add.
@@ -36,3 +44,14 @@ entry:
    %x = xor <4 x i64> %a2, %b
    ret <4 x i64> %x
  }
+
+
+
+; CHECK: vpblendvb
+; CHECK: vpblendvb %ymm
+; CHECK: ret
+define <32 x i8> @vpblendvb(<32 x i8> %x, <32 x i8> %y) {
+  %min_is_x = icmp ult <32 x i8> %x, %y
+  %min = select <32 x i1> %min_is_x, <32 x i8> %x, <32 x i8> %y
+  ret <32 x i8> %min
+}
author	Nadav Rotem <nadav.rotem@intel.com>
	Wed, 9 Nov 2011 13:21:28 +0000 (13:21 +0000)
committer	Nadav Rotem <nadav.rotem@intel.com>
	Wed, 9 Nov 2011 13:21:28 +0000 (13:21 +0000)
lib/Target/X86/X86ISelLowering.cpp		patch \| blob \| history
lib/Target/X86/X86InstrSSE.td		patch \| blob \| history
test/CodeGen/X86/avx2-logic.ll		patch \| blob \| history