-; RUN: llc < %s -march=x86-64 -mcpu=corei7 | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse2 | FileCheck %s
; Splat patterns below
define <4 x i32> @shl4(<4 x i32> %A) nounwind {
entry:
; CHECK: shl4
-; CHECK: padd
; CHECK: pslld
+; CHECK: padd
; CHECK: ret
%B = shl <4 x i32> %A, < i32 2, i32 2, i32 2, i32 2>
%C = shl <4 x i32> %A, < i32 1, i32 1, i32 1, i32 1>
define <8 x i16> @shl8(<8 x i16> %A) nounwind {
entry:
; CHECK: shl8
-; CHECK: padd
; CHECK: psllw
+; CHECK: padd
; CHECK: ret
%B = shl <8 x i16> %A, < i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
%C = shl <8 x i16> %A, < i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
ret <8 x i16> %K
}
-; non splat test
+; non-splat test
define <8 x i16> @sll8_nosplat(<8 x i16> %A) nounwind {
define <2 x i64> @shr2_nosplat(<2 x i64> %A) nounwind {
entry:
-; CHECK: shr2_nosplat
-; CHECK-NOT: psrlq
-; CHECK-NOT: psrlq
-; CHECK: ret
+; CHECK-LABEL: shr2_nosplat
+; CHECK: movdqa %xmm0, %xmm1
+; CHECK-NEXT: psrlq $1, %xmm1
+; CHECK-NEXT: movdqa %xmm0, %xmm2
+; CHECK-NEXT: psrlq $8, %xmm2
+; CHECK-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
+; CHECK-NEXT: movsd {{.*#+}} xmm1 = xmm2[0],xmm1[1]
+; CHECK-NEXT: xorpd %xmm0, %xmm1
+; CHECK-NEXT: movapd %xmm1, %xmm0
+; CHECK-NEXT: ret
%B = lshr <2 x i64> %A, < i64 8, i64 1>
%C = lshr <2 x i64> %A, < i64 1, i64 0>
%K = xor <2 x i64> %B, %C
define <16 x i8> @shl9(<16 x i8> %A) nounwind {
%B = shl <16 x i8> %A, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
ret <16 x i8> %B
-; CHECK: shl9:
+; CHECK-LABEL: shl9:
; CHECK: psllw $3
; CHECK: pand
; CHECK: ret
define <16 x i8> @shr9(<16 x i8> %A) nounwind {
%B = lshr <16 x i8> %A, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
ret <16 x i8> %B
-; CHECK: shr9:
+; CHECK-LABEL: shr9:
+; CHECK: psrlw $3
+; CHECK: pand
+; CHECK: ret
+}
+
+define <16 x i8> @sra_v16i8_7(<16 x i8> %A) nounwind {
+ %B = ashr <16 x i8> %A, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
+ ret <16 x i8> %B
+; CHECK-LABEL: sra_v16i8_7:
+; CHECK: pxor
+; CHECK: pcmpgtb
+; CHECK: ret
+}
+
+define <16 x i8> @sra_v16i8(<16 x i8> %A) nounwind {
+ %B = ashr <16 x i8> %A, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+ ret <16 x i8> %B
+; CHECK-LABEL: sra_v16i8:
; CHECK: psrlw $3
; CHECK: pand
+; CHECK: pxor
+; CHECK: psubb
; CHECK: ret
}