ret void
}
-; shift1b can't use a packed shift
+; shift1b can't use a packed shift but can shift lanes separately and shuffle back together
define void @shift1b(<2 x i64> %val, <2 x i64>* %dst, <2 x i64> %sh) nounwind {
entry:
; CHECK-LABEL: shift1b:
-; CHECK: shll
+; CHECK: pshufd {{.*#+}} xmm2 = xmm1[2,3,0,1]
+; CHECK-NEXT: movdqa %xmm0, %xmm3
+; CHECK-NEXT: psllq %xmm2, %xmm3
+; CHECK-NEXT: movq {{.*#+}} xmm1 = xmm1[0],zero
+; CHECK-NEXT: psllq %xmm1, %xmm0
+; CHECK-NEXT: movsd {{.*#+}} xmm3 = xmm0[0],xmm3[1]
%shamt = shufflevector <2 x i64> %sh, <2 x i64> undef, <2 x i32> <i32 0, i32 1>
%shl = shl <2 x i64> %val, %shamt
store <2 x i64> %shl, <2 x i64>* %dst
define void @shift3a(<8 x i16> %val, <8 x i16>* %dst, <8 x i16> %amt) nounwind {
entry:
; CHECK-LABEL: shift3a:
-; CHECK: movzwl
+; CHECK: pextrw $6
; CHECK: psllw
%shamt = shufflevector <8 x i16> %amt, <8 x i16> undef, <8 x i32> <i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6>
%shl = shl <8 x i16> %val, %shamt