1 ; RUN: llc < %s -march=x86 -mcpu=core2 -mattr=+ssse3 | FileCheck %s
2 ; RUN: llc < %s -march=x86 -mcpu=yonah | FileCheck --check-prefix=CHECK-YONAH %s
4 define <4 x i32> @test1(<4 x i32> %A, <4 x i32> %B) nounwind {
7 ; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,2,3,0]
10 ; CHECK-YONAH-LABEL: test1:
11 ; CHECK-YONAH: # BB#0:
12 ; CHECK-YONAH-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,2,3,0]
13 ; CHECK-YONAH-NEXT: retl
14 %C = shufflevector <4 x i32> %A, <4 x i32> undef, <4 x i32> < i32 1, i32 2, i32 3, i32 0 >
18 define <4 x i32> @test2(<4 x i32> %A, <4 x i32> %B) nounwind {
21 ; CHECK-NEXT: palignr {{.*#+}} xmm1 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3]
22 ; CHECK-NEXT: movdqa %xmm1, %xmm0
25 ; CHECK-YONAH-LABEL: test2:
26 ; CHECK-YONAH: # BB#0:
27 ; CHECK-YONAH-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[3,0]
28 ; CHECK-YONAH-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,2],xmm1[2,0]
29 ; CHECK-YONAH-NEXT: retl
30 %C = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> < i32 1, i32 2, i32 3, i32 4 >
34 define <4 x i32> @test3(<4 x i32> %A, <4 x i32> %B) nounwind {
37 ; CHECK-NEXT: palignr {{.*#+}} xmm1 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3]
38 ; CHECK-NEXT: movdqa %xmm1, %xmm0
41 ; CHECK-YONAH-LABEL: test3:
42 ; CHECK-YONAH: # BB#0:
43 ; CHECK-YONAH-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,2],xmm1[0,0]
44 ; CHECK-YONAH-NEXT: retl
45 %C = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> < i32 1, i32 2, i32 undef, i32 4 >
49 define <4 x i32> @test4(<4 x i32> %A, <4 x i32> %B) nounwind {
52 ; CHECK-NEXT: palignr {{.*#+}} xmm0 = xmm1[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7]
55 ; CHECK-YONAH-LABEL: test4:
56 ; CHECK-YONAH: # BB#0:
57 ; CHECK-YONAH-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,3],xmm0[0,1]
58 ; CHECK-YONAH-NEXT: movaps %xmm1, %xmm0
59 ; CHECK-YONAH-NEXT: retl
60 %C = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> < i32 6, i32 7, i32 undef, i32 1 >
64 define <4 x float> @test5(<4 x float> %A, <4 x float> %B) nounwind {
67 ; CHECK-NEXT: palignr {{.*#+}} xmm0 = xmm1[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7]
70 ; CHECK-YONAH-LABEL: test5:
71 ; CHECK-YONAH: # BB#0:
72 ; CHECK-YONAH-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,3],xmm0[0,1]
73 ; CHECK-YONAH-NEXT: movaps %xmm1, %xmm0
74 ; CHECK-YONAH-NEXT: retl
75 %C = shufflevector <4 x float> %A, <4 x float> %B, <4 x i32> < i32 6, i32 7, i32 undef, i32 1 >
79 define <8 x i16> @test6(<8 x i16> %A, <8 x i16> %B) nounwind {
82 ; CHECK-NEXT: palignr {{.*#+}} xmm1 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5]
83 ; CHECK-NEXT: movdqa %xmm1, %xmm0
86 ; CHECK-YONAH-LABEL: test6:
87 ; CHECK-YONAH: # BB#0:
88 ; CHECK-YONAH-NEXT: movapd %xmm0, %xmm2
89 ; CHECK-YONAH-NEXT: shufpd {{.*#+}} xmm2 = xmm2[1],xmm1[0]
90 ; CHECK-YONAH-NEXT: pshuflw {{.*#+}} xmm1 = xmm2[0,0,0,2,4,5,6,7]
91 ; CHECK-YONAH-NEXT: pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,4,5,6]
92 ; CHECK-YONAH-NEXT: pextrw $3, %xmm0, %eax
93 ; CHECK-YONAH-NEXT: pinsrw $0, %eax, %xmm1
94 ; CHECK-YONAH-NEXT: pextrw $7, %xmm0, %eax
95 ; CHECK-YONAH-NEXT: pinsrw $4, %eax, %xmm1
96 ; CHECK-YONAH-NEXT: movdqa %xmm1, %xmm0
97 ; CHECK-YONAH-NEXT: retl
98 %C = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> < i32 3, i32 4, i32 undef, i32 6, i32 7, i32 8, i32 9, i32 10 >
102 define <8 x i16> @test7(<8 x i16> %A, <8 x i16> %B) nounwind {
103 ; CHECK-LABEL: test7:
105 ; CHECK-NEXT: palignr {{.*#+}} xmm1 = xmm0[10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7,8,9]
106 ; CHECK-NEXT: movdqa %xmm1, %xmm0
109 ; CHECK-YONAH-LABEL: test7:
110 ; CHECK-YONAH: # BB#0:
111 ; CHECK-YONAH-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0]
112 ; CHECK-YONAH-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,0,0,4,5,6,7]
113 ; CHECK-YONAH-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,6,7,4]
114 ; CHECK-YONAH-NEXT: movd %xmm1, %eax
115 ; CHECK-YONAH-NEXT: pinsrw $3, %eax, %xmm0
116 ; CHECK-YONAH-NEXT: pextrw $4, %xmm1, %eax
117 ; CHECK-YONAH-NEXT: pinsrw $7, %eax, %xmm0
118 ; CHECK-YONAH-NEXT: retl
119 %C = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> < i32 undef, i32 6, i32 undef, i32 8, i32 9, i32 10, i32 11, i32 12 >
123 define <16 x i8> @test8(<16 x i8> %A, <16 x i8> %B) nounwind {
124 ; CHECK-LABEL: test8:
126 ; CHECK-NEXT: palignr {{.*#+}} xmm1 = xmm0[5,6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4]
127 ; CHECK-NEXT: movdqa %xmm1, %xmm0
130 ; CHECK-YONAH-LABEL: test8:
131 ; CHECK-YONAH: # BB#0:
132 ; CHECK-YONAH-NEXT: pushl %esi
133 ; CHECK-YONAH-NEXT: movdqa %xmm0, %xmm2
134 ; CHECK-YONAH-NEXT: pextrw $4, %xmm2, %eax
135 ; CHECK-YONAH-NEXT: pextrw $5, %xmm2, %ecx
136 ; CHECK-YONAH-NEXT: shrdw $8, %cx, %ax
137 ; CHECK-YONAH-NEXT: pextrw $2, %xmm2, %edx
138 ; CHECK-YONAH-NEXT: pextrw $3, %xmm2, %esi
139 ; CHECK-YONAH-NEXT: shrdw $8, %si, %dx
140 ; CHECK-YONAH-NEXT: # kill: XMM0<def> XMM2<kill>
141 ; CHECK-YONAH-NEXT: pinsrw $0, %edx, %xmm0
142 ; CHECK-YONAH-NEXT: shrl $8, %esi
143 ; CHECK-YONAH-NEXT: pinsrw $1, %esi, %xmm0
144 ; CHECK-YONAH-NEXT: pinsrw $2, %eax, %xmm0
145 ; CHECK-YONAH-NEXT: pextrw $6, %xmm2, %eax
146 ; CHECK-YONAH-NEXT: shrdw $8, %ax, %cx
147 ; CHECK-YONAH-NEXT: pinsrw $3, %ecx, %xmm0
148 ; CHECK-YONAH-NEXT: pextrw $7, %xmm2, %ecx
149 ; CHECK-YONAH-NEXT: shrdw $8, %cx, %ax
150 ; CHECK-YONAH-NEXT: pinsrw $4, %eax, %xmm0
151 ; CHECK-YONAH-NEXT: pextrw $8, %xmm1, %eax
152 ; CHECK-YONAH-NEXT: shrdw $8, %ax, %cx
153 ; CHECK-YONAH-NEXT: pinsrw $5, %ecx, %xmm0
154 ; CHECK-YONAH-NEXT: pextrw $9, %xmm1, %ecx
155 ; CHECK-YONAH-NEXT: shrdw $8, %cx, %ax
156 ; CHECK-YONAH-NEXT: pinsrw $6, %eax, %xmm0
157 ; CHECK-YONAH-NEXT: pextrw $10, %xmm1, %eax
158 ; CHECK-YONAH-NEXT: shldw $8, %cx, %ax
159 ; CHECK-YONAH-NEXT: pinsrw $7, %eax, %xmm0
160 ; CHECK-YONAH-NEXT: popl %esi
161 ; CHECK-YONAH-NEXT: retl
162 %C = shufflevector <16 x i8> %A, <16 x i8> %B, <16 x i32> < i32 5, i32 6, i32 7, i32 undef, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20 >
166 ; Check that we don't do unary (circular on single operand) palignr incorrectly.
167 ; (It is possible, but before this testcase was committed, it was being done
168 ; incorrectly. In particular, one of the operands of the palignr node
170 define <8 x i16> @test9(<8 x i16> %A, <8 x i16> %B) nounwind {
171 ; CHECK-LABEL: test9:
173 ; CHECK-NEXT: pshufb {{.*#+}} xmm1 = zero,zero,xmm1[4,5,6,7,8,9,10,11,12,13,14,15,0,1]
174 ; CHECK-NEXT: movdqa %xmm1, %xmm0
177 ; CHECK-YONAH-LABEL: test9:
178 ; CHECK-YONAH: # BB#0:
179 ; CHECK-YONAH-NEXT: pextrw $4, %xmm1, %eax
180 ; CHECK-YONAH-NEXT: pshuflw {{.*#+}} xmm0 = xmm1[0,2,3,0,4,5,6,7]
181 ; CHECK-YONAH-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,6,7,4]
182 ; CHECK-YONAH-NEXT: pinsrw $3, %eax, %xmm0
183 ; CHECK-YONAH-NEXT: movd %xmm1, %eax
184 ; CHECK-YONAH-NEXT: pinsrw $7, %eax, %xmm0
185 ; CHECK-YONAH-NEXT: retl
186 %C = shufflevector <8 x i16> %B, <8 x i16> %A, <8 x i32> < i32 undef, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0 >