1 ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 | FileCheck %s
4 ; Verify that each of the following test cases is folded into a single
5 ; instruction which performs a blend operation.
7 define <2 x i64> @test1(<2 x i64> %a, <2 x i64> %b) {
10 ; CHECK-NEXT: movsd %xmm0, %xmm1
11 ; CHECK-NEXT: movaps %xmm1, %xmm0
13 %shuf1 = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32><i32 0, i32 2>
14 %shuf2 = shufflevector <2 x i64> %b, <2 x i64> zeroinitializer, <2 x i32><i32 2, i32 1>
15 %or = or <2 x i64> %shuf1, %shuf2
20 define <4 x i32> @test2(<4 x i32> %a, <4 x i32> %b) {
23 ; CHECK-NEXT: movsd %xmm1, %xmm0
25 %shuf1 = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32><i32 4, i32 4, i32 2, i32 3>
26 %shuf2 = shufflevector <4 x i32> %b, <4 x i32> zeroinitializer, <4 x i32><i32 0, i32 1, i32 4, i32 4>
27 %or = or <4 x i32> %shuf1, %shuf2
32 define <2 x i64> @test3(<2 x i64> %a, <2 x i64> %b) {
35 ; CHECK-NEXT: movsd %xmm1, %xmm0
37 %shuf1 = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32><i32 2, i32 1>
38 %shuf2 = shufflevector <2 x i64> %b, <2 x i64> zeroinitializer, <2 x i32><i32 0, i32 2>
39 %or = or <2 x i64> %shuf1, %shuf2
44 define <4 x i32> @test4(<4 x i32> %a, <4 x i32> %b) {
47 ; CHECK-NEXT: movss %xmm0, %xmm1
48 ; CHECK-NEXT: movaps %xmm1, %xmm0
50 %shuf1 = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32><i32 0, i32 4, i32 4, i32 4>
51 %shuf2 = shufflevector <4 x i32> %b, <4 x i32> zeroinitializer, <4 x i32><i32 4, i32 1, i32 2, i32 3>
52 %or = or <4 x i32> %shuf1, %shuf2
57 define <4 x i32> @test5(<4 x i32> %a, <4 x i32> %b) {
60 ; CHECK-NEXT: movss %xmm1, %xmm0
62 %shuf1 = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32><i32 4, i32 1, i32 2, i32 3>
63 %shuf2 = shufflevector <4 x i32> %b, <4 x i32> zeroinitializer, <4 x i32><i32 0, i32 4, i32 4, i32 4>
64 %or = or <4 x i32> %shuf1, %shuf2
69 define <4 x i32> @test6(<4 x i32> %a, <4 x i32> %b) {
72 ; CHECK-NEXT: blendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
74 %shuf1 = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32><i32 0, i32 1, i32 4, i32 4>
75 %shuf2 = shufflevector <4 x i32> %b, <4 x i32> zeroinitializer, <4 x i32><i32 4, i32 4, i32 2, i32 3>
76 %or = or <4 x i32> %shuf1, %shuf2
81 define <4 x i32> @test7(<4 x i32> %a, <4 x i32> %b) {
84 ; CHECK-NEXT: blendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
86 %and1 = and <4 x i32> %a, <i32 -1, i32 -1, i32 0, i32 0>
87 %and2 = and <4 x i32> %b, <i32 0, i32 0, i32 -1, i32 -1>
88 %or = or <4 x i32> %and1, %and2
93 define <2 x i64> @test8(<2 x i64> %a, <2 x i64> %b) {
96 ; CHECK-NEXT: movsd %xmm0, %xmm1
97 ; CHECK-NEXT: movaps %xmm1, %xmm0
99 %and1 = and <2 x i64> %a, <i64 -1, i64 0>
100 %and2 = and <2 x i64> %b, <i64 0, i64 -1>
101 %or = or <2 x i64> %and1, %and2
106 define <4 x i32> @test9(<4 x i32> %a, <4 x i32> %b) {
107 ; CHECK-LABEL: test9:
109 ; CHECK-NEXT: movsd %xmm1, %xmm0
111 %and1 = and <4 x i32> %a, <i32 0, i32 0, i32 -1, i32 -1>
112 %and2 = and <4 x i32> %b, <i32 -1, i32 -1, i32 0, i32 0>
113 %or = or <4 x i32> %and1, %and2
118 define <2 x i64> @test10(<2 x i64> %a, <2 x i64> %b) {
119 ; CHECK-LABEL: test10:
121 ; CHECK-NEXT: movsd %xmm1, %xmm0
123 %and1 = and <2 x i64> %a, <i64 0, i64 -1>
124 %and2 = and <2 x i64> %b, <i64 -1, i64 0>
125 %or = or <2 x i64> %and1, %and2
130 define <4 x i32> @test11(<4 x i32> %a, <4 x i32> %b) {
131 ; CHECK-LABEL: test11:
133 ; CHECK-NEXT: movss %xmm0, %xmm1
134 ; CHECK-NEXT: movaps %xmm1, %xmm0
136 %and1 = and <4 x i32> %a, <i32 -1, i32 0, i32 0, i32 0>
137 %and2 = and <4 x i32> %b, <i32 0, i32 -1, i32 -1, i32 -1>
138 %or = or <4 x i32> %and1, %and2
143 define <4 x i32> @test12(<4 x i32> %a, <4 x i32> %b) {
144 ; CHECK-LABEL: test12:
146 ; CHECK-NEXT: movss %xmm1, %xmm0
148 %and1 = and <4 x i32> %a, <i32 0, i32 -1, i32 -1, i32 -1>
149 %and2 = and <4 x i32> %b, <i32 -1, i32 0, i32 0, i32 0>
150 %or = or <4 x i32> %and1, %and2
155 ; Verify that the following test cases are folded into single shuffles.
157 define <4 x i32> @test13(<4 x i32> %a, <4 x i32> %b) {
158 ; CHECK-LABEL: test13:
160 ; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1],xmm1[2,3]
162 %shuf1 = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32><i32 1, i32 1, i32 4, i32 4>
163 %shuf2 = shufflevector <4 x i32> %b, <4 x i32> zeroinitializer, <4 x i32><i32 4, i32 4, i32 2, i32 3>
164 %or = or <4 x i32> %shuf1, %shuf2
169 define <2 x i64> @test14(<2 x i64> %a, <2 x i64> %b) {
170 ; CHECK-LABEL: test14:
172 ; CHECK-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
174 %shuf1 = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32><i32 0, i32 2>
175 %shuf2 = shufflevector <2 x i64> %b, <2 x i64> zeroinitializer, <2 x i32><i32 2, i32 0>
176 %or = or <2 x i64> %shuf1, %shuf2
181 define <4 x i32> @test15(<4 x i32> %a, <4 x i32> %b) {
182 ; CHECK-LABEL: test15:
184 ; CHECK-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,1],xmm0[2,1]
185 ; CHECK-NEXT: movaps %xmm1, %xmm0
187 %shuf1 = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32><i32 4, i32 4, i32 2, i32 1>
188 %shuf2 = shufflevector <4 x i32> %b, <4 x i32> zeroinitializer, <4 x i32><i32 2, i32 1, i32 4, i32 4>
189 %or = or <4 x i32> %shuf1, %shuf2
194 define <2 x i64> @test16(<2 x i64> %a, <2 x i64> %b) {
195 ; CHECK-LABEL: test16:
197 ; CHECK-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
198 ; CHECK-NEXT: movdqa %xmm1, %xmm0
200 %shuf1 = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32><i32 2, i32 0>
201 %shuf2 = shufflevector <2 x i64> %b, <2 x i64> zeroinitializer, <2 x i32><i32 0, i32 2>
202 %or = or <2 x i64> %shuf1, %shuf2
207 ; Verify that the dag-combiner does not fold a OR of two shuffles into a single
208 ; shuffle instruction when the shuffle indexes are not compatible.
210 define <4 x i32> @test17(<4 x i32> %a, <4 x i32> %b) {
211 ; CHECK-LABEL: test17:
213 ; CHECK-NEXT: xorps %xmm2, %xmm2
214 ; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm2[0,0]
215 ; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,0,3,1]
216 ; CHECK-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0,0]
217 ; CHECK-NEXT: por %xmm1, %xmm0
219 %shuf1 = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32><i32 4, i32 0, i32 4, i32 2>
220 %shuf2 = shufflevector <4 x i32> %b, <4 x i32> zeroinitializer, <4 x i32><i32 0, i32 1, i32 4, i32 4>
221 %or = or <4 x i32> %shuf1, %shuf2
226 define <4 x i32> @test18(<4 x i32> %a, <4 x i32> %b) {
227 ; CHECK-LABEL: test18:
229 ; CHECK-NEXT: xorps %xmm2, %xmm2
230 ; CHECK-NEXT: xorps %xmm3, %xmm3
231 ; CHECK-NEXT: blendps $1, %xmm0, %xmm3
232 ; CHECK-NEXT: shufps {{.*#+}} xmm3 = xmm3[2,0],xmm2[0,0]
233 ; CHECK-NEXT: blendps $1, %xmm1, %xmm2
234 ; CHECK-NEXT: orps %xmm3, %xmm2
235 ; CHECK-NEXT: movaps %xmm2, %xmm0
237 %shuf1 = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32><i32 4, i32 0, i32 4, i32 4>
238 %shuf2 = shufflevector <4 x i32> %b, <4 x i32> zeroinitializer, <4 x i32><i32 0, i32 4, i32 4, i32 4>
239 %or = or <4 x i32> %shuf1, %shuf2
244 define <4 x i32> @test19(<4 x i32> %a, <4 x i32> %b) {
245 ; CHECK-LABEL: test19:
247 ; CHECK-NEXT: xorps %xmm2, %xmm2
248 ; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,3],xmm2[0,0]
249 ; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,0,3,1]
250 ; CHECK-NEXT: movdqa %xmm1, %xmm2
251 ; CHECK-NEXT: pslldq $8, %xmm2
252 ; CHECK-NEXT: shufps {{.*#+}} xmm2 = xmm2[2,0],xmm1[2,2]
253 ; CHECK-NEXT: por %xmm2, %xmm0
255 %shuf1 = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32><i32 4, i32 0, i32 4, i32 3>
256 %shuf2 = shufflevector <4 x i32> %b, <4 x i32> zeroinitializer, <4 x i32><i32 0, i32 4, i32 2, i32 2>
257 %or = or <4 x i32> %shuf1, %shuf2
262 define <2 x i64> @test20(<2 x i64> %a, <2 x i64> %b) {
263 ; CHECK-LABEL: test20:
265 ; CHECK-NEXT: orps %xmm1, %xmm0
266 ; CHECK-NEXT: movq %xmm0, %xmm0
268 %shuf1 = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32><i32 0, i32 2>
269 %shuf2 = shufflevector <2 x i64> %b, <2 x i64> zeroinitializer, <2 x i32><i32 0, i32 2>
270 %or = or <2 x i64> %shuf1, %shuf2
275 define <2 x i64> @test21(<2 x i64> %a, <2 x i64> %b) {
276 ; CHECK-LABEL: test21:
278 ; CHECK-NEXT: por %xmm1, %xmm0
279 ; CHECK-NEXT: pslldq $8, %xmm0
281 %shuf1 = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32><i32 2, i32 0>
282 %shuf2 = shufflevector <2 x i64> %b, <2 x i64> zeroinitializer, <2 x i32><i32 2, i32 0>
283 %or = or <2 x i64> %shuf1, %shuf2
287 ; Verify that the DAGCombiner doesn't crash in the attempt to check if a shuffle
288 ; with illegal type has a legal mask. Method 'isShuffleMaskLegal' only knows how to
289 ; handle legal vector value types.
290 define <4 x i8> @test_crash(<4 x i8> %a, <4 x i8> %b) {
291 ; CHECK-LABEL: test_crash:
293 ; CHECK-NEXT: movsd %xmm1, %xmm0
295 %shuf1 = shufflevector <4 x i8> %a, <4 x i8> zeroinitializer, <4 x i32><i32 4, i32 4, i32 2, i32 3>
296 %shuf2 = shufflevector <4 x i8> %b, <4 x i8> zeroinitializer, <4 x i32><i32 0, i32 1, i32 4, i32 4>
297 %or = or <4 x i8> %shuf1, %shuf2