1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 | FileCheck %s
5 ; Verify that each of the following test cases is folded into a single
6 ; instruction which performs a blend operation.
8 define <2 x i64> @test1(<2 x i64> %a, <2 x i64> %b) {
11 ; CHECK-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
13 %shuf1 = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32><i32 0, i32 2>
14 %shuf2 = shufflevector <2 x i64> %b, <2 x i64> zeroinitializer, <2 x i32><i32 2, i32 1>
15 %or = or <2 x i64> %shuf1, %shuf2
20 define <4 x i32> @test2(<4 x i32> %a, <4 x i32> %b) {
23 ; CHECK-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
25 %shuf1 = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32><i32 4, i32 4, i32 2, i32 3>
26 %shuf2 = shufflevector <4 x i32> %b, <4 x i32> zeroinitializer, <4 x i32><i32 0, i32 1, i32 4, i32 4>
27 %or = or <4 x i32> %shuf1, %shuf2
32 define <2 x i64> @test3(<2 x i64> %a, <2 x i64> %b) {
35 ; CHECK-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
37 %shuf1 = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32><i32 2, i32 1>
38 %shuf2 = shufflevector <2 x i64> %b, <2 x i64> zeroinitializer, <2 x i32><i32 0, i32 2>
39 %or = or <2 x i64> %shuf1, %shuf2
44 define <4 x i32> @test4(<4 x i32> %a, <4 x i32> %b) {
47 ; CHECK-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3,4,5,6,7]
49 %shuf1 = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32><i32 0, i32 4, i32 4, i32 4>
50 %shuf2 = shufflevector <4 x i32> %b, <4 x i32> zeroinitializer, <4 x i32><i32 4, i32 1, i32 2, i32 3>
51 %or = or <4 x i32> %shuf1, %shuf2
56 define <4 x i32> @test5(<4 x i32> %a, <4 x i32> %b) {
59 ; CHECK-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3,4,5,6,7]
61 %shuf1 = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32><i32 4, i32 1, i32 2, i32 3>
62 %shuf2 = shufflevector <4 x i32> %b, <4 x i32> zeroinitializer, <4 x i32><i32 0, i32 4, i32 4, i32 4>
63 %or = or <4 x i32> %shuf1, %shuf2
68 define <4 x i32> @test6(<4 x i32> %a, <4 x i32> %b) {
71 ; CHECK-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
73 %shuf1 = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32><i32 0, i32 1, i32 4, i32 4>
74 %shuf2 = shufflevector <4 x i32> %b, <4 x i32> zeroinitializer, <4 x i32><i32 4, i32 4, i32 2, i32 3>
75 %or = or <4 x i32> %shuf1, %shuf2
80 define <4 x i32> @test7(<4 x i32> %a, <4 x i32> %b) {
83 ; CHECK-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
85 %and1 = and <4 x i32> %a, <i32 -1, i32 -1, i32 0, i32 0>
86 %and2 = and <4 x i32> %b, <i32 0, i32 0, i32 -1, i32 -1>
87 %or = or <4 x i32> %and1, %and2
92 define <2 x i64> @test8(<2 x i64> %a, <2 x i64> %b) {
95 ; CHECK-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
97 %and1 = and <2 x i64> %a, <i64 -1, i64 0>
98 %and2 = and <2 x i64> %b, <i64 0, i64 -1>
99 %or = or <2 x i64> %and1, %and2
104 define <4 x i32> @test9(<4 x i32> %a, <4 x i32> %b) {
105 ; CHECK-LABEL: test9:
107 ; CHECK-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
109 %and1 = and <4 x i32> %a, <i32 0, i32 0, i32 -1, i32 -1>
110 %and2 = and <4 x i32> %b, <i32 -1, i32 -1, i32 0, i32 0>
111 %or = or <4 x i32> %and1, %and2
116 define <2 x i64> @test10(<2 x i64> %a, <2 x i64> %b) {
117 ; CHECK-LABEL: test10:
119 ; CHECK-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
121 %and1 = and <2 x i64> %a, <i64 0, i64 -1>
122 %and2 = and <2 x i64> %b, <i64 -1, i64 0>
123 %or = or <2 x i64> %and1, %and2
128 define <4 x i32> @test11(<4 x i32> %a, <4 x i32> %b) {
129 ; CHECK-LABEL: test11:
131 ; CHECK-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3,4,5,6,7]
133 %and1 = and <4 x i32> %a, <i32 -1, i32 0, i32 0, i32 0>
134 %and2 = and <4 x i32> %b, <i32 0, i32 -1, i32 -1, i32 -1>
135 %or = or <4 x i32> %and1, %and2
140 define <4 x i32> @test12(<4 x i32> %a, <4 x i32> %b) {
141 ; CHECK-LABEL: test12:
143 ; CHECK-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3,4,5,6,7]
145 %and1 = and <4 x i32> %a, <i32 0, i32 -1, i32 -1, i32 -1>
146 %and2 = and <4 x i32> %b, <i32 -1, i32 0, i32 0, i32 0>
147 %or = or <4 x i32> %and1, %and2
152 ; Verify that the following test cases are folded into single shuffles.
154 define <4 x i32> @test13(<4 x i32> %a, <4 x i32> %b) {
155 ; CHECK-LABEL: test13:
157 ; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
158 ; CHECK-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
160 %shuf1 = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32><i32 1, i32 1, i32 4, i32 4>
161 %shuf2 = shufflevector <4 x i32> %b, <4 x i32> zeroinitializer, <4 x i32><i32 4, i32 4, i32 2, i32 3>
162 %or = or <4 x i32> %shuf1, %shuf2
167 define <2 x i64> @test14(<2 x i64> %a, <2 x i64> %b) {
168 ; CHECK-LABEL: test14:
170 ; CHECK-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
172 %shuf1 = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32><i32 0, i32 2>
173 %shuf2 = shufflevector <2 x i64> %b, <2 x i64> zeroinitializer, <2 x i32><i32 2, i32 0>
174 %or = or <2 x i64> %shuf1, %shuf2
179 define <4 x i32> @test15(<4 x i32> %a, <4 x i32> %b) {
180 ; CHECK-LABEL: test15:
182 ; CHECK-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,1,2,1]
183 ; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,1,2,3]
184 ; CHECK-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm2[4,5,6,7]
186 %shuf1 = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32><i32 4, i32 4, i32 2, i32 1>
187 %shuf2 = shufflevector <4 x i32> %b, <4 x i32> zeroinitializer, <4 x i32><i32 2, i32 1, i32 4, i32 4>
188 %or = or <4 x i32> %shuf1, %shuf2
193 define <2 x i64> @test16(<2 x i64> %a, <2 x i64> %b) {
194 ; CHECK-LABEL: test16:
196 ; CHECK-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
197 ; CHECK-NEXT: movdqa %xmm1, %xmm0
199 %shuf1 = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32><i32 2, i32 0>
200 %shuf2 = shufflevector <2 x i64> %b, <2 x i64> zeroinitializer, <2 x i32><i32 0, i32 2>
201 %or = or <2 x i64> %shuf1, %shuf2
206 ; Verify that the dag-combiner does not fold a OR of two shuffles into a single
207 ; shuffle instruction when the shuffle indexes are not compatible.
209 define <4 x i32> @test17(<4 x i32> %a, <4 x i32> %b) {
210 ; CHECK-LABEL: test17:
212 ; CHECK-NEXT: psllq $32, %xmm0
213 ; CHECK-NEXT: movq {{.*#+}} xmm1 = xmm1[0],zero
214 ; CHECK-NEXT: por %xmm1, %xmm0
216 %shuf1 = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32><i32 4, i32 0, i32 4, i32 2>
217 %shuf2 = shufflevector <4 x i32> %b, <4 x i32> zeroinitializer, <4 x i32><i32 0, i32 1, i32 4, i32 4>
218 %or = or <4 x i32> %shuf1, %shuf2
223 define <4 x i32> @test18(<4 x i32> %a, <4 x i32> %b) {
224 ; CHECK-LABEL: test18:
226 ; CHECK-NEXT: pxor %xmm2, %xmm2
227 ; CHECK-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3,4,5,6,7]
228 ; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,0,1,1]
229 ; CHECK-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3,4,5,6,7]
230 ; CHECK-NEXT: por %xmm1, %xmm0
232 %shuf1 = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32><i32 4, i32 0, i32 4, i32 4>
233 %shuf2 = shufflevector <4 x i32> %b, <4 x i32> zeroinitializer, <4 x i32><i32 0, i32 4, i32 4, i32 4>
234 %or = or <4 x i32> %shuf1, %shuf2
239 define <4 x i32> @test19(<4 x i32> %a, <4 x i32> %b) {
240 ; CHECK-LABEL: test19:
242 ; CHECK-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,0,2,3]
243 ; CHECK-NEXT: pxor %xmm3, %xmm3
244 ; CHECK-NEXT: pblendw {{.*#+}} xmm2 = xmm3[0,1],xmm2[2,3],xmm3[4,5],xmm2[6,7]
245 ; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,1,2,2]
246 ; CHECK-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm3[2,3],xmm0[4,5,6,7]
247 ; CHECK-NEXT: por %xmm2, %xmm0
249 %shuf1 = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32><i32 4, i32 0, i32 4, i32 3>
250 %shuf2 = shufflevector <4 x i32> %b, <4 x i32> zeroinitializer, <4 x i32><i32 0, i32 4, i32 2, i32 2>
251 %or = or <4 x i32> %shuf1, %shuf2
256 define <2 x i64> @test20(<2 x i64> %a, <2 x i64> %b) {
257 ; CHECK-LABEL: test20:
259 ; CHECK-NEXT: por %xmm1, %xmm0
260 ; CHECK-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero
262 %shuf1 = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32><i32 0, i32 2>
263 %shuf2 = shufflevector <2 x i64> %b, <2 x i64> zeroinitializer, <2 x i32><i32 0, i32 2>
264 %or = or <2 x i64> %shuf1, %shuf2
269 define <2 x i64> @test21(<2 x i64> %a, <2 x i64> %b) {
270 ; CHECK-LABEL: test21:
272 ; CHECK-NEXT: por %xmm1, %xmm0
273 ; CHECK-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7]
275 %shuf1 = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32><i32 2, i32 0>
276 %shuf2 = shufflevector <2 x i64> %b, <2 x i64> zeroinitializer, <2 x i32><i32 2, i32 0>
277 %or = or <2 x i64> %shuf1, %shuf2
282 ; Verify that the dag-combiner keeps the correct domain for float/double vectors
283 ; bitcast to use the mask-or blend combine.
285 define <2 x double> @test22(<2 x double> %a0, <2 x double> %a1) {
286 ; CHECK-LABEL: test22:
288 ; CHECK-NEXT: blendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
290 %bc1 = bitcast <2 x double> %a0 to <2 x i64>
291 %bc2 = bitcast <2 x double> %a1 to <2 x i64>
292 %and1 = and <2 x i64> %bc1, <i64 0, i64 -1>
293 %and2 = and <2 x i64> %bc2, <i64 -1, i64 0>
294 %or = or <2 x i64> %and1, %and2
295 %bc3 = bitcast <2 x i64> %or to <2 x double>
296 ret <2 x double> %bc3
300 define <4 x float> @test23(<4 x float> %a0, <4 x float> %a1) {
301 ; CHECK-LABEL: test23:
303 ; CHECK-NEXT: blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2],xmm1[3]
305 %bc1 = bitcast <4 x float> %a0 to <4 x i32>
306 %bc2 = bitcast <4 x float> %a1 to <4 x i32>
307 %and1 = and <4 x i32> %bc1, <i32 0, i32 -1, i32 -1, i32 0>
308 %and2 = and <4 x i32> %bc2, <i32 -1, i32 0, i32 0, i32 -1>
309 %or = or <4 x i32> %and1, %and2
310 %bc3 = bitcast <4 x i32> %or to <4 x float>
315 define <4 x float> @test24(<4 x float> %a0, <4 x float> %a1) {
316 ; CHECK-LABEL: test24:
318 ; CHECK-NEXT: blendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
320 %bc1 = bitcast <4 x float> %a0 to <2 x i64>
321 %bc2 = bitcast <4 x float> %a1 to <2 x i64>
322 %and1 = and <2 x i64> %bc1, <i64 0, i64 -1>
323 %and2 = and <2 x i64> %bc2, <i64 -1, i64 0>
324 %or = or <2 x i64> %and1, %and2
325 %bc3 = bitcast <2 x i64> %or to <4 x float>
330 define <4 x float> @test25(<4 x float> %a0) {
331 ; CHECK-LABEL: test25:
333 ; CHECK-NEXT: blendps {{.*#+}} xmm0 = mem[0],xmm0[1,2],mem[3]
335 %bc1 = bitcast <4 x float> %a0 to <4 x i32>
336 %bc2 = bitcast <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0> to <4 x i32>
337 %and1 = and <4 x i32> %bc1, <i32 0, i32 -1, i32 -1, i32 0>
338 %and2 = and <4 x i32> %bc2, <i32 -1, i32 0, i32 0, i32 -1>
339 %or = or <4 x i32> %and1, %and2
340 %bc3 = bitcast <4 x i32> %or to <4 x float>
345 ; Verify that the DAGCombiner doesn't crash in the attempt to check if a shuffle
346 ; with illegal type has a legal mask. Method 'isShuffleMaskLegal' only knows how to
347 ; handle legal vector value types.
348 define <4 x i8> @test_crash(<4 x i8> %a, <4 x i8> %b) {
349 ; CHECK-LABEL: test_crash:
351 ; CHECK-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
353 %shuf1 = shufflevector <4 x i8> %a, <4 x i8> zeroinitializer, <4 x i32><i32 4, i32 4, i32 2, i32 3>
354 %shuf2 = shufflevector <4 x i8> %b, <4 x i8> zeroinitializer, <4 x i32><i32 0, i32 1, i32 4, i32 4>
355 %or = or <4 x i8> %shuf1, %shuf2