1 ; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 < %s | FileCheck %s
3 ; Verify that the DAGCombiner is able to fold a vector AND into a blend
4 ; if one of the operands to the AND is a vector of all constants, and each
5 ; constant element is either zero or all-ones.
8 define <4 x i32> @test1(<4 x i32> %A) {
11 ; CHECK-NEXT: pxor %xmm1, %xmm1
12 ; CHECK-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3,4,5,6,7]
14 %1 = and <4 x i32> %A, <i32 -1, i32 0, i32 0, i32 0>
18 define <4 x i32> @test2(<4 x i32> %A) {
21 ; CHECK-NEXT: pxor %xmm1, %xmm1
22 ; CHECK-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3],xmm1[4,5,6,7]
24 %1 = and <4 x i32> %A, <i32 0, i32 -1, i32 0, i32 0>
28 define <4 x i32> @test3(<4 x i32> %A) {
31 ; CHECK-NEXT: pxor %xmm1, %xmm1
32 ; CHECK-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5],xmm1[6,7]
34 %1 = and <4 x i32> %A, <i32 0, i32 0, i32 -1, i32 0>
38 define <4 x i32> @test4(<4 x i32> %A) {
41 ; CHECK-NEXT: pxor %xmm1, %xmm1
42 ; CHECK-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3,4,5],xmm0[6,7]
44 %1 = and <4 x i32> %A, <i32 0, i32 0, i32 0, i32 -1>
48 define <4 x i32> @test5(<4 x i32> %A) {
51 ; CHECK-NEXT: pxor %xmm1, %xmm1
52 ; CHECK-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
54 %1 = and <4 x i32> %A, <i32 -1, i32 0, i32 -1, i32 0>
58 define <4 x i32> @test6(<4 x i32> %A) {
61 ; CHECK-NEXT: pxor %xmm1, %xmm1
62 ; CHECK-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3],xmm1[4,5],xmm0[6,7]
64 %1 = and <4 x i32> %A, <i32 0, i32 -1, i32 0, i32 -1>
68 define <4 x i32> @test7(<4 x i32> %A) {
71 ; CHECK-NEXT: pxor %xmm1, %xmm1
72 ; CHECK-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
74 %1 = and <4 x i32> %A, <i32 0, i32 0, i32 -1, i32 -1>
78 define <4 x i32> @test8(<4 x i32> %A) {
81 ; CHECK-NEXT: pxor %xmm1, %xmm1
82 ; CHECK-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3,4,5],xmm0[6,7]
84 %1 = and <4 x i32> %A, <i32 -1, i32 0, i32 0, i32 -1>
88 define <4 x i32> @test9(<4 x i32> %A) {
91 ; CHECK-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero
93 %1 = and <4 x i32> %A, <i32 -1, i32 -1, i32 0, i32 0>
97 define <4 x i32> @test10(<4 x i32> %A) {
98 ; CHECK-LABEL: test10:
100 ; CHECK-NEXT: pxor %xmm1, %xmm1
101 ; CHECK-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3,4,5],xmm1[6,7]
103 %1 = and <4 x i32> %A, <i32 0, i32 -1, i32 -1, i32 0>
107 define <4 x i32> @test11(<4 x i32> %A) {
108 ; CHECK-LABEL: test11:
110 ; CHECK-NEXT: pxor %xmm1, %xmm1
111 ; CHECK-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3,4,5,6,7]
113 %1 = and <4 x i32> %A, <i32 0, i32 -1, i32 -1, i32 -1>
117 define <4 x i32> @test12(<4 x i32> %A) {
118 ; CHECK-LABEL: test12:
120 ; CHECK-NEXT: pxor %xmm1, %xmm1
121 ; CHECK-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5],xmm1[6,7]
123 %1 = and <4 x i32> %A, <i32 -1, i32 -1, i32 -1, i32 0>
127 define <4 x i32> @test13(<4 x i32> %A) {
128 ; CHECK-LABEL: test13:
130 ; CHECK-NEXT: pxor %xmm1, %xmm1
131 ; CHECK-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5],xmm0[6,7]
133 %1 = and <4 x i32> %A, <i32 -1, i32 -1, i32 0, i32 -1>
137 define <4 x i32> @test14(<4 x i32> %A) {
138 ; CHECK-LABEL: test14:
140 ; CHECK-NEXT: pxor %xmm1, %xmm1
141 ; CHECK-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5,6,7]
143 %1 = and <4 x i32> %A, <i32 -1, i32 0, i32 -1, i32 -1>
147 define <4 x i32> @test15(<4 x i32> %A, <4 x i32> %B) {
148 ; CHECK-LABEL: test15:
150 ; CHECK-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5,6,7]
152 %1 = and <4 x i32> %A, <i32 -1, i32 0, i32 -1, i32 -1>
153 %2 = and <4 x i32> %B, <i32 0, i32 -1, i32 0, i32 0>
154 %3 = or <4 x i32> %1, %2
158 define <4 x i32> @test16(<4 x i32> %A, <4 x i32> %B) {
159 ; CHECK-LABEL: test16:
161 ; CHECK-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
163 %1 = and <4 x i32> %A, <i32 -1, i32 0, i32 -1, i32 0>
164 %2 = and <4 x i32> %B, <i32 0, i32 -1, i32 0, i32 -1>
165 %3 = or <4 x i32> %1, %2
169 define <4 x i32> @test17(<4 x i32> %A, <4 x i32> %B) {
170 ; CHECK-LABEL: test17:
172 ; CHECK-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3],xmm1[4,5],xmm0[6,7]
174 %1 = and <4 x i32> %A, <i32 0, i32 -1, i32 0, i32 -1>
175 %2 = and <4 x i32> %B, <i32 -1, i32 0, i32 -1, i32 0>
176 %3 = or <4 x i32> %1, %2