1 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512vl --show-mc-encoding| FileCheck %s
5 define i8 @test_pcmpeq_d_256(<8 x i32> %a, <8 x i32> %b) {
6 ; CHECK-LABEL: test_pcmpeq_d_256
7 ; CHECK: vpcmpeqd %ymm1, %ymm0, %k0 ##
8 %res = call i8 @llvm.x86.avx512.mask.pcmpeq.d.256(<8 x i32> %a, <8 x i32> %b, i8 -1)
12 define i8 @test_mask_pcmpeq_d_256(<8 x i32> %a, <8 x i32> %b, i8 %mask) {
13 ; CHECK-LABEL: test_mask_pcmpeq_d_256
14 ; CHECK: vpcmpeqd %ymm1, %ymm0, %k0 {%k1} ##
15 %res = call i8 @llvm.x86.avx512.mask.pcmpeq.d.256(<8 x i32> %a, <8 x i32> %b, i8 %mask)
19 declare i8 @llvm.x86.avx512.mask.pcmpeq.d.256(<8 x i32>, <8 x i32>, i8)
21 define i8 @test_pcmpeq_q_256(<4 x i64> %a, <4 x i64> %b) {
22 ; CHECK-LABEL: test_pcmpeq_q_256
23 ; CHECK: vpcmpeqq %ymm1, %ymm0, %k0 ##
24 %res = call i8 @llvm.x86.avx512.mask.pcmpeq.q.256(<4 x i64> %a, <4 x i64> %b, i8 -1)
28 define i8 @test_mask_pcmpeq_q_256(<4 x i64> %a, <4 x i64> %b, i8 %mask) {
29 ; CHECK-LABEL: test_mask_pcmpeq_q_256
30 ; CHECK: vpcmpeqq %ymm1, %ymm0, %k0 {%k1} ##
31 %res = call i8 @llvm.x86.avx512.mask.pcmpeq.q.256(<4 x i64> %a, <4 x i64> %b, i8 %mask)
35 declare i8 @llvm.x86.avx512.mask.pcmpeq.q.256(<4 x i64>, <4 x i64>, i8)
37 define i8 @test_pcmpgt_d_256(<8 x i32> %a, <8 x i32> %b) {
38 ; CHECK-LABEL: test_pcmpgt_d_256
39 ; CHECK: vpcmpgtd %ymm1, %ymm0, %k0 ##
40 %res = call i8 @llvm.x86.avx512.mask.pcmpgt.d.256(<8 x i32> %a, <8 x i32> %b, i8 -1)
44 define i8 @test_mask_pcmpgt_d_256(<8 x i32> %a, <8 x i32> %b, i8 %mask) {
45 ; CHECK-LABEL: test_mask_pcmpgt_d_256
46 ; CHECK: vpcmpgtd %ymm1, %ymm0, %k0 {%k1} ##
47 %res = call i8 @llvm.x86.avx512.mask.pcmpgt.d.256(<8 x i32> %a, <8 x i32> %b, i8 %mask)
51 declare i8 @llvm.x86.avx512.mask.pcmpgt.d.256(<8 x i32>, <8 x i32>, i8)
53 define i8 @test_pcmpgt_q_256(<4 x i64> %a, <4 x i64> %b) {
54 ; CHECK-LABEL: test_pcmpgt_q_256
55 ; CHECK: vpcmpgtq %ymm1, %ymm0, %k0 ##
56 %res = call i8 @llvm.x86.avx512.mask.pcmpgt.q.256(<4 x i64> %a, <4 x i64> %b, i8 -1)
60 define i8 @test_mask_pcmpgt_q_256(<4 x i64> %a, <4 x i64> %b, i8 %mask) {
61 ; CHECK-LABEL: test_mask_pcmpgt_q_256
62 ; CHECK: vpcmpgtq %ymm1, %ymm0, %k0 {%k1} ##
63 %res = call i8 @llvm.x86.avx512.mask.pcmpgt.q.256(<4 x i64> %a, <4 x i64> %b, i8 %mask)
67 declare i8 @llvm.x86.avx512.mask.pcmpgt.q.256(<4 x i64>, <4 x i64>, i8)
69 define <8 x i8> @test_cmp_d_256(<8 x i32> %a0, <8 x i32> %a1) {
70 ; CHECK-LABEL: test_cmp_d_256
71 ; CHECK: vpcmpeqd %ymm1, %ymm0, %k0 ##
72 %res0 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 0, i8 -1)
73 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
74 ; CHECK: vpcmpltd %ymm1, %ymm0, %k0 ##
75 %res1 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 1, i8 -1)
76 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
77 ; CHECK: vpcmpled %ymm1, %ymm0, %k0 ##
78 %res2 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 2, i8 -1)
79 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
80 ; CHECK: vpcmpunordd %ymm1, %ymm0, %k0 ##
81 %res3 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 3, i8 -1)
82 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
83 ; CHECK: vpcmpneqd %ymm1, %ymm0, %k0 ##
84 %res4 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 4, i8 -1)
85 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
86 ; CHECK: vpcmpnltd %ymm1, %ymm0, %k0 ##
87 %res5 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 5, i8 -1)
88 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
89 ; CHECK: vpcmpnled %ymm1, %ymm0, %k0 ##
90 %res6 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 6, i8 -1)
91 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
92 ; CHECK: vpcmpordd %ymm1, %ymm0, %k0 ##
93 %res7 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 7, i8 -1)
94 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
98 define <8 x i8> @test_mask_cmp_d_256(<8 x i32> %a0, <8 x i32> %a1, i8 %mask) {
99 ; CHECK-LABEL: test_mask_cmp_d_256
100 ; CHECK: vpcmpeqd %ymm1, %ymm0, %k0 {%k1} ##
101 %res0 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 0, i8 %mask)
102 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
103 ; CHECK: vpcmpltd %ymm1, %ymm0, %k0 {%k1} ##
104 %res1 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 1, i8 %mask)
105 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
106 ; CHECK: vpcmpled %ymm1, %ymm0, %k0 {%k1} ##
107 %res2 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 2, i8 %mask)
108 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
109 ; CHECK: vpcmpunordd %ymm1, %ymm0, %k0 {%k1} ##
110 %res3 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 3, i8 %mask)
111 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
112 ; CHECK: vpcmpneqd %ymm1, %ymm0, %k0 {%k1} ##
113 %res4 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 4, i8 %mask)
114 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
115 ; CHECK: vpcmpnltd %ymm1, %ymm0, %k0 {%k1} ##
116 %res5 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 5, i8 %mask)
117 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
118 ; CHECK: vpcmpnled %ymm1, %ymm0, %k0 {%k1} ##
119 %res6 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 6, i8 %mask)
120 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
121 ; CHECK: vpcmpordd %ymm1, %ymm0, %k0 {%k1} ##
122 %res7 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 7, i8 %mask)
123 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
127 declare i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32>, <8 x i32>, i32, i8) nounwind readnone
129 define <8 x i8> @test_ucmp_d_256(<8 x i32> %a0, <8 x i32> %a1) {
130 ; CHECK-LABEL: test_ucmp_d_256
131 ; CHECK: vpcmpequd %ymm1, %ymm0, %k0 ##
132 %res0 = call i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 0, i8 -1)
133 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
134 ; CHECK: vpcmpltud %ymm1, %ymm0, %k0 ##
135 %res1 = call i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 1, i8 -1)
136 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
137 ; CHECK: vpcmpleud %ymm1, %ymm0, %k0 ##
138 %res2 = call i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 2, i8 -1)
139 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
140 ; CHECK: vpcmpunordud %ymm1, %ymm0, %k0 ##
141 %res3 = call i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 3, i8 -1)
142 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
143 ; CHECK: vpcmpnequd %ymm1, %ymm0, %k0 ##
144 %res4 = call i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 4, i8 -1)
145 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
146 ; CHECK: vpcmpnltud %ymm1, %ymm0, %k0 ##
147 %res5 = call i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 5, i8 -1)
148 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
149 ; CHECK: vpcmpnleud %ymm1, %ymm0, %k0 ##
150 %res6 = call i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 6, i8 -1)
151 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
152 ; CHECK: vpcmpordud %ymm1, %ymm0, %k0 ##
153 %res7 = call i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 7, i8 -1)
154 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
158 define <8 x i8> @test_mask_ucmp_d_256(<8 x i32> %a0, <8 x i32> %a1, i8 %mask) {
159 ; CHECK-LABEL: test_mask_ucmp_d_256
160 ; CHECK: vpcmpequd %ymm1, %ymm0, %k0 {%k1} ##
161 %res0 = call i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 0, i8 %mask)
162 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
163 ; CHECK: vpcmpltud %ymm1, %ymm0, %k0 {%k1} ##
164 %res1 = call i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 1, i8 %mask)
165 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
166 ; CHECK: vpcmpleud %ymm1, %ymm0, %k0 {%k1} ##
167 %res2 = call i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 2, i8 %mask)
168 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
169 ; CHECK: vpcmpunordud %ymm1, %ymm0, %k0 {%k1} ##
170 %res3 = call i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 3, i8 %mask)
171 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
172 ; CHECK: vpcmpnequd %ymm1, %ymm0, %k0 {%k1} ##
173 %res4 = call i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 4, i8 %mask)
174 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
175 ; CHECK: vpcmpnltud %ymm1, %ymm0, %k0 {%k1} ##
176 %res5 = call i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 5, i8 %mask)
177 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
178 ; CHECK: vpcmpnleud %ymm1, %ymm0, %k0 {%k1} ##
179 %res6 = call i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 6, i8 %mask)
180 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
181 ; CHECK: vpcmpordud %ymm1, %ymm0, %k0 {%k1} ##
182 %res7 = call i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 7, i8 %mask)
183 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
187 declare i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32>, <8 x i32>, i32, i8) nounwind readnone
189 define <8 x i8> @test_cmp_q_256(<4 x i64> %a0, <4 x i64> %a1) {
190 ; CHECK-LABEL: test_cmp_q_256
191 ; CHECK: vpcmpeqq %ymm1, %ymm0, %k0 ##
192 %res0 = call i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 0, i8 -1)
193 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
194 ; CHECK: vpcmpltq %ymm1, %ymm0, %k0 ##
195 %res1 = call i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 1, i8 -1)
196 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
197 ; CHECK: vpcmpleq %ymm1, %ymm0, %k0 ##
198 %res2 = call i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 2, i8 -1)
199 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
200 ; CHECK: vpcmpunordq %ymm1, %ymm0, %k0 ##
201 %res3 = call i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 3, i8 -1)
202 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
203 ; CHECK: vpcmpneqq %ymm1, %ymm0, %k0 ##
204 %res4 = call i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 4, i8 -1)
205 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
206 ; CHECK: vpcmpnltq %ymm1, %ymm0, %k0 ##
207 %res5 = call i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 5, i8 -1)
208 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
209 ; CHECK: vpcmpnleq %ymm1, %ymm0, %k0 ##
210 %res6 = call i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 6, i8 -1)
211 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
212 ; CHECK: vpcmpordq %ymm1, %ymm0, %k0 ##
213 %res7 = call i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 7, i8 -1)
214 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
218 define <8 x i8> @test_mask_cmp_q_256(<4 x i64> %a0, <4 x i64> %a1, i8 %mask) {
219 ; CHECK-LABEL: test_mask_cmp_q_256
220 ; CHECK: vpcmpeqq %ymm1, %ymm0, %k0 {%k1} ##
221 %res0 = call i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 0, i8 %mask)
222 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
223 ; CHECK: vpcmpltq %ymm1, %ymm0, %k0 {%k1} ##
224 %res1 = call i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 1, i8 %mask)
225 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
226 ; CHECK: vpcmpleq %ymm1, %ymm0, %k0 {%k1} ##
227 %res2 = call i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 2, i8 %mask)
228 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
229 ; CHECK: vpcmpunordq %ymm1, %ymm0, %k0 {%k1} ##
230 %res3 = call i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 3, i8 %mask)
231 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
232 ; CHECK: vpcmpneqq %ymm1, %ymm0, %k0 {%k1} ##
233 %res4 = call i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 4, i8 %mask)
234 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
235 ; CHECK: vpcmpnltq %ymm1, %ymm0, %k0 {%k1} ##
236 %res5 = call i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 5, i8 %mask)
237 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
238 ; CHECK: vpcmpnleq %ymm1, %ymm0, %k0 {%k1} ##
239 %res6 = call i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 6, i8 %mask)
240 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
241 ; CHECK: vpcmpordq %ymm1, %ymm0, %k0 {%k1} ##
242 %res7 = call i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 7, i8 %mask)
243 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
247 declare i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64>, <4 x i64>, i32, i8) nounwind readnone
249 define <8 x i8> @test_ucmp_q_256(<4 x i64> %a0, <4 x i64> %a1) {
250 ; CHECK-LABEL: test_ucmp_q_256
251 ; CHECK: vpcmpequq %ymm1, %ymm0, %k0 ##
252 %res0 = call i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 0, i8 -1)
253 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
254 ; CHECK: vpcmpltuq %ymm1, %ymm0, %k0 ##
255 %res1 = call i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 1, i8 -1)
256 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
257 ; CHECK: vpcmpleuq %ymm1, %ymm0, %k0 ##
258 %res2 = call i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 2, i8 -1)
259 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
260 ; CHECK: vpcmpunorduq %ymm1, %ymm0, %k0 ##
261 %res3 = call i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 3, i8 -1)
262 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
263 ; CHECK: vpcmpnequq %ymm1, %ymm0, %k0 ##
264 %res4 = call i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 4, i8 -1)
265 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
266 ; CHECK: vpcmpnltuq %ymm1, %ymm0, %k0 ##
267 %res5 = call i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 5, i8 -1)
268 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
269 ; CHECK: vpcmpnleuq %ymm1, %ymm0, %k0 ##
270 %res6 = call i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 6, i8 -1)
271 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
272 ; CHECK: vpcmporduq %ymm1, %ymm0, %k0 ##
273 %res7 = call i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 7, i8 -1)
274 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
278 define <8 x i8> @test_mask_ucmp_q_256(<4 x i64> %a0, <4 x i64> %a1, i8 %mask) {
279 ; CHECK-LABEL: test_mask_ucmp_q_256
280 ; CHECK: vpcmpequq %ymm1, %ymm0, %k0 {%k1} ##
281 %res0 = call i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 0, i8 %mask)
282 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
283 ; CHECK: vpcmpltuq %ymm1, %ymm0, %k0 {%k1} ##
284 %res1 = call i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 1, i8 %mask)
285 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
286 ; CHECK: vpcmpleuq %ymm1, %ymm0, %k0 {%k1} ##
287 %res2 = call i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 2, i8 %mask)
288 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
289 ; CHECK: vpcmpunorduq %ymm1, %ymm0, %k0 {%k1} ##
290 %res3 = call i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 3, i8 %mask)
291 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
292 ; CHECK: vpcmpnequq %ymm1, %ymm0, %k0 {%k1} ##
293 %res4 = call i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 4, i8 %mask)
294 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
295 ; CHECK: vpcmpnltuq %ymm1, %ymm0, %k0 {%k1} ##
296 %res5 = call i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 5, i8 %mask)
297 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
298 ; CHECK: vpcmpnleuq %ymm1, %ymm0, %k0 {%k1} ##
299 %res6 = call i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 6, i8 %mask)
300 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
301 ; CHECK: vpcmporduq %ymm1, %ymm0, %k0 {%k1} ##
302 %res7 = call i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 7, i8 %mask)
303 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
307 declare i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64>, <4 x i64>, i32, i8) nounwind readnone
311 define i8 @test_pcmpeq_d_128(<4 x i32> %a, <4 x i32> %b) {
312 ; CHECK-LABEL: test_pcmpeq_d_128
313 ; CHECK: vpcmpeqd %xmm1, %xmm0, %k0 ##
314 %res = call i8 @llvm.x86.avx512.mask.pcmpeq.d.128(<4 x i32> %a, <4 x i32> %b, i8 -1)
318 define i8 @test_mask_pcmpeq_d_128(<4 x i32> %a, <4 x i32> %b, i8 %mask) {
319 ; CHECK-LABEL: test_mask_pcmpeq_d_128
320 ; CHECK: vpcmpeqd %xmm1, %xmm0, %k0 {%k1} ##
321 %res = call i8 @llvm.x86.avx512.mask.pcmpeq.d.128(<4 x i32> %a, <4 x i32> %b, i8 %mask)
325 declare i8 @llvm.x86.avx512.mask.pcmpeq.d.128(<4 x i32>, <4 x i32>, i8)
327 define i8 @test_pcmpeq_q_128(<2 x i64> %a, <2 x i64> %b) {
328 ; CHECK-LABEL: test_pcmpeq_q_128
329 ; CHECK: vpcmpeqq %xmm1, %xmm0, %k0 ##
330 %res = call i8 @llvm.x86.avx512.mask.pcmpeq.q.128(<2 x i64> %a, <2 x i64> %b, i8 -1)
334 define i8 @test_mask_pcmpeq_q_128(<2 x i64> %a, <2 x i64> %b, i8 %mask) {
335 ; CHECK-LABEL: test_mask_pcmpeq_q_128
336 ; CHECK: vpcmpeqq %xmm1, %xmm0, %k0 {%k1} ##
337 %res = call i8 @llvm.x86.avx512.mask.pcmpeq.q.128(<2 x i64> %a, <2 x i64> %b, i8 %mask)
341 declare i8 @llvm.x86.avx512.mask.pcmpeq.q.128(<2 x i64>, <2 x i64>, i8)
343 define i8 @test_pcmpgt_d_128(<4 x i32> %a, <4 x i32> %b) {
344 ; CHECK-LABEL: test_pcmpgt_d_128
345 ; CHECK: vpcmpgtd %xmm1, %xmm0, %k0 ##
346 %res = call i8 @llvm.x86.avx512.mask.pcmpgt.d.128(<4 x i32> %a, <4 x i32> %b, i8 -1)
350 define i8 @test_mask_pcmpgt_d_128(<4 x i32> %a, <4 x i32> %b, i8 %mask) {
351 ; CHECK-LABEL: test_mask_pcmpgt_d_128
352 ; CHECK: vpcmpgtd %xmm1, %xmm0, %k0 {%k1} ##
353 %res = call i8 @llvm.x86.avx512.mask.pcmpgt.d.128(<4 x i32> %a, <4 x i32> %b, i8 %mask)
357 declare i8 @llvm.x86.avx512.mask.pcmpgt.d.128(<4 x i32>, <4 x i32>, i8)
359 define i8 @test_pcmpgt_q_128(<2 x i64> %a, <2 x i64> %b) {
360 ; CHECK-LABEL: test_pcmpgt_q_128
361 ; CHECK: vpcmpgtq %xmm1, %xmm0, %k0 ##
362 %res = call i8 @llvm.x86.avx512.mask.pcmpgt.q.128(<2 x i64> %a, <2 x i64> %b, i8 -1)
366 define i8 @test_mask_pcmpgt_q_128(<2 x i64> %a, <2 x i64> %b, i8 %mask) {
367 ; CHECK-LABEL: test_mask_pcmpgt_q_128
368 ; CHECK: vpcmpgtq %xmm1, %xmm0, %k0 {%k1} ##
369 %res = call i8 @llvm.x86.avx512.mask.pcmpgt.q.128(<2 x i64> %a, <2 x i64> %b, i8 %mask)
373 declare i8 @llvm.x86.avx512.mask.pcmpgt.q.128(<2 x i64>, <2 x i64>, i8)
375 define <8 x i8> @test_cmp_d_128(<4 x i32> %a0, <4 x i32> %a1) {
376 ; CHECK-LABEL: test_cmp_d_128
377 ; CHECK: vpcmpeqd %xmm1, %xmm0, %k0 ##
378 %res0 = call i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 0, i8 -1)
379 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
380 ; CHECK: vpcmpltd %xmm1, %xmm0, %k0 ##
381 %res1 = call i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 1, i8 -1)
382 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
383 ; CHECK: vpcmpled %xmm1, %xmm0, %k0 ##
384 %res2 = call i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 2, i8 -1)
385 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
386 ; CHECK: vpcmpunordd %xmm1, %xmm0, %k0 ##
387 %res3 = call i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 3, i8 -1)
388 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
389 ; CHECK: vpcmpneqd %xmm1, %xmm0, %k0 ##
390 %res4 = call i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 4, i8 -1)
391 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
392 ; CHECK: vpcmpnltd %xmm1, %xmm0, %k0 ##
393 %res5 = call i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 5, i8 -1)
394 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
395 ; CHECK: vpcmpnled %xmm1, %xmm0, %k0 ##
396 %res6 = call i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 6, i8 -1)
397 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
398 ; CHECK: vpcmpordd %xmm1, %xmm0, %k0 ##
399 %res7 = call i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 7, i8 -1)
400 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
404 define <8 x i8> @test_mask_cmp_d_128(<4 x i32> %a0, <4 x i32> %a1, i8 %mask) {
405 ; CHECK-LABEL: test_mask_cmp_d_128
406 ; CHECK: vpcmpeqd %xmm1, %xmm0, %k0 {%k1} ##
407 %res0 = call i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 0, i8 %mask)
408 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
409 ; CHECK: vpcmpltd %xmm1, %xmm0, %k0 {%k1} ##
410 %res1 = call i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 1, i8 %mask)
411 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
412 ; CHECK: vpcmpled %xmm1, %xmm0, %k0 {%k1} ##
413 %res2 = call i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 2, i8 %mask)
414 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
415 ; CHECK: vpcmpunordd %xmm1, %xmm0, %k0 {%k1} ##
416 %res3 = call i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 3, i8 %mask)
417 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
418 ; CHECK: vpcmpneqd %xmm1, %xmm0, %k0 {%k1} ##
419 %res4 = call i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 4, i8 %mask)
420 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
421 ; CHECK: vpcmpnltd %xmm1, %xmm0, %k0 {%k1} ##
422 %res5 = call i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 5, i8 %mask)
423 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
424 ; CHECK: vpcmpnled %xmm1, %xmm0, %k0 {%k1} ##
425 %res6 = call i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 6, i8 %mask)
426 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
427 ; CHECK: vpcmpordd %xmm1, %xmm0, %k0 {%k1} ##
428 %res7 = call i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 7, i8 %mask)
429 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
433 declare i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32>, <4 x i32>, i32, i8) nounwind readnone
435 define <8 x i8> @test_ucmp_d_128(<4 x i32> %a0, <4 x i32> %a1) {
436 ; CHECK-LABEL: test_ucmp_d_128
437 ; CHECK: vpcmpequd %xmm1, %xmm0, %k0 ##
438 %res0 = call i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 0, i8 -1)
439 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
440 ; CHECK: vpcmpltud %xmm1, %xmm0, %k0 ##
441 %res1 = call i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 1, i8 -1)
442 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
443 ; CHECK: vpcmpleud %xmm1, %xmm0, %k0 ##
444 %res2 = call i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 2, i8 -1)
445 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
446 ; CHECK: vpcmpunordud %xmm1, %xmm0, %k0 ##
447 %res3 = call i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 3, i8 -1)
448 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
449 ; CHECK: vpcmpnequd %xmm1, %xmm0, %k0 ##
450 %res4 = call i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 4, i8 -1)
451 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
452 ; CHECK: vpcmpnltud %xmm1, %xmm0, %k0 ##
453 %res5 = call i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 5, i8 -1)
454 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
455 ; CHECK: vpcmpnleud %xmm1, %xmm0, %k0 ##
456 %res6 = call i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 6, i8 -1)
457 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
458 ; CHECK: vpcmpordud %xmm1, %xmm0, %k0 ##
459 %res7 = call i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 7, i8 -1)
460 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
464 define <8 x i8> @test_mask_ucmp_d_128(<4 x i32> %a0, <4 x i32> %a1, i8 %mask) {
465 ; CHECK-LABEL: test_mask_ucmp_d_128
466 ; CHECK: vpcmpequd %xmm1, %xmm0, %k0 {%k1} ##
467 %res0 = call i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 0, i8 %mask)
468 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
469 ; CHECK: vpcmpltud %xmm1, %xmm0, %k0 {%k1} ##
470 %res1 = call i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 1, i8 %mask)
471 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
472 ; CHECK: vpcmpleud %xmm1, %xmm0, %k0 {%k1} ##
473 %res2 = call i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 2, i8 %mask)
474 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
475 ; CHECK: vpcmpunordud %xmm1, %xmm0, %k0 {%k1} ##
476 %res3 = call i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 3, i8 %mask)
477 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
478 ; CHECK: vpcmpnequd %xmm1, %xmm0, %k0 {%k1} ##
479 %res4 = call i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 4, i8 %mask)
480 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
481 ; CHECK: vpcmpnltud %xmm1, %xmm0, %k0 {%k1} ##
482 %res5 = call i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 5, i8 %mask)
483 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
484 ; CHECK: vpcmpnleud %xmm1, %xmm0, %k0 {%k1} ##
485 %res6 = call i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 6, i8 %mask)
486 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
487 ; CHECK: vpcmpordud %xmm1, %xmm0, %k0 {%k1} ##
488 %res7 = call i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 7, i8 %mask)
489 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
493 declare i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32>, <4 x i32>, i32, i8) nounwind readnone
495 define <8 x i8> @test_cmp_q_128(<2 x i64> %a0, <2 x i64> %a1) {
496 ; CHECK-LABEL: test_cmp_q_128
497 ; CHECK: vpcmpeqq %xmm1, %xmm0, %k0 ##
498 %res0 = call i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 0, i8 -1)
499 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
500 ; CHECK: vpcmpltq %xmm1, %xmm0, %k0 ##
501 %res1 = call i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 1, i8 -1)
502 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
503 ; CHECK: vpcmpleq %xmm1, %xmm0, %k0 ##
504 %res2 = call i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 2, i8 -1)
505 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
506 ; CHECK: vpcmpunordq %xmm1, %xmm0, %k0 ##
507 %res3 = call i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 3, i8 -1)
508 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
509 ; CHECK: vpcmpneqq %xmm1, %xmm0, %k0 ##
510 %res4 = call i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 4, i8 -1)
511 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
512 ; CHECK: vpcmpnltq %xmm1, %xmm0, %k0 ##
513 %res5 = call i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 5, i8 -1)
514 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
515 ; CHECK: vpcmpnleq %xmm1, %xmm0, %k0 ##
516 %res6 = call i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 6, i8 -1)
517 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
518 ; CHECK: vpcmpordq %xmm1, %xmm0, %k0 ##
519 %res7 = call i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 7, i8 -1)
520 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
524 define <8 x i8> @test_mask_cmp_q_128(<2 x i64> %a0, <2 x i64> %a1, i8 %mask) {
525 ; CHECK-LABEL: test_mask_cmp_q_128
526 ; CHECK: vpcmpeqq %xmm1, %xmm0, %k0 {%k1} ##
527 %res0 = call i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 0, i8 %mask)
528 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
529 ; CHECK: vpcmpltq %xmm1, %xmm0, %k0 {%k1} ##
530 %res1 = call i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 1, i8 %mask)
531 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
532 ; CHECK: vpcmpleq %xmm1, %xmm0, %k0 {%k1} ##
533 %res2 = call i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 2, i8 %mask)
534 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
535 ; CHECK: vpcmpunordq %xmm1, %xmm0, %k0 {%k1} ##
536 %res3 = call i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 3, i8 %mask)
537 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
538 ; CHECK: vpcmpneqq %xmm1, %xmm0, %k0 {%k1} ##
539 %res4 = call i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 4, i8 %mask)
540 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
541 ; CHECK: vpcmpnltq %xmm1, %xmm0, %k0 {%k1} ##
542 %res5 = call i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 5, i8 %mask)
543 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
544 ; CHECK: vpcmpnleq %xmm1, %xmm0, %k0 {%k1} ##
545 %res6 = call i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 6, i8 %mask)
546 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
547 ; CHECK: vpcmpordq %xmm1, %xmm0, %k0 {%k1} ##
548 %res7 = call i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 7, i8 %mask)
549 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
553 declare i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64>, <2 x i64>, i32, i8) nounwind readnone
555 define <8 x i8> @test_ucmp_q_128(<2 x i64> %a0, <2 x i64> %a1) {
556 ; CHECK-LABEL: test_ucmp_q_128
557 ; CHECK: vpcmpequq %xmm1, %xmm0, %k0 ##
558 %res0 = call i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 0, i8 -1)
559 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
560 ; CHECK: vpcmpltuq %xmm1, %xmm0, %k0 ##
561 %res1 = call i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 1, i8 -1)
562 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
563 ; CHECK: vpcmpleuq %xmm1, %xmm0, %k0 ##
564 %res2 = call i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 2, i8 -1)
565 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
566 ; CHECK: vpcmpunorduq %xmm1, %xmm0, %k0 ##
567 %res3 = call i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 3, i8 -1)
568 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
569 ; CHECK: vpcmpnequq %xmm1, %xmm0, %k0 ##
570 %res4 = call i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 4, i8 -1)
571 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
572 ; CHECK: vpcmpnltuq %xmm1, %xmm0, %k0 ##
573 %res5 = call i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 5, i8 -1)
574 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
575 ; CHECK: vpcmpnleuq %xmm1, %xmm0, %k0 ##
576 %res6 = call i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 6, i8 -1)
577 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
578 ; CHECK: vpcmporduq %xmm1, %xmm0, %k0 ##
579 %res7 = call i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 7, i8 -1)
580 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
584 define <8 x i8> @test_mask_ucmp_q_128(<2 x i64> %a0, <2 x i64> %a1, i8 %mask) {
585 ; CHECK-LABEL: test_mask_ucmp_q_128
586 ; CHECK: vpcmpequq %xmm1, %xmm0, %k0 {%k1} ##
587 %res0 = call i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 0, i8 %mask)
588 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
589 ; CHECK: vpcmpltuq %xmm1, %xmm0, %k0 {%k1} ##
590 %res1 = call i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 1, i8 %mask)
591 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
592 ; CHECK: vpcmpleuq %xmm1, %xmm0, %k0 {%k1} ##
593 %res2 = call i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 2, i8 %mask)
594 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
595 ; CHECK: vpcmpunorduq %xmm1, %xmm0, %k0 {%k1} ##
596 %res3 = call i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 3, i8 %mask)
597 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
598 ; CHECK: vpcmpnequq %xmm1, %xmm0, %k0 {%k1} ##
599 %res4 = call i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 4, i8 %mask)
600 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
601 ; CHECK: vpcmpnltuq %xmm1, %xmm0, %k0 {%k1} ##
602 %res5 = call i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 5, i8 %mask)
603 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
604 ; CHECK: vpcmpnleuq %xmm1, %xmm0, %k0 {%k1} ##
605 %res6 = call i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 6, i8 %mask)
606 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
607 ; CHECK: vpcmporduq %xmm1, %xmm0, %k0 {%k1} ##
608 %res7 = call i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 7, i8 %mask)
609 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
613 declare i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64>, <2 x i64>, i32, i8) nounwind readnone
615 ; CHECK-LABEL: compr1
616 ; CHECK: vcompresspd %zmm0
617 define void @compr1(i8* %addr, <8 x double> %data, i8 %mask) {
618 call void @llvm.x86.avx512.mask.compress.store.pd.512(i8* %addr, <8 x double> %data, i8 %mask)
622 declare void @llvm.x86.avx512.mask.compress.store.pd.512(i8* %addr, <8 x double> %data, i8 %mask)
624 ; CHECK-LABEL: compr2
625 ; CHECK: vcompresspd %ymm0
626 define void @compr2(i8* %addr, <4 x double> %data, i8 %mask) {
627 call void @llvm.x86.avx512.mask.compress.store.pd.256(i8* %addr, <4 x double> %data, i8 %mask)
631 declare void @llvm.x86.avx512.mask.compress.store.pd.256(i8* %addr, <4 x double> %data, i8 %mask)
633 ; CHECK-LABEL: compr3
634 ; CHECK: vcompressps %xmm0
635 define void @compr3(i8* %addr, <4 x float> %data, i8 %mask) {
636 call void @llvm.x86.avx512.mask.compress.store.ps.128(i8* %addr, <4 x float> %data, i8 %mask)
640 declare void @llvm.x86.avx512.mask.compress.store.ps.128(i8* %addr, <4 x float> %data, i8 %mask)
642 ; CHECK-LABEL: compr4
643 ; CHECK: vcompresspd %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x8a,0xc0]
644 define <8 x double> @compr4(i8* %addr, <8 x double> %data, i8 %mask) {
645 %res = call <8 x double> @llvm.x86.avx512.mask.compress.pd.512(<8 x double> %data, <8 x double> zeroinitializer, i8 %mask)
646 ret <8 x double> %res
649 declare <8 x double> @llvm.x86.avx512.mask.compress.pd.512(<8 x double> %data, <8 x double> %src0, i8 %mask)
651 ; CHECK-LABEL: compr5
652 ; CHECK: vcompresspd %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x8a,0xc1]
653 define <4 x double> @compr5(<4 x double> %data, <4 x double> %src0, i8 %mask) {
654 %res = call <4 x double> @llvm.x86.avx512.mask.compress.pd.256( <4 x double> %data, <4 x double> %src0, i8 %mask)
655 ret <4 x double> %res
658 declare <4 x double> @llvm.x86.avx512.mask.compress.pd.256(<4 x double> %data, <4 x double> %src0, i8 %mask)
660 ; CHECK-LABEL: compr6
661 ; CHECK: vcompressps %xmm0
662 define <4 x float> @compr6(<4 x float> %data, i8 %mask) {
663 %res = call <4 x float> @llvm.x86.avx512.mask.compress.ps.128(<4 x float> %data, <4 x float>zeroinitializer, i8 %mask)
667 declare <4 x float> @llvm.x86.avx512.mask.compress.ps.128(<4 x float> %data, <4 x float> %src0, i8 %mask)
669 ; CHECK-LABEL: compr7
670 ; CHECK-NOT: vcompress
672 define void @compr7(i8* %addr, <8 x double> %data) {
673 call void @llvm.x86.avx512.mask.compress.store.pd.512(i8* %addr, <8 x double> %data, i8 -1)
677 ; CHECK-LABEL: compr8
678 ; CHECK-NOT: vcompressps %xmm0
679 define <4 x float> @compr8(<4 x float> %data) {
680 %res = call <4 x float> @llvm.x86.avx512.mask.compress.ps.128(<4 x float> %data, <4 x float>zeroinitializer, i8 -1)
684 ; CHECK-LABEL: compr9
685 ; CHECK: vpcompressq %zmm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x8b,0x07]
686 define void @compr9(i8* %addr, <8 x i64> %data, i8 %mask) {
687 call void @llvm.x86.avx512.mask.compress.store.q.512(i8* %addr, <8 x i64> %data, i8 %mask)
691 declare void @llvm.x86.avx512.mask.compress.store.q.512(i8* %addr, <8 x i64> %data, i8 %mask)
693 ; CHECK-LABEL: compr10
694 ; CHECK: vpcompressd %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0x8b,0xc0]
695 define <4 x i32> @compr10(<4 x i32> %data, i8 %mask) {
696 %res = call <4 x i32> @llvm.x86.avx512.mask.compress.d.128(<4 x i32> %data, <4 x i32>zeroinitializer, i8 %mask)
700 declare <4 x i32> @llvm.x86.avx512.mask.compress.d.128(<4 x i32> %data, <4 x i32> %src0, i8 %mask)
704 ; CHECK-LABEL: expand1
705 ; CHECK: vexpandpd (%rdi), %zmm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x88,0x07]
706 define <8 x double> @expand1(i8* %addr, <8 x double> %data, i8 %mask) {
707 %res = call <8 x double> @llvm.x86.avx512.mask.expand.load.pd.512(i8* %addr, <8 x double> %data, i8 %mask)
708 ret <8 x double> %res
711 declare <8 x double> @llvm.x86.avx512.mask.expand.load.pd.512(i8* %addr, <8 x double> %data, i8 %mask)
713 ; CHECK-LABEL: expand2
714 ; CHECK: vexpandpd (%rdi), %ymm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x88,0x07]
715 define <4 x double> @expand2(i8* %addr, <4 x double> %data, i8 %mask) {
716 %res = call <4 x double> @llvm.x86.avx512.mask.expand.load.pd.256(i8* %addr, <4 x double> %data, i8 %mask)
717 ret <4 x double> %res
720 declare <4 x double> @llvm.x86.avx512.mask.expand.load.pd.256(i8* %addr, <4 x double> %data, i8 %mask)
722 ; CHECK-LABEL: expand3
723 ; CHECK: vexpandps (%rdi), %xmm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x88,0x07]
724 define <4 x float> @expand3(i8* %addr, <4 x float> %data, i8 %mask) {
725 %res = call <4 x float> @llvm.x86.avx512.mask.expand.load.ps.128(i8* %addr, <4 x float> %data, i8 %mask)
729 declare <4 x float> @llvm.x86.avx512.mask.expand.load.ps.128(i8* %addr, <4 x float> %data, i8 %mask)
731 ; CHECK-LABEL: expand4
732 ; CHECK: vexpandpd %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x88,0xc0]
733 define <8 x double> @expand4(i8* %addr, <8 x double> %data, i8 %mask) {
734 %res = call <8 x double> @llvm.x86.avx512.mask.expand.pd.512(<8 x double> %data, <8 x double> zeroinitializer, i8 %mask)
735 ret <8 x double> %res
738 declare <8 x double> @llvm.x86.avx512.mask.expand.pd.512(<8 x double> %data, <8 x double> %src0, i8 %mask)
740 ; CHECK-LABEL: expand5
741 ; CHECK: vexpandpd %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x88,0xc8]
742 define <4 x double> @expand5(<4 x double> %data, <4 x double> %src0, i8 %mask) {
743 %res = call <4 x double> @llvm.x86.avx512.mask.expand.pd.256( <4 x double> %data, <4 x double> %src0, i8 %mask)
744 ret <4 x double> %res
747 declare <4 x double> @llvm.x86.avx512.mask.expand.pd.256(<4 x double> %data, <4 x double> %src0, i8 %mask)
749 ; CHECK-LABEL: expand6
750 ; CHECK: vexpandps %xmm0
751 define <4 x float> @expand6(<4 x float> %data, i8 %mask) {
752 %res = call <4 x float> @llvm.x86.avx512.mask.expand.ps.128(<4 x float> %data, <4 x float>zeroinitializer, i8 %mask)
756 declare <4 x float> @llvm.x86.avx512.mask.expand.ps.128(<4 x float> %data, <4 x float> %src0, i8 %mask)
758 ; CHECK-LABEL: expand7
761 define <8 x double> @expand7(i8* %addr, <8 x double> %data) {
762 %res = call <8 x double> @llvm.x86.avx512.mask.expand.load.pd.512(i8* %addr, <8 x double> %data, i8 -1)
763 ret <8 x double> %res
766 ; CHECK-LABEL: expand8
767 ; CHECK-NOT: vexpandps %xmm0
768 define <4 x float> @expand8(<4 x float> %data) {
769 %res = call <4 x float> @llvm.x86.avx512.mask.expand.ps.128(<4 x float> %data, <4 x float>zeroinitializer, i8 -1)
773 ; CHECK-LABEL: expand9
774 ; CHECK: vpexpandq (%rdi), %zmm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x89,0x07]
775 define <8 x i64> @expand9(i8* %addr, <8 x i64> %data, i8 %mask) {
776 %res = call <8 x i64> @llvm.x86.avx512.mask.expand.load.q.512(i8* %addr, <8 x i64> %data, i8 %mask)
780 declare <8 x i64> @llvm.x86.avx512.mask.expand.load.q.512(i8* %addr, <8 x i64> %data, i8 %mask)
782 ; CHECK-LABEL: expand10
783 ; CHECK: vpexpandd %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0x89,0xc0]
784 define <4 x i32> @expand10(<4 x i32> %data, i8 %mask) {
785 %res = call <4 x i32> @llvm.x86.avx512.mask.expand.d.128(<4 x i32> %data, <4 x i32>zeroinitializer, i8 %mask)
789 declare <4 x i32> @llvm.x86.avx512.mask.expand.d.128(<4 x i32> %data, <4 x i32> %src0, i8 %mask)
791 define <8 x float> @test_x86_mask_blend_ps_256(i8 %a0, <8 x float> %a1, <8 x float> %a2) {
792 ; CHECK: vblendmps %ymm1, %ymm0
793 %res = call <8 x float> @llvm.x86.avx512.mask.blend.ps.256(<8 x float> %a1, <8 x float> %a2, i8 %a0) ; <<8 x float>> [#uses=1]
797 declare <8 x float> @llvm.x86.avx512.mask.blend.ps.256(<8 x float>, <8 x float>, i8) nounwind readonly
799 define <4 x double> @test_x86_mask_blend_pd_256(i8 %a0, <4 x double> %a1, <4 x double> %a2) {
800 ; CHECK: vblendmpd %ymm1, %ymm0
801 %res = call <4 x double> @llvm.x86.avx512.mask.blend.pd.256(<4 x double> %a1, <4 x double> %a2, i8 %a0) ; <<4 x double>> [#uses=1]
802 ret <4 x double> %res
805 define <4 x double> @test_x86_mask_blend_pd_256_memop(<4 x double> %a, <4 x double>* %ptr, i8 %mask) {
806 ; CHECK-LABEL: test_x86_mask_blend_pd_256_memop
807 ; CHECK: vblendmpd (%
808 %b = load <4 x double>, <4 x double>* %ptr
809 %res = call <4 x double> @llvm.x86.avx512.mask.blend.pd.256(<4 x double> %a, <4 x double> %b, i8 %mask) ; <<4 x double>> [#uses=1]
810 ret <4 x double> %res
812 declare <4 x double> @llvm.x86.avx512.mask.blend.pd.256(<4 x double>, <4 x double>, i8) nounwind readonly
814 ; CHECK-LABEL: test_x86_mask_blend_d_256
816 define <8 x i32> @test_x86_mask_blend_d_256(i8 %a0, <8 x i32> %a1, <8 x i32> %a2) {
817 %res = call <8 x i32> @llvm.x86.avx512.mask.blend.d.256(<8 x i32> %a1, <8 x i32> %a2, i8 %a0) ; <<8 x i32>> [#uses=1]
820 declare <8 x i32> @llvm.x86.avx512.mask.blend.d.256(<8 x i32>, <8 x i32>, i8) nounwind readonly
822 define <4 x i64> @test_x86_mask_blend_q_256(i8 %a0, <4 x i64> %a1, <4 x i64> %a2) {
824 %res = call <4 x i64> @llvm.x86.avx512.mask.blend.q.256(<4 x i64> %a1, <4 x i64> %a2, i8 %a0) ; <<4 x i64>> [#uses=1]
827 declare <4 x i64> @llvm.x86.avx512.mask.blend.q.256(<4 x i64>, <4 x i64>, i8) nounwind readonly
829 define <4 x float> @test_x86_mask_blend_ps_128(i8 %a0, <4 x float> %a1, <4 x float> %a2) {
830 ; CHECK: vblendmps %xmm1, %xmm0
831 %res = call <4 x float> @llvm.x86.avx512.mask.blend.ps.128(<4 x float> %a1, <4 x float> %a2, i8 %a0) ; <<4 x float>> [#uses=1]
835 declare <4 x float> @llvm.x86.avx512.mask.blend.ps.128(<4 x float>, <4 x float>, i8) nounwind readonly
837 define <2 x double> @test_x86_mask_blend_pd_128(i8 %a0, <2 x double> %a1, <2 x double> %a2) {
838 ; CHECK: vblendmpd %xmm1, %xmm0
839 %res = call <2 x double> @llvm.x86.avx512.mask.blend.pd.128(<2 x double> %a1, <2 x double> %a2, i8 %a0) ; <<2 x double>> [#uses=1]
840 ret <2 x double> %res
843 define <2 x double> @test_x86_mask_blend_pd_128_memop(<2 x double> %a, <2 x double>* %ptr, i8 %mask) {
844 ; CHECK-LABEL: test_x86_mask_blend_pd_128_memop
845 ; CHECK: vblendmpd (%
846 %b = load <2 x double>, <2 x double>* %ptr
847 %res = call <2 x double> @llvm.x86.avx512.mask.blend.pd.128(<2 x double> %a, <2 x double> %b, i8 %mask) ; <<2 x double>> [#uses=1]
848 ret <2 x double> %res
850 declare <2 x double> @llvm.x86.avx512.mask.blend.pd.128(<2 x double>, <2 x double>, i8) nounwind readonly
852 define <4 x i32> @test_x86_mask_blend_d_128(i8 %a0, <4 x i32> %a1, <4 x i32> %a2) {
854 %res = call <4 x i32> @llvm.x86.avx512.mask.blend.d.128(<4 x i32> %a1, <4 x i32> %a2, i8 %a0) ; <<4 x i32>> [#uses=1]
857 declare <4 x i32> @llvm.x86.avx512.mask.blend.d.128(<4 x i32>, <4 x i32>, i8) nounwind readonly
859 define <2 x i64> @test_x86_mask_blend_q_128(i8 %a0, <2 x i64> %a1, <2 x i64> %a2) {
861 %res = call <2 x i64> @llvm.x86.avx512.mask.blend.q.128(<2 x i64> %a1, <2 x i64> %a2, i8 %a0) ; <<2 x i64>> [#uses=1]
864 declare <2 x i64> @llvm.x86.avx512.mask.blend.q.128(<2 x i64>, <2 x i64>, i8) nounwind readonly
867 define < 2 x i64> @test_mask_mul_epi32_rr_128(< 4 x i32> %a, < 4 x i32> %b) {
868 ;CHECK-LABEL: test_mask_mul_epi32_rr_128
869 ;CHECK: vpmuldq %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf2,0xfd,0x08,0x28,0xc1]
870 %res = call < 2 x i64> @llvm.x86.avx512.mask.pmul.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> zeroinitializer, i8 -1)
874 define < 2 x i64> @test_mask_mul_epi32_rrk_128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> %passThru, i8 %mask) {
875 ;CHECK-LABEL: test_mask_mul_epi32_rrk_128
876 ;CHECK: vpmuldq %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x28,0xd1]
877 %res = call < 2 x i64> @llvm.x86.avx512.mask.pmul.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> %passThru, i8 %mask)
881 define < 2 x i64> @test_mask_mul_epi32_rrkz_128(< 4 x i32> %a, < 4 x i32> %b, i8 %mask) {
882 ;CHECK-LABEL: test_mask_mul_epi32_rrkz_128
883 ;CHECK: vpmuldq %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0x89,0x28,0xc1]
884 %res = call < 2 x i64> @llvm.x86.avx512.mask.pmul.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> zeroinitializer, i8 %mask)
888 define < 2 x i64> @test_mask_mul_epi32_rm_128(< 4 x i32> %a, < 4 x i32>* %ptr_b) {
889 ;CHECK-LABEL: test_mask_mul_epi32_rm_128
890 ;CHECK: vpmuldq (%rdi), %xmm0, %xmm0 ## encoding: [0x62,0xf2,0xfd,0x08,0x28,0x07]
891 %b = load < 4 x i32>, < 4 x i32>* %ptr_b
892 %res = call < 2 x i64> @llvm.x86.avx512.mask.pmul.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> zeroinitializer, i8 -1)
896 define < 2 x i64> @test_mask_mul_epi32_rmk_128(< 4 x i32> %a, < 4 x i32>* %ptr_b, < 2 x i64> %passThru, i8 %mask) {
897 ;CHECK-LABEL: test_mask_mul_epi32_rmk_128
898 ;CHECK: vpmuldq (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x28,0x0f]
899 %b = load < 4 x i32>, < 4 x i32>* %ptr_b
900 %res = call < 2 x i64> @llvm.x86.avx512.mask.pmul.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> %passThru, i8 %mask)
904 define < 2 x i64> @test_mask_mul_epi32_rmkz_128(< 4 x i32> %a, < 4 x i32>* %ptr_b, i8 %mask) {
905 ;CHECK-LABEL: test_mask_mul_epi32_rmkz_128
906 ;CHECK: vpmuldq (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0x89,0x28,0x07]
907 %b = load < 4 x i32>, < 4 x i32>* %ptr_b
908 %res = call < 2 x i64> @llvm.x86.avx512.mask.pmul.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> zeroinitializer, i8 %mask)
912 define < 2 x i64> @test_mask_mul_epi32_rmb_128(< 4 x i32> %a, i64* %ptr_b) {
913 ;CHECK-LABEL: test_mask_mul_epi32_rmb_128
914 ;CHECK: vpmuldq (%rdi){1to2}, %xmm0, %xmm0 ## encoding: [0x62,0xf2,0xfd,0x18,0x28,0x07]
915 %q = load i64, i64* %ptr_b
916 %vecinit.i = insertelement < 2 x i64> undef, i64 %q, i32 0
917 %b64 = shufflevector < 2 x i64> %vecinit.i, < 2 x i64> undef, <2 x i32> zeroinitializer
918 %b = bitcast < 2 x i64> %b64 to < 4 x i32>
919 %res = call < 2 x i64> @llvm.x86.avx512.mask.pmul.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> zeroinitializer, i8 -1)
923 define < 2 x i64> @test_mask_mul_epi32_rmbk_128(< 4 x i32> %a, i64* %ptr_b, < 2 x i64> %passThru, i8 %mask) {
924 ;CHECK-LABEL: test_mask_mul_epi32_rmbk_128
925 ;CHECK: vpmuldq (%rdi){1to2}, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x19,0x28,0x0f]
926 %q = load i64, i64* %ptr_b
927 %vecinit.i = insertelement < 2 x i64> undef, i64 %q, i32 0
928 %b64 = shufflevector < 2 x i64> %vecinit.i, < 2 x i64> undef, <2 x i32> zeroinitializer
929 %b = bitcast < 2 x i64> %b64 to < 4 x i32>
930 %res = call < 2 x i64> @llvm.x86.avx512.mask.pmul.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> %passThru, i8 %mask)
934 define < 2 x i64> @test_mask_mul_epi32_rmbkz_128(< 4 x i32> %a, i64* %ptr_b, i8 %mask) {
935 ;CHECK-LABEL: test_mask_mul_epi32_rmbkz_128
936 ;CHECK: vpmuldq (%rdi){1to2}, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0x99,0x28,0x07]
937 %q = load i64, i64* %ptr_b
938 %vecinit.i = insertelement < 2 x i64> undef, i64 %q, i32 0
939 %b64 = shufflevector < 2 x i64> %vecinit.i, < 2 x i64> undef, < 2 x i32> zeroinitializer
940 %b = bitcast < 2 x i64> %b64 to < 4 x i32>
941 %res = call < 2 x i64> @llvm.x86.avx512.mask.pmul.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> zeroinitializer, i8 %mask)
945 declare < 2 x i64> @llvm.x86.avx512.mask.pmul.dq.128(< 4 x i32>, < 4 x i32>, < 2 x i64>, i8)
947 define < 4 x i64> @test_mask_mul_epi32_rr_256(< 8 x i32> %a, < 8 x i32> %b) {
948 ;CHECK-LABEL: test_mask_mul_epi32_rr_256
949 ;CHECK: vpmuldq %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf2,0xfd,0x28,0x28,0xc1]
950 %res = call < 4 x i64> @llvm.x86.avx512.mask.pmul.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> zeroinitializer, i8 -1)
954 define < 4 x i64> @test_mask_mul_epi32_rrk_256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> %passThru, i8 %mask) {
955 ;CHECK-LABEL: test_mask_mul_epi32_rrk_256
956 ;CHECK: vpmuldq %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x28,0xd1]
957 %res = call < 4 x i64> @llvm.x86.avx512.mask.pmul.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> %passThru, i8 %mask)
961 define < 4 x i64> @test_mask_mul_epi32_rrkz_256(< 8 x i32> %a, < 8 x i32> %b, i8 %mask) {
962 ;CHECK-LABEL: test_mask_mul_epi32_rrkz_256
963 ;CHECK: vpmuldq %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xa9,0x28,0xc1]
964 %res = call < 4 x i64> @llvm.x86.avx512.mask.pmul.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> zeroinitializer, i8 %mask)
968 define < 4 x i64> @test_mask_mul_epi32_rm_256(< 8 x i32> %a, < 8 x i32>* %ptr_b) {
969 ;CHECK-LABEL: test_mask_mul_epi32_rm_256
970 ;CHECK: vpmuldq (%rdi), %ymm0, %ymm0 ## encoding: [0x62,0xf2,0xfd,0x28,0x28,0x07]
971 %b = load < 8 x i32>, < 8 x i32>* %ptr_b
972 %res = call < 4 x i64> @llvm.x86.avx512.mask.pmul.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> zeroinitializer, i8 -1)
976 define < 4 x i64> @test_mask_mul_epi32_rmk_256(< 8 x i32> %a, < 8 x i32>* %ptr_b, < 4 x i64> %passThru, i8 %mask) {
977 ;CHECK-LABEL: test_mask_mul_epi32_rmk_256
978 ;CHECK: vpmuldq (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x28,0x0f]
979 %b = load < 8 x i32>, < 8 x i32>* %ptr_b
980 %res = call < 4 x i64> @llvm.x86.avx512.mask.pmul.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> %passThru, i8 %mask)
984 define < 4 x i64> @test_mask_mul_epi32_rmkz_256(< 8 x i32> %a, < 8 x i32>* %ptr_b, i8 %mask) {
985 ;CHECK-LABEL: test_mask_mul_epi32_rmkz_256
986 ;CHECK: vpmuldq (%rdi), %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xa9,0x28,0x07]
987 %b = load < 8 x i32>, < 8 x i32>* %ptr_b
988 %res = call < 4 x i64> @llvm.x86.avx512.mask.pmul.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> zeroinitializer, i8 %mask)
992 define < 4 x i64> @test_mask_mul_epi32_rmb_256(< 8 x i32> %a, i64* %ptr_b) {
993 ;CHECK-LABEL: test_mask_mul_epi32_rmb_256
994 ;CHECK: vpmuldq (%rdi){1to4}, %ymm0, %ymm0 ## encoding: [0x62,0xf2,0xfd,0x38,0x28,0x07]
995 %q = load i64, i64* %ptr_b
996 %vecinit.i = insertelement < 4 x i64> undef, i64 %q, i32 0
997 %b64 = shufflevector < 4 x i64> %vecinit.i, < 4 x i64> undef, < 4 x i32> zeroinitializer
998 %b = bitcast < 4 x i64> %b64 to < 8 x i32>
999 %res = call < 4 x i64> @llvm.x86.avx512.mask.pmul.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> zeroinitializer, i8 -1)
1003 define < 4 x i64> @test_mask_mul_epi32_rmbk_256(< 8 x i32> %a, i64* %ptr_b, < 4 x i64> %passThru, i8 %mask) {
1004 ;CHECK-LABEL: test_mask_mul_epi32_rmbk_256
1005 ;CHECK: vpmuldq (%rdi){1to4}, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x39,0x28,0x0f]
1006 %q = load i64, i64* %ptr_b
1007 %vecinit.i = insertelement < 4 x i64> undef, i64 %q, i32 0
1008 %b64 = shufflevector < 4 x i64> %vecinit.i, < 4 x i64> undef, < 4 x i32> zeroinitializer
1009 %b = bitcast < 4 x i64> %b64 to < 8 x i32>
1010 %res = call < 4 x i64> @llvm.x86.avx512.mask.pmul.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> %passThru, i8 %mask)
1014 define < 4 x i64> @test_mask_mul_epi32_rmbkz_256(< 8 x i32> %a, i64* %ptr_b, i8 %mask) {
1015 ;CHECK-LABEL: test_mask_mul_epi32_rmbkz_256
1016 ;CHECK: vpmuldq (%rdi){1to4}, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xb9,0x28,0x07]
1017 %q = load i64, i64* %ptr_b
1018 %vecinit.i = insertelement < 4 x i64> undef, i64 %q, i32 0
1019 %b64 = shufflevector < 4 x i64> %vecinit.i, < 4 x i64> undef, < 4 x i32> zeroinitializer
1020 %b = bitcast < 4 x i64> %b64 to < 8 x i32>
1021 %res = call < 4 x i64> @llvm.x86.avx512.mask.pmul.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> zeroinitializer, i8 %mask)
1025 declare < 4 x i64> @llvm.x86.avx512.mask.pmul.dq.256(< 8 x i32>, < 8 x i32>, < 4 x i64>, i8)
1027 define < 2 x i64> @test_mask_mul_epu32_rr_128(< 4 x i32> %a, < 4 x i32> %b) {
1028 ;CHECK-LABEL: test_mask_mul_epu32_rr_128
1029 ;CHECK: vpmuludq %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xf4,0xc1]
1030 %res = call < 2 x i64> @llvm.x86.avx512.mask.pmulu.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> zeroinitializer, i8 -1)
1034 define < 2 x i64> @test_mask_mul_epu32_rrk_128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> %passThru, i8 %mask) {
1035 ;CHECK-LABEL: test_mask_mul_epu32_rrk_128
1036 ;CHECK: vpmuludq %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x09,0xf4,0xd1]
1037 %res = call < 2 x i64> @llvm.x86.avx512.mask.pmulu.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> %passThru, i8 %mask)
1041 define < 2 x i64> @test_mask_mul_epu32_rrkz_128(< 4 x i32> %a, < 4 x i32> %b, i8 %mask) {
1042 ;CHECK-LABEL: test_mask_mul_epu32_rrkz_128
1043 ;CHECK: vpmuludq %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0x89,0xf4,0xc1]
1044 %res = call < 2 x i64> @llvm.x86.avx512.mask.pmulu.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> zeroinitializer, i8 %mask)
1048 define < 2 x i64> @test_mask_mul_epu32_rm_128(< 4 x i32> %a, < 4 x i32>* %ptr_b) {
1049 ;CHECK-LABEL: test_mask_mul_epu32_rm_128
1050 ;CHECK: vpmuludq (%rdi), %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xf4,0x07]
1051 %b = load < 4 x i32>, < 4 x i32>* %ptr_b
1052 %res = call < 2 x i64> @llvm.x86.avx512.mask.pmulu.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> zeroinitializer, i8 -1)
1056 define < 2 x i64> @test_mask_mul_epu32_rmk_128(< 4 x i32> %a, < 4 x i32>* %ptr_b, < 2 x i64> %passThru, i8 %mask) {
1057 ;CHECK-LABEL: test_mask_mul_epu32_rmk_128
1058 ;CHECK: vpmuludq (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x09,0xf4,0x0f]
1059 %b = load < 4 x i32>, < 4 x i32>* %ptr_b
1060 %res = call < 2 x i64> @llvm.x86.avx512.mask.pmulu.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> %passThru, i8 %mask)
1064 define < 2 x i64> @test_mask_mul_epu32_rmkz_128(< 4 x i32> %a, < 4 x i32>* %ptr_b, i8 %mask) {
1065 ;CHECK-LABEL: test_mask_mul_epu32_rmkz_128
1066 ;CHECK: vpmuludq (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0x89,0xf4,0x07]
1067 %b = load < 4 x i32>, < 4 x i32>* %ptr_b
1068 %res = call < 2 x i64> @llvm.x86.avx512.mask.pmulu.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> zeroinitializer, i8 %mask)
1072 define < 2 x i64> @test_mask_mul_epu32_rmb_128(< 4 x i32> %a, i64* %ptr_b) {
1073 ;CHECK-LABEL: test_mask_mul_epu32_rmb_128
1074 ;CHECK: vpmuludq (%rdi){1to2}, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x18,0xf4,0x07]
1075 %q = load i64, i64* %ptr_b
1076 %vecinit.i = insertelement < 2 x i64> undef, i64 %q, i32 0
1077 %b64 = shufflevector < 2 x i64> %vecinit.i, < 2 x i64> undef, <2 x i32> zeroinitializer
1078 %b = bitcast < 2 x i64> %b64 to < 4 x i32>
1079 %res = call < 2 x i64> @llvm.x86.avx512.mask.pmulu.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> zeroinitializer, i8 -1)
1083 define < 2 x i64> @test_mask_mul_epu32_rmbk_128(< 4 x i32> %a, i64* %ptr_b, < 2 x i64> %passThru, i8 %mask) {
1084 ;CHECK-LABEL: test_mask_mul_epu32_rmbk_128
1085 ;CHECK: vpmuludq (%rdi){1to2}, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x19,0xf4,0x0f]
1086 %q = load i64, i64* %ptr_b
1087 %vecinit.i = insertelement < 2 x i64> undef, i64 %q, i32 0
1088 %b64 = shufflevector < 2 x i64> %vecinit.i, < 2 x i64> undef, <2 x i32> zeroinitializer
1089 %b = bitcast < 2 x i64> %b64 to < 4 x i32>
1090 %res = call < 2 x i64> @llvm.x86.avx512.mask.pmulu.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> %passThru, i8 %mask)
1094 define < 2 x i64> @test_mask_mul_epu32_rmbkz_128(< 4 x i32> %a, i64* %ptr_b, i8 %mask) {
1095 ;CHECK-LABEL: test_mask_mul_epu32_rmbkz_128
1096 ;CHECK: vpmuludq (%rdi){1to2}, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0x99,0xf4,0x07]
1097 %q = load i64, i64* %ptr_b
1098 %vecinit.i = insertelement < 2 x i64> undef, i64 %q, i32 0
1099 %b64 = shufflevector < 2 x i64> %vecinit.i, < 2 x i64> undef, < 2 x i32> zeroinitializer
1100 %b = bitcast < 2 x i64> %b64 to < 4 x i32>
1101 %res = call < 2 x i64> @llvm.x86.avx512.mask.pmulu.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> zeroinitializer, i8 %mask)
1105 declare < 2 x i64> @llvm.x86.avx512.mask.pmulu.dq.128(< 4 x i32>, < 4 x i32>, < 2 x i64>, i8)
1107 define < 4 x i64> @test_mask_mul_epu32_rr_256(< 8 x i32> %a, < 8 x i32> %b) {
1108 ;CHECK-LABEL: test_mask_mul_epu32_rr_256
1109 ;CHECK: vpmuludq %ymm1, %ymm0, %ymm0 ## encoding: [0xc5,0xfd,0xf4,0xc1]
1110 %res = call < 4 x i64> @llvm.x86.avx512.mask.pmulu.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> zeroinitializer, i8 -1)
1114 define < 4 x i64> @test_mask_mul_epu32_rrk_256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> %passThru, i8 %mask) {
1115 ;CHECK-LABEL: test_mask_mul_epu32_rrk_256
1116 ;CHECK: vpmuludq %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x29,0xf4,0xd1]
1117 %res = call < 4 x i64> @llvm.x86.avx512.mask.pmulu.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> %passThru, i8 %mask)
1121 define < 4 x i64> @test_mask_mul_epu32_rrkz_256(< 8 x i32> %a, < 8 x i32> %b, i8 %mask) {
1122 ;CHECK-LABEL: test_mask_mul_epu32_rrkz_256
1123 ;CHECK: vpmuludq %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xa9,0xf4,0xc1]
1124 %res = call < 4 x i64> @llvm.x86.avx512.mask.pmulu.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> zeroinitializer, i8 %mask)
1128 define < 4 x i64> @test_mask_mul_epu32_rm_256(< 8 x i32> %a, < 8 x i32>* %ptr_b) {
1129 ;CHECK-LABEL: test_mask_mul_epu32_rm_256
1130 ;CHECK: vpmuludq (%rdi), %ymm0, %ymm0 ## encoding: [0xc5,0xfd,0xf4,0x07]
1131 %b = load < 8 x i32>, < 8 x i32>* %ptr_b
1132 %res = call < 4 x i64> @llvm.x86.avx512.mask.pmulu.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> zeroinitializer, i8 -1)
1136 define < 4 x i64> @test_mask_mul_epu32_rmk_256(< 8 x i32> %a, < 8 x i32>* %ptr_b, < 4 x i64> %passThru, i8 %mask) {
1137 ;CHECK-LABEL: test_mask_mul_epu32_rmk_256
1138 ;CHECK: vpmuludq (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x29,0xf4,0x0f]
1139 %b = load < 8 x i32>, < 8 x i32>* %ptr_b
1140 %res = call < 4 x i64> @llvm.x86.avx512.mask.pmulu.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> %passThru, i8 %mask)
1144 define < 4 x i64> @test_mask_mul_epu32_rmkz_256(< 8 x i32> %a, < 8 x i32>* %ptr_b, i8 %mask) {
1145 ;CHECK-LABEL: test_mask_mul_epu32_rmkz_256
1146 ;CHECK: vpmuludq (%rdi), %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xa9,0xf4,0x07]
1147 %b = load < 8 x i32>, < 8 x i32>* %ptr_b
1148 %res = call < 4 x i64> @llvm.x86.avx512.mask.pmulu.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> zeroinitializer, i8 %mask)
1152 define < 4 x i64> @test_mask_mul_epu32_rmb_256(< 8 x i32> %a, i64* %ptr_b) {
1153 ;CHECK-LABEL: test_mask_mul_epu32_rmb_256
1154 ;CHECK: vpmuludq (%rdi){1to4}, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x38,0xf4,0x07]
1155 %q = load i64, i64* %ptr_b
1156 %vecinit.i = insertelement < 4 x i64> undef, i64 %q, i32 0
1157 %b64 = shufflevector < 4 x i64> %vecinit.i, < 4 x i64> undef, < 4 x i32> zeroinitializer
1158 %b = bitcast < 4 x i64> %b64 to < 8 x i32>
1159 %res = call < 4 x i64> @llvm.x86.avx512.mask.pmulu.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> zeroinitializer, i8 -1)
1163 define < 4 x i64> @test_mask_mul_epu32_rmbk_256(< 8 x i32> %a, i64* %ptr_b, < 4 x i64> %passThru, i8 %mask) {
1164 ;CHECK-LABEL: test_mask_mul_epu32_rmbk_256
1165 ;CHECK: vpmuludq (%rdi){1to4}, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x39,0xf4,0x0f]
1166 %q = load i64, i64* %ptr_b
1167 %vecinit.i = insertelement < 4 x i64> undef, i64 %q, i32 0
1168 %b64 = shufflevector < 4 x i64> %vecinit.i, < 4 x i64> undef, < 4 x i32> zeroinitializer
1169 %b = bitcast < 4 x i64> %b64 to < 8 x i32>
1170 %res = call < 4 x i64> @llvm.x86.avx512.mask.pmulu.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> %passThru, i8 %mask)
1174 define < 4 x i64> @test_mask_mul_epu32_rmbkz_256(< 8 x i32> %a, i64* %ptr_b, i8 %mask) {
1175 ;CHECK-LABEL: test_mask_mul_epu32_rmbkz_256
1176 ;CHECK: vpmuludq (%rdi){1to4}, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xb9,0xf4,0x07]
1177 %q = load i64, i64* %ptr_b
1178 %vecinit.i = insertelement < 4 x i64> undef, i64 %q, i32 0
1179 %b64 = shufflevector < 4 x i64> %vecinit.i, < 4 x i64> undef, < 4 x i32> zeroinitializer
1180 %b = bitcast < 4 x i64> %b64 to < 8 x i32>
1181 %res = call < 4 x i64> @llvm.x86.avx512.mask.pmulu.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> zeroinitializer, i8 %mask)
1185 declare < 4 x i64> @llvm.x86.avx512.mask.pmulu.dq.256(< 8 x i32>, < 8 x i32>, < 4 x i64>, i8)
1187 define <4 x i32> @test_mask_add_epi32_rr_128(<4 x i32> %a, <4 x i32> %b) {
1188 ;CHECK-LABEL: test_mask_add_epi32_rr_128
1189 ;CHECK: vpaddd %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xfe,0xc1]
1190 %res = call <4 x i32> @llvm.x86.avx512.mask.padd.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1)
1194 define <4 x i32> @test_mask_add_epi32_rrk_128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask) {
1195 ;CHECK-LABEL: test_mask_add_epi32_rrk_128
1196 ;CHECK: vpaddd %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xfe,0xd1]
1197 %res = call <4 x i32> @llvm.x86.avx512.mask.padd.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask)
1201 define <4 x i32> @test_mask_add_epi32_rrkz_128(<4 x i32> %a, <4 x i32> %b, i8 %mask) {
1202 ;CHECK-LABEL: test_mask_add_epi32_rrkz_128
1203 ;CHECK: vpaddd %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xfe,0xc1]
1204 %res = call <4 x i32> @llvm.x86.avx512.mask.padd.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask)
1208 define <4 x i32> @test_mask_add_epi32_rm_128(<4 x i32> %a, <4 x i32>* %ptr_b) {
1209 ;CHECK-LABEL: test_mask_add_epi32_rm_128
1210 ;CHECK: vpaddd (%rdi), %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xfe,0x07]
1211 %b = load <4 x i32>, <4 x i32>* %ptr_b
1212 %res = call <4 x i32> @llvm.x86.avx512.mask.padd.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1)
1216 define <4 x i32> @test_mask_add_epi32_rmk_128(<4 x i32> %a, <4 x i32>* %ptr_b, <4 x i32> %passThru, i8 %mask) {
1217 ;CHECK-LABEL: test_mask_add_epi32_rmk_128
1218 ;CHECK: vpaddd (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xfe,0x0f]
1219 %b = load <4 x i32>, <4 x i32>* %ptr_b
1220 %res = call <4 x i32> @llvm.x86.avx512.mask.padd.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask)
1224 define <4 x i32> @test_mask_add_epi32_rmkz_128(<4 x i32> %a, <4 x i32>* %ptr_b, i8 %mask) {
1225 ;CHECK-LABEL: test_mask_add_epi32_rmkz_128
1226 ;CHECK: vpaddd (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xfe,0x07]
1227 %b = load <4 x i32>, <4 x i32>* %ptr_b
1228 %res = call <4 x i32> @llvm.x86.avx512.mask.padd.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask)
1232 define <4 x i32> @test_mask_add_epi32_rmb_128(<4 x i32> %a, i32* %ptr_b) {
1233 ;CHECK-LABEL: test_mask_add_epi32_rmb_128
1234 ;CHECK: vpaddd (%rdi){1to4}, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x18,0xfe,0x07]
1235 %q = load i32, i32* %ptr_b
1236 %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
1237 %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
1238 %res = call <4 x i32> @llvm.x86.avx512.mask.padd.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1)
1242 define <4 x i32> @test_mask_add_epi32_rmbk_128(<4 x i32> %a, i32* %ptr_b, <4 x i32> %passThru, i8 %mask) {
1243 ;CHECK-LABEL: test_mask_add_epi32_rmbk_128
1244 ;CHECK: vpaddd (%rdi){1to4}, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x19,0xfe,0x0f]
1245 %q = load i32, i32* %ptr_b
1246 %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
1247 %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
1248 %res = call <4 x i32> @llvm.x86.avx512.mask.padd.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask)
1252 define <4 x i32> @test_mask_add_epi32_rmbkz_128(<4 x i32> %a, i32* %ptr_b, i8 %mask) {
1253 ;CHECK-LABEL: test_mask_add_epi32_rmbkz_128
1254 ;CHECK: vpaddd (%rdi){1to4}, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x99,0xfe,0x07]
1255 %q = load i32, i32* %ptr_b
1256 %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
1257 %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
1258 %res = call <4 x i32> @llvm.x86.avx512.mask.padd.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask)
1262 declare <4 x i32> @llvm.x86.avx512.mask.padd.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)
1264 define <4 x i32> @test_mask_sub_epi32_rr_128(<4 x i32> %a, <4 x i32> %b) {
1265 ;CHECK-LABEL: test_mask_sub_epi32_rr_128
1266 ;CHECK: vpsubd %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xfa,0xc1]
1267 %res = call <4 x i32> @llvm.x86.avx512.mask.psub.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1)
1271 define <4 x i32> @test_mask_sub_epi32_rrk_128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask) {
1272 ;CHECK-LABEL: test_mask_sub_epi32_rrk_128
1273 ;CHECK: vpsubd %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xfa,0xd1]
1274 %res = call <4 x i32> @llvm.x86.avx512.mask.psub.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask)
1278 define <4 x i32> @test_mask_sub_epi32_rrkz_128(<4 x i32> %a, <4 x i32> %b, i8 %mask) {
1279 ;CHECK-LABEL: test_mask_sub_epi32_rrkz_128
1280 ;CHECK: vpsubd %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xfa,0xc1]
1281 %res = call <4 x i32> @llvm.x86.avx512.mask.psub.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask)
1285 define <4 x i32> @test_mask_sub_epi32_rm_128(<4 x i32> %a, <4 x i32>* %ptr_b) {
1286 ;CHECK-LABEL: test_mask_sub_epi32_rm_128
1287 ;CHECK: (%rdi), %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xfa,0x07]
1288 %b = load <4 x i32>, <4 x i32>* %ptr_b
1289 %res = call <4 x i32> @llvm.x86.avx512.mask.psub.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1)
1293 define <4 x i32> @test_mask_sub_epi32_rmk_128(<4 x i32> %a, <4 x i32>* %ptr_b, <4 x i32> %passThru, i8 %mask) {
1294 ;CHECK-LABEL: test_mask_sub_epi32_rmk_128
1295 ;CHECK: vpsubd (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xfa,0x0f]
1296 %b = load <4 x i32>, <4 x i32>* %ptr_b
1297 %res = call <4 x i32> @llvm.x86.avx512.mask.psub.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask)
1301 define <4 x i32> @test_mask_sub_epi32_rmkz_128(<4 x i32> %a, <4 x i32>* %ptr_b, i8 %mask) {
1302 ;CHECK-LABEL: test_mask_sub_epi32_rmkz_128
1303 ;CHECK: vpsubd (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xfa,0x07]
1304 %b = load <4 x i32>, <4 x i32>* %ptr_b
1305 %res = call <4 x i32> @llvm.x86.avx512.mask.psub.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask)
1309 define <4 x i32> @test_mask_sub_epi32_rmb_128(<4 x i32> %a, i32* %ptr_b) {
1310 ;CHECK-LABEL: test_mask_sub_epi32_rmb_128
1311 ;CHECK: vpsubd (%rdi){1to4}, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x18,0xfa,0x07]
1312 %q = load i32, i32* %ptr_b
1313 %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
1314 %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
1315 %res = call <4 x i32> @llvm.x86.avx512.mask.psub.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1)
1319 define <4 x i32> @test_mask_sub_epi32_rmbk_128(<4 x i32> %a, i32* %ptr_b, <4 x i32> %passThru, i8 %mask) {
1320 ;CHECK-LABEL: test_mask_sub_epi32_rmbk_128
1321 ;CHECK: vpsubd (%rdi){1to4}, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x19,0xfa,0x0f]
1322 %q = load i32, i32* %ptr_b
1323 %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
1324 %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
1325 %res = call <4 x i32> @llvm.x86.avx512.mask.psub.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask)
1329 define <4 x i32> @test_mask_sub_epi32_rmbkz_128(<4 x i32> %a, i32* %ptr_b, i8 %mask) {
1330 ;CHECK-LABEL: test_mask_sub_epi32_rmbkz_128
1331 ;CHECK: vpsubd (%rdi){1to4}, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x99,0xfa,0x07]
1332 %q = load i32, i32* %ptr_b
1333 %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
1334 %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
1335 %res = call <4 x i32> @llvm.x86.avx512.mask.psub.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask)
1339 declare <4 x i32> @llvm.x86.avx512.mask.psub.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)
1341 define <8 x i32> @test_mask_sub_epi32_rr_256(<8 x i32> %a, <8 x i32> %b) {
1342 ;CHECK-LABEL: test_mask_sub_epi32_rr_256
1343 ;CHECK: vpsubd %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xfa,0xc1]
1344 %res = call <8 x i32> @llvm.x86.avx512.mask.psub.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1)
1348 define <8 x i32> @test_mask_sub_epi32_rrk_256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask) {
1349 ;CHECK-LABEL: test_mask_sub_epi32_rrk_256
1350 ;CHECK: vpsubd %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xfa,0xd1]
1351 %res = call <8 x i32> @llvm.x86.avx512.mask.psub.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask)
1355 define <8 x i32> @test_mask_sub_epi32_rrkz_256(<8 x i32> %a, <8 x i32> %b, i8 %mask) {
1356 ;CHECK-LABEL: test_mask_sub_epi32_rrkz_256
1357 ;CHECK: vpsubd %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xfa,0xc1]
1358 %res = call <8 x i32> @llvm.x86.avx512.mask.psub.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask)
1362 define <8 x i32> @test_mask_sub_epi32_rm_256(<8 x i32> %a, <8 x i32>* %ptr_b) {
1363 ;CHECK-LABEL: test_mask_sub_epi32_rm_256
1364 ;CHECK: vpsubd (%rdi), %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xfa,0x07]
1365 %b = load <8 x i32>, <8 x i32>* %ptr_b
1366 %res = call <8 x i32> @llvm.x86.avx512.mask.psub.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1)
1370 define <8 x i32> @test_mask_sub_epi32_rmk_256(<8 x i32> %a, <8 x i32>* %ptr_b, <8 x i32> %passThru, i8 %mask) {
1371 ;CHECK-LABEL: test_mask_sub_epi32_rmk_256
1372 ;CHECK: vpsubd (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xfa,0x0f]
1373 %b = load <8 x i32>, <8 x i32>* %ptr_b
1374 %res = call <8 x i32> @llvm.x86.avx512.mask.psub.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask)
1378 define <8 x i32> @test_mask_sub_epi32_rmkz_256(<8 x i32> %a, <8 x i32>* %ptr_b, i8 %mask) {
1379 ;CHECK-LABEL: test_mask_sub_epi32_rmkz_256
1380 ;CHECK: vpsubd (%rdi), %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xfa,0x07]
1381 %b = load <8 x i32>, <8 x i32>* %ptr_b
1382 %res = call <8 x i32> @llvm.x86.avx512.mask.psub.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask)
1386 define <8 x i32> @test_mask_sub_epi32_rmb_256(<8 x i32> %a, i32* %ptr_b) {
1387 ;CHECK-LABEL: test_mask_sub_epi32_rmb_256
1388 ;CHECK: vpsubd (%rdi){1to8}, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x38,0xfa,0x07]
1389 %q = load i32, i32* %ptr_b
1390 %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
1391 %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
1392 %res = call <8 x i32> @llvm.x86.avx512.mask.psub.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1)
1396 define <8 x i32> @test_mask_sub_epi32_rmbk_256(<8 x i32> %a, i32* %ptr_b, <8 x i32> %passThru, i8 %mask) {
1397 ;CHECK-LABEL: test_mask_sub_epi32_rmbk_256
1398 ;CHECK: vpsubd (%rdi){1to8}, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x39,0xfa,0x0f]
1399 %q = load i32, i32* %ptr_b
1400 %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
1401 %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
1402 %res = call <8 x i32> @llvm.x86.avx512.mask.psub.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask)
1406 define <8 x i32> @test_mask_sub_epi32_rmbkz_256(<8 x i32> %a, i32* %ptr_b, i8 %mask) {
1407 ;CHECK-LABEL: test_mask_sub_epi32_rmbkz_256
1408 ;CHECK: vpsubd (%rdi){1to8}, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xb9,0xfa,0x07]
1409 %q = load i32, i32* %ptr_b
1410 %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
1411 %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
1412 %res = call <8 x i32> @llvm.x86.avx512.mask.psub.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask)
1416 declare <8 x i32> @llvm.x86.avx512.mask.psub.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8)
1418 define <8 x i32> @test_mask_add_epi32_rr_256(<8 x i32> %a, <8 x i32> %b) {
1419 ;CHECK-LABEL: test_mask_add_epi32_rr_256
1420 ;CHECK: vpaddd %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xfe,0xc1]
1421 %res = call <8 x i32> @llvm.x86.avx512.mask.padd.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1)
1425 define <8 x i32> @test_mask_add_epi32_rrk_256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask) {
1426 ;CHECK-LABEL: test_mask_add_epi32_rrk_256
1427 ;CHECK: vpaddd %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xfe,0xd1]
1428 %res = call <8 x i32> @llvm.x86.avx512.mask.padd.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask)
1432 define <8 x i32> @test_mask_add_epi32_rrkz_256(<8 x i32> %a, <8 x i32> %b, i8 %mask) {
1433 ;CHECK-LABEL: test_mask_add_epi32_rrkz_256
1434 ;CHECK: vpaddd %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xfe,0xc1]
1435 %res = call <8 x i32> @llvm.x86.avx512.mask.padd.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask)
1439 define <8 x i32> @test_mask_add_epi32_rm_256(<8 x i32> %a, <8 x i32>* %ptr_b) {
1440 ;CHECK-LABEL: test_mask_add_epi32_rm_256
1441 ;CHECK: vpaddd (%rdi), %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xfe,0x07]
1442 %b = load <8 x i32>, <8 x i32>* %ptr_b
1443 %res = call <8 x i32> @llvm.x86.avx512.mask.padd.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1)
1447 define <8 x i32> @test_mask_add_epi32_rmk_256(<8 x i32> %a, <8 x i32>* %ptr_b, <8 x i32> %passThru, i8 %mask) {
1448 ;CHECK-LABEL: test_mask_add_epi32_rmk_256
1449 ;CHECK: vpaddd (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xfe,0x0f]
1450 %b = load <8 x i32>, <8 x i32>* %ptr_b
1451 %res = call <8 x i32> @llvm.x86.avx512.mask.padd.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask)
1455 define <8 x i32> @test_mask_add_epi32_rmkz_256(<8 x i32> %a, <8 x i32>* %ptr_b, i8 %mask) {
1456 ;CHECK-LABEL: test_mask_add_epi32_rmkz_256
1457 ;CHECK: vpaddd (%rdi), %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xfe,0x07]
1458 %b = load <8 x i32>, <8 x i32>* %ptr_b
1459 %res = call <8 x i32> @llvm.x86.avx512.mask.padd.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask)
1463 define <8 x i32> @test_mask_add_epi32_rmb_256(<8 x i32> %a, i32* %ptr_b) {
1464 ;CHECK-LABEL: test_mask_add_epi32_rmb_256
1465 ;CHECK: vpaddd (%rdi){1to8}, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x38,0xfe,0x07]
1466 %q = load i32, i32* %ptr_b
1467 %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
1468 %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
1469 %res = call <8 x i32> @llvm.x86.avx512.mask.padd.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1)
1473 define <8 x i32> @test_mask_add_epi32_rmbk_256(<8 x i32> %a, i32* %ptr_b, <8 x i32> %passThru, i8 %mask) {
1474 ;CHECK-LABEL: test_mask_add_epi32_rmbk_256
1475 ;CHECK: vpaddd (%rdi){1to8}, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x39,0xfe,0x0f]
1476 %q = load i32, i32* %ptr_b
1477 %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
1478 %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
1479 %res = call <8 x i32> @llvm.x86.avx512.mask.padd.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask)
1483 define <8 x i32> @test_mask_add_epi32_rmbkz_256(<8 x i32> %a, i32* %ptr_b, i8 %mask) {
1484 ;CHECK-LABEL: test_mask_add_epi32_rmbkz_256
1485 ;CHECK: vpaddd (%rdi){1to8}, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xb9,0xfe,0x07]
1486 %q = load i32, i32* %ptr_b
1487 %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
1488 %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
1489 %res = call <8 x i32> @llvm.x86.avx512.mask.padd.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask)
1493 declare <8 x i32> @llvm.x86.avx512.mask.padd.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8)
1495 define <4 x i32> @test_mask_and_epi32_rr_128(<4 x i32> %a, <4 x i32> %b) {
1496 ;CHECK-LABEL: test_mask_and_epi32_rr_128
1497 ;CHECK: vpandd %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xdb,0xc1]
1498 %res = call <4 x i32> @llvm.x86.avx512.mask.pand.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1)
1502 define <4 x i32> @test_mask_and_epi32_rrk_128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask) {
1503 ;CHECK-LABEL: test_mask_and_epi32_rrk_128
1504 ;CHECK: vpandd %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xdb,0xd1]
1505 %res = call <4 x i32> @llvm.x86.avx512.mask.pand.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask)
1509 define <4 x i32> @test_mask_and_epi32_rrkz_128(<4 x i32> %a, <4 x i32> %b, i8 %mask) {
1510 ;CHECK-LABEL: test_mask_and_epi32_rrkz_128
1511 ;CHECK: vpandd %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xdb,0xc1]
1512 %res = call <4 x i32> @llvm.x86.avx512.mask.pand.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask)
1516 define <4 x i32> @test_mask_and_epi32_rm_128(<4 x i32> %a, <4 x i32>* %ptr_b) {
1517 ;CHECK-LABEL: test_mask_and_epi32_rm_128
1518 ;CHECK: vpandd (%rdi), %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xdb,0x07]
1519 %b = load <4 x i32>, <4 x i32>* %ptr_b
1520 %res = call <4 x i32> @llvm.x86.avx512.mask.pand.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1)
1524 define <4 x i32> @test_mask_and_epi32_rmk_128(<4 x i32> %a, <4 x i32>* %ptr_b, <4 x i32> %passThru, i8 %mask) {
1525 ;CHECK-LABEL: test_mask_and_epi32_rmk_128
1526 ;CHECK: vpandd (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xdb,0x0f]
1527 %b = load <4 x i32>, <4 x i32>* %ptr_b
1528 %res = call <4 x i32> @llvm.x86.avx512.mask.pand.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask)
1532 define <4 x i32> @test_mask_and_epi32_rmkz_128(<4 x i32> %a, <4 x i32>* %ptr_b, i8 %mask) {
1533 ;CHECK-LABEL: test_mask_and_epi32_rmkz_128
1534 ;CHECK: vpandd (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xdb,0x07]
1535 %b = load <4 x i32>, <4 x i32>* %ptr_b
1536 %res = call <4 x i32> @llvm.x86.avx512.mask.pand.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask)
1540 define <4 x i32> @test_mask_and_epi32_rmb_128(<4 x i32> %a, i32* %ptr_b) {
1541 ;CHECK-LABEL: test_mask_and_epi32_rmb_128
1542 ;CHECK: vpandd (%rdi){1to4}, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x18,0xdb,0x07]
1543 %q = load i32, i32* %ptr_b
1544 %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
1545 %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
1546 %res = call <4 x i32> @llvm.x86.avx512.mask.pand.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1)
1550 define <4 x i32> @test_mask_and_epi32_rmbk_128(<4 x i32> %a, i32* %ptr_b, <4 x i32> %passThru, i8 %mask) {
1551 ;CHECK-LABEL: test_mask_and_epi32_rmbk_128
1552 ;CHECK: vpandd (%rdi){1to4}, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x19,0xdb,0x0f]
1553 %q = load i32, i32* %ptr_b
1554 %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
1555 %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
1556 %res = call <4 x i32> @llvm.x86.avx512.mask.pand.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask)
1560 define <4 x i32> @test_mask_and_epi32_rmbkz_128(<4 x i32> %a, i32* %ptr_b, i8 %mask) {
1561 ;CHECK-LABEL: test_mask_and_epi32_rmbkz_128
1562 ;CHECK: vpandd (%rdi){1to4}, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x99,0xdb,0x07]
1563 %q = load i32, i32* %ptr_b
1564 %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
1565 %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
1566 %res = call <4 x i32> @llvm.x86.avx512.mask.pand.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask)
1570 declare <4 x i32> @llvm.x86.avx512.mask.pand.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)
1572 define <8 x i32> @test_mask_and_epi32_rr_256(<8 x i32> %a, <8 x i32> %b) {
1573 ;CHECK-LABEL: test_mask_and_epi32_rr_256
1574 ;CHECK: vpandd %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xdb,0xc1]
1575 %res = call <8 x i32> @llvm.x86.avx512.mask.pand.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1)
1579 define <8 x i32> @test_mask_and_epi32_rrk_256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask) {
1580 ;CHECK-LABEL: test_mask_and_epi32_rrk_256
1581 ;CHECK: vpandd %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xdb,0xd1]
1582 %res = call <8 x i32> @llvm.x86.avx512.mask.pand.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask)
1586 define <8 x i32> @test_mask_and_epi32_rrkz_256(<8 x i32> %a, <8 x i32> %b, i8 %mask) {
1587 ;CHECK-LABEL: test_mask_and_epi32_rrkz_256
1588 ;CHECK: vpandd %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xdb,0xc1]
1589 %res = call <8 x i32> @llvm.x86.avx512.mask.pand.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask)
1593 define <8 x i32> @test_mask_and_epi32_rm_256(<8 x i32> %a, <8 x i32>* %ptr_b) {
1594 ;CHECK-LABEL: test_mask_and_epi32_rm_256
1595 ;CHECK: vpandd (%rdi), %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xdb,0x07]
1596 %b = load <8 x i32>, <8 x i32>* %ptr_b
1597 %res = call <8 x i32> @llvm.x86.avx512.mask.pand.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1)
1601 define <8 x i32> @test_mask_and_epi32_rmk_256(<8 x i32> %a, <8 x i32>* %ptr_b, <8 x i32> %passThru, i8 %mask) {
1602 ;CHECK-LABEL: test_mask_and_epi32_rmk_256
1603 ;CHECK: vpandd (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xdb,0x0f]
1604 %b = load <8 x i32>, <8 x i32>* %ptr_b
1605 %res = call <8 x i32> @llvm.x86.avx512.mask.pand.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask)
1609 define <8 x i32> @test_mask_and_epi32_rmkz_256(<8 x i32> %a, <8 x i32>* %ptr_b, i8 %mask) {
1610 ;CHECK-LABEL: test_mask_and_epi32_rmkz_256
1611 ;CHECK: vpandd (%rdi), %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xdb,0x07]
1612 %b = load <8 x i32>, <8 x i32>* %ptr_b
1613 %res = call <8 x i32> @llvm.x86.avx512.mask.pand.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask)
1617 define <8 x i32> @test_mask_and_epi32_rmb_256(<8 x i32> %a, i32* %ptr_b) {
1618 ;CHECK-LABEL: test_mask_and_epi32_rmb_256
1619 ;CHECK: vpandd (%rdi){1to8}, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x38,0xdb,0x07]
1620 %q = load i32, i32* %ptr_b
1621 %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
1622 %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
1623 %res = call <8 x i32> @llvm.x86.avx512.mask.pand.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1)
1627 define <8 x i32> @test_mask_and_epi32_rmbk_256(<8 x i32> %a, i32* %ptr_b, <8 x i32> %passThru, i8 %mask) {
1628 ;CHECK-LABEL: test_mask_and_epi32_rmbk_256
1629 ;CHECK: vpandd (%rdi){1to8}, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x39,0xdb,0x0f]
1630 %q = load i32, i32* %ptr_b
1631 %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
1632 %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
1633 %res = call <8 x i32> @llvm.x86.avx512.mask.pand.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask)
1637 define <8 x i32> @test_mask_and_epi32_rmbkz_256(<8 x i32> %a, i32* %ptr_b, i8 %mask) {
1638 ;CHECK-LABEL: test_mask_and_epi32_rmbkz_256
1639 ;CHECK: vpandd (%rdi){1to8}, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xb9,0xdb,0x07]
1640 %q = load i32, i32* %ptr_b
1641 %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
1642 %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
1643 %res = call <8 x i32> @llvm.x86.avx512.mask.pand.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask)
1647 declare <8 x i32> @llvm.x86.avx512.mask.pand.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8)
1649 define <4 x i32> @test_mask_or_epi32_rr_128(<4 x i32> %a, <4 x i32> %b) {
1650 ;CHECK-LABEL: test_mask_or_epi32_rr_128
1651 ;CHECK: vpord %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xeb,0xc1]
1652 %res = call <4 x i32> @llvm.x86.avx512.mask.por.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1)
1656 define <4 x i32> @test_mask_or_epi32_rrk_128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask) {
1657 ;CHECK-LABEL: test_mask_or_epi32_rrk_128
1658 ;CHECK: vpord %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xeb,0xd1]
1659 %res = call <4 x i32> @llvm.x86.avx512.mask.por.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask)
1663 define <4 x i32> @test_mask_or_epi32_rrkz_128(<4 x i32> %a, <4 x i32> %b, i8 %mask) {
1664 ;CHECK-LABEL: test_mask_or_epi32_rrkz_128
1665 ;CHECK: vpord %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xeb,0xc1]
1666 %res = call <4 x i32> @llvm.x86.avx512.mask.por.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask)
1670 define <4 x i32> @test_mask_or_epi32_rm_128(<4 x i32> %a, <4 x i32>* %ptr_b) {
1671 ;CHECK-LABEL: test_mask_or_epi32_rm_128
1672 ;CHECK: vpord (%rdi), %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xeb,0x07]
1673 %b = load <4 x i32>, <4 x i32>* %ptr_b
1674 %res = call <4 x i32> @llvm.x86.avx512.mask.por.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1)
1678 define <4 x i32> @test_mask_or_epi32_rmk_128(<4 x i32> %a, <4 x i32>* %ptr_b, <4 x i32> %passThru, i8 %mask) {
1679 ;CHECK-LABEL: test_mask_or_epi32_rmk_128
1680 ;CHECK: vpord (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xeb,0x0f]
1681 %b = load <4 x i32>, <4 x i32>* %ptr_b
1682 %res = call <4 x i32> @llvm.x86.avx512.mask.por.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask)
1686 define <4 x i32> @test_mask_or_epi32_rmkz_128(<4 x i32> %a, <4 x i32>* %ptr_b, i8 %mask) {
1687 ;CHECK-LABEL: test_mask_or_epi32_rmkz_128
1688 ;CHECK: vpord (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xeb,0x07]
1689 %b = load <4 x i32>, <4 x i32>* %ptr_b
1690 %res = call <4 x i32> @llvm.x86.avx512.mask.por.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask)
1694 define <4 x i32> @test_mask_or_epi32_rmb_128(<4 x i32> %a, i32* %ptr_b) {
1695 ;CHECK-LABEL: test_mask_or_epi32_rmb_128
1696 ;CHECK: vpord (%rdi){1to4}, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x18,0xeb,0x07]
1697 %q = load i32, i32* %ptr_b
1698 %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
1699 %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
1700 %res = call <4 x i32> @llvm.x86.avx512.mask.por.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1)
1704 define <4 x i32> @test_mask_or_epi32_rmbk_128(<4 x i32> %a, i32* %ptr_b, <4 x i32> %passThru, i8 %mask) {
1705 ;CHECK-LABEL: test_mask_or_epi32_rmbk_128
1706 ;CHECK: vpord (%rdi){1to4}, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x19,0xeb,0x0f]
1707 %q = load i32, i32* %ptr_b
1708 %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
1709 %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
1710 %res = call <4 x i32> @llvm.x86.avx512.mask.por.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask)
1714 define <4 x i32> @test_mask_or_epi32_rmbkz_128(<4 x i32> %a, i32* %ptr_b, i8 %mask) {
1715 ;CHECK-LABEL: test_mask_or_epi32_rmbkz_128
1716 ;CHECK: vpord (%rdi){1to4}, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x99,0xeb,0x07]
1717 %q = load i32, i32* %ptr_b
1718 %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
1719 %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
1720 %res = call <4 x i32> @llvm.x86.avx512.mask.por.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask)
1724 declare <4 x i32> @llvm.x86.avx512.mask.por.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)
1726 define <8 x i32> @test_mask_or_epi32_rr_256(<8 x i32> %a, <8 x i32> %b) {
1727 ;CHECK-LABEL: test_mask_or_epi32_rr_256
1728 ;CHECK: vpord %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xeb,0xc1]
1729 %res = call <8 x i32> @llvm.x86.avx512.mask.por.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1)
1733 define <8 x i32> @test_mask_or_epi32_rrk_256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask) {
1734 ;CHECK-LABEL: test_mask_or_epi32_rrk_256
1735 ;CHECK: vpord %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xeb,0xd1]
1736 %res = call <8 x i32> @llvm.x86.avx512.mask.por.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask)
1740 define <8 x i32> @test_mask_or_epi32_rrkz_256(<8 x i32> %a, <8 x i32> %b, i8 %mask) {
1741 ;CHECK-LABEL: test_mask_or_epi32_rrkz_256
1742 ;CHECK: vpord %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xeb,0xc1]
1743 %res = call <8 x i32> @llvm.x86.avx512.mask.por.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask)
1747 define <8 x i32> @test_mask_or_epi32_rm_256(<8 x i32> %a, <8 x i32>* %ptr_b) {
1748 ;CHECK-LABEL: test_mask_or_epi32_rm_256
1749 ;CHECK: vpord (%rdi), %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xeb,0x07]
1750 %b = load <8 x i32>, <8 x i32>* %ptr_b
1751 %res = call <8 x i32> @llvm.x86.avx512.mask.por.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1)
1755 define <8 x i32> @test_mask_or_epi32_rmk_256(<8 x i32> %a, <8 x i32>* %ptr_b, <8 x i32> %passThru, i8 %mask) {
1756 ;CHECK-LABEL: test_mask_or_epi32_rmk_256
1757 ;CHECK: vpord (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xeb,0x0f]
1758 %b = load <8 x i32>, <8 x i32>* %ptr_b
1759 %res = call <8 x i32> @llvm.x86.avx512.mask.por.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask)
1763 define <8 x i32> @test_mask_or_epi32_rmkz_256(<8 x i32> %a, <8 x i32>* %ptr_b, i8 %mask) {
1764 ;CHECK-LABEL: test_mask_or_epi32_rmkz_256
1765 ;CHECK: vpord (%rdi), %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xeb,0x07]
1766 %b = load <8 x i32>, <8 x i32>* %ptr_b
1767 %res = call <8 x i32> @llvm.x86.avx512.mask.por.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask)
1771 define <8 x i32> @test_mask_or_epi32_rmb_256(<8 x i32> %a, i32* %ptr_b) {
1772 ;CHECK-LABEL: test_mask_or_epi32_rmb_256
1773 ;CHECK: vpord (%rdi){1to8}, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x38,0xeb,0x07]
1774 %q = load i32, i32* %ptr_b
1775 %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
1776 %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
1777 %res = call <8 x i32> @llvm.x86.avx512.mask.por.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1)
1781 define <8 x i32> @test_mask_or_epi32_rmbk_256(<8 x i32> %a, i32* %ptr_b, <8 x i32> %passThru, i8 %mask) {
1782 ;CHECK-LABEL: test_mask_or_epi32_rmbk_256
1783 ;CHECK: vpord (%rdi){1to8}, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x39,0xeb,0x0f]
1784 %q = load i32, i32* %ptr_b
1785 %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
1786 %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
1787 %res = call <8 x i32> @llvm.x86.avx512.mask.por.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask)
1791 define <8 x i32> @test_mask_or_epi32_rmbkz_256(<8 x i32> %a, i32* %ptr_b, i8 %mask) {
1792 ;CHECK-LABEL: test_mask_or_epi32_rmbkz_256
1793 ;CHECK: vpord (%rdi){1to8}, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xb9,0xeb,0x07]
1794 %q = load i32, i32* %ptr_b
1795 %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
1796 %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
1797 %res = call <8 x i32> @llvm.x86.avx512.mask.por.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask)
1801 declare <8 x i32> @llvm.x86.avx512.mask.por.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8)
1803 define <4 x i32> @test_mask_xor_epi32_rr_128(<4 x i32> %a, <4 x i32> %b) {
1804 ;CHECK-LABEL: test_mask_xor_epi32_rr_128
1805 ;CHECK: vpxord %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xef,0xc1]
1806 %res = call <4 x i32> @llvm.x86.avx512.mask.pxor.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1)
1810 define <4 x i32> @test_mask_xor_epi32_rrk_128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask) {
1811 ;CHECK-LABEL: test_mask_xor_epi32_rrk_128
1812 ;CHECK: vpxord %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xef,0xd1]
1813 %res = call <4 x i32> @llvm.x86.avx512.mask.pxor.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask)
1817 define <4 x i32> @test_mask_xor_epi32_rrkz_128(<4 x i32> %a, <4 x i32> %b, i8 %mask) {
1818 ;CHECK-LABEL: test_mask_xor_epi32_rrkz_128
1819 ;CHECK: vpxord %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xef,0xc1]
1820 %res = call <4 x i32> @llvm.x86.avx512.mask.pxor.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask)
1824 define <4 x i32> @test_mask_xor_epi32_rm_128(<4 x i32> %a, <4 x i32>* %ptr_b) {
1825 ;CHECK-LABEL: test_mask_xor_epi32_rm_128
1826 ;CHECK: vpxord (%rdi), %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xef,0x07]
1827 %b = load <4 x i32>, <4 x i32>* %ptr_b
1828 %res = call <4 x i32> @llvm.x86.avx512.mask.pxor.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1)
1832 define <4 x i32> @test_mask_xor_epi32_rmk_128(<4 x i32> %a, <4 x i32>* %ptr_b, <4 x i32> %passThru, i8 %mask) {
1833 ;CHECK-LABEL: test_mask_xor_epi32_rmk_128
1834 ;CHECK: vpxord (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xef,0x0f]
1835 %b = load <4 x i32>, <4 x i32>* %ptr_b
1836 %res = call <4 x i32> @llvm.x86.avx512.mask.pxor.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask)
1840 define <4 x i32> @test_mask_xor_epi32_rmkz_128(<4 x i32> %a, <4 x i32>* %ptr_b, i8 %mask) {
1841 ;CHECK-LABEL: test_mask_xor_epi32_rmkz_128
1842 ;CHECK: vpxord (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xef,0x07]
1843 %b = load <4 x i32>, <4 x i32>* %ptr_b
1844 %res = call <4 x i32> @llvm.x86.avx512.mask.pxor.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask)
1848 define <4 x i32> @test_mask_xor_epi32_rmb_128(<4 x i32> %a, i32* %ptr_b) {
1849 ;CHECK-LABEL: test_mask_xor_epi32_rmb_128
1850 ;CHECK: vpxord (%rdi){1to4}, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x18,0xef,0x07]
1851 %q = load i32, i32* %ptr_b
1852 %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
1853 %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
1854 %res = call <4 x i32> @llvm.x86.avx512.mask.pxor.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1)
1858 define <4 x i32> @test_mask_xor_epi32_rmbk_128(<4 x i32> %a, i32* %ptr_b, <4 x i32> %passThru, i8 %mask) {
1859 ;CHECK-LABEL: test_mask_xor_epi32_rmbk_128
1860 ;CHECK: vpxord (%rdi){1to4}, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x19,0xef,0x0f]
1861 %q = load i32, i32* %ptr_b
1862 %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
1863 %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
1864 %res = call <4 x i32> @llvm.x86.avx512.mask.pxor.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask)
1868 define <4 x i32> @test_mask_xor_epi32_rmbkz_128(<4 x i32> %a, i32* %ptr_b, i8 %mask) {
1869 ;CHECK-LABEL: test_mask_xor_epi32_rmbkz_128
1870 ;CHECK: vpxord (%rdi){1to4}, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x99,0xef,0x07]
1871 %q = load i32, i32* %ptr_b
1872 %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
1873 %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
1874 %res = call <4 x i32> @llvm.x86.avx512.mask.pxor.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask)
1878 declare <4 x i32> @llvm.x86.avx512.mask.pxor.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)
1880 define <8 x i32> @test_mask_xor_epi32_rr_256(<8 x i32> %a, <8 x i32> %b) {
1881 ;CHECK-LABEL: test_mask_xor_epi32_rr_256
1882 ;CHECK: vpxord %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xef,0xc1]
1883 %res = call <8 x i32> @llvm.x86.avx512.mask.pxor.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1)
1887 define <8 x i32> @test_mask_xor_epi32_rrk_256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask) {
1888 ;CHECK-LABEL: test_mask_xor_epi32_rrk_256
1889 ;CHECK: vpxord %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xef,0xd1]
1890 %res = call <8 x i32> @llvm.x86.avx512.mask.pxor.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask)
1894 define <8 x i32> @test_mask_xor_epi32_rrkz_256(<8 x i32> %a, <8 x i32> %b, i8 %mask) {
1895 ;CHECK-LABEL: test_mask_xor_epi32_rrkz_256
1896 ;CHECK: vpxord %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xef,0xc1]
1897 %res = call <8 x i32> @llvm.x86.avx512.mask.pxor.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask)
1901 define <8 x i32> @test_mask_xor_epi32_rm_256(<8 x i32> %a, <8 x i32>* %ptr_b) {
1902 ;CHECK-LABEL: test_mask_xor_epi32_rm_256
1903 ;CHECK: vpxord (%rdi), %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xef,0x07]
1904 %b = load <8 x i32>, <8 x i32>* %ptr_b
1905 %res = call <8 x i32> @llvm.x86.avx512.mask.pxor.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1)
1909 define <8 x i32> @test_mask_xor_epi32_rmk_256(<8 x i32> %a, <8 x i32>* %ptr_b, <8 x i32> %passThru, i8 %mask) {
1910 ;CHECK-LABEL: test_mask_xor_epi32_rmk_256
1911 ;CHECK: vpxord (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xef,0x0f]
1912 %b = load <8 x i32>, <8 x i32>* %ptr_b
1913 %res = call <8 x i32> @llvm.x86.avx512.mask.pxor.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask)
1917 define <8 x i32> @test_mask_xor_epi32_rmkz_256(<8 x i32> %a, <8 x i32>* %ptr_b, i8 %mask) {
1918 ;CHECK-LABEL: test_mask_xor_epi32_rmkz_256
1919 ;CHECK: vpxord (%rdi), %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xef,0x07]
1920 %b = load <8 x i32>, <8 x i32>* %ptr_b
1921 %res = call <8 x i32> @llvm.x86.avx512.mask.pxor.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask)
1925 define <8 x i32> @test_mask_xor_epi32_rmb_256(<8 x i32> %a, i32* %ptr_b) {
1926 ;CHECK-LABEL: test_mask_xor_epi32_rmb_256
1927 ;CHECK: vpxord (%rdi){1to8}, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x38,0xef,0x07]
1928 %q = load i32, i32* %ptr_b
1929 %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
1930 %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
1931 %res = call <8 x i32> @llvm.x86.avx512.mask.pxor.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1)
1935 define <8 x i32> @test_mask_xor_epi32_rmbk_256(<8 x i32> %a, i32* %ptr_b, <8 x i32> %passThru, i8 %mask) {
1936 ;CHECK-LABEL: test_mask_xor_epi32_rmbk_256
1937 ;CHECK: vpxord (%rdi){1to8}, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x39,0xef,0x0f]
1938 %q = load i32, i32* %ptr_b
1939 %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
1940 %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
1941 %res = call <8 x i32> @llvm.x86.avx512.mask.pxor.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask)
1945 define <8 x i32> @test_mask_xor_epi32_rmbkz_256(<8 x i32> %a, i32* %ptr_b, i8 %mask) {
1946 ;CHECK-LABEL: test_mask_xor_epi32_rmbkz_256
1947 ;CHECK: vpxord (%rdi){1to8}, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xb9,0xef,0x07]
1948 %q = load i32, i32* %ptr_b
1949 %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
1950 %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
1951 %res = call <8 x i32> @llvm.x86.avx512.mask.pxor.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask)
1955 declare <8 x i32> @llvm.x86.avx512.mask.pxor.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8)
1957 define <4 x i32> @test_mask_andnot_epi32_rr_128(<4 x i32> %a, <4 x i32> %b) {
1958 ;CHECK-LABEL: test_mask_andnot_epi32_rr_128
1959 ;CHECK: vpandnd %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xdf,0xc1]
1960 %res = call <4 x i32> @llvm.x86.avx512.mask.pandn.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1)
1964 define <4 x i32> @test_mask_andnot_epi32_rrk_128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask) {
1965 ;CHECK-LABEL: test_mask_andnot_epi32_rrk_128
1966 ;CHECK: vpandnd %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xdf,0xd1]
1967 %res = call <4 x i32> @llvm.x86.avx512.mask.pandn.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask)
1971 define <4 x i32> @test_mask_andnot_epi32_rrkz_128(<4 x i32> %a, <4 x i32> %b, i8 %mask) {
1972 ;CHECK-LABEL: test_mask_andnot_epi32_rrkz_128
1973 ;CHECK: vpandnd %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xdf,0xc1]
1974 %res = call <4 x i32> @llvm.x86.avx512.mask.pandn.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask)
1978 define <4 x i32> @test_mask_andnot_epi32_rm_128(<4 x i32> %a, <4 x i32>* %ptr_b) {
1979 ;CHECK-LABEL: test_mask_andnot_epi32_rm_128
1980 ;CHECK: vpandnd (%rdi), %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xdf,0x07]
1981 %b = load <4 x i32>, <4 x i32>* %ptr_b
1982 %res = call <4 x i32> @llvm.x86.avx512.mask.pandn.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1)
1986 define <4 x i32> @test_mask_andnot_epi32_rmk_128(<4 x i32> %a, <4 x i32>* %ptr_b, <4 x i32> %passThru, i8 %mask) {
1987 ;CHECK-LABEL: test_mask_andnot_epi32_rmk_128
1988 ;CHECK: vpandnd (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xdf,0x0f]
1989 %b = load <4 x i32>, <4 x i32>* %ptr_b
1990 %res = call <4 x i32> @llvm.x86.avx512.mask.pandn.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask)
1994 define <4 x i32> @test_mask_andnot_epi32_rmkz_128(<4 x i32> %a, <4 x i32>* %ptr_b, i8 %mask) {
1995 ;CHECK-LABEL: test_mask_andnot_epi32_rmkz_128
1996 ;CHECK: vpandnd (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xdf,0x07]
1997 %b = load <4 x i32>, <4 x i32>* %ptr_b
1998 %res = call <4 x i32> @llvm.x86.avx512.mask.pandn.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask)
2002 define <4 x i32> @test_mask_andnot_epi32_rmb_128(<4 x i32> %a, i32* %ptr_b) {
2003 ;CHECK-LABEL: test_mask_andnot_epi32_rmb_128
2004 ;CHECK: vpandnd (%rdi){1to4}, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x18,0xdf,0x07]
2005 %q = load i32, i32* %ptr_b
2006 %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
2007 %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
2008 %res = call <4 x i32> @llvm.x86.avx512.mask.pandn.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1)
2012 define <4 x i32> @test_mask_andnot_epi32_rmbk_128(<4 x i32> %a, i32* %ptr_b, <4 x i32> %passThru, i8 %mask) {
2013 ;CHECK-LABEL: test_mask_andnot_epi32_rmbk_128
2014 ;CHECK: vpandnd (%rdi){1to4}, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x19,0xdf,0x0f]
2015 %q = load i32, i32* %ptr_b
2016 %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
2017 %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
2018 %res = call <4 x i32> @llvm.x86.avx512.mask.pandn.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask)
2022 define <4 x i32> @test_mask_andnot_epi32_rmbkz_128(<4 x i32> %a, i32* %ptr_b, i8 %mask) {
2023 ;CHECK-LABEL: test_mask_andnot_epi32_rmbkz_128
2024 ;CHECK: vpandnd (%rdi){1to4}, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x99,0xdf,0x07]
2025 %q = load i32, i32* %ptr_b
2026 %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
2027 %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
2028 %res = call <4 x i32> @llvm.x86.avx512.mask.pandn.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask)
2032 declare <4 x i32> @llvm.x86.avx512.mask.pandn.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)
2034 define <8 x i32> @test_mask_andnot_epi32_rr_256(<8 x i32> %a, <8 x i32> %b) {
2035 ;CHECK-LABEL: test_mask_andnot_epi32_rr_256
2036 ;CHECK: vpandnd %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xdf,0xc1]
2037 %res = call <8 x i32> @llvm.x86.avx512.mask.pandn.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1)
2041 define <8 x i32> @test_mask_andnot_epi32_rrk_256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask) {
2042 ;CHECK-LABEL: test_mask_andnot_epi32_rrk_256
2043 ;CHECK: vpandnd %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xdf,0xd1]
2044 %res = call <8 x i32> @llvm.x86.avx512.mask.pandn.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask)
2048 define <8 x i32> @test_mask_andnot_epi32_rrkz_256(<8 x i32> %a, <8 x i32> %b, i8 %mask) {
2049 ;CHECK-LABEL: test_mask_andnot_epi32_rrkz_256
2050 ;CHECK: vpandnd %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xdf,0xc1]
2051 %res = call <8 x i32> @llvm.x86.avx512.mask.pandn.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask)
2055 define <8 x i32> @test_mask_andnot_epi32_rm_256(<8 x i32> %a, <8 x i32>* %ptr_b) {
2056 ;CHECK-LABEL: test_mask_andnot_epi32_rm_256
2057 ;CHECK: vpandnd (%rdi), %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xdf,0x07]
2058 %b = load <8 x i32>, <8 x i32>* %ptr_b
2059 %res = call <8 x i32> @llvm.x86.avx512.mask.pandn.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1)
2063 define <8 x i32> @test_mask_andnot_epi32_rmk_256(<8 x i32> %a, <8 x i32>* %ptr_b, <8 x i32> %passThru, i8 %mask) {
2064 ;CHECK-LABEL: test_mask_andnot_epi32_rmk_256
2065 ;CHECK: vpandnd (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xdf,0x0f]
2066 %b = load <8 x i32>, <8 x i32>* %ptr_b
2067 %res = call <8 x i32> @llvm.x86.avx512.mask.pandn.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask)
2071 define <8 x i32> @test_mask_andnot_epi32_rmkz_256(<8 x i32> %a, <8 x i32>* %ptr_b, i8 %mask) {
2072 ;CHECK-LABEL: test_mask_andnot_epi32_rmkz_256
2073 ;CHECK: vpandnd (%rdi), %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xdf,0x07]
2074 %b = load <8 x i32>, <8 x i32>* %ptr_b
2075 %res = call <8 x i32> @llvm.x86.avx512.mask.pandn.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask)
2079 define <8 x i32> @test_mask_andnot_epi32_rmb_256(<8 x i32> %a, i32* %ptr_b) {
2080 ;CHECK-LABEL: test_mask_andnot_epi32_rmb_256
2081 ;CHECK: vpandnd (%rdi){1to8}, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x38,0xdf,0x07]
2082 %q = load i32, i32* %ptr_b
2083 %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
2084 %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
2085 %res = call <8 x i32> @llvm.x86.avx512.mask.pandn.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1)
2089 define <8 x i32> @test_mask_andnot_epi32_rmbk_256(<8 x i32> %a, i32* %ptr_b, <8 x i32> %passThru, i8 %mask) {
2090 ;CHECK-LABEL: test_mask_andnot_epi32_rmbk_256
2091 ;CHECK: vpandnd (%rdi){1to8}, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x39,0xdf,0x0f]
2092 %q = load i32, i32* %ptr_b
2093 %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
2094 %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
2095 %res = call <8 x i32> @llvm.x86.avx512.mask.pandn.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask)
2099 define <8 x i32> @test_mask_andnot_epi32_rmbkz_256(<8 x i32> %a, i32* %ptr_b, i8 %mask) {
2100 ;CHECK-LABEL: test_mask_andnot_epi32_rmbkz_256
2101 ;CHECK: vpandnd (%rdi){1to8}, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xb9,0xdf,0x07]
2102 %q = load i32, i32* %ptr_b
2103 %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
2104 %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
2105 %res = call <8 x i32> @llvm.x86.avx512.mask.pandn.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask)
2109 declare <8 x i32> @llvm.x86.avx512.mask.pandn.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8)
2111 define <2 x i64> @test_mask_andnot_epi64_rr_128(<2 x i64> %a, <2 x i64> %b) {
2112 ;CHECK-LABEL: test_mask_andnot_epi64_rr_128
2113 ;CHECK: vpandnq %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0xdf,0xc1]
2114 %res = call <2 x i64> @llvm.x86.avx512.mask.pandn.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> zeroinitializer, i8 -1)
2118 define <2 x i64> @test_mask_andnot_epi64_rrk_128(<2 x i64> %a, <2 x i64> %b, <2 x i64> %passThru, i8 %mask) {
2119 ;CHECK-LABEL: test_mask_andnot_epi64_rrk_128
2120 ;CHECK: vpandnq %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x09,0xdf,0xd1]
2121 %res = call <2 x i64> @llvm.x86.avx512.mask.pandn.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> %passThru, i8 %mask)
2125 define <2 x i64> @test_mask_andnot_epi64_rrkz_128(<2 x i64> %a, <2 x i64> %b, i8 %mask) {
2126 ;CHECK-LABEL: test_mask_andnot_epi64_rrkz_128
2127 ;CHECK: vpandnq %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0x89,0xdf,0xc1]
2128 %res = call <2 x i64> @llvm.x86.avx512.mask.pandn.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> zeroinitializer, i8 %mask)
2132 define <2 x i64> @test_mask_andnot_epi64_rm_128(<2 x i64> %a, <2 x i64>* %ptr_b) {
2133 ;CHECK-LABEL: test_mask_andnot_epi64_rm_128
2134 ;CHECK: vpandnq (%rdi), %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0xdf,0x07]
2135 %b = load <2 x i64>, <2 x i64>* %ptr_b
2136 %res = call <2 x i64> @llvm.x86.avx512.mask.pandn.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> zeroinitializer, i8 -1)
2140 define <2 x i64> @test_mask_andnot_epi64_rmk_128(<2 x i64> %a, <2 x i64>* %ptr_b, <2 x i64> %passThru, i8 %mask) {
2141 ;CHECK-LABEL: test_mask_andnot_epi64_rmk_128
2142 ;CHECK: vpandnq (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x09,0xdf,0x0f]
2143 %b = load <2 x i64>, <2 x i64>* %ptr_b
2144 %res = call <2 x i64> @llvm.x86.avx512.mask.pandn.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> %passThru, i8 %mask)
2148 define <2 x i64> @test_mask_andnot_epi64_rmkz_128(<2 x i64> %a, <2 x i64>* %ptr_b, i8 %mask) {
2149 ;CHECK-LABEL: test_mask_andnot_epi64_rmkz_128
2150 ;CHECK: vpandnq (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0x89,0xdf,0x07]
2151 %b = load <2 x i64>, <2 x i64>* %ptr_b
2152 %res = call <2 x i64> @llvm.x86.avx512.mask.pandn.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> zeroinitializer, i8 %mask)
2156 define <2 x i64> @test_mask_andnot_epi64_rmb_128(<2 x i64> %a, i64* %ptr_b) {
2157 ;CHECK-LABEL: test_mask_andnot_epi64_rmb_128
2158 ;CHECK: vpandnq (%rdi){1to2}, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x18,0xdf,0x07]
2159 %q = load i64, i64* %ptr_b
2160 %vecinit.i = insertelement <2 x i64> undef, i64 %q, i32 0
2161 %b = shufflevector <2 x i64> %vecinit.i, <2 x i64> undef, <2 x i32> zeroinitializer
2162 %res = call <2 x i64> @llvm.x86.avx512.mask.pandn.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> zeroinitializer, i8 -1)
2166 define <2 x i64> @test_mask_andnot_epi64_rmbk_128(<2 x i64> %a, i64* %ptr_b, <2 x i64> %passThru, i8 %mask) {
2167 ;CHECK-LABEL: test_mask_andnot_epi64_rmbk_128
2168 ;CHECK: vpandnq (%rdi){1to2}, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x19,0xdf,0x0f]
2169 %q = load i64, i64* %ptr_b
2170 %vecinit.i = insertelement <2 x i64> undef, i64 %q, i32 0
2171 %b = shufflevector <2 x i64> %vecinit.i, <2 x i64> undef, <2 x i32> zeroinitializer
2172 %res = call <2 x i64> @llvm.x86.avx512.mask.pandn.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> %passThru, i8 %mask)
2176 define <2 x i64> @test_mask_andnot_epi64_rmbkz_128(<2 x i64> %a, i64* %ptr_b, i8 %mask) {
2177 ;CHECK-LABEL: test_mask_andnot_epi64_rmbkz_128
2178 ;CHECK: vpandnq (%rdi){1to2}, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0x99,0xdf,0x07]
2179 %q = load i64, i64* %ptr_b
2180 %vecinit.i = insertelement <2 x i64> undef, i64 %q, i32 0
2181 %b = shufflevector <2 x i64> %vecinit.i, <2 x i64> undef, <2 x i32> zeroinitializer
2182 %res = call <2 x i64> @llvm.x86.avx512.mask.pandn.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> zeroinitializer, i8 %mask)
2186 declare <2 x i64> @llvm.x86.avx512.mask.pandn.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8)
2188 define <4 x i64> @test_mask_andnot_epi64_rr_256(<4 x i64> %a, <4 x i64> %b) {
2189 ;CHECK-LABEL: test_mask_andnot_epi64_rr_256
2190 ;CHECK: vpandnq %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0xdf,0xc1]
2191 %res = call <4 x i64> @llvm.x86.avx512.mask.pandn.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> zeroinitializer, i8 -1)
2195 define <4 x i64> @test_mask_andnot_epi64_rrk_256(<4 x i64> %a, <4 x i64> %b, <4 x i64> %passThru, i8 %mask) {
2196 ;CHECK-LABEL: test_mask_andnot_epi64_rrk_256
2197 ;CHECK: vpandnq %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x29,0xdf,0xd1]
2198 %res = call <4 x i64> @llvm.x86.avx512.mask.pandn.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> %passThru, i8 %mask)
2202 define <4 x i64> @test_mask_andnot_epi64_rrkz_256(<4 x i64> %a, <4 x i64> %b, i8 %mask) {
2203 ;CHECK-LABEL: test_mask_andnot_epi64_rrkz_256
2204 ;CHECK: vpandnq %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xa9,0xdf,0xc1]
2205 %res = call <4 x i64> @llvm.x86.avx512.mask.pandn.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> zeroinitializer, i8 %mask)
2209 define <4 x i64> @test_mask_andnot_epi64_rm_256(<4 x i64> %a, <4 x i64>* %ptr_b) {
2210 ;CHECK-LABEL: test_mask_andnot_epi64_rm_256
2211 ;CHECK: vpandnq (%rdi), %ymm0, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0xdf,0x07]
2212 %b = load <4 x i64>, <4 x i64>* %ptr_b
2213 %res = call <4 x i64> @llvm.x86.avx512.mask.pandn.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> zeroinitializer, i8 -1)
2217 define <4 x i64> @test_mask_andnot_epi64_rmk_256(<4 x i64> %a, <4 x i64>* %ptr_b, <4 x i64> %passThru, i8 %mask) {
2218 ;CHECK-LABEL: test_mask_andnot_epi64_rmk_256
2219 ;CHECK: vpandnq (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x29,0xdf,0x0f]
2220 %b = load <4 x i64>, <4 x i64>* %ptr_b
2221 %res = call <4 x i64> @llvm.x86.avx512.mask.pandn.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> %passThru, i8 %mask)
2225 define <4 x i64> @test_mask_andnot_epi64_rmkz_256(<4 x i64> %a, <4 x i64>* %ptr_b, i8 %mask) {
2226 ;CHECK-LABEL: test_mask_andnot_epi64_rmkz_256
2227 ;CHECK: vpandnq (%rdi), %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xa9,0xdf,0x07]
2228 %b = load <4 x i64>, <4 x i64>* %ptr_b
2229 %res = call <4 x i64> @llvm.x86.avx512.mask.pandn.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> zeroinitializer, i8 %mask)
2233 define <4 x i64> @test_mask_andnot_epi64_rmb_256(<4 x i64> %a, i64* %ptr_b) {
2234 ;CHECK-LABEL: test_mask_andnot_epi64_rmb_256
2235 ;CHECK: vpandnq (%rdi){1to4}, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x38,0xdf,0x07]
2236 %q = load i64, i64* %ptr_b
2237 %vecinit.i = insertelement <4 x i64> undef, i64 %q, i32 0
2238 %b = shufflevector <4 x i64> %vecinit.i, <4 x i64> undef, <4 x i32> zeroinitializer
2239 %res = call <4 x i64> @llvm.x86.avx512.mask.pandn.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> zeroinitializer, i8 -1)
2243 define <4 x i64> @test_mask_andnot_epi64_rmbk_256(<4 x i64> %a, i64* %ptr_b, <4 x i64> %passThru, i8 %mask) {
2244 ;CHECK-LABEL: test_mask_andnot_epi64_rmbk_256
2245 ;CHECK: vpandnq (%rdi){1to4}, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x39,0xdf,0x0f]
2246 %q = load i64, i64* %ptr_b
2247 %vecinit.i = insertelement <4 x i64> undef, i64 %q, i32 0
2248 %b = shufflevector <4 x i64> %vecinit.i, <4 x i64> undef, <4 x i32> zeroinitializer
2249 %res = call <4 x i64> @llvm.x86.avx512.mask.pandn.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> %passThru, i8 %mask)
2253 define <4 x i64> @test_mask_andnot_epi64_rmbkz_256(<4 x i64> %a, i64* %ptr_b, i8 %mask) {
2254 ;CHECK-LABEL: test_mask_andnot_epi64_rmbkz_256
2255 ;CHECK: vpandnq (%rdi){1to4}, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xb9,0xdf,0x07]
2256 %q = load i64, i64* %ptr_b
2257 %vecinit.i = insertelement <4 x i64> undef, i64 %q, i32 0
2258 %b = shufflevector <4 x i64> %vecinit.i, <4 x i64> undef, <4 x i32> zeroinitializer
2259 %res = call <4 x i64> @llvm.x86.avx512.mask.pandn.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> zeroinitializer, i8 %mask)
2263 declare <4 x i64> @llvm.x86.avx512.mask.pandn.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i8)
2265 define i8 @test_cmpps_256(<8 x float> %a, <8 x float> %b) {
2266 ;CHECK: vcmpleps %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf1,0x7c,0x28,0xc2,0xc1,0x02]
2267 %res = call i8 @llvm.x86.avx512.mask.cmp.ps.256(<8 x float> %a, <8 x float> %b, i32 2, i8 -1)
2270 declare i8 @llvm.x86.avx512.mask.cmp.ps.256(<8 x float> , <8 x float> , i32, i8)
2272 define i8 @test_cmpps_128(<4 x float> %a, <4 x float> %b) {
2273 ;CHECK: vcmpleps %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf1,0x7c,0x08,0xc2,0xc1,0x02]
2274 %res = call i8 @llvm.x86.avx512.mask.cmp.ps.128(<4 x float> %a, <4 x float> %b, i32 2, i8 -1)
2277 declare i8 @llvm.x86.avx512.mask.cmp.ps.128(<4 x float> , <4 x float> , i32, i8)
2279 define i8 @test_cmppd_256(<4 x double> %a, <4 x double> %b) {
2280 ;CHECK: vcmplepd %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf1,0xfd,0x28,0xc2,0xc1,0x02]
2281 %res = call i8 @llvm.x86.avx512.mask.cmp.pd.256(<4 x double> %a, <4 x double> %b, i32 2, i8 -1)
2284 declare i8 @llvm.x86.avx512.mask.cmp.pd.256(<4 x double> , <4 x double> , i32, i8)
2286 define i8 @test_cmppd_128(<2 x double> %a, <2 x double> %b) {
2287 ;CHECK: vcmplepd %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf1,0xfd,0x08,0xc2,0xc1,0x02]
2288 %res = call i8 @llvm.x86.avx512.mask.cmp.pd.128(<2 x double> %a, <2 x double> %b, i32 2, i8 -1)
2291 declare i8 @llvm.x86.avx512.mask.cmp.pd.128(<2 x double> , <2 x double> , i32, i8)
2293 define <8 x float> @test_mm512_maskz_add_ps_256(<8 x float> %a0, <8 x float> %a1, i8 %mask) {
2294 ;CHECK-LABEL: test_mm512_maskz_add_ps_256
2295 ;CHECK: vaddps %ymm1, %ymm0, %ymm0 {%k1} {z}
2296 %res = call <8 x float> @llvm.x86.avx512.mask.add.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float>zeroinitializer, i8 %mask)
2297 ret <8 x float> %res
2300 define <8 x float> @test_mm512_mask_add_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask) {
2301 ;CHECK-LABEL: test_mm512_mask_add_ps_256
2302 ;CHECK: vaddps %ymm1, %ymm0, %ymm2 {%k1}
2303 %res = call <8 x float> @llvm.x86.avx512.mask.add.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask)
2304 ret <8 x float> %res
2307 define <8 x float> @test_mm512_add_ps_256(<8 x float> %a0, <8 x float> %a1, i8 %mask) {
2308 ;CHECK-LABEL: test_mm512_add_ps_256
2309 ;CHECK: vaddps %ymm1, %ymm0, %ymm0
2310 %res = call <8 x float> @llvm.x86.avx512.mask.add.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float>zeroinitializer, i8 -1)
2311 ret <8 x float> %res
2313 declare <8 x float> @llvm.x86.avx512.mask.add.ps.256(<8 x float>, <8 x float>, <8 x float>, i8)
2315 define <4 x float> @test_mm512_maskz_add_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask) {
2316 ;CHECK-LABEL: test_mm512_maskz_add_ps_128
2317 ;CHECK: vaddps %xmm1, %xmm0, %xmm0 {%k1} {z}
2318 %res = call <4 x float> @llvm.x86.avx512.mask.add.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float>zeroinitializer, i8 %mask)
2319 ret <4 x float> %res
2322 define <4 x float> @test_mm512_mask_add_ps_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask) {
2323 ;CHECK-LABEL: test_mm512_mask_add_ps_128
2324 ;CHECK: vaddps %xmm1, %xmm0, %xmm2 {%k1}
2325 %res = call <4 x float> @llvm.x86.avx512.mask.add.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask)
2326 ret <4 x float> %res
2329 define <4 x float> @test_mm512_add_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask) {
2330 ;CHECK-LABEL: test_mm512_add_ps_128
2331 ;CHECK: vaddps %xmm1, %xmm0, %xmm0
2332 %res = call <4 x float> @llvm.x86.avx512.mask.add.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float>zeroinitializer, i8 -1)
2333 ret <4 x float> %res
2335 declare <4 x float> @llvm.x86.avx512.mask.add.ps.128(<4 x float>, <4 x float>, <4 x float>, i8)
2337 define <8 x float> @test_mm512_maskz_sub_ps_256(<8 x float> %a0, <8 x float> %a1, i8 %mask) {
2338 ;CHECK-LABEL: test_mm512_maskz_sub_ps_256
2339 ;CHECK: vsubps %ymm1, %ymm0, %ymm0 {%k1} {z}
2340 %res = call <8 x float> @llvm.x86.avx512.mask.sub.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float>zeroinitializer, i8 %mask)
2341 ret <8 x float> %res
2344 define <8 x float> @test_mm512_mask_sub_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask) {
2345 ;CHECK-LABEL: test_mm512_mask_sub_ps_256
2346 ;CHECK: vsubps %ymm1, %ymm0, %ymm2 {%k1}
2347 %res = call <8 x float> @llvm.x86.avx512.mask.sub.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask)
2348 ret <8 x float> %res
2351 define <8 x float> @test_mm512_sub_ps_256(<8 x float> %a0, <8 x float> %a1, i8 %mask) {
2352 ;CHECK-LABEL: test_mm512_sub_ps_256
2353 ;CHECK: vsubps %ymm1, %ymm0, %ymm0
2354 %res = call <8 x float> @llvm.x86.avx512.mask.sub.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float>zeroinitializer, i8 -1)
2355 ret <8 x float> %res
2357 declare <8 x float> @llvm.x86.avx512.mask.sub.ps.256(<8 x float>, <8 x float>, <8 x float>, i8)
2359 define <4 x float> @test_mm512_maskz_sub_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask) {
2360 ;CHECK-LABEL: test_mm512_maskz_sub_ps_128
2361 ;CHECK: vsubps %xmm1, %xmm0, %xmm0 {%k1} {z}
2362 %res = call <4 x float> @llvm.x86.avx512.mask.sub.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float>zeroinitializer, i8 %mask)
2363 ret <4 x float> %res
2366 define <4 x float> @test_mm512_mask_sub_ps_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask) {
2367 ;CHECK-LABEL: test_mm512_mask_sub_ps_128
2368 ;CHECK: vsubps %xmm1, %xmm0, %xmm2 {%k1}
2369 %res = call <4 x float> @llvm.x86.avx512.mask.sub.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask)
2370 ret <4 x float> %res
2373 define <4 x float> @test_mm512_sub_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask) {
2374 ;CHECK-LABEL: test_mm512_sub_ps_128
2375 ;CHECK: vsubps %xmm1, %xmm0, %xmm0
2376 %res = call <4 x float> @llvm.x86.avx512.mask.sub.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float>zeroinitializer, i8 -1)
2377 ret <4 x float> %res
2379 declare <4 x float> @llvm.x86.avx512.mask.sub.ps.128(<4 x float>, <4 x float>, <4 x float>, i8)
2381 define <8 x float> @test_mm512_maskz_mul_ps_256(<8 x float> %a0, <8 x float> %a1, i8 %mask) {
2382 ;CHECK-LABEL: test_mm512_maskz_mul_ps_256
2383 ;CHECK: vmulps %ymm1, %ymm0, %ymm0 {%k1} {z}
2384 %res = call <8 x float> @llvm.x86.avx512.mask.mul.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float>zeroinitializer, i8 %mask)
2385 ret <8 x float> %res
2388 define <8 x float> @test_mm512_mask_mul_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask) {
2389 ;CHECK-LABEL: test_mm512_mask_mul_ps_256
2390 ;CHECK: vmulps %ymm1, %ymm0, %ymm2 {%k1}
2391 %res = call <8 x float> @llvm.x86.avx512.mask.mul.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask)
2392 ret <8 x float> %res
2395 define <8 x float> @test_mm512_mul_ps_256(<8 x float> %a0, <8 x float> %a1, i8 %mask) {
2396 ;CHECK-LABEL: test_mm512_mul_ps_256
2397 ;CHECK: vmulps %ymm1, %ymm0, %ymm0
2398 %res = call <8 x float> @llvm.x86.avx512.mask.mul.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float>zeroinitializer, i8 -1)
2399 ret <8 x float> %res
2401 declare <8 x float> @llvm.x86.avx512.mask.mul.ps.256(<8 x float>, <8 x float>, <8 x float>, i8)
2403 define <4 x float> @test_mm512_maskz_mul_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask) {
2404 ;CHECK-LABEL: test_mm512_maskz_mul_ps_128
2405 ;CHECK: vmulps %xmm1, %xmm0, %xmm0 {%k1} {z}
2406 %res = call <4 x float> @llvm.x86.avx512.mask.mul.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float>zeroinitializer, i8 %mask)
2407 ret <4 x float> %res
2410 define <4 x float> @test_mm512_mask_mul_ps_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask) {
2411 ;CHECK-LABEL: test_mm512_mask_mul_ps_128
2412 ;CHECK: vmulps %xmm1, %xmm0, %xmm2 {%k1}
2413 %res = call <4 x float> @llvm.x86.avx512.mask.mul.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask)
2414 ret <4 x float> %res
2417 define <4 x float> @test_mm512_mul_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask) {
2418 ;CHECK-LABEL: test_mm512_mul_ps_128
2419 ;CHECK: vmulps %xmm1, %xmm0, %xmm0
2420 %res = call <4 x float> @llvm.x86.avx512.mask.mul.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float>zeroinitializer, i8 -1)
2421 ret <4 x float> %res
2423 declare <4 x float> @llvm.x86.avx512.mask.mul.ps.128(<4 x float>, <4 x float>, <4 x float>, i8)
2425 define <8 x float> @test_mm512_maskz_div_ps_256(<8 x float> %a0, <8 x float> %a1, i8 %mask) {
2426 ;CHECK-LABEL: test_mm512_maskz_div_ps_256
2427 ;CHECK: vdivps %ymm1, %ymm0, %ymm0 {%k1} {z}
2428 %res = call <8 x float> @llvm.x86.avx512.mask.div.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float>zeroinitializer, i8 %mask)
2429 ret <8 x float> %res
2432 define <8 x float> @test_mm512_mask_div_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask) {
2433 ;CHECK-LABEL: test_mm512_mask_div_ps_256
2434 ;CHECK: vdivps %ymm1, %ymm0, %ymm2 {%k1}
2435 %res = call <8 x float> @llvm.x86.avx512.mask.div.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask)
2436 ret <8 x float> %res
2439 define <8 x float> @test_mm512_div_ps_256(<8 x float> %a0, <8 x float> %a1, i8 %mask) {
2440 ;CHECK-LABEL: test_mm512_div_ps_256
2441 ;CHECK: vdivps %ymm1, %ymm0, %ymm0
2442 %res = call <8 x float> @llvm.x86.avx512.mask.div.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float>zeroinitializer, i8 -1)
2443 ret <8 x float> %res
2445 declare <8 x float> @llvm.x86.avx512.mask.div.ps.256(<8 x float>, <8 x float>, <8 x float>, i8)
2447 define <4 x float> @test_mm512_maskz_div_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask) {
2448 ;CHECK-LABEL: test_mm512_maskz_div_ps_128
2449 ;CHECK: vdivps %xmm1, %xmm0, %xmm0 {%k1} {z}
2450 %res = call <4 x float> @llvm.x86.avx512.mask.div.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float>zeroinitializer, i8 %mask)
2451 ret <4 x float> %res
2454 define <4 x float> @test_mm512_mask_div_ps_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask) {
2455 ;CHECK-LABEL: test_mm512_mask_div_ps_128
2456 ;CHECK: vdivps %xmm1, %xmm0, %xmm2 {%k1}
2457 %res = call <4 x float> @llvm.x86.avx512.mask.div.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask)
2458 ret <4 x float> %res
2461 define <4 x float> @test_mm512_div_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask) {
2462 ;CHECK-LABEL: test_mm512_div_ps_128
2463 ;CHECK: vdivps %xmm1, %xmm0, %xmm0
2464 %res = call <4 x float> @llvm.x86.avx512.mask.div.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float>zeroinitializer, i8 -1)
2465 ret <4 x float> %res
2467 declare <4 x float> @llvm.x86.avx512.mask.div.ps.128(<4 x float>, <4 x float>, <4 x float>, i8)
2469 define <8 x float> @test_mm512_maskz_max_ps_256(<8 x float> %a0, <8 x float> %a1, i8 %mask) {
2470 ;CHECK-LABEL: test_mm512_maskz_max_ps_256
2471 ;CHECK: vmaxps %ymm1, %ymm0, %ymm0 {%k1} {z}
2472 %res = call <8 x float> @llvm.x86.avx512.mask.max.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float>zeroinitializer, i8 %mask)
2473 ret <8 x float> %res
2476 define <8 x float> @test_mm512_mask_max_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask) {
2477 ;CHECK-LABEL: test_mm512_mask_max_ps_256
2478 ;CHECK: vmaxps %ymm1, %ymm0, %ymm2 {%k1}
2479 %res = call <8 x float> @llvm.x86.avx512.mask.max.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask)
2480 ret <8 x float> %res
2483 define <8 x float> @test_mm512_max_ps_256(<8 x float> %a0, <8 x float> %a1, i8 %mask) {
2484 ;CHECK-LABEL: test_mm512_max_ps_256
2485 ;CHECK: vmaxps %ymm1, %ymm0, %ymm0
2486 %res = call <8 x float> @llvm.x86.avx512.mask.max.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float>zeroinitializer, i8 -1)
2487 ret <8 x float> %res
2489 declare <8 x float> @llvm.x86.avx512.mask.max.ps.256(<8 x float>, <8 x float>, <8 x float>, i8)
2491 define <4 x float> @test_mm512_maskz_max_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask) {
2492 ;CHECK-LABEL: test_mm512_maskz_max_ps_128
2493 ;CHECK: vmaxps %xmm1, %xmm0, %xmm0 {%k1} {z}
2494 %res = call <4 x float> @llvm.x86.avx512.mask.max.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float>zeroinitializer, i8 %mask)
2495 ret <4 x float> %res
2498 define <4 x float> @test_mm512_mask_max_ps_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask) {
2499 ;CHECK-LABEL: test_mm512_mask_max_ps_128
2500 ;CHECK: vmaxps %xmm1, %xmm0, %xmm2 {%k1}
2501 %res = call <4 x float> @llvm.x86.avx512.mask.max.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask)
2502 ret <4 x float> %res
2505 define <4 x float> @test_mm512_max_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask) {
2506 ;CHECK-LABEL: test_mm512_max_ps_128
2507 ;CHECK: vmaxps %xmm1, %xmm0, %xmm0
2508 %res = call <4 x float> @llvm.x86.avx512.mask.max.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float>zeroinitializer, i8 -1)
2509 ret <4 x float> %res
2511 declare <4 x float> @llvm.x86.avx512.mask.max.ps.128(<4 x float>, <4 x float>, <4 x float>, i8)
2513 define <8 x float> @test_mm512_maskz_min_ps_256(<8 x float> %a0, <8 x float> %a1, i8 %mask) {
2514 ;CHECK-LABEL: test_mm512_maskz_min_ps_256
2515 ;CHECK: vminps %ymm1, %ymm0, %ymm0 {%k1} {z}
2516 %res = call <8 x float> @llvm.x86.avx512.mask.min.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float>zeroinitializer, i8 %mask)
2517 ret <8 x float> %res
2520 define <8 x float> @test_mm512_mask_min_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask) {
2521 ;CHECK-LABEL: test_mm512_mask_min_ps_256
2522 ;CHECK: vminps %ymm1, %ymm0, %ymm2 {%k1}
2523 %res = call <8 x float> @llvm.x86.avx512.mask.min.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask)
2524 ret <8 x float> %res
2527 define <8 x float> @test_mm512_min_ps_256(<8 x float> %a0, <8 x float> %a1, i8 %mask) {
2528 ;CHECK-LABEL: test_mm512_min_ps_256
2529 ;CHECK: vminps %ymm1, %ymm0, %ymm0
2530 %res = call <8 x float> @llvm.x86.avx512.mask.min.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float>zeroinitializer, i8 -1)
2531 ret <8 x float> %res
2533 declare <8 x float> @llvm.x86.avx512.mask.min.ps.256(<8 x float>, <8 x float>, <8 x float>, i8)
2535 define <4 x float> @test_mm512_maskz_min_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask) {
2536 ;CHECK-LABEL: test_mm512_maskz_min_ps_128
2537 ;CHECK: vminps %xmm1, %xmm0, %xmm0 {%k1} {z}
2538 %res = call <4 x float> @llvm.x86.avx512.mask.min.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float>zeroinitializer, i8 %mask)
2539 ret <4 x float> %res
2542 define <4 x float> @test_mm512_mask_min_ps_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask) {
2543 ;CHECK-LABEL: test_mm512_mask_min_ps_128
2544 ;CHECK: vminps %xmm1, %xmm0, %xmm2 {%k1}
2545 %res = call <4 x float> @llvm.x86.avx512.mask.min.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask)
2546 ret <4 x float> %res
2549 define <4 x float> @test_mm512_min_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask) {
2550 ;CHECK-LABEL: test_mm512_min_ps_128
2551 ;CHECK: vminps %xmm1, %xmm0, %xmm0
2552 %res = call <4 x float> @llvm.x86.avx512.mask.min.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float>zeroinitializer, i8 -1)
2553 ret <4 x float> %res
2555 declare <4 x float> @llvm.x86.avx512.mask.min.ps.128(<4 x float>, <4 x float>, <4 x float>, i8)
2557 define <4 x double> @test_sqrt_pd_256(<4 x double> %a0, i8 %mask) {
2558 ; CHECK-LABEL: test_sqrt_pd_256
2560 %res = call <4 x double> @llvm.x86.avx512.mask.sqrt.pd.256(<4 x double> %a0, <4 x double> zeroinitializer, i8 %mask)
2561 ret <4 x double> %res
2563 declare <4 x double> @llvm.x86.avx512.mask.sqrt.pd.256(<4 x double>, <4 x double>, i8) nounwind readnone
2565 define <8 x float> @test_sqrt_ps_256(<8 x float> %a0, i8 %mask) {
2566 ; CHECK-LABEL: test_sqrt_ps_256
2568 %res = call <8 x float> @llvm.x86.avx512.mask.sqrt.ps.256(<8 x float> %a0, <8 x float> zeroinitializer, i8 %mask)
2569 ret <8 x float> %res
2572 declare <8 x float> @llvm.x86.avx512.mask.sqrt.ps.256(<8 x float>, <8 x float>, i8) nounwind readnone
2574 define <4 x double> @test_getexp_pd_256(<4 x double> %a0) {
2575 ; CHECK-LABEL: test_getexp_pd_256
2577 %res = call <4 x double> @llvm.x86.avx512.mask.getexp.pd.256(<4 x double> %a0, <4 x double> zeroinitializer, i8 -1)
2578 ret <4 x double> %res
2581 declare <4 x double> @llvm.x86.avx512.mask.getexp.pd.256(<4 x double>, <4 x double>, i8) nounwind readnone
2583 define <8 x float> @test_getexp_ps_256(<8 x float> %a0) {
2584 ; CHECK-LABEL: test_getexp_ps_256
2586 %res = call <8 x float> @llvm.x86.avx512.mask.getexp.ps.256(<8 x float> %a0, <8 x float> zeroinitializer, i8 -1)
2587 ret <8 x float> %res
2589 declare <8 x float> @llvm.x86.avx512.mask.getexp.ps.256(<8 x float>, <8 x float>, i8) nounwind readnone
2591 declare <4 x i32> @llvm.x86.avx512.mask.pmaxs.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)
2593 ; CHECK-LABEL: @test_int_x86_avx512_mask_pmaxs_d_128
2595 ; CHECK: vpmaxsd %xmm
2597 define <4 x i32>@test_int_x86_avx512_mask_pmaxs_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %mask) {
2598 %res = call <4 x i32> @llvm.x86.avx512.mask.pmaxs.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2 ,i8 %mask)
2599 %res1 = call <4 x i32> @llvm.x86.avx512.mask.pmaxs.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> zeroinitializer, i8 %mask)
2600 %res2 = add <4 x i32> %res, %res1
2604 declare <8 x i32> @llvm.x86.avx512.mask.pmaxs.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8)
2606 ; CHECK-LABEL: @test_int_x86_avx512_mask_pmaxs_d_256
2608 ; CHECK: vpmaxsd %ymm
2610 define <8 x i32>@test_int_x86_avx512_mask_pmaxs_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) {
2611 %res = call <8 x i32> @llvm.x86.avx512.mask.pmaxs.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3)
2612 %res1 = call <8 x i32> @llvm.x86.avx512.mask.pmaxs.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1)
2613 %res2 = add <8 x i32> %res, %res1
2617 declare <2 x i64> @llvm.x86.avx512.mask.pmaxs.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8)
2619 ; CHECK-LABEL: @test_int_x86_avx512_mask_pmaxs_q_128
2621 ; CHECK: vpmaxsq %xmm
2623 define <2 x i64>@test_int_x86_avx512_mask_pmaxs_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) {
2624 %res = call <2 x i64> @llvm.x86.avx512.mask.pmaxs.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3)
2625 %res1 = call <2 x i64> @llvm.x86.avx512.mask.pmaxs.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 -1)
2626 %res2 = add <2 x i64> %res, %res1
2630 declare <4 x i64> @llvm.x86.avx512.mask.pmaxs.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i8)
2632 ; CHECK-LABEL: @test_int_x86_avx512_mask_pmaxs_q_256
2634 ; CHECK: vpmaxsq %ymm
2636 define <4 x i64>@test_int_x86_avx512_mask_pmaxs_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %mask) {
2637 %res = call <4 x i64> @llvm.x86.avx512.mask.pmaxs.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %mask)
2638 %res1 = call <4 x i64> @llvm.x86.avx512.mask.pmaxs.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> zeroinitializer, i8 %mask)
2639 %res2 = add <4 x i64> %res, %res1
2643 declare <4 x i32> @llvm.x86.avx512.mask.pmaxu.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)
2645 ; CHECK-LABEL: @test_int_x86_avx512_mask_pmaxu_d_128
2647 ; CHECK: vpmaxud %xmm
2649 define <4 x i32>@test_int_x86_avx512_mask_pmaxu_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2,i8 %mask) {
2650 %res = call <4 x i32> @llvm.x86.avx512.mask.pmaxu.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %mask)
2651 %res1 = call <4 x i32> @llvm.x86.avx512.mask.pmaxu.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> zeroinitializer, i8 %mask)
2652 %res2 = add <4 x i32> %res, %res1
2656 declare <8 x i32> @llvm.x86.avx512.mask.pmaxu.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8)
2658 ; CHECK-LABEL: @test_int_x86_avx512_mask_pmaxu_d_256
2660 ; CHECK: vpmaxud %ymm
2662 define <8 x i32>@test_int_x86_avx512_mask_pmaxu_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) {
2663 %res = call <8 x i32> @llvm.x86.avx512.mask.pmaxu.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3)
2664 %res1 = call <8 x i32> @llvm.x86.avx512.mask.pmaxu.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1)
2665 %res2 = add <8 x i32> %res, %res1
2669 declare <2 x i64> @llvm.x86.avx512.mask.pmaxu.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8)
2671 ; CHECK-LABEL: @test_int_x86_avx512_mask_pmaxu_q_128
2673 ; CHECK: vpmaxuq %xmm
2675 define <2 x i64>@test_int_x86_avx512_mask_pmaxu_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) {
2676 %res = call <2 x i64> @llvm.x86.avx512.mask.pmaxu.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3)
2677 %res1 = call <2 x i64> @llvm.x86.avx512.mask.pmaxu.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 -1)
2678 %res2 = add <2 x i64> %res, %res1
2682 declare <4 x i64> @llvm.x86.avx512.mask.pmaxu.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i8)
2684 ; CHECK-LABEL: @test_int_x86_avx512_mask_pmaxu_q_256
2686 ; CHECK: vpmaxuq %ymm
2688 define <4 x i64>@test_int_x86_avx512_mask_pmaxu_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %mask) {
2689 %res = call <4 x i64> @llvm.x86.avx512.mask.pmaxu.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %mask)
2690 %res1 = call <4 x i64> @llvm.x86.avx512.mask.pmaxu.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> zeroinitializer, i8 %mask)
2691 %res2 = add <4 x i64> %res, %res1
2695 declare <4 x i32> @llvm.x86.avx512.mask.pmins.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)
2697 ; CHECK-LABEL: @test_int_x86_avx512_mask_pmins_d_128
2699 ; CHECK: vpminsd %xmm
2701 define <4 x i32>@test_int_x86_avx512_mask_pmins_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %mask) {
2702 %res = call <4 x i32> @llvm.x86.avx512.mask.pmins.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %mask)
2703 %res1 = call <4 x i32> @llvm.x86.avx512.mask.pmins.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> zeroinitializer, i8 %mask)
2704 %res2 = add <4 x i32> %res, %res1
2708 declare <8 x i32> @llvm.x86.avx512.mask.pmins.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8)
2710 ; CHECK-LABEL: @test_int_x86_avx512_mask_pmins_d_256
2712 ; CHECK: vpminsd %ymm
2714 define <8 x i32>@test_int_x86_avx512_mask_pmins_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) {
2715 %res = call <8 x i32> @llvm.x86.avx512.mask.pmins.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3)
2716 %res1 = call <8 x i32> @llvm.x86.avx512.mask.pmins.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1)
2717 %res2 = add <8 x i32> %res, %res1
2721 declare <2 x i64> @llvm.x86.avx512.mask.pmins.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8)
2723 ; CHECK-LABEL: @test_int_x86_avx512_mask_pmins_q_128
2725 ; CHECK: vpminsq %xmm
2727 define <2 x i64>@test_int_x86_avx512_mask_pmins_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) {
2728 %res = call <2 x i64> @llvm.x86.avx512.mask.pmins.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3)
2729 %res1 = call <2 x i64> @llvm.x86.avx512.mask.pmins.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 -1)
2730 %res2 = add <2 x i64> %res, %res1
2734 declare <4 x i64> @llvm.x86.avx512.mask.pmins.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i8)
2736 ; CHECK-LABEL: @test_int_x86_avx512_mask_pmins_q_256
2738 ; CHECK: vpminsq %ymm
2740 define <4 x i64>@test_int_x86_avx512_mask_pmins_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %mask) {
2741 %res = call <4 x i64> @llvm.x86.avx512.mask.pmins.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %mask)
2742 %res1 = call <4 x i64> @llvm.x86.avx512.mask.pmins.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> zeroinitializer, i8 %mask)
2743 %res2 = add <4 x i64> %res, %res1
2747 declare <4 x i32> @llvm.x86.avx512.mask.pminu.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)
2749 ; CHECK-LABEL: @test_int_x86_avx512_mask_pminu_d_128
2751 ; CHECK: vpminud %xmm
2753 define <4 x i32>@test_int_x86_avx512_mask_pminu_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %mask) {
2754 %res = call <4 x i32> @llvm.x86.avx512.mask.pminu.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %mask)
2755 %res1 = call <4 x i32> @llvm.x86.avx512.mask.pminu.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> zeroinitializer, i8 %mask)
2756 %res2 = add <4 x i32> %res, %res1
2760 declare <8 x i32> @llvm.x86.avx512.mask.pminu.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8)
2762 ; CHECK-LABEL: @test_int_x86_avx512_mask_pminu_d_256
2764 ; CHECK: vpminud %ymm
2766 define <8 x i32>@test_int_x86_avx512_mask_pminu_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) {
2767 %res = call <8 x i32> @llvm.x86.avx512.mask.pminu.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3)
2768 %res1 = call <8 x i32> @llvm.x86.avx512.mask.pminu.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1)
2769 %res2 = add <8 x i32> %res, %res1
2773 declare <2 x i64> @llvm.x86.avx512.mask.pminu.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8)
2775 ; CHECK-LABEL: @test_int_x86_avx512_mask_pminu_q_128
2777 ; CHECK: vpminuq %xmm
2779 define <2 x i64>@test_int_x86_avx512_mask_pminu_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) {
2780 %res = call <2 x i64> @llvm.x86.avx512.mask.pminu.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3)
2781 %res1 = call <2 x i64> @llvm.x86.avx512.mask.pminu.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 -1)
2782 %res2 = add <2 x i64> %res, %res1
2786 declare <4 x i64> @llvm.x86.avx512.mask.pminu.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i8)
2788 ; CHECK-LABEL: @test_int_x86_avx512_mask_pminu_q_256
2790 ; CHECK: vpminuq %ymm
2792 define <4 x i64>@test_int_x86_avx512_mask_pminu_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %mask) {
2793 %res = call <4 x i64> @llvm.x86.avx512.mask.pminu.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %mask)
2794 %res1 = call <4 x i64> @llvm.x86.avx512.mask.pminu.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> zeroinitializer, i8 %mask)
2795 %res2 = add <4 x i64> %res, %res1
2799 declare <4 x i32> @llvm.x86.avx512.mask.vpermt2var.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)
2801 ; CHECK-LABEL: @test_int_x86_avx512_mask_vpermt2var_d_128
2804 ; CHECK: vpermt2d %xmm{{.*}}{%k1}
2806 define <4 x i32>@test_int_x86_avx512_mask_vpermt2var_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) {
2807 %res = call <4 x i32> @llvm.x86.avx512.mask.vpermt2var.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3)
2808 %res1 = call <4 x i32> @llvm.x86.avx512.mask.vpermt2var.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 -1)
2809 %res2 = add <4 x i32> %res, %res1
2813 declare <4 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)
2815 ; CHECK-LABEL: @test_int_x86_avx512_maskz_vpermt2var_d_128
2818 ; CHECK: vpermt2d %xmm{{.*}}{%k1} {z}
2819 define <4 x i32>@test_int_x86_avx512_maskz_vpermt2var_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) {
2820 %res = call <4 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3)
2821 %res1 = call <4 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 -1)
2822 %res2 = add <4 x i32> %res, %res1
2826 declare <8 x i32> @llvm.x86.avx512.mask.vpermt2var.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8)
2828 ; CHECK-LABEL: @test_int_x86_avx512_mask_vpermt2var_d_256
2831 ; CHECK: vpermt2d %ymm{{.*}}{%k1}
2833 define <8 x i32>@test_int_x86_avx512_mask_vpermt2var_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) {
2834 %res = call <8 x i32> @llvm.x86.avx512.mask.vpermt2var.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3)
2835 %res1 = call <8 x i32> @llvm.x86.avx512.mask.vpermt2var.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1)
2836 %res2 = add <8 x i32> %res, %res1
2840 declare <8 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8)
2842 ; CHECK-LABEL: @test_int_x86_avx512_maskz_vpermt2var_d_256
2845 ; CHECK: vpermt2d {{.*}}{%k1} {z}
2846 define <8 x i32>@test_int_x86_avx512_maskz_vpermt2var_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) {
2847 %res = call <8 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3)
2848 %res1 = call <8 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1)
2849 %res2 = add <8 x i32> %res, %res1
2853 declare <2 x double> @llvm.x86.avx512.mask.vpermi2var.pd.128(<2 x double>, <2 x i64>, <2 x double>, i8)
2855 ; CHECK-LABEL: @test_int_x86_avx512_mask_vpermi2var_pd_128
2858 ; CHECK: vpermi2pd %xmm{{.*}}{%k1}
2859 define <2 x double>@test_int_x86_avx512_mask_vpermi2var_pd_128(<2 x double> %x0, <2 x i64> %x1, <2 x double> %x2, i8 %x3) {
2860 %res = call <2 x double> @llvm.x86.avx512.mask.vpermi2var.pd.128(<2 x double> %x0, <2 x i64> %x1, <2 x double> %x2, i8 %x3)
2861 %res1 = call <2 x double> @llvm.x86.avx512.mask.vpermi2var.pd.128(<2 x double> %x0, <2 x i64> %x1, <2 x double> %x2, i8 -1)
2862 %res2 = fadd <2 x double> %res, %res1
2863 ret <2 x double> %res2
2866 declare <4 x double> @llvm.x86.avx512.mask.vpermi2var.pd.256(<4 x double>, <4 x i64>, <4 x double>, i8)
2868 ; CHECK-LABEL: @test_int_x86_avx512_mask_vpermi2var_pd_256
2871 ; CHECK: vpermi2pd %ymm{{.*}}{%k1}
2872 define <4 x double>@test_int_x86_avx512_mask_vpermi2var_pd_256(<4 x double> %x0, <4 x i64> %x1, <4 x double> %x2, i8 %x3) {
2873 %res = call <4 x double> @llvm.x86.avx512.mask.vpermi2var.pd.256(<4 x double> %x0, <4 x i64> %x1, <4 x double> %x2, i8 %x3)
2874 %res1 = call <4 x double> @llvm.x86.avx512.mask.vpermi2var.pd.256(<4 x double> %x0, <4 x i64> %x1, <4 x double> %x2, i8 -1)
2875 %res2 = fadd <4 x double> %res, %res1
2876 ret <4 x double> %res2
2879 declare <4 x float> @llvm.x86.avx512.mask.vpermi2var.ps.128(<4 x float>, <4 x i32>, <4 x float>, i8)
2881 ; CHECK-LABEL: @test_int_x86_avx512_mask_vpermi2var_ps_128
2884 ; CHECK: vpermi2ps %xmm{{.*}}{%k1}
2885 define <4 x float>@test_int_x86_avx512_mask_vpermi2var_ps_128(<4 x float> %x0, <4 x i32> %x1, <4 x float> %x2, i8 %x3) {
2886 %res = call <4 x float> @llvm.x86.avx512.mask.vpermi2var.ps.128(<4 x float> %x0, <4 x i32> %x1, <4 x float> %x2, i8 %x3)
2887 %res1 = call <4 x float> @llvm.x86.avx512.mask.vpermi2var.ps.128(<4 x float> %x0, <4 x i32> %x1, <4 x float> %x2, i8 -1)
2888 %res2 = fadd <4 x float> %res, %res1
2889 ret <4 x float> %res2
2892 declare <8 x float> @llvm.x86.avx512.mask.vpermi2var.ps.256(<8 x float>, <8 x i32>, <8 x float>, i8)
2894 ; CHECK-LABEL: @test_int_x86_avx512_mask_vpermi2var_ps_256
2897 ; CHECK: vpermi2ps %ymm{{.*}}{%k1}
2898 define <8 x float>@test_int_x86_avx512_mask_vpermi2var_ps_256(<8 x float> %x0, <8 x i32> %x1, <8 x float> %x2, i8 %x3) {
2899 %res = call <8 x float> @llvm.x86.avx512.mask.vpermi2var.ps.256(<8 x float> %x0, <8 x i32> %x1, <8 x float> %x2, i8 %x3)
2900 %res1 = call <8 x float> @llvm.x86.avx512.mask.vpermi2var.ps.256(<8 x float> %x0, <8 x i32> %x1, <8 x float> %x2, i8 -1)
2901 %res2 = fadd <8 x float> %res, %res1
2902 ret <8 x float> %res2
2905 declare <2 x i64> @llvm.x86.avx512.mask.pabs.q.128(<2 x i64>, <2 x i64>, i8)
2907 ; CHECK-LABEL: @test_int_x86_avx512_mask_pabs_q_128
2910 ; CHECK: vpabsq{{.*}}{%k1}
2911 define <2 x i64>@test_int_x86_avx512_mask_pabs_q_128(<2 x i64> %x0, <2 x i64> %x1, i8 %x2) {
2912 %res = call <2 x i64> @llvm.x86.avx512.mask.pabs.q.128(<2 x i64> %x0, <2 x i64> %x1, i8 %x2)
2913 %res1 = call <2 x i64> @llvm.x86.avx512.mask.pabs.q.128(<2 x i64> %x0, <2 x i64> %x1, i8 -1)
2914 %res2 = add <2 x i64> %res, %res1
2918 declare <4 x i64> @llvm.x86.avx512.mask.pabs.q.256(<4 x i64>, <4 x i64>, i8)
2920 ; CHECK-LABEL: @test_int_x86_avx512_mask_pabs_q_256
2923 ; CHECK: vpabsq{{.*}}{%k1}
2924 define <4 x i64>@test_int_x86_avx512_mask_pabs_q_256(<4 x i64> %x0, <4 x i64> %x1, i8 %x2) {
2925 %res = call <4 x i64> @llvm.x86.avx512.mask.pabs.q.256(<4 x i64> %x0, <4 x i64> %x1, i8 %x2)
2926 %res1 = call <4 x i64> @llvm.x86.avx512.mask.pabs.q.256(<4 x i64> %x0, <4 x i64> %x1, i8 -1)
2927 %res2 = add <4 x i64> %res, %res1
2931 declare <4 x i32> @llvm.x86.avx512.mask.pabs.d.128(<4 x i32>, <4 x i32>, i8)
2933 ; CHECK-LABEL: @test_int_x86_avx512_mask_pabs_d_128
2936 ; CHECK: vpabsd{{.*}}{%k1}
2937 define <4 x i32>@test_int_x86_avx512_mask_pabs_d_128(<4 x i32> %x0, <4 x i32> %x1, i8 %x2) {
2938 %res = call <4 x i32> @llvm.x86.avx512.mask.pabs.d.128(<4 x i32> %x0, <4 x i32> %x1, i8 %x2)
2939 %res1 = call <4 x i32> @llvm.x86.avx512.mask.pabs.d.128(<4 x i32> %x0, <4 x i32> %x1, i8 -1)
2940 %res2 = add <4 x i32> %res, %res1
2944 declare <8 x i32> @llvm.x86.avx512.mask.pabs.d.256(<8 x i32>, <8 x i32>, i8)
2946 ; CHECK-LABEL: @test_int_x86_avx512_mask_pabs_d_256
2949 ; CHECK: vpabsd{{.*}}{%k1}
2950 define <8 x i32>@test_int_x86_avx512_mask_pabs_d_256(<8 x i32> %x0, <8 x i32> %x1, i8 %x2) {
2951 %res = call <8 x i32> @llvm.x86.avx512.mask.pabs.d.256(<8 x i32> %x0, <8 x i32> %x1, i8 %x2)
2952 %res1 = call <8 x i32> @llvm.x86.avx512.mask.pabs.d.256(<8 x i32> %x0, <8 x i32> %x1, i8 -1)
2953 %res2 = add <8 x i32> %res, %res1
2958 declare <2 x double> @llvm.x86.avx512.mask.scalef.pd.128(<2 x double>, <2 x double>, <2 x double>, i8)
2960 ; CHECK-LABEL: @test_int_x86_avx512_mask_scalef_pd_128
2963 ; CHECK: vscalefpd{{.*}}{%k1}
2964 define <2 x double>@test_int_x86_avx512_mask_scalef_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) {
2965 %res = call <2 x double> @llvm.x86.avx512.mask.scalef.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3)
2966 %res1 = call <2 x double> @llvm.x86.avx512.mask.scalef.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1)
2967 %res2 = fadd <2 x double> %res, %res1
2968 ret <2 x double> %res2
2971 declare <4 x double> @llvm.x86.avx512.mask.scalef.pd.256(<4 x double>, <4 x double>, <4 x double>, i8)
2973 ; CHECK-LABEL: @test_int_x86_avx512_mask_scalef_pd_256
2976 ; CHECK: vscalefpd{{.*}}{%k1}
2977 define <4 x double>@test_int_x86_avx512_mask_scalef_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) {
2978 %res = call <4 x double> @llvm.x86.avx512.mask.scalef.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3)
2979 %res1 = call <4 x double> @llvm.x86.avx512.mask.scalef.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 -1)
2980 %res2 = fadd <4 x double> %res, %res1
2981 ret <4 x double> %res2
2984 declare <4 x float> @llvm.x86.avx512.mask.scalef.ps.128(<4 x float>, <4 x float>, <4 x float>, i8)
2985 ; CHECK-LABEL: @test_int_x86_avx512_mask_scalef_ps_128
2988 ; CHECK: vscalefps{{.*}}{%k1}
2989 define <4 x float>@test_int_x86_avx512_mask_scalef_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) {
2990 %res = call <4 x float> @llvm.x86.avx512.mask.scalef.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3)
2991 %res1 = call <4 x float> @llvm.x86.avx512.mask.scalef.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1)
2992 %res2 = fadd <4 x float> %res, %res1
2993 ret <4 x float> %res2
2996 declare <8 x float> @llvm.x86.avx512.mask.scalef.ps.256(<8 x float>, <8 x float>, <8 x float>, i8)
2997 ; CHECK-LABEL: @test_int_x86_avx512_mask_scalef_ps_256
3000 ; CHECK: vscalefps{{.*}}{%k1}
3001 define <8 x float>@test_int_x86_avx512_mask_scalef_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) {
3002 %res = call <8 x float> @llvm.x86.avx512.mask.scalef.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3)
3003 %res1 = call <8 x float> @llvm.x86.avx512.mask.scalef.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 -1)
3004 %res2 = fadd <8 x float> %res, %res1
3005 ret <8 x float> %res2
3008 declare <2 x double> @llvm.x86.avx512.mask.unpckh.pd.128(<2 x double>, <2 x double>, <2 x double>, i8)
3010 define <2 x double>@test_int_x86_avx512_mask_unpckh_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) {
3011 ; CHECK-LABEL: test_int_x86_avx512_mask_unpckh_pd_128:
3012 ; CHECK: vunpckhpd %xmm1, %xmm0, %xmm2 {%k1}
3013 ; CHECK-NEXT: ## xmm2 = xmm2[1],k1[1]
3014 ; CHECK-NEXT: vunpckhpd %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x15,0xc1]
3015 ; CHECK-NEXT: ## xmm0 = xmm0[1],xmm1[1]
3016 %res = call <2 x double> @llvm.x86.avx512.mask.unpckh.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3)
3017 %res1 = call <2 x double> @llvm.x86.avx512.mask.unpckh.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1)
3018 %res2 = fadd <2 x double> %res, %res1
3019 ret <2 x double> %res2
3022 declare <4 x double> @llvm.x86.avx512.mask.unpckh.pd.256(<4 x double>, <4 x double>, <4 x double>, i8)
3024 define <4 x double>@test_int_x86_avx512_mask_unpckh_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) {
3025 ; CHECK-LABEL: test_int_x86_avx512_mask_unpckh_pd_256:
3026 ; CHECK: vunpckhpd %ymm1, %ymm0, %ymm2 {%k1}
3027 ; CHECK-NEXT: ## ymm2 = ymm2[1],k1[1],ymm2[3],k1[3]
3028 ; CHECK-NEXT: vunpckhpd %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x15,0xc1]
3029 ; CHECK-NEXT: ## ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3]
3030 %res = call <4 x double> @llvm.x86.avx512.mask.unpckh.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3)
3031 %res1 = call <4 x double> @llvm.x86.avx512.mask.unpckh.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 -1)
3032 %res2 = fadd <4 x double> %res, %res1
3033 ret <4 x double> %res2
3036 declare <4 x float> @llvm.x86.avx512.mask.unpckh.ps.128(<4 x float>, <4 x float>, <4 x float>, i8)
3038 define <4 x float>@test_int_x86_avx512_mask_unpckh_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) {
3039 ; CHECK-LABEL: test_int_x86_avx512_mask_unpckh_ps_128:
3040 ; CHECK: vunpckhps %xmm1, %xmm0, %xmm2 {%k1}
3041 ; CHECK-NEXT: ## xmm2 = xmm2[2],k1[2],xmm2[3],k1[3]
3042 ; CHECK-NEXT: vunpckhps %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x15,0xc1]
3043 ; CHECK-NEXT: ## xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
3044 %res = call <4 x float> @llvm.x86.avx512.mask.unpckh.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3)
3045 %res1 = call <4 x float> @llvm.x86.avx512.mask.unpckh.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1)
3046 %res2 = fadd <4 x float> %res, %res1
3047 ret <4 x float> %res2
3050 declare <8 x float> @llvm.x86.avx512.mask.unpckh.ps.256(<8 x float>, <8 x float>, <8 x float>, i8)
3052 define <8 x float>@test_int_x86_avx512_mask_unpckh_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) {
3053 ; CHECK-LABEL: test_int_x86_avx512_mask_unpckh_ps_256:
3055 ; CHECK: vunpckhps %ymm1, %ymm0, %ymm2 {%k1}
3056 ; CHECK-NEXT: ## ymm2 = ymm2[2],k1[2],ymm2[3],k1[3],ymm2[6],k1[6],ymm2[7],k1[7]
3057 ; CHECK-NEXT: vunpckhps %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x15,0xc1]
3058 ; CHECK-NEXT: ## ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7]
3059 %res = call <8 x float> @llvm.x86.avx512.mask.unpckh.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3)
3060 %res1 = call <8 x float> @llvm.x86.avx512.mask.unpckh.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 -1)
3061 %res2 = fadd <8 x float> %res, %res1
3062 ret <8 x float> %res2
3065 declare <2 x double> @llvm.x86.avx512.mask.unpckl.pd.128(<2 x double>, <2 x double>, <2 x double>, i8)
3067 define <2 x double>@test_int_x86_avx512_mask_unpckl_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) {
3068 ; CHECK-LABEL: test_int_x86_avx512_mask_unpckl_pd_128:
3069 ; CHECK: vunpcklpd %xmm1, %xmm0, %xmm2 {%k1}
3070 ; CHECK-NEXT: ## xmm2 = xmm2[0],k1[0]
3071 ; CHECK-NEXT: vunpcklpd %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x14,0xc1]
3072 ; CHECK-NEXT: ## xmm0 = xmm0[0],xmm1[0]
3073 %res = call <2 x double> @llvm.x86.avx512.mask.unpckl.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3)
3074 %res1 = call <2 x double> @llvm.x86.avx512.mask.unpckl.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1)
3075 %res2 = fadd <2 x double> %res, %res1
3076 ret <2 x double> %res2
3079 declare <4 x double> @llvm.x86.avx512.mask.unpckl.pd.256(<4 x double>, <4 x double>, <4 x double>, i8)
3081 define <4 x double>@test_int_x86_avx512_mask_unpckl_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) {
3082 ; CHECK-LABEL: test_int_x86_avx512_mask_unpckl_pd_256:
3083 ; CHECK: vunpcklpd %ymm1, %ymm0, %ymm2 {%k1}
3084 ; CHECK-NEXT: ## ymm2 = ymm2[0],k1[0],ymm2[2],k1[2]
3085 ; CHECK-NEXT: vunpcklpd %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x14,0xc1]
3086 ; CHECK-NEXT: ## ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
3087 %res = call <4 x double> @llvm.x86.avx512.mask.unpckl.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3)
3088 %res1 = call <4 x double> @llvm.x86.avx512.mask.unpckl.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 -1)
3089 %res2 = fadd <4 x double> %res, %res1
3090 ret <4 x double> %res2
3093 declare <4 x float> @llvm.x86.avx512.mask.unpckl.ps.128(<4 x float>, <4 x float>, <4 x float>, i8)
3095 define <4 x float>@test_int_x86_avx512_mask_unpckl_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) {
3096 ; CHECK-LABEL: test_int_x86_avx512_mask_unpckl_ps_128:
3097 ; CHECK: vunpcklps %xmm1, %xmm0, %xmm2 {%k1}
3098 ; CHECK-NEXT: ## xmm2 = xmm2[0],k1[0],xmm2[1],k1[1]
3099 ; CHECK-NEXT: vunpcklps %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x14,0xc1]
3100 ; CHECK-NEXT: ## xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
3101 %res = call <4 x float> @llvm.x86.avx512.mask.unpckl.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3)
3102 %res1 = call <4 x float> @llvm.x86.avx512.mask.unpckl.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1)
3103 %res2 = fadd <4 x float> %res, %res1
3104 ret <4 x float> %res2
3107 declare <8 x float> @llvm.x86.avx512.mask.unpckl.ps.256(<8 x float>, <8 x float>, <8 x float>, i8)
3109 define <8 x float>@test_int_x86_avx512_mask_unpckl_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) {
3110 ; CHECK-LABEL: test_int_x86_avx512_mask_unpckl_ps_256:
3111 ; CHECK: vunpcklps %ymm1, %ymm0, %ymm2 {%k1}
3112 ; CHECK-NEXT: ## ymm2 = ymm2[0],k1[0],ymm2[1],k1[1],ymm2[4],k1[4],ymm2[5],k1[5]
3113 ; CHECK-NEXT: vunpcklps %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x14,0xc1]
3114 ; CHECK-NEXT: ## ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
3115 %res = call <8 x float> @llvm.x86.avx512.mask.unpckl.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3)
3116 %res1 = call <8 x float> @llvm.x86.avx512.mask.unpckl.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 -1)
3117 %res2 = fadd <8 x float> %res, %res1
3118 ret <8 x float> %res2
3121 declare <4 x i32> @llvm.x86.avx512.mask.punpckhd.q.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)
3123 define <4 x i32>@test_int_x86_avx512_mask_punpckhd_q_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) {
3124 ; CHECK-LABEL: test_int_x86_avx512_mask_punpckhd_q_128:
3125 ; CHECK: vpunpckhdq %xmm1, %xmm0, %xmm2 {%k1}
3126 ; CHECK-NEXT: ## xmm2 = xmm2[2],k1[2],xmm2[3],k1[3]
3127 ; CHECK-NEXT: vpunpckhdq %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0x6a,0xc1]
3128 ; CHECK-NEXT: ## xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
3129 %res = call <4 x i32> @llvm.x86.avx512.mask.punpckhd.q.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3)
3130 %res1 = call <4 x i32> @llvm.x86.avx512.mask.punpckhd.q.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 -1)
3131 %res2 = add <4 x i32> %res, %res1
3135 declare <4 x i32> @llvm.x86.avx512.mask.punpckld.q.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)
3137 define <4 x i32>@test_int_x86_avx512_mask_punpckld_q_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) {
3138 ; CHECK-LABEL: test_int_x86_avx512_mask_punpckld_q_128:
3139 ; CHECK: vpunpckldq %xmm1, %xmm0, %xmm2 {%k1}
3140 ; CHECK-NEXT: ## xmm2 = xmm2[0],k1[0],xmm2[1],k1[1]
3141 ; CHECK-NEXT: vpunpckldq %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0x62,0xc1]
3142 ; CHECK-NEXT: ## xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
3143 %res = call <4 x i32> @llvm.x86.avx512.mask.punpckld.q.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3)
3144 %res1 = call <4 x i32> @llvm.x86.avx512.mask.punpckld.q.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 -1)
3145 %res2 = add <4 x i32> %res, %res1
3149 declare <8 x i32> @llvm.x86.avx512.mask.punpckhd.q.256(<8 x i32>, <8 x i32>, <8 x i32>, i8)
3151 define <8 x i32>@test_int_x86_avx512_mask_punpckhd_q_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) {
3152 ; CHECK-LABEL: test_int_x86_avx512_mask_punpckhd_q_256:
3154 ; CHECK: vpunpckhdq %ymm1, %ymm0, %ymm2 {%k1}
3155 ; CHECK-NEXT: ## ymm2 = ymm2[2],k1[2],ymm2[3],k1[3],ymm2[6],k1[6],ymm2[7],k1[7]
3156 ; CHECK-NEXT: vpunpckhdq %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0x6a,0xc1]
3157 ; CHECK-NEXT: ## ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7]
3158 %res = call <8 x i32> @llvm.x86.avx512.mask.punpckhd.q.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3)
3159 %res1 = call <8 x i32> @llvm.x86.avx512.mask.punpckhd.q.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1)
3160 %res2 = add <8 x i32> %res, %res1
3164 declare <8 x i32> @llvm.x86.avx512.mask.punpckld.q.256(<8 x i32>, <8 x i32>, <8 x i32>, i8)
3166 define <8 x i32>@test_int_x86_avx512_mask_punpckld_q_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) {
3167 ; CHECK-LABEL: test_int_x86_avx512_mask_punpckld_q_256:
3168 ; CHECK: vpunpckldq %ymm1, %ymm0, %ymm2 {%k1}
3169 ; CHECK-NEXT: ## ymm2 = ymm2[0],k1[0],ymm2[1],k1[1],ymm2[4],k1[4],ymm2[5],k1[5]
3170 ; CHECK-NEXT: vpunpckldq %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0x62,0xc1]
3171 ; CHECK-NEXT: ## ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
3172 %res = call <8 x i32> @llvm.x86.avx512.mask.punpckld.q.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3)
3173 %res1 = call <8 x i32> @llvm.x86.avx512.mask.punpckld.q.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1)
3174 %res2 = add <8 x i32> %res, %res1
3178 declare <2 x i64> @llvm.x86.avx512.mask.punpckhqd.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8)
3180 define <2 x i64>@test_int_x86_avx512_mask_punpckhqd_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) {
3181 ; CHECK-LABEL: test_int_x86_avx512_mask_punpckhqd_q_128:
3182 ; CHECK: vpunpckhqdq %xmm1, %xmm0, %xmm2 {%k1}
3183 ; CHECK-NEXT: ## xmm2 = xmm2[1],k1[1]
3184 ; CHECK-NEXT: vpunpckhqdq %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x6d,0xc1]
3185 ; CHECK-NEXT: ## xmm0 = xmm0[1],xmm1[1]
3186 %res = call <2 x i64> @llvm.x86.avx512.mask.punpckhqd.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3)
3187 %res1 = call <2 x i64> @llvm.x86.avx512.mask.punpckhqd.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 -1)
3188 %res2 = add <2 x i64> %res, %res1
3192 declare <2 x i64> @llvm.x86.avx512.mask.punpcklqd.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8)
3194 define <2 x i64>@test_int_x86_avx512_mask_punpcklqd_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) {
3195 ; CHECK-LABEL: test_int_x86_avx512_mask_punpcklqd_q_128:
3196 ; CHECK: vpunpcklqdq %xmm1, %xmm0, %xmm2 {%k1}
3197 ; CHECK-NEXT: ## xmm2 = xmm2[0],k1[0]
3198 ; CHECK-NEXT: vpunpcklqdq %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x6c,0xc1]
3199 ; CHECK-NEXT: ## xmm0 = xmm0[0],xmm1[0]
3200 %res = call <2 x i64> @llvm.x86.avx512.mask.punpcklqd.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3)
3201 %res1 = call <2 x i64> @llvm.x86.avx512.mask.punpcklqd.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 -1)
3202 %res2 = add <2 x i64> %res, %res1
3206 declare <4 x i64> @llvm.x86.avx512.mask.punpcklqd.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i8)
3208 define <4 x i64>@test_int_x86_avx512_mask_punpcklqd_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) {
3209 ; CHECK-LABEL: test_int_x86_avx512_mask_punpcklqd_q_256:
3210 ; CHECK: vpunpcklqdq %ymm1, %ymm0, %ymm2 {%k1}
3211 ; CHECK-NEXT: ## ymm2 = ymm2[0],k1[0],ymm2[2],k1[2]
3212 ; CHECK-NEXT: vpunpcklqdq %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x6c,0xc1]
3213 ; CHECK-NEXT: ## ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
3214 %res = call <4 x i64> @llvm.x86.avx512.mask.punpcklqd.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3)
3215 %res1 = call <4 x i64> @llvm.x86.avx512.mask.punpcklqd.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 -1)
3216 %res2 = add <4 x i64> %res, %res1
3220 declare <4 x i64> @llvm.x86.avx512.mask.punpckhqd.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i8)
3222 define <4 x i64>@test_int_x86_avx512_mask_punpckhqd_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) {
3223 ; CHECK-LABEL: test_int_x86_avx512_mask_punpckhqd_q_256:
3224 ; CHECK: vpunpckhqdq %ymm1, %ymm0, %ymm2 {%k1}
3225 ; CHECK-NEXT: ## ymm2 = ymm2[1],k1[1],ymm2[3],k1[3]
3226 ; CHECK-NEXT: vpunpckhqdq %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x6d,0xc1]
3227 ; CHECK-NEXT: ## ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3]
3228 %res = call <4 x i64> @llvm.x86.avx512.mask.punpckhqd.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3)
3229 %res1 = call <4 x i64> @llvm.x86.avx512.mask.punpckhqd.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 -1)
3230 %res2 = add <4 x i64> %res, %res1
3234 declare <16 x i8> @llvm.x86.avx512.mask.pmov.qb.128(<2 x i64>, <16 x i8>, i8)
3236 define <16 x i8>@test_int_x86_avx512_mask_pmov_qb_128(<2 x i64> %x0, <16 x i8> %x1, i8 %x2) {
3237 ; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qb_128:
3238 ; CHECK: vpmovqb %xmm0, %xmm1 {%k1}
3239 ; CHECK-NEXT: vpmovqb %xmm0, %xmm2 {%k1} {z}
3240 ; CHECK-NEXT: vpmovqb %xmm0, %xmm0
3241 %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmov.qb.128(<2 x i64> %x0, <16 x i8> %x1, i8 -1)
3242 %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmov.qb.128(<2 x i64> %x0, <16 x i8> %x1, i8 %x2)
3243 %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmov.qb.128(<2 x i64> %x0, <16 x i8> zeroinitializer, i8 %x2)
3244 %res3 = add <16 x i8> %res0, %res1
3245 %res4 = add <16 x i8> %res3, %res2
3249 declare void @llvm.x86.avx512.mask.pmov.qb.mem.128(i8* %ptr, <2 x i64>, i8)
3251 define void @test_int_x86_avx512_mask_pmov_qb_mem_128(i8* %ptr, <2 x i64> %x1, i8 %x2) {
3252 ; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qb_mem_128:
3253 ; CHECK: vpmovqb %xmm0, (%rdi)
3254 ; CHECK: vpmovqb %xmm0, (%rdi) {%k1}
3255 call void @llvm.x86.avx512.mask.pmov.qb.mem.128(i8* %ptr, <2 x i64> %x1, i8 -1)
3256 call void @llvm.x86.avx512.mask.pmov.qb.mem.128(i8* %ptr, <2 x i64> %x1, i8 %x2)
3260 declare <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.128(<2 x i64>, <16 x i8>, i8)
3262 define <16 x i8>@test_int_x86_avx512_mask_pmovs_qb_128(<2 x i64> %x0, <16 x i8> %x1, i8 %x2) {
3263 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qb_128:
3264 ; CHECK: vpmovsqb %xmm0, %xmm1 {%k1}
3265 ; CHECK-NEXT: vpmovsqb %xmm0, %xmm2 {%k1} {z}
3266 ; CHECK-NEXT: vpmovsqb %xmm0, %xmm0
3267 %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.128(<2 x i64> %x0, <16 x i8> %x1, i8 -1)
3268 %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.128(<2 x i64> %x0, <16 x i8> %x1, i8 %x2)
3269 %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.128(<2 x i64> %x0, <16 x i8> zeroinitializer, i8 %x2)
3270 %res3 = add <16 x i8> %res0, %res1
3271 %res4 = add <16 x i8> %res3, %res2
3275 declare void @llvm.x86.avx512.mask.pmovs.qb.mem.128(i8* %ptr, <2 x i64>, i8)
3277 define void @test_int_x86_avx512_mask_pmovs_qb_mem_128(i8* %ptr, <2 x i64> %x1, i8 %x2) {
3278 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qb_mem_128:
3279 ; CHECK: vpmovsqb %xmm0, (%rdi)
3280 ; CHECK: vpmovsqb %xmm0, (%rdi) {%k1}
3281 call void @llvm.x86.avx512.mask.pmovs.qb.mem.128(i8* %ptr, <2 x i64> %x1, i8 -1)
3282 call void @llvm.x86.avx512.mask.pmovs.qb.mem.128(i8* %ptr, <2 x i64> %x1, i8 %x2)
3286 declare <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.128(<2 x i64>, <16 x i8>, i8)
3288 define <16 x i8>@test_int_x86_avx512_mask_pmovus_qb_128(<2 x i64> %x0, <16 x i8> %x1, i8 %x2) {
3289 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qb_128:
3290 ; CHECK: vpmovusqb %xmm0, %xmm1 {%k1}
3291 ; CHECK-NEXT: vpmovusqb %xmm0, %xmm2 {%k1} {z}
3292 ; CHECK-NEXT: vpmovusqb %xmm0, %xmm0
3293 %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.128(<2 x i64> %x0, <16 x i8> %x1, i8 -1)
3294 %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.128(<2 x i64> %x0, <16 x i8> %x1, i8 %x2)
3295 %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.128(<2 x i64> %x0, <16 x i8> zeroinitializer, i8 %x2)
3296 %res3 = add <16 x i8> %res0, %res1
3297 %res4 = add <16 x i8> %res3, %res2
3301 declare void @llvm.x86.avx512.mask.pmovus.qb.mem.128(i8* %ptr, <2 x i64>, i8)
3303 define void @test_int_x86_avx512_mask_pmovus_qb_mem_128(i8* %ptr, <2 x i64> %x1, i8 %x2) {
3304 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qb_mem_128:
3305 ; CHECK: vpmovusqb %xmm0, (%rdi)
3306 ; CHECK: vpmovusqb %xmm0, (%rdi) {%k1}
3307 call void @llvm.x86.avx512.mask.pmovus.qb.mem.128(i8* %ptr, <2 x i64> %x1, i8 -1)
3308 call void @llvm.x86.avx512.mask.pmovus.qb.mem.128(i8* %ptr, <2 x i64> %x1, i8 %x2)
3312 declare <16 x i8> @llvm.x86.avx512.mask.pmov.qb.256(<4 x i64>, <16 x i8>, i8)
3314 define <16 x i8>@test_int_x86_avx512_mask_pmov_qb_256(<4 x i64> %x0, <16 x i8> %x1, i8 %x2) {
3315 ; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qb_256:
3316 ; CHECK: vpmovqb %ymm0, %xmm1 {%k1}
3317 ; CHECK-NEXT: vpmovqb %ymm0, %xmm2 {%k1} {z}
3318 ; CHECK-NEXT: vpmovqb %ymm0, %xmm0
3319 %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmov.qb.256(<4 x i64> %x0, <16 x i8> %x1, i8 -1)
3320 %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmov.qb.256(<4 x i64> %x0, <16 x i8> %x1, i8 %x2)
3321 %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmov.qb.256(<4 x i64> %x0, <16 x i8> zeroinitializer, i8 %x2)
3322 %res3 = add <16 x i8> %res0, %res1
3323 %res4 = add <16 x i8> %res3, %res2
3327 declare void @llvm.x86.avx512.mask.pmov.qb.mem.256(i8* %ptr, <4 x i64>, i8)
3329 define void @test_int_x86_avx512_mask_pmov_qb_mem_256(i8* %ptr, <4 x i64> %x1, i8 %x2) {
3330 ; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qb_mem_256:
3331 ; CHECK: vpmovqb %ymm0, (%rdi)
3332 ; CHECK: vpmovqb %ymm0, (%rdi) {%k1}
3333 call void @llvm.x86.avx512.mask.pmov.qb.mem.256(i8* %ptr, <4 x i64> %x1, i8 -1)
3334 call void @llvm.x86.avx512.mask.pmov.qb.mem.256(i8* %ptr, <4 x i64> %x1, i8 %x2)
3338 declare <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.256(<4 x i64>, <16 x i8>, i8)
3340 define <16 x i8>@test_int_x86_avx512_mask_pmovs_qb_256(<4 x i64> %x0, <16 x i8> %x1, i8 %x2) {
3341 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qb_256:
3342 ; CHECK: vpmovsqb %ymm0, %xmm1 {%k1}
3343 ; CHECK-NEXT: vpmovsqb %ymm0, %xmm2 {%k1} {z}
3344 ; CHECK-NEXT: vpmovsqb %ymm0, %xmm0
3345 %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.256(<4 x i64> %x0, <16 x i8> %x1, i8 -1)
3346 %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.256(<4 x i64> %x0, <16 x i8> %x1, i8 %x2)
3347 %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.256(<4 x i64> %x0, <16 x i8> zeroinitializer, i8 %x2)
3348 %res3 = add <16 x i8> %res0, %res1
3349 %res4 = add <16 x i8> %res3, %res2
3353 declare void @llvm.x86.avx512.mask.pmovs.qb.mem.256(i8* %ptr, <4 x i64>, i8)
3355 define void @test_int_x86_avx512_mask_pmovs_qb_mem_256(i8* %ptr, <4 x i64> %x1, i8 %x2) {
3356 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qb_mem_256:
3357 ; CHECK: vpmovsqb %ymm0, (%rdi)
3358 ; CHECK: vpmovsqb %ymm0, (%rdi) {%k1}
3359 call void @llvm.x86.avx512.mask.pmovs.qb.mem.256(i8* %ptr, <4 x i64> %x1, i8 -1)
3360 call void @llvm.x86.avx512.mask.pmovs.qb.mem.256(i8* %ptr, <4 x i64> %x1, i8 %x2)
3364 declare <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.256(<4 x i64>, <16 x i8>, i8)
3366 define <16 x i8>@test_int_x86_avx512_mask_pmovus_qb_256(<4 x i64> %x0, <16 x i8> %x1, i8 %x2) {
3367 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qb_256:
3368 ; CHECK: vpmovusqb %ymm0, %xmm1 {%k1}
3369 ; CHECK-NEXT: vpmovusqb %ymm0, %xmm2 {%k1} {z}
3370 ; CHECK-NEXT: vpmovusqb %ymm0, %xmm0
3371 %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.256(<4 x i64> %x0, <16 x i8> %x1, i8 -1)
3372 %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.256(<4 x i64> %x0, <16 x i8> %x1, i8 %x2)
3373 %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.256(<4 x i64> %x0, <16 x i8> zeroinitializer, i8 %x2)
3374 %res3 = add <16 x i8> %res0, %res1
3375 %res4 = add <16 x i8> %res3, %res2
3379 declare void @llvm.x86.avx512.mask.pmovus.qb.mem.256(i8* %ptr, <4 x i64>, i8)
3381 define void @test_int_x86_avx512_mask_pmovus_qb_mem_256(i8* %ptr, <4 x i64> %x1, i8 %x2) {
3382 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qb_mem_256:
3383 ; CHECK: vpmovusqb %ymm0, (%rdi)
3384 ; CHECK: vpmovusqb %ymm0, (%rdi) {%k1}
3385 call void @llvm.x86.avx512.mask.pmovus.qb.mem.256(i8* %ptr, <4 x i64> %x1, i8 -1)
3386 call void @llvm.x86.avx512.mask.pmovus.qb.mem.256(i8* %ptr, <4 x i64> %x1, i8 %x2)
3390 declare <8 x i16> @llvm.x86.avx512.mask.pmov.qw.128(<2 x i64>, <8 x i16>, i8)
3392 define <8 x i16>@test_int_x86_avx512_mask_pmov_qw_128(<2 x i64> %x0, <8 x i16> %x1, i8 %x2) {
3393 ; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qw_128:
3394 ; CHECK: vpmovqw %xmm0, %xmm1 {%k1}
3395 ; CHECK-NEXT: vpmovqw %xmm0, %xmm2 {%k1} {z}
3396 ; CHECK-NEXT: vpmovqw %xmm0, %xmm0
3397 %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmov.qw.128(<2 x i64> %x0, <8 x i16> %x1, i8 -1)
3398 %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmov.qw.128(<2 x i64> %x0, <8 x i16> %x1, i8 %x2)
3399 %res2 = call <8 x i16> @llvm.x86.avx512.mask.pmov.qw.128(<2 x i64> %x0, <8 x i16> zeroinitializer, i8 %x2)
3400 %res3 = add <8 x i16> %res0, %res1
3401 %res4 = add <8 x i16> %res3, %res2
3405 declare void @llvm.x86.avx512.mask.pmov.qw.mem.128(i8* %ptr, <2 x i64>, i8)
3407 define void @test_int_x86_avx512_mask_pmov_qw_mem_128(i8* %ptr, <2 x i64> %x1, i8 %x2) {
3408 ; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qw_mem_128:
3409 ; CHECK: vpmovqw %xmm0, (%rdi)
3410 ; CHECK: vpmovqw %xmm0, (%rdi) {%k1}
3411 call void @llvm.x86.avx512.mask.pmov.qw.mem.128(i8* %ptr, <2 x i64> %x1, i8 -1)
3412 call void @llvm.x86.avx512.mask.pmov.qw.mem.128(i8* %ptr, <2 x i64> %x1, i8 %x2)
3416 declare <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.128(<2 x i64>, <8 x i16>, i8)
3418 define <8 x i16>@test_int_x86_avx512_mask_pmovs_qw_128(<2 x i64> %x0, <8 x i16> %x1, i8 %x2) {
3419 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qw_128:
3420 ; CHECK: vpmovsqw %xmm0, %xmm1 {%k1}
3421 ; CHECK-NEXT: vpmovsqw %xmm0, %xmm2 {%k1} {z}
3422 ; CHECK-NEXT: vpmovsqw %xmm0, %xmm0
3423 %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.128(<2 x i64> %x0, <8 x i16> %x1, i8 -1)
3424 %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.128(<2 x i64> %x0, <8 x i16> %x1, i8 %x2)
3425 %res2 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.128(<2 x i64> %x0, <8 x i16> zeroinitializer, i8 %x2)
3426 %res3 = add <8 x i16> %res0, %res1
3427 %res4 = add <8 x i16> %res3, %res2
3431 declare void @llvm.x86.avx512.mask.pmovs.qw.mem.128(i8* %ptr, <2 x i64>, i8)
3433 define void @test_int_x86_avx512_mask_pmovs_qw_mem_128(i8* %ptr, <2 x i64> %x1, i8 %x2) {
3434 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qw_mem_128:
3435 ; CHECK: vpmovsqw %xmm0, (%rdi)
3436 ; CHECK: vpmovsqw %xmm0, (%rdi) {%k1}
3437 call void @llvm.x86.avx512.mask.pmovs.qw.mem.128(i8* %ptr, <2 x i64> %x1, i8 -1)
3438 call void @llvm.x86.avx512.mask.pmovs.qw.mem.128(i8* %ptr, <2 x i64> %x1, i8 %x2)
3442 declare <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.128(<2 x i64>, <8 x i16>, i8)
3444 define <8 x i16>@test_int_x86_avx512_mask_pmovus_qw_128(<2 x i64> %x0, <8 x i16> %x1, i8 %x2) {
3445 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qw_128:
3446 ; CHECK: vpmovusqw %xmm0, %xmm1 {%k1}
3447 ; CHECK-NEXT: vpmovusqw %xmm0, %xmm2 {%k1} {z}
3448 ; CHECK-NEXT: vpmovusqw %xmm0, %xmm0
3449 %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.128(<2 x i64> %x0, <8 x i16> %x1, i8 -1)
3450 %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.128(<2 x i64> %x0, <8 x i16> %x1, i8 %x2)
3451 %res2 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.128(<2 x i64> %x0, <8 x i16> zeroinitializer, i8 %x2)
3452 %res3 = add <8 x i16> %res0, %res1
3453 %res4 = add <8 x i16> %res3, %res2
3457 declare void @llvm.x86.avx512.mask.pmovus.qw.mem.128(i8* %ptr, <2 x i64>, i8)
3459 define void @test_int_x86_avx512_mask_pmovus_qw_mem_128(i8* %ptr, <2 x i64> %x1, i8 %x2) {
3460 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qw_mem_128:
3461 ; CHECK: vpmovusqw %xmm0, (%rdi)
3462 ; CHECK: vpmovusqw %xmm0, (%rdi) {%k1}
3463 call void @llvm.x86.avx512.mask.pmovus.qw.mem.128(i8* %ptr, <2 x i64> %x1, i8 -1)
3464 call void @llvm.x86.avx512.mask.pmovus.qw.mem.128(i8* %ptr, <2 x i64> %x1, i8 %x2)
3468 declare <8 x i16> @llvm.x86.avx512.mask.pmov.qw.256(<4 x i64>, <8 x i16>, i8)
3470 define <8 x i16>@test_int_x86_avx512_mask_pmov_qw_256(<4 x i64> %x0, <8 x i16> %x1, i8 %x2) {
3471 ; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qw_256:
3472 ; CHECK: vpmovqw %ymm0, %xmm1 {%k1}
3473 ; CHECK-NEXT: vpmovqw %ymm0, %xmm2 {%k1} {z}
3474 ; CHECK-NEXT: vpmovqw %ymm0, %xmm0
3475 %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmov.qw.256(<4 x i64> %x0, <8 x i16> %x1, i8 -1)
3476 %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmov.qw.256(<4 x i64> %x0, <8 x i16> %x1, i8 %x2)
3477 %res2 = call <8 x i16> @llvm.x86.avx512.mask.pmov.qw.256(<4 x i64> %x0, <8 x i16> zeroinitializer, i8 %x2)
3478 %res3 = add <8 x i16> %res0, %res1
3479 %res4 = add <8 x i16> %res3, %res2
3483 declare void @llvm.x86.avx512.mask.pmov.qw.mem.256(i8* %ptr, <4 x i64>, i8)
3485 define void @test_int_x86_avx512_mask_pmov_qw_mem_256(i8* %ptr, <4 x i64> %x1, i8 %x2) {
3486 ; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qw_mem_256:
3487 ; CHECK: vpmovqw %ymm0, (%rdi)
3488 ; CHECK: vpmovqw %ymm0, (%rdi) {%k1}
3489 call void @llvm.x86.avx512.mask.pmov.qw.mem.256(i8* %ptr, <4 x i64> %x1, i8 -1)
3490 call void @llvm.x86.avx512.mask.pmov.qw.mem.256(i8* %ptr, <4 x i64> %x1, i8 %x2)
3494 declare <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.256(<4 x i64>, <8 x i16>, i8)
3496 define <8 x i16>@test_int_x86_avx512_mask_pmovs_qw_256(<4 x i64> %x0, <8 x i16> %x1, i8 %x2) {
3497 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qw_256:
3498 ; CHECK: vpmovsqw %ymm0, %xmm1 {%k1}
3499 ; CHECK-NEXT: vpmovsqw %ymm0, %xmm2 {%k1} {z}
3500 ; CHECK-NEXT: vpmovsqw %ymm0, %xmm0
3501 %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.256(<4 x i64> %x0, <8 x i16> %x1, i8 -1)
3502 %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.256(<4 x i64> %x0, <8 x i16> %x1, i8 %x2)
3503 %res2 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.256(<4 x i64> %x0, <8 x i16> zeroinitializer, i8 %x2)
3504 %res3 = add <8 x i16> %res0, %res1
3505 %res4 = add <8 x i16> %res3, %res2
3509 declare void @llvm.x86.avx512.mask.pmovs.qw.mem.256(i8* %ptr, <4 x i64>, i8)
3511 define void @test_int_x86_avx512_mask_pmovs_qw_mem_256(i8* %ptr, <4 x i64> %x1, i8 %x2) {
3512 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qw_mem_256:
3513 ; CHECK: vpmovsqw %ymm0, (%rdi)
3514 ; CHECK: vpmovsqw %ymm0, (%rdi) {%k1}
3515 call void @llvm.x86.avx512.mask.pmovs.qw.mem.256(i8* %ptr, <4 x i64> %x1, i8 -1)
3516 call void @llvm.x86.avx512.mask.pmovs.qw.mem.256(i8* %ptr, <4 x i64> %x1, i8 %x2)
3520 declare <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.256(<4 x i64>, <8 x i16>, i8)
3522 define <8 x i16>@test_int_x86_avx512_mask_pmovus_qw_256(<4 x i64> %x0, <8 x i16> %x1, i8 %x2) {
3523 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qw_256:
3524 ; CHECK: vpmovusqw %ymm0, %xmm1 {%k1}
3525 ; CHECK-NEXT: vpmovusqw %ymm0, %xmm2 {%k1} {z}
3526 ; CHECK-NEXT: vpmovusqw %ymm0, %xmm0
3527 %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.256(<4 x i64> %x0, <8 x i16> %x1, i8 -1)
3528 %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.256(<4 x i64> %x0, <8 x i16> %x1, i8 %x2)
3529 %res2 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.256(<4 x i64> %x0, <8 x i16> zeroinitializer, i8 %x2)
3530 %res3 = add <8 x i16> %res0, %res1
3531 %res4 = add <8 x i16> %res3, %res2
3535 declare void @llvm.x86.avx512.mask.pmovus.qw.mem.256(i8* %ptr, <4 x i64>, i8)
3537 define void @test_int_x86_avx512_mask_pmovus_qw_mem_256(i8* %ptr, <4 x i64> %x1, i8 %x2) {
3538 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qw_mem_256:
3539 ; CHECK: vpmovusqw %ymm0, (%rdi)
3540 ; CHECK: vpmovusqw %ymm0, (%rdi) {%k1}
3541 call void @llvm.x86.avx512.mask.pmovus.qw.mem.256(i8* %ptr, <4 x i64> %x1, i8 -1)
3542 call void @llvm.x86.avx512.mask.pmovus.qw.mem.256(i8* %ptr, <4 x i64> %x1, i8 %x2)
3546 declare <4 x i32> @llvm.x86.avx512.mask.pmov.qd.128(<2 x i64>, <4 x i32>, i8)
3548 define <4 x i32>@test_int_x86_avx512_mask_pmov_qd_128(<2 x i64> %x0, <4 x i32> %x1, i8 %x2) {
3549 ; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qd_128:
3550 ; CHECK: vpmovqd %xmm0, %xmm1 {%k1}
3551 ; CHECK-NEXT: vpmovqd %xmm0, %xmm2 {%k1} {z}
3552 ; CHECK-NEXT: vpmovqd %xmm0, %xmm0
3553 %res0 = call <4 x i32> @llvm.x86.avx512.mask.pmov.qd.128(<2 x i64> %x0, <4 x i32> %x1, i8 -1)
3554 %res1 = call <4 x i32> @llvm.x86.avx512.mask.pmov.qd.128(<2 x i64> %x0, <4 x i32> %x1, i8 %x2)
3555 %res2 = call <4 x i32> @llvm.x86.avx512.mask.pmov.qd.128(<2 x i64> %x0, <4 x i32> zeroinitializer, i8 %x2)
3556 %res3 = add <4 x i32> %res0, %res1
3557 %res4 = add <4 x i32> %res3, %res2
3561 declare void @llvm.x86.avx512.mask.pmov.qd.mem.128(i8* %ptr, <2 x i64>, i8)
3563 define void @test_int_x86_avx512_mask_pmov_qd_mem_128(i8* %ptr, <2 x i64> %x1, i8 %x2) {
3564 ; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qd_mem_128:
3565 ; CHECK: vpmovqd %xmm0, (%rdi)
3566 ; CHECK: vpmovqd %xmm0, (%rdi) {%k1}
3567 call void @llvm.x86.avx512.mask.pmov.qd.mem.128(i8* %ptr, <2 x i64> %x1, i8 -1)
3568 call void @llvm.x86.avx512.mask.pmov.qd.mem.128(i8* %ptr, <2 x i64> %x1, i8 %x2)
3572 declare <4 x i32> @llvm.x86.avx512.mask.pmovs.qd.128(<2 x i64>, <4 x i32>, i8)
3574 define <4 x i32>@test_int_x86_avx512_mask_pmovs_qd_128(<2 x i64> %x0, <4 x i32> %x1, i8 %x2) {
3575 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qd_128:
3576 ; CHECK: vpmovsqd %xmm0, %xmm1 {%k1}
3577 ; CHECK-NEXT: vpmovsqd %xmm0, %xmm2 {%k1} {z}
3578 ; CHECK-NEXT: vpmovsqd %xmm0, %xmm0
3579 %res0 = call <4 x i32> @llvm.x86.avx512.mask.pmovs.qd.128(<2 x i64> %x0, <4 x i32> %x1, i8 -1)
3580 %res1 = call <4 x i32> @llvm.x86.avx512.mask.pmovs.qd.128(<2 x i64> %x0, <4 x i32> %x1, i8 %x2)
3581 %res2 = call <4 x i32> @llvm.x86.avx512.mask.pmovs.qd.128(<2 x i64> %x0, <4 x i32> zeroinitializer, i8 %x2)
3582 %res3 = add <4 x i32> %res0, %res1
3583 %res4 = add <4 x i32> %res3, %res2
3587 declare void @llvm.x86.avx512.mask.pmovs.qd.mem.128(i8* %ptr, <2 x i64>, i8)
3589 define void @test_int_x86_avx512_mask_pmovs_qd_mem_128(i8* %ptr, <2 x i64> %x1, i8 %x2) {
3590 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qd_mem_128:
3591 ; CHECK: vpmovsqd %xmm0, (%rdi)
3592 ; CHECK: vpmovsqd %xmm0, (%rdi) {%k1}
3593 call void @llvm.x86.avx512.mask.pmovs.qd.mem.128(i8* %ptr, <2 x i64> %x1, i8 -1)
3594 call void @llvm.x86.avx512.mask.pmovs.qd.mem.128(i8* %ptr, <2 x i64> %x1, i8 %x2)
3598 declare <4 x i32> @llvm.x86.avx512.mask.pmovus.qd.128(<2 x i64>, <4 x i32>, i8)
3600 define <4 x i32>@test_int_x86_avx512_mask_pmovus_qd_128(<2 x i64> %x0, <4 x i32> %x1, i8 %x2) {
3601 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qd_128:
3602 ; CHECK: vpmovusqd %xmm0, %xmm1 {%k1}
3603 ; CHECK-NEXT: vpmovusqd %xmm0, %xmm2 {%k1} {z}
3604 ; CHECK-NEXT: vpmovusqd %xmm0, %xmm0
3605 %res0 = call <4 x i32> @llvm.x86.avx512.mask.pmovus.qd.128(<2 x i64> %x0, <4 x i32> %x1, i8 -1)
3606 %res1 = call <4 x i32> @llvm.x86.avx512.mask.pmovus.qd.128(<2 x i64> %x0, <4 x i32> %x1, i8 %x2)
3607 %res2 = call <4 x i32> @llvm.x86.avx512.mask.pmovus.qd.128(<2 x i64> %x0, <4 x i32> zeroinitializer, i8 %x2)
3608 %res3 = add <4 x i32> %res0, %res1
3609 %res4 = add <4 x i32> %res3, %res2
3613 declare void @llvm.x86.avx512.mask.pmovus.qd.mem.128(i8* %ptr, <2 x i64>, i8)
3615 define void @test_int_x86_avx512_mask_pmovus_qd_mem_128(i8* %ptr, <2 x i64> %x1, i8 %x2) {
3616 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qd_mem_128:
3617 ; CHECK: vpmovusqd %xmm0, (%rdi)
3618 ; CHECK: vpmovusqd %xmm0, (%rdi) {%k1}
3619 call void @llvm.x86.avx512.mask.pmovus.qd.mem.128(i8* %ptr, <2 x i64> %x1, i8 -1)
3620 call void @llvm.x86.avx512.mask.pmovus.qd.mem.128(i8* %ptr, <2 x i64> %x1, i8 %x2)
3624 declare <4 x i32> @llvm.x86.avx512.mask.pmov.qd.256(<4 x i64>, <4 x i32>, i8)
3626 define <4 x i32>@test_int_x86_avx512_mask_pmov_qd_256(<4 x i64> %x0, <4 x i32> %x1, i8 %x2) {
3627 ; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qd_256:
3628 ; CHECK: vpmovqd %ymm0, %xmm1 {%k1}
3629 ; CHECK-NEXT: vpmovqd %ymm0, %xmm2 {%k1} {z}
3630 ; CHECK-NEXT: vpmovqd %ymm0, %xmm0
3631 %res0 = call <4 x i32> @llvm.x86.avx512.mask.pmov.qd.256(<4 x i64> %x0, <4 x i32> %x1, i8 -1)
3632 %res1 = call <4 x i32> @llvm.x86.avx512.mask.pmov.qd.256(<4 x i64> %x0, <4 x i32> %x1, i8 %x2)
3633 %res2 = call <4 x i32> @llvm.x86.avx512.mask.pmov.qd.256(<4 x i64> %x0, <4 x i32> zeroinitializer, i8 %x2)
3634 %res3 = add <4 x i32> %res0, %res1
3635 %res4 = add <4 x i32> %res3, %res2
3639 declare void @llvm.x86.avx512.mask.pmov.qd.mem.256(i8* %ptr, <4 x i64>, i8)
3641 define void @test_int_x86_avx512_mask_pmov_qd_mem_256(i8* %ptr, <4 x i64> %x1, i8 %x2) {
3642 ; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qd_mem_256:
3643 ; CHECK: vpmovqd %ymm0, (%rdi)
3644 ; CHECK: vpmovqd %ymm0, (%rdi) {%k1}
3645 call void @llvm.x86.avx512.mask.pmov.qd.mem.256(i8* %ptr, <4 x i64> %x1, i8 -1)
3646 call void @llvm.x86.avx512.mask.pmov.qd.mem.256(i8* %ptr, <4 x i64> %x1, i8 %x2)
3650 declare <4 x i32> @llvm.x86.avx512.mask.pmovs.qd.256(<4 x i64>, <4 x i32>, i8)
3652 define <4 x i32>@test_int_x86_avx512_mask_pmovs_qd_256(<4 x i64> %x0, <4 x i32> %x1, i8 %x2) {
3653 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qd_256:
3654 ; CHECK: vpmovsqd %ymm0, %xmm1 {%k1}
3655 ; CHECK-NEXT: vpmovsqd %ymm0, %xmm2 {%k1} {z}
3656 ; CHECK-NEXT: vpmovsqd %ymm0, %xmm0
3657 %res0 = call <4 x i32> @llvm.x86.avx512.mask.pmovs.qd.256(<4 x i64> %x0, <4 x i32> %x1, i8 -1)
3658 %res1 = call <4 x i32> @llvm.x86.avx512.mask.pmovs.qd.256(<4 x i64> %x0, <4 x i32> %x1, i8 %x2)
3659 %res2 = call <4 x i32> @llvm.x86.avx512.mask.pmovs.qd.256(<4 x i64> %x0, <4 x i32> zeroinitializer, i8 %x2)
3660 %res3 = add <4 x i32> %res0, %res1
3661 %res4 = add <4 x i32> %res3, %res2
3665 declare void @llvm.x86.avx512.mask.pmovs.qd.mem.256(i8* %ptr, <4 x i64>, i8)
3667 define void @test_int_x86_avx512_mask_pmovs_qd_mem_256(i8* %ptr, <4 x i64> %x1, i8 %x2) {
3668 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qd_mem_256:
3669 ; CHECK: vpmovsqd %ymm0, (%rdi)
3670 ; CHECK: vpmovsqd %ymm0, (%rdi) {%k1}
3671 call void @llvm.x86.avx512.mask.pmovs.qd.mem.256(i8* %ptr, <4 x i64> %x1, i8 -1)
3672 call void @llvm.x86.avx512.mask.pmovs.qd.mem.256(i8* %ptr, <4 x i64> %x1, i8 %x2)
3676 declare <4 x i32> @llvm.x86.avx512.mask.pmovus.qd.256(<4 x i64>, <4 x i32>, i8)
3678 define <4 x i32>@test_int_x86_avx512_mask_pmovus_qd_256(<4 x i64> %x0, <4 x i32> %x1, i8 %x2) {
3679 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qd_256:
3680 ; CHECK: vpmovusqd %ymm0, %xmm1 {%k1}
3681 ; CHECK-NEXT: vpmovusqd %ymm0, %xmm2 {%k1} {z}
3682 ; CHECK-NEXT: vpmovusqd %ymm0, %xmm0
3683 %res0 = call <4 x i32> @llvm.x86.avx512.mask.pmovus.qd.256(<4 x i64> %x0, <4 x i32> %x1, i8 -1)
3684 %res1 = call <4 x i32> @llvm.x86.avx512.mask.pmovus.qd.256(<4 x i64> %x0, <4 x i32> %x1, i8 %x2)
3685 %res2 = call <4 x i32> @llvm.x86.avx512.mask.pmovus.qd.256(<4 x i64> %x0, <4 x i32> zeroinitializer, i8 %x2)
3686 %res3 = add <4 x i32> %res0, %res1
3687 %res4 = add <4 x i32> %res3, %res2
3691 declare void @llvm.x86.avx512.mask.pmovus.qd.mem.256(i8* %ptr, <4 x i64>, i8)
3693 define void @test_int_x86_avx512_mask_pmovus_qd_mem_256(i8* %ptr, <4 x i64> %x1, i8 %x2) {
3694 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qd_mem_256:
3695 ; CHECK: vpmovusqd %ymm0, (%rdi)
3696 ; CHECK: vpmovusqd %ymm0, (%rdi) {%k1}
3697 call void @llvm.x86.avx512.mask.pmovus.qd.mem.256(i8* %ptr, <4 x i64> %x1, i8 -1)
3698 call void @llvm.x86.avx512.mask.pmovus.qd.mem.256(i8* %ptr, <4 x i64> %x1, i8 %x2)
3702 declare <16 x i8> @llvm.x86.avx512.mask.pmov.db.128(<4 x i32>, <16 x i8>, i8)
3704 define <16 x i8>@test_int_x86_avx512_mask_pmov_db_128(<4 x i32> %x0, <16 x i8> %x1, i8 %x2) {
3705 ; CHECK-LABEL: test_int_x86_avx512_mask_pmov_db_128:
3706 ; CHECK: vpmovdb %xmm0, %xmm1 {%k1}
3707 ; CHECK-NEXT: vpmovdb %xmm0, %xmm2 {%k1} {z}
3708 ; CHECK-NEXT: vpmovdb %xmm0, %xmm0
3709 %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmov.db.128(<4 x i32> %x0, <16 x i8> %x1, i8 -1)
3710 %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmov.db.128(<4 x i32> %x0, <16 x i8> %x1, i8 %x2)
3711 %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmov.db.128(<4 x i32> %x0, <16 x i8> zeroinitializer, i8 %x2)
3712 %res3 = add <16 x i8> %res0, %res1
3713 %res4 = add <16 x i8> %res3, %res2
3717 declare void @llvm.x86.avx512.mask.pmov.db.mem.128(i8* %ptr, <4 x i32>, i8)
3719 define void @test_int_x86_avx512_mask_pmov_db_mem_128(i8* %ptr, <4 x i32> %x1, i8 %x2) {
3720 ; CHECK-LABEL: test_int_x86_avx512_mask_pmov_db_mem_128:
3721 ; CHECK: vpmovdb %xmm0, (%rdi)
3722 ; CHECK: vpmovdb %xmm0, (%rdi) {%k1}
3723 call void @llvm.x86.avx512.mask.pmov.db.mem.128(i8* %ptr, <4 x i32> %x1, i8 -1)
3724 call void @llvm.x86.avx512.mask.pmov.db.mem.128(i8* %ptr, <4 x i32> %x1, i8 %x2)
3728 declare <16 x i8> @llvm.x86.avx512.mask.pmovs.db.128(<4 x i32>, <16 x i8>, i8)
3730 define <16 x i8>@test_int_x86_avx512_mask_pmovs_db_128(<4 x i32> %x0, <16 x i8> %x1, i8 %x2) {
3731 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_db_128:
3732 ; CHECK: vpmovsdb %xmm0, %xmm1 {%k1}
3733 ; CHECK-NEXT: vpmovsdb %xmm0, %xmm2 {%k1} {z}
3734 ; CHECK-NEXT: vpmovsdb %xmm0, %xmm0
3735 %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.db.128(<4 x i32> %x0, <16 x i8> %x1, i8 -1)
3736 %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.db.128(<4 x i32> %x0, <16 x i8> %x1, i8 %x2)
3737 %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.db.128(<4 x i32> %x0, <16 x i8> zeroinitializer, i8 %x2)
3738 %res3 = add <16 x i8> %res0, %res1
3739 %res4 = add <16 x i8> %res3, %res2
3743 declare void @llvm.x86.avx512.mask.pmovs.db.mem.128(i8* %ptr, <4 x i32>, i8)
3745 define void @test_int_x86_avx512_mask_pmovs_db_mem_128(i8* %ptr, <4 x i32> %x1, i8 %x2) {
3746 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_db_mem_128:
3747 ; CHECK: vpmovsdb %xmm0, (%rdi)
3748 ; CHECK: vpmovsdb %xmm0, (%rdi) {%k1}
3749 call void @llvm.x86.avx512.mask.pmovs.db.mem.128(i8* %ptr, <4 x i32> %x1, i8 -1)
3750 call void @llvm.x86.avx512.mask.pmovs.db.mem.128(i8* %ptr, <4 x i32> %x1, i8 %x2)
3754 declare <16 x i8> @llvm.x86.avx512.mask.pmovus.db.128(<4 x i32>, <16 x i8>, i8)
3756 define <16 x i8>@test_int_x86_avx512_mask_pmovus_db_128(<4 x i32> %x0, <16 x i8> %x1, i8 %x2) {
3757 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_db_128:
3758 ; CHECK: vpmovusdb %xmm0, %xmm1 {%k1}
3759 ; CHECK-NEXT: vpmovusdb %xmm0, %xmm2 {%k1} {z}
3760 ; CHECK-NEXT: vpmovusdb %xmm0, %xmm0
3761 %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.db.128(<4 x i32> %x0, <16 x i8> %x1, i8 -1)
3762 %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.db.128(<4 x i32> %x0, <16 x i8> %x1, i8 %x2)
3763 %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.db.128(<4 x i32> %x0, <16 x i8> zeroinitializer, i8 %x2)
3764 %res3 = add <16 x i8> %res0, %res1
3765 %res4 = add <16 x i8> %res3, %res2
3769 declare void @llvm.x86.avx512.mask.pmovus.db.mem.128(i8* %ptr, <4 x i32>, i8)
3771 define void @test_int_x86_avx512_mask_pmovus_db_mem_128(i8* %ptr, <4 x i32> %x1, i8 %x2) {
3772 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_db_mem_128:
3773 ; CHECK: vpmovusdb %xmm0, (%rdi)
3774 ; CHECK: vpmovusdb %xmm0, (%rdi) {%k1}
3775 call void @llvm.x86.avx512.mask.pmovus.db.mem.128(i8* %ptr, <4 x i32> %x1, i8 -1)
3776 call void @llvm.x86.avx512.mask.pmovus.db.mem.128(i8* %ptr, <4 x i32> %x1, i8 %x2)
3780 declare <16 x i8> @llvm.x86.avx512.mask.pmov.db.256(<8 x i32>, <16 x i8>, i8)
3782 define <16 x i8>@test_int_x86_avx512_mask_pmov_db_256(<8 x i32> %x0, <16 x i8> %x1, i8 %x2) {
3783 ; CHECK-LABEL: test_int_x86_avx512_mask_pmov_db_256:
3784 ; CHECK: vpmovdb %ymm0, %xmm1 {%k1}
3785 ; CHECK-NEXT: vpmovdb %ymm0, %xmm2 {%k1} {z}
3786 ; CHECK-NEXT: vpmovdb %ymm0, %xmm0
3787 %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmov.db.256(<8 x i32> %x0, <16 x i8> %x1, i8 -1)
3788 %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmov.db.256(<8 x i32> %x0, <16 x i8> %x1, i8 %x2)
3789 %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmov.db.256(<8 x i32> %x0, <16 x i8> zeroinitializer, i8 %x2)
3790 %res3 = add <16 x i8> %res0, %res1
3791 %res4 = add <16 x i8> %res3, %res2
3795 declare void @llvm.x86.avx512.mask.pmov.db.mem.256(i8* %ptr, <8 x i32>, i8)
3797 define void @test_int_x86_avx512_mask_pmov_db_mem_256(i8* %ptr, <8 x i32> %x1, i8 %x2) {
3798 ; CHECK-LABEL: test_int_x86_avx512_mask_pmov_db_mem_256:
3799 ; CHECK: vpmovdb %ymm0, (%rdi)
3800 ; CHECK: vpmovdb %ymm0, (%rdi) {%k1}
3801 call void @llvm.x86.avx512.mask.pmov.db.mem.256(i8* %ptr, <8 x i32> %x1, i8 -1)
3802 call void @llvm.x86.avx512.mask.pmov.db.mem.256(i8* %ptr, <8 x i32> %x1, i8 %x2)
3806 declare <16 x i8> @llvm.x86.avx512.mask.pmovs.db.256(<8 x i32>, <16 x i8>, i8)
3808 define <16 x i8>@test_int_x86_avx512_mask_pmovs_db_256(<8 x i32> %x0, <16 x i8> %x1, i8 %x2) {
3809 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_db_256:
3810 ; CHECK: vpmovsdb %ymm0, %xmm1 {%k1}
3811 ; CHECK-NEXT: vpmovsdb %ymm0, %xmm2 {%k1} {z}
3812 ; CHECK-NEXT: vpmovsdb %ymm0, %xmm0
3813 %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.db.256(<8 x i32> %x0, <16 x i8> %x1, i8 -1)
3814 %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.db.256(<8 x i32> %x0, <16 x i8> %x1, i8 %x2)
3815 %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.db.256(<8 x i32> %x0, <16 x i8> zeroinitializer, i8 %x2)
3816 %res3 = add <16 x i8> %res0, %res1
3817 %res4 = add <16 x i8> %res3, %res2
3821 declare void @llvm.x86.avx512.mask.pmovs.db.mem.256(i8* %ptr, <8 x i32>, i8)
3823 define void @test_int_x86_avx512_mask_pmovs_db_mem_256(i8* %ptr, <8 x i32> %x1, i8 %x2) {
3824 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_db_mem_256:
3825 ; CHECK: vpmovsdb %ymm0, (%rdi)
3826 ; CHECK: vpmovsdb %ymm0, (%rdi) {%k1}
3827 call void @llvm.x86.avx512.mask.pmovs.db.mem.256(i8* %ptr, <8 x i32> %x1, i8 -1)
3828 call void @llvm.x86.avx512.mask.pmovs.db.mem.256(i8* %ptr, <8 x i32> %x1, i8 %x2)
3832 declare <16 x i8> @llvm.x86.avx512.mask.pmovus.db.256(<8 x i32>, <16 x i8>, i8)
3834 define <16 x i8>@test_int_x86_avx512_mask_pmovus_db_256(<8 x i32> %x0, <16 x i8> %x1, i8 %x2) {
3835 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_db_256:
3836 ; CHECK: vpmovusdb %ymm0, %xmm1 {%k1}
3837 ; CHECK-NEXT: vpmovusdb %ymm0, %xmm2 {%k1} {z}
3838 ; CHECK-NEXT: vpmovusdb %ymm0, %xmm0
3839 %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.db.256(<8 x i32> %x0, <16 x i8> %x1, i8 -1)
3840 %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.db.256(<8 x i32> %x0, <16 x i8> %x1, i8 %x2)
3841 %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.db.256(<8 x i32> %x0, <16 x i8> zeroinitializer, i8 %x2)
3842 %res3 = add <16 x i8> %res0, %res1
3843 %res4 = add <16 x i8> %res3, %res2
3847 declare void @llvm.x86.avx512.mask.pmovus.db.mem.256(i8* %ptr, <8 x i32>, i8)
3849 define void @test_int_x86_avx512_mask_pmovus_db_mem_256(i8* %ptr, <8 x i32> %x1, i8 %x2) {
3850 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_db_mem_256:
3851 ; CHECK: vpmovusdb %ymm0, (%rdi)
3852 ; CHECK: vpmovusdb %ymm0, (%rdi) {%k1}
3853 call void @llvm.x86.avx512.mask.pmovus.db.mem.256(i8* %ptr, <8 x i32> %x1, i8 -1)
3854 call void @llvm.x86.avx512.mask.pmovus.db.mem.256(i8* %ptr, <8 x i32> %x1, i8 %x2)
3858 declare <8 x i16> @llvm.x86.avx512.mask.pmov.dw.128(<4 x i32>, <8 x i16>, i8)
3860 define <8 x i16>@test_int_x86_avx512_mask_pmov_dw_128(<4 x i32> %x0, <8 x i16> %x1, i8 %x2) {
3861 ; CHECK-LABEL: test_int_x86_avx512_mask_pmov_dw_128:
3862 ; CHECK: vpmovdw %xmm0, %xmm1 {%k1}
3863 ; CHECK-NEXT: vpmovdw %xmm0, %xmm2 {%k1} {z}
3864 ; CHECK-NEXT: vpmovdw %xmm0, %xmm0
3865 %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmov.dw.128(<4 x i32> %x0, <8 x i16> %x1, i8 -1)
3866 %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmov.dw.128(<4 x i32> %x0, <8 x i16> %x1, i8 %x2)
3867 %res2 = call <8 x i16> @llvm.x86.avx512.mask.pmov.dw.128(<4 x i32> %x0, <8 x i16> zeroinitializer, i8 %x2)
3868 %res3 = add <8 x i16> %res0, %res1
3869 %res4 = add <8 x i16> %res3, %res2
3873 declare void @llvm.x86.avx512.mask.pmov.dw.mem.128(i8* %ptr, <4 x i32>, i8)
3875 define void @test_int_x86_avx512_mask_pmov_dw_mem_128(i8* %ptr, <4 x i32> %x1, i8 %x2) {
3876 ; CHECK-LABEL: test_int_x86_avx512_mask_pmov_dw_mem_128:
3877 ; CHECK: vpmovdw %xmm0, (%rdi)
3878 ; CHECK: vpmovdw %xmm0, (%rdi) {%k1}
3879 call void @llvm.x86.avx512.mask.pmov.dw.mem.128(i8* %ptr, <4 x i32> %x1, i8 -1)
3880 call void @llvm.x86.avx512.mask.pmov.dw.mem.128(i8* %ptr, <4 x i32> %x1, i8 %x2)
3884 declare <8 x i16> @llvm.x86.avx512.mask.pmovs.dw.128(<4 x i32>, <8 x i16>, i8)
3886 define <8 x i16>@test_int_x86_avx512_mask_pmovs_dw_128(<4 x i32> %x0, <8 x i16> %x1, i8 %x2) {
3887 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_dw_128:
3888 ; CHECK: vpmovsdw %xmm0, %xmm1 {%k1}
3889 ; CHECK-NEXT: vpmovsdw %xmm0, %xmm2 {%k1} {z}
3890 ; CHECK-NEXT: vpmovsdw %xmm0, %xmm0
3891 %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.dw.128(<4 x i32> %x0, <8 x i16> %x1, i8 -1)
3892 %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.dw.128(<4 x i32> %x0, <8 x i16> %x1, i8 %x2)
3893 %res2 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.dw.128(<4 x i32> %x0, <8 x i16> zeroinitializer, i8 %x2)
3894 %res3 = add <8 x i16> %res0, %res1
3895 %res4 = add <8 x i16> %res3, %res2
3899 declare void @llvm.x86.avx512.mask.pmovs.dw.mem.128(i8* %ptr, <4 x i32>, i8)
3901 define void @test_int_x86_avx512_mask_pmovs_dw_mem_128(i8* %ptr, <4 x i32> %x1, i8 %x2) {
3902 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_dw_mem_128:
3903 ; CHECK: vpmovsdw %xmm0, (%rdi)
3904 ; CHECK: vpmovsdw %xmm0, (%rdi) {%k1}
3905 call void @llvm.x86.avx512.mask.pmovs.dw.mem.128(i8* %ptr, <4 x i32> %x1, i8 -1)
3906 call void @llvm.x86.avx512.mask.pmovs.dw.mem.128(i8* %ptr, <4 x i32> %x1, i8 %x2)
3910 declare <8 x i16> @llvm.x86.avx512.mask.pmovus.dw.128(<4 x i32>, <8 x i16>, i8)
3912 define <8 x i16>@test_int_x86_avx512_mask_pmovus_dw_128(<4 x i32> %x0, <8 x i16> %x1, i8 %x2) {
3913 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_dw_128:
3914 ; CHECK: vpmovusdw %xmm0, %xmm1 {%k1}
3915 ; CHECK-NEXT: vpmovusdw %xmm0, %xmm2 {%k1} {z}
3916 ; CHECK-NEXT: vpmovusdw %xmm0, %xmm0
3917 %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.dw.128(<4 x i32> %x0, <8 x i16> %x1, i8 -1)
3918 %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.dw.128(<4 x i32> %x0, <8 x i16> %x1, i8 %x2)
3919 %res2 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.dw.128(<4 x i32> %x0, <8 x i16> zeroinitializer, i8 %x2)
3920 %res3 = add <8 x i16> %res0, %res1
3921 %res4 = add <8 x i16> %res3, %res2
3925 declare void @llvm.x86.avx512.mask.pmovus.dw.mem.128(i8* %ptr, <4 x i32>, i8)
3927 define void @test_int_x86_avx512_mask_pmovus_dw_mem_128(i8* %ptr, <4 x i32> %x1, i8 %x2) {
3928 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_dw_mem_128:
3929 ; CHECK: vpmovusdw %xmm0, (%rdi)
3930 ; CHECK: vpmovusdw %xmm0, (%rdi) {%k1}
3931 call void @llvm.x86.avx512.mask.pmovus.dw.mem.128(i8* %ptr, <4 x i32> %x1, i8 -1)
3932 call void @llvm.x86.avx512.mask.pmovus.dw.mem.128(i8* %ptr, <4 x i32> %x1, i8 %x2)
3936 declare <8 x i16> @llvm.x86.avx512.mask.pmov.dw.256(<8 x i32>, <8 x i16>, i8)
3938 define <8 x i16>@test_int_x86_avx512_mask_pmov_dw_256(<8 x i32> %x0, <8 x i16> %x1, i8 %x2) {
3939 ; CHECK-LABEL: test_int_x86_avx512_mask_pmov_dw_256:
3940 ; CHECK: vpmovdw %ymm0, %xmm1 {%k1}
3941 ; CHECK-NEXT: vpmovdw %ymm0, %xmm2 {%k1} {z}
3942 ; CHECK-NEXT: vpmovdw %ymm0, %xmm0
3943 %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmov.dw.256(<8 x i32> %x0, <8 x i16> %x1, i8 -1)
3944 %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmov.dw.256(<8 x i32> %x0, <8 x i16> %x1, i8 %x2)
3945 %res2 = call <8 x i16> @llvm.x86.avx512.mask.pmov.dw.256(<8 x i32> %x0, <8 x i16> zeroinitializer, i8 %x2)
3946 %res3 = add <8 x i16> %res0, %res1
3947 %res4 = add <8 x i16> %res3, %res2
3951 declare void @llvm.x86.avx512.mask.pmov.dw.mem.256(i8* %ptr, <8 x i32>, i8)
3953 define void @test_int_x86_avx512_mask_pmov_dw_mem_256(i8* %ptr, <8 x i32> %x1, i8 %x2) {
3954 ; CHECK-LABEL: test_int_x86_avx512_mask_pmov_dw_mem_256:
3955 ; CHECK: vpmovdw %ymm0, (%rdi)
3956 ; CHECK: vpmovdw %ymm0, (%rdi) {%k1}
3957 call void @llvm.x86.avx512.mask.pmov.dw.mem.256(i8* %ptr, <8 x i32> %x1, i8 -1)
3958 call void @llvm.x86.avx512.mask.pmov.dw.mem.256(i8* %ptr, <8 x i32> %x1, i8 %x2)
3962 declare <8 x i16> @llvm.x86.avx512.mask.pmovs.dw.256(<8 x i32>, <8 x i16>, i8)
3964 define <8 x i16>@test_int_x86_avx512_mask_pmovs_dw_256(<8 x i32> %x0, <8 x i16> %x1, i8 %x2) {
3965 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_dw_256:
3966 ; CHECK: vpmovsdw %ymm0, %xmm1 {%k1}
3967 ; CHECK-NEXT: vpmovsdw %ymm0, %xmm2 {%k1} {z}
3968 ; CHECK-NEXT: vpmovsdw %ymm0, %xmm0
3969 %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.dw.256(<8 x i32> %x0, <8 x i16> %x1, i8 -1)
3970 %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.dw.256(<8 x i32> %x0, <8 x i16> %x1, i8 %x2)
3971 %res2 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.dw.256(<8 x i32> %x0, <8 x i16> zeroinitializer, i8 %x2)
3972 %res3 = add <8 x i16> %res0, %res1
3973 %res4 = add <8 x i16> %res3, %res2
3977 declare void @llvm.x86.avx512.mask.pmovs.dw.mem.256(i8* %ptr, <8 x i32>, i8)
3979 define void @test_int_x86_avx512_mask_pmovs_dw_mem_256(i8* %ptr, <8 x i32> %x1, i8 %x2) {
3980 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_dw_mem_256:
3981 ; CHECK: vpmovsdw %ymm0, (%rdi)
3982 ; CHECK: vpmovsdw %ymm0, (%rdi) {%k1}
3983 call void @llvm.x86.avx512.mask.pmovs.dw.mem.256(i8* %ptr, <8 x i32> %x1, i8 -1)
3984 call void @llvm.x86.avx512.mask.pmovs.dw.mem.256(i8* %ptr, <8 x i32> %x1, i8 %x2)
3988 declare <8 x i16> @llvm.x86.avx512.mask.pmovus.dw.256(<8 x i32>, <8 x i16>, i8)
3990 define <8 x i16>@test_int_x86_avx512_mask_pmovus_dw_256(<8 x i32> %x0, <8 x i16> %x1, i8 %x2) {
3991 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_dw_256:
3992 ; CHECK: vpmovusdw %ymm0, %xmm1 {%k1}
3993 ; CHECK-NEXT: vpmovusdw %ymm0, %xmm2 {%k1} {z}
3994 ; CHECK-NEXT: vpmovusdw %ymm0, %xmm0
3995 %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.dw.256(<8 x i32> %x0, <8 x i16> %x1, i8 -1)
3996 %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.dw.256(<8 x i32> %x0, <8 x i16> %x1, i8 %x2)
3997 %res2 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.dw.256(<8 x i32> %x0, <8 x i16> zeroinitializer, i8 %x2)
3998 %res3 = add <8 x i16> %res0, %res1
3999 %res4 = add <8 x i16> %res3, %res2
4003 declare void @llvm.x86.avx512.mask.pmovus.dw.mem.256(i8* %ptr, <8 x i32>, i8)
4005 define void @test_int_x86_avx512_mask_pmovus_dw_mem_256(i8* %ptr, <8 x i32> %x1, i8 %x2) {
4006 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_dw_mem_256:
4007 ; CHECK: vpmovusdw %ymm0, (%rdi)
4008 ; CHECK: vpmovusdw %ymm0, (%rdi) {%k1}
4009 call void @llvm.x86.avx512.mask.pmovus.dw.mem.256(i8* %ptr, <8 x i32> %x1, i8 -1)
4010 call void @llvm.x86.avx512.mask.pmovus.dw.mem.256(i8* %ptr, <8 x i32> %x1, i8 %x2)
4014 declare <2 x double> @llvm.x86.avx512.mask.cvtdq2pd.128(<4 x i32>, <2 x double>, i8)
4016 define <2 x double>@test_int_x86_avx512_mask_cvt_dq2pd_128(<4 x i32> %x0, <2 x double> %x1, i8 %x2) {
4017 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_dq2pd_128:
4019 ; CHECK-NEXT: movzbl %dil, %eax
4020 ; CHECK-NEXT: kmovw %eax, %k1
4021 ; CHECK-NEXT: vcvtdq2pd %xmm0, %xmm1 {%k1}
4022 ; CHECK-NEXT: vcvtdq2pd %xmm0, %xmm0
4023 ; CHECK-NEXT: vaddpd %xmm0, %xmm1, %xmm0
4025 %res = call <2 x double> @llvm.x86.avx512.mask.cvtdq2pd.128(<4 x i32> %x0, <2 x double> %x1, i8 %x2)
4026 %res1 = call <2 x double> @llvm.x86.avx512.mask.cvtdq2pd.128(<4 x i32> %x0, <2 x double> %x1, i8 -1)
4027 %res2 = fadd <2 x double> %res, %res1
4028 ret <2 x double> %res2
4031 declare <4 x double> @llvm.x86.avx512.mask.cvtdq2pd.256(<4 x i32>, <4 x double>, i8)
4033 define <4 x double>@test_int_x86_avx512_mask_cvt_dq2pd_256(<4 x i32> %x0, <4 x double> %x1, i8 %x2) {
4034 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_dq2pd_256:
4036 ; CHECK-NEXT: movzbl %dil, %eax
4037 ; CHECK-NEXT: kmovw %eax, %k1
4038 ; CHECK-NEXT: vcvtdq2pd %xmm0, %ymm1 {%k1}
4039 ; CHECK-NEXT: vcvtdq2pd %xmm0, %ymm0
4040 ; CHECK-NEXT: vaddpd %ymm0, %ymm1, %ymm0
4042 %res = call <4 x double> @llvm.x86.avx512.mask.cvtdq2pd.256(<4 x i32> %x0, <4 x double> %x1, i8 %x2)
4043 %res1 = call <4 x double> @llvm.x86.avx512.mask.cvtdq2pd.256(<4 x i32> %x0, <4 x double> %x1, i8 -1)
4044 %res2 = fadd <4 x double> %res, %res1
4045 ret <4 x double> %res2
4048 declare <4 x float> @llvm.x86.avx512.mask.cvtdq2ps.128(<4 x i32>, <4 x float>, i8)
4050 define <4 x float>@test_int_x86_avx512_mask_cvt_dq2ps_128(<4 x i32> %x0, <4 x float> %x1, i8 %x2) {
4051 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_dq2ps_128:
4053 ; CHECK-NEXT: movzbl %dil, %eax
4054 ; CHECK-NEXT: kmovw %eax, %k1
4055 ; CHECK-NEXT: vcvtdq2ps %xmm0, %xmm1 {%k1}
4056 ; CHECK-NEXT: vcvtdq2ps %xmm0, %xmm0
4057 ; CHECK-NEXT: vaddps %xmm0, %xmm1, %xmm0
4059 %res = call <4 x float> @llvm.x86.avx512.mask.cvtdq2ps.128(<4 x i32> %x0, <4 x float> %x1, i8 %x2)
4060 %res1 = call <4 x float> @llvm.x86.avx512.mask.cvtdq2ps.128(<4 x i32> %x0, <4 x float> %x1, i8 -1)
4061 %res2 = fadd <4 x float> %res, %res1
4062 ret <4 x float> %res2
4065 declare <8 x float> @llvm.x86.avx512.mask.cvtdq2ps.256(<8 x i32>, <8 x float>, i8)
4067 define <8 x float>@test_int_x86_avx512_mask_cvt_dq2ps_256(<8 x i32> %x0, <8 x float> %x1, i8 %x2) {
4068 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_dq2ps_256:
4070 ; CHECK-NEXT: movzbl %dil, %eax
4071 ; CHECK-NEXT: kmovw %eax, %k1
4072 ; CHECK-NEXT: vcvtdq2ps %ymm0, %ymm1 {%k1}
4073 ; CHECK-NEXT: vcvtdq2ps %ymm0, %ymm0
4074 ; CHECK-NEXT: vaddps %ymm0, %ymm1, %ymm0
4076 %res = call <8 x float> @llvm.x86.avx512.mask.cvtdq2ps.256(<8 x i32> %x0, <8 x float> %x1, i8 %x2)
4077 %res1 = call <8 x float> @llvm.x86.avx512.mask.cvtdq2ps.256(<8 x i32> %x0, <8 x float> %x1, i8 -1)
4078 %res2 = fadd <8 x float> %res, %res1
4079 ret <8 x float> %res2
4082 declare <4 x i32> @llvm.x86.avx512.mask.cvtpd2dq.128(<2 x double>, <4 x i32>, i8)
4084 define <4 x i32>@test_int_x86_avx512_mask_cvt_pd2dq_128(<2 x double> %x0, <4 x i32> %x1, i8 %x2) {
4085 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_pd2dq_128:
4087 ; CHECK-NEXT: movzbl %dil, %eax
4088 ; CHECK-NEXT: kmovw %eax, %k1
4089 ; CHECK-NEXT: vcvtpd2dq %xmm0, %xmm1 {%k1}
4090 ; CHECK-NEXT: vcvtpd2dq %xmm0, %xmm0
4091 ; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0
4093 %res = call <4 x i32> @llvm.x86.avx512.mask.cvtpd2dq.128(<2 x double> %x0, <4 x i32> %x1, i8 %x2)
4094 %res1 = call <4 x i32> @llvm.x86.avx512.mask.cvtpd2dq.128(<2 x double> %x0, <4 x i32> %x1, i8 -1)
4095 %res2 = add <4 x i32> %res, %res1
4099 declare <4 x i32> @llvm.x86.avx512.mask.cvtpd2dq.256(<4 x double>, <4 x i32>, i8)
4101 define <4 x i32>@test_int_x86_avx512_mask_cvt_pd2dq_256(<4 x double> %x0, <4 x i32> %x1, i8 %x2) {
4102 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_pd2dq_256:
4104 ; CHECK-NEXT: movzbl %dil, %eax
4105 ; CHECK-NEXT: kmovw %eax, %k1
4106 ; CHECK-NEXT: vcvtpd2dq %ymm0, %xmm1 {%k1}
4107 ; CHECK-NEXT: vcvtpd2dq %ymm0, %xmm0
4108 ; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0
4110 %res = call <4 x i32> @llvm.x86.avx512.mask.cvtpd2dq.256(<4 x double> %x0, <4 x i32> %x1, i8 %x2)
4111 %res1 = call <4 x i32> @llvm.x86.avx512.mask.cvtpd2dq.256(<4 x double> %x0, <4 x i32> %x1, i8 -1)
4112 %res2 = add <4 x i32> %res, %res1
4116 declare <4 x float> @llvm.x86.avx512.mask.cvtpd2ps.256(<4 x double>, <4 x float>, i8)
4118 define <4 x float>@test_int_x86_avx512_mask_cvt_pd2ps_256(<4 x double> %x0, <4 x float> %x1, i8 %x2) {
4119 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_pd2ps_256:
4121 ; CHECK-NEXT: movzbl %dil, %eax
4122 ; CHECK-NEXT: kmovw %eax, %k1
4123 ; CHECK-NEXT: vcvtpd2ps %ymm0, %xmm1 {%k1}
4124 ; CHECK-NEXT: vcvtpd2ps %ymm0, %xmm0
4125 ; CHECK-NEXT: vaddps %xmm0, %xmm1, %xmm0
4127 %res = call <4 x float> @llvm.x86.avx512.mask.cvtpd2ps.256(<4 x double> %x0, <4 x float> %x1, i8 %x2)
4128 %res1 = call <4 x float> @llvm.x86.avx512.mask.cvtpd2ps.256(<4 x double> %x0, <4 x float> %x1, i8 -1)
4129 %res2 = fadd <4 x float> %res, %res1
4130 ret <4 x float> %res2
4133 declare <4 x float> @llvm.x86.avx512.mask.cvtpd2ps(<2 x double>, <4 x float>, i8)
4135 define <4 x float>@test_int_x86_avx512_mask_cvt_pd2ps(<2 x double> %x0, <4 x float> %x1, i8 %x2) {
4136 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_pd2ps:
4138 ; CHECK-NEXT: movzbl %dil, %eax
4139 ; CHECK-NEXT: kmovw %eax, %k1
4140 ; CHECK-NEXT: vcvtpd2ps %xmm0, %xmm1 {%k1}
4141 ; CHECK-NEXT: vcvtpd2ps %xmm0, %xmm0
4142 ; CHECK-NEXT: vaddps %xmm0, %xmm1, %xmm0
4144 %res = call <4 x float> @llvm.x86.avx512.mask.cvtpd2ps(<2 x double> %x0, <4 x float> %x1, i8 %x2)
4145 %res1 = call <4 x float> @llvm.x86.avx512.mask.cvtpd2ps(<2 x double> %x0, <4 x float> %x1, i8 -1)
4146 %res2 = fadd <4 x float> %res, %res1
4147 ret <4 x float> %res2
4150 declare <4 x i32> @llvm.x86.avx512.mask.cvtpd2udq.128(<2 x double>, <4 x i32>, i8)
4152 define <4 x i32>@test_int_x86_avx512_mask_cvt_pd2udq_128(<2 x double> %x0, <4 x i32> %x1, i8 %x2) {
4153 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_pd2udq_128:
4155 ; CHECK-NEXT: movzbl %dil, %eax
4156 ; CHECK-NEXT: kmovw %eax, %k1
4157 ; CHECK-NEXT: vcvtpd2udq %xmm0, %xmm1 {%k1}
4158 ; CHECK-NEXT: vcvtpd2udq %xmm0, %xmm0
4159 ; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0
4161 %res = call <4 x i32> @llvm.x86.avx512.mask.cvtpd2udq.128(<2 x double> %x0, <4 x i32> %x1, i8 %x2)
4162 %res1 = call <4 x i32> @llvm.x86.avx512.mask.cvtpd2udq.128(<2 x double> %x0, <4 x i32> %x1, i8 -1)
4163 %res2 = add <4 x i32> %res, %res1
4167 declare <4 x i32> @llvm.x86.avx512.mask.cvtpd2udq.256(<4 x double>, <4 x i32>, i8)
4169 define <4 x i32>@test_int_x86_avx512_mask_cvt_pd2udq_256(<4 x double> %x0, <4 x i32> %x1, i8 %x2) {
4170 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_pd2udq_256:
4172 ; CHECK-NEXT: movzbl %dil, %eax
4173 ; CHECK-NEXT: kmovw %eax, %k1
4174 ; CHECK-NEXT: vcvtpd2udq %ymm0, %xmm1 {%k1}
4175 ; CHECK-NEXT: vcvtpd2udq %ymm0, %xmm0
4176 ; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0
4178 %res = call <4 x i32> @llvm.x86.avx512.mask.cvtpd2udq.256(<4 x double> %x0, <4 x i32> %x1, i8 %x2)
4179 %res1 = call <4 x i32> @llvm.x86.avx512.mask.cvtpd2udq.256(<4 x double> %x0, <4 x i32> %x1, i8 -1)
4180 %res2 = add <4 x i32> %res, %res1
4184 declare <4 x i32> @llvm.x86.avx512.mask.cvtps2dq.128(<4 x float>, <4 x i32>, i8)
4186 define <4 x i32>@test_int_x86_avx512_mask_cvt_ps2dq_128(<4 x float> %x0, <4 x i32> %x1, i8 %x2) {
4187 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ps2dq_128:
4189 ; CHECK-NEXT: movzbl %dil, %eax
4190 ; CHECK-NEXT: kmovw %eax, %k1
4191 ; CHECK-NEXT: vcvtps2dq %xmm0, %xmm1 {%k1}
4192 ; CHECK-NEXT: vcvtps2dq %xmm0, %xmm0
4193 ; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0
4195 %res = call <4 x i32> @llvm.x86.avx512.mask.cvtps2dq.128(<4 x float> %x0, <4 x i32> %x1, i8 %x2)
4196 %res1 = call <4 x i32> @llvm.x86.avx512.mask.cvtps2dq.128(<4 x float> %x0, <4 x i32> %x1, i8 -1)
4197 %res2 = add <4 x i32> %res, %res1
4201 declare <8 x i32> @llvm.x86.avx512.mask.cvtps2dq.256(<8 x float>, <8 x i32>, i8)
4203 define <8 x i32>@test_int_x86_avx512_mask_cvt_ps2dq_256(<8 x float> %x0, <8 x i32> %x1, i8 %x2) {
4204 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ps2dq_256:
4206 ; CHECK-NEXT: movzbl %dil, %eax
4207 ; CHECK-NEXT: kmovw %eax, %k1
4208 ; CHECK-NEXT: vcvtps2dq %ymm0, %ymm1 {%k1}
4209 ; CHECK-NEXT: vcvtps2dq %ymm0, %ymm0
4210 ; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0
4212 %res = call <8 x i32> @llvm.x86.avx512.mask.cvtps2dq.256(<8 x float> %x0, <8 x i32> %x1, i8 %x2)
4213 %res1 = call <8 x i32> @llvm.x86.avx512.mask.cvtps2dq.256(<8 x float> %x0, <8 x i32> %x1, i8 -1)
4214 %res2 = add <8 x i32> %res, %res1
4218 declare <2 x double> @llvm.x86.avx512.mask.cvtps2pd.128(<4 x float>, <2 x double>, i8)
4220 define <2 x double>@test_int_x86_avx512_mask_cvt_ps2pd_128(<4 x float> %x0, <2 x double> %x1, i8 %x2) {
4221 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ps2pd_128:
4223 ; CHECK-NEXT: movzbl %dil, %eax
4224 ; CHECK-NEXT: kmovw %eax, %k1
4225 ; CHECK-NEXT: vcvtps2pd %xmm0, %xmm1 {%k1}
4226 ; CHECK-NEXT: vcvtps2pd %xmm0, %xmm0
4227 ; CHECK-NEXT: vaddpd %xmm0, %xmm1, %xmm0
4229 %res = call <2 x double> @llvm.x86.avx512.mask.cvtps2pd.128(<4 x float> %x0, <2 x double> %x1, i8 %x2)
4230 %res1 = call <2 x double> @llvm.x86.avx512.mask.cvtps2pd.128(<4 x float> %x0, <2 x double> %x1, i8 -1)
4231 %res2 = fadd <2 x double> %res, %res1
4232 ret <2 x double> %res2
4235 declare <4 x double> @llvm.x86.avx512.mask.cvtps2pd.256(<4 x float>, <4 x double>, i8)
4237 define <4 x double>@test_int_x86_avx512_mask_cvt_ps2pd_256(<4 x float> %x0, <4 x double> %x1, i8 %x2) {
4238 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ps2pd_256:
4240 ; CHECK-NEXT: movzbl %dil, %eax
4241 ; CHECK-NEXT: kmovw %eax, %k1
4242 ; CHECK-NEXT: vcvtps2pd %xmm0, %ymm1 {%k1}
4243 ; CHECK-NEXT: vcvtps2pd %xmm0, %ymm0
4244 ; CHECK-NEXT: vaddpd %ymm0, %ymm1, %ymm0
4246 %res = call <4 x double> @llvm.x86.avx512.mask.cvtps2pd.256(<4 x float> %x0, <4 x double> %x1, i8 %x2)
4247 %res1 = call <4 x double> @llvm.x86.avx512.mask.cvtps2pd.256(<4 x float> %x0, <4 x double> %x1, i8 -1)
4248 %res2 = fadd <4 x double> %res, %res1
4249 ret <4 x double> %res2
4252 declare <4 x i32> @llvm.x86.avx512.mask.cvtps2udq.128(<4 x float>, <4 x i32>, i8)
4254 define <4 x i32>@test_int_x86_avx512_mask_cvt_ps2udq_128(<4 x float> %x0, <4 x i32> %x1, i8 %x2) {
4255 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ps2udq_128:
4257 ; CHECK-NEXT: movzbl %dil, %eax
4258 ; CHECK-NEXT: kmovw %eax, %k1
4259 ; CHECK-NEXT: vcvtps2udq %xmm0, %xmm1 {%k1}
4260 ; CHECK-NEXT: vcvtps2udq %xmm0, %xmm0
4261 ; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0
4263 %res = call <4 x i32> @llvm.x86.avx512.mask.cvtps2udq.128(<4 x float> %x0, <4 x i32> %x1, i8 %x2)
4264 %res1 = call <4 x i32> @llvm.x86.avx512.mask.cvtps2udq.128(<4 x float> %x0, <4 x i32> %x1, i8 -1)
4265 %res2 = add <4 x i32> %res, %res1
4269 declare <8 x i32> @llvm.x86.avx512.mask.cvtps2udq.256(<8 x float>, <8 x i32>, i8)
4271 define <8 x i32>@test_int_x86_avx512_mask_cvt_ps2udq_256(<8 x float> %x0, <8 x i32> %x1, i8 %x2) {
4272 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ps2udq_256:
4274 ; CHECK-NEXT: movzbl %dil, %eax
4275 ; CHECK-NEXT: kmovw %eax, %k1
4276 ; CHECK-NEXT: vcvtps2udq %ymm0, %ymm1 {%k1}
4277 ; CHECK-NEXT: vcvtps2udq %ymm0, %ymm0
4278 ; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0
4280 %res = call <8 x i32> @llvm.x86.avx512.mask.cvtps2udq.256(<8 x float> %x0, <8 x i32> %x1, i8 %x2)
4281 %res1 = call <8 x i32> @llvm.x86.avx512.mask.cvtps2udq.256(<8 x float> %x0, <8 x i32> %x1, i8 -1)
4282 %res2 = add <8 x i32> %res, %res1
4286 declare <4 x i32> @llvm.x86.avx512.mask.cvttpd2dq.128(<2 x double>, <4 x i32>, i8)
4288 define <4 x i32>@test_int_x86_avx512_mask_cvtt_pd2dq_128(<2 x double> %x0, <4 x i32> %x1, i8 %x2) {
4289 ; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_pd2dq_128:
4291 ; CHECK-NEXT: movzbl %dil, %eax
4292 ; CHECK-NEXT: kmovw %eax, %k1
4293 ; CHECK-NEXT: vcvttpd2dq %xmm0, %xmm1 {%k1}
4294 ; CHECK-NEXT: vcvttpd2dq %xmm0, %xmm0
4295 ; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0
4297 %res = call <4 x i32> @llvm.x86.avx512.mask.cvttpd2dq.128(<2 x double> %x0, <4 x i32> %x1, i8 %x2)
4298 %res1 = call <4 x i32> @llvm.x86.avx512.mask.cvttpd2dq.128(<2 x double> %x0, <4 x i32> %x1, i8 -1)
4299 %res2 = add <4 x i32> %res, %res1
4303 declare <4 x i32> @llvm.x86.avx512.mask.cvttpd2dq.256(<4 x double>, <4 x i32>, i8)
4305 define <4 x i32>@test_int_x86_avx512_mask_cvtt_pd2dq_256(<4 x double> %x0, <4 x i32> %x1, i8 %x2) {
4306 ; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_pd2dq_256:
4308 ; CHECK-NEXT: movzbl %dil, %eax
4309 ; CHECK-NEXT: kmovw %eax, %k1
4310 ; CHECK-NEXT: vcvttpd2dq %ymm0, %xmm1 {%k1}
4311 ; CHECK-NEXT: vcvttpd2dq %ymm0, %xmm0
4312 ; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0
4314 %res = call <4 x i32> @llvm.x86.avx512.mask.cvttpd2dq.256(<4 x double> %x0, <4 x i32> %x1, i8 %x2)
4315 %res1 = call <4 x i32> @llvm.x86.avx512.mask.cvttpd2dq.256(<4 x double> %x0, <4 x i32> %x1, i8 -1)
4316 %res2 = add <4 x i32> %res, %res1
4320 declare <4 x i32> @llvm.x86.avx512.mask.cvttpd2udq.128(<2 x double>, <4 x i32>, i8)
4322 define <4 x i32>@test_int_x86_avx512_mask_cvtt_pd2udq_128(<2 x double> %x0, <4 x i32> %x1, i8 %x2) {
4323 ; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_pd2udq_128:
4325 ; CHECK-NEXT: movzbl %dil, %eax
4326 ; CHECK-NEXT: kmovw %eax, %k1
4327 ; CHECK-NEXT: vcvttpd2udq %xmm0, %xmm1 {%k1}
4328 ; CHECK-NEXT: vcvttpd2udq %xmm0, %xmm0
4329 ; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0
4331 %res = call <4 x i32> @llvm.x86.avx512.mask.cvttpd2udq.128(<2 x double> %x0, <4 x i32> %x1, i8 %x2)
4332 %res1 = call <4 x i32> @llvm.x86.avx512.mask.cvttpd2udq.128(<2 x double> %x0, <4 x i32> %x1, i8 -1)
4333 %res2 = add <4 x i32> %res, %res1
4337 declare <4 x i32> @llvm.x86.avx512.mask.cvttpd2udq.256(<4 x double>, <4 x i32>, i8)
4339 define <4 x i32>@test_int_x86_avx512_mask_cvtt_pd2udq_256(<4 x double> %x0, <4 x i32> %x1, i8 %x2) {
4340 ; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_pd2udq_256:
4342 ; CHECK-NEXT: movzbl %dil, %eax
4343 ; CHECK-NEXT: kmovw %eax, %k1
4344 ; CHECK-NEXT: vcvttpd2udq %ymm0, %xmm1 {%k1}
4345 ; CHECK-NEXT: vcvttpd2udq %ymm0, %xmm0
4346 ; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0
4348 %res = call <4 x i32> @llvm.x86.avx512.mask.cvttpd2udq.256(<4 x double> %x0, <4 x i32> %x1, i8 %x2)
4349 %res1 = call <4 x i32> @llvm.x86.avx512.mask.cvttpd2udq.256(<4 x double> %x0, <4 x i32> %x1, i8 -1)
4350 %res2 = add <4 x i32> %res, %res1
4354 declare <4 x i32> @llvm.x86.avx512.mask.cvttps2dq.128(<4 x float>, <4 x i32>, i8)
4356 define <4 x i32>@test_int_x86_avx512_mask_cvtt_ps2dq_128(<4 x float> %x0, <4 x i32> %x1, i8 %x2) {
4357 ; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ps2dq_128:
4359 ; CHECK-NEXT: movzbl %dil, %eax
4360 ; CHECK-NEXT: kmovw %eax, %k1
4361 ; CHECK-NEXT: vcvttps2dq %xmm0, %xmm1 {%k1}
4362 ; CHECK-NEXT: vcvttps2dq %xmm0, %xmm0
4363 ; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0
4365 %res = call <4 x i32> @llvm.x86.avx512.mask.cvttps2dq.128(<4 x float> %x0, <4 x i32> %x1, i8 %x2)
4366 %res1 = call <4 x i32> @llvm.x86.avx512.mask.cvttps2dq.128(<4 x float> %x0, <4 x i32> %x1, i8 -1)
4367 %res2 = add <4 x i32> %res, %res1
4371 declare <8 x i32> @llvm.x86.avx512.mask.cvttps2dq.256(<8 x float>, <8 x i32>, i8)
4373 define <8 x i32>@test_int_x86_avx512_mask_cvtt_ps2dq_256(<8 x float> %x0, <8 x i32> %x1, i8 %x2) {
4374 ; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ps2dq_256:
4376 ; CHECK-NEXT: movzbl %dil, %eax
4377 ; CHECK-NEXT: kmovw %eax, %k1
4378 ; CHECK-NEXT: vcvttps2dq %ymm0, %ymm1 {%k1}
4379 ; CHECK-NEXT: vcvttps2dq %ymm0, %ymm0
4380 ; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0
4382 %res = call <8 x i32> @llvm.x86.avx512.mask.cvttps2dq.256(<8 x float> %x0, <8 x i32> %x1, i8 %x2)
4383 %res1 = call <8 x i32> @llvm.x86.avx512.mask.cvttps2dq.256(<8 x float> %x0, <8 x i32> %x1, i8 -1)
4384 %res2 = add <8 x i32> %res, %res1
4388 declare <4 x i32> @llvm.x86.avx512.mask.cvttps2udq.128(<4 x float>, <4 x i32>, i8)
4390 define <4 x i32>@test_int_x86_avx512_mask_cvtt_ps2udq_128(<4 x float> %x0, <4 x i32> %x1, i8 %x2) {
4391 ; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ps2udq_128:
4393 ; CHECK-NEXT: movzbl %dil, %eax
4394 ; CHECK-NEXT: kmovw %eax, %k1
4395 ; CHECK-NEXT: vcvttps2udq %xmm0, %xmm1 {%k1}
4396 ; CHECK-NEXT: vcvttps2udq %xmm0, %xmm0
4397 ; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0
4399 %res = call <4 x i32> @llvm.x86.avx512.mask.cvttps2udq.128(<4 x float> %x0, <4 x i32> %x1, i8 %x2)
4400 %res1 = call <4 x i32> @llvm.x86.avx512.mask.cvttps2udq.128(<4 x float> %x0, <4 x i32> %x1, i8 -1)
4401 %res2 = add <4 x i32> %res, %res1
4405 declare <8 x i32> @llvm.x86.avx512.mask.cvttps2udq.256(<8 x float>, <8 x i32>, i8)
4407 define <8 x i32>@test_int_x86_avx512_mask_cvtt_ps2udq_256(<8 x float> %x0, <8 x i32> %x1, i8 %x2) {
4408 ; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ps2udq_256:
4410 ; CHECK-NEXT: movzbl %dil, %eax
4411 ; CHECK-NEXT: kmovw %eax, %k1
4412 ; CHECK-NEXT: vcvttps2udq %ymm0, %ymm1 {%k1}
4413 ; CHECK-NEXT: vcvttps2udq %ymm0, %ymm0
4414 ; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0
4416 %res = call <8 x i32> @llvm.x86.avx512.mask.cvttps2udq.256(<8 x float> %x0, <8 x i32> %x1, i8 %x2)
4417 %res1 = call <8 x i32> @llvm.x86.avx512.mask.cvttps2udq.256(<8 x float> %x0, <8 x i32> %x1, i8 -1)
4418 %res2 = add <8 x i32> %res, %res1
4422 declare <2 x double> @llvm.x86.avx512.mask.cvtudq2pd.128(<4 x i32>, <2 x double>, i8)
4424 define <2 x double>@test_int_x86_avx512_mask_cvt_udq2pd_128(<4 x i32> %x0, <2 x double> %x1, i8 %x2) {
4425 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_udq2pd_128:
4427 ; CHECK-NEXT: movzbl %dil, %eax
4428 ; CHECK-NEXT: kmovw %eax, %k1
4429 ; CHECK-NEXT: vcvtudq2pd %xmm0, %xmm1 {%k1}
4430 ; CHECK-NEXT: vcvtudq2pd %xmm0, %xmm0
4431 ; CHECK-NEXT: vaddpd %xmm0, %xmm1, %xmm0
4433 %res = call <2 x double> @llvm.x86.avx512.mask.cvtudq2pd.128(<4 x i32> %x0, <2 x double> %x1, i8 %x2)
4434 %res1 = call <2 x double> @llvm.x86.avx512.mask.cvtudq2pd.128(<4 x i32> %x0, <2 x double> %x1, i8 -1)
4435 %res2 = fadd <2 x double> %res, %res1
4436 ret <2 x double> %res2
4439 declare <4 x double> @llvm.x86.avx512.mask.cvtudq2pd.256(<4 x i32>, <4 x double>, i8)
4441 define <4 x double>@test_int_x86_avx512_mask_cvt_udq2pd_256(<4 x i32> %x0, <4 x double> %x1, i8 %x2) {
4442 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_udq2pd_256:
4444 ; CHECK-NEXT: movzbl %dil, %eax
4445 ; CHECK-NEXT: kmovw %eax, %k1
4446 ; CHECK-NEXT: vcvtudq2pd %xmm0, %ymm1 {%k1}
4447 ; CHECK-NEXT: vcvtudq2pd %xmm0, %ymm0
4448 ; CHECK-NEXT: vaddpd %ymm0, %ymm1, %ymm0
4450 %res = call <4 x double> @llvm.x86.avx512.mask.cvtudq2pd.256(<4 x i32> %x0, <4 x double> %x1, i8 %x2)
4451 %res1 = call <4 x double> @llvm.x86.avx512.mask.cvtudq2pd.256(<4 x i32> %x0, <4 x double> %x1, i8 -1)
4452 %res2 = fadd <4 x double> %res, %res1
4453 ret <4 x double> %res2
4456 declare <4 x float> @llvm.x86.avx512.mask.cvtudq2ps.128(<4 x i32>, <4 x float>, i8)
4458 define <4 x float>@test_int_x86_avx512_mask_cvt_udq2ps_128(<4 x i32> %x0, <4 x float> %x1, i8 %x2) {
4459 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_udq2ps_128:
4461 ; CHECK-NEXT: movzbl %dil, %eax
4462 ; CHECK-NEXT: kmovw %eax, %k1
4463 ; CHECK-NEXT: vcvtudq2ps %xmm0, %xmm1 {%k1}
4464 ; CHECK-NEXT: vcvtudq2ps %xmm0, %xmm0
4465 ; CHECK-NEXT: vaddps %xmm0, %xmm1, %xmm0
4467 %res = call <4 x float> @llvm.x86.avx512.mask.cvtudq2ps.128(<4 x i32> %x0, <4 x float> %x1, i8 %x2)
4468 %res1 = call <4 x float> @llvm.x86.avx512.mask.cvtudq2ps.128(<4 x i32> %x0, <4 x float> %x1, i8 -1)
4469 %res2 = fadd <4 x float> %res, %res1
4470 ret <4 x float> %res2
4473 declare <8 x float> @llvm.x86.avx512.mask.cvtudq2ps.256(<8 x i32>, <8 x float>, i8)
4475 define <8 x float>@test_int_x86_avx512_mask_cvt_udq2ps_256(<8 x i32> %x0, <8 x float> %x1, i8 %x2) {
4476 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_udq2ps_256:
4478 ; CHECK-NEXT: movzbl %dil, %eax
4479 ; CHECK-NEXT: kmovw %eax, %k1
4480 ; CHECK-NEXT: vcvtudq2ps %ymm0, %ymm1 {%k1}
4481 ; CHECK-NEXT: vcvtudq2ps %ymm0, %ymm0
4482 ; CHECK-NEXT: vaddps %ymm0, %ymm1, %ymm0
4484 %res = call <8 x float> @llvm.x86.avx512.mask.cvtudq2ps.256(<8 x i32> %x0, <8 x float> %x1, i8 %x2)
4485 %res1 = call <8 x float> @llvm.x86.avx512.mask.cvtudq2ps.256(<8 x i32> %x0, <8 x float> %x1, i8 -1)
4486 %res2 = fadd <8 x float> %res, %res1
4487 ret <8 x float> %res2
4490 declare <2 x double> @llvm.x86.avx512.mask.rndscale.pd.128(<2 x double>, i32, <2 x double>, i8)
4491 ; CHECK-LABEL: @test_int_x86_avx512_mask_rndscale_pd_128
4494 ; CHECK: vrndscalepd {{.*}}{%k1}
4495 ; CHECK: vrndscalepd
4496 define <2 x double>@test_int_x86_avx512_mask_rndscale_pd_128(<2 x double> %x0, <2 x double> %x2, i8 %x3) {
4497 %res = call <2 x double> @llvm.x86.avx512.mask.rndscale.pd.128(<2 x double> %x0, i32 4, <2 x double> %x2, i8 %x3)
4498 %res1 = call <2 x double> @llvm.x86.avx512.mask.rndscale.pd.128(<2 x double> %x0, i32 88, <2 x double> %x2, i8 -1)
4499 %res2 = fadd <2 x double> %res, %res1
4500 ret <2 x double> %res2
4503 declare <4 x double> @llvm.x86.avx512.mask.rndscale.pd.256(<4 x double>, i32, <4 x double>, i8)
4504 ; CHECK-LABEL: @test_int_x86_avx512_mask_rndscale_pd_256
4507 ; CHECK: vrndscalepd {{.*}}{%k1}
4508 ; CHECK: vrndscalepd
4509 define <4 x double>@test_int_x86_avx512_mask_rndscale_pd_256(<4 x double> %x0, <4 x double> %x2, i8 %x3) {
4510 %res = call <4 x double> @llvm.x86.avx512.mask.rndscale.pd.256(<4 x double> %x0, i32 4, <4 x double> %x2, i8 %x3)
4511 %res1 = call <4 x double> @llvm.x86.avx512.mask.rndscale.pd.256(<4 x double> %x0, i32 88, <4 x double> %x2, i8 -1)
4512 %res2 = fadd <4 x double> %res, %res1
4513 ret <4 x double> %res2
4516 declare <4 x float> @llvm.x86.avx512.mask.rndscale.ps.128(<4 x float>, i32, <4 x float>, i8)
4517 ; CHECK-LABEL: @test_int_x86_avx512_mask_rndscale_ps_128
4520 ; CHECK: vrndscaleps {{.*}}{%k1}
4521 ; CHECK: vrndscaleps
4522 define <4 x float>@test_int_x86_avx512_mask_rndscale_ps_128(<4 x float> %x0, <4 x float> %x2, i8 %x3) {
4523 %res = call <4 x float> @llvm.x86.avx512.mask.rndscale.ps.128(<4 x float> %x0, i32 88, <4 x float> %x2, i8 %x3)
4524 %res1 = call <4 x float> @llvm.x86.avx512.mask.rndscale.ps.128(<4 x float> %x0, i32 4, <4 x float> %x2, i8 -1)
4525 %res2 = fadd <4 x float> %res, %res1
4526 ret <4 x float> %res2
4529 declare <8 x float> @llvm.x86.avx512.mask.rndscale.ps.256(<8 x float>, i32, <8 x float>, i8)
4531 ; CHECK-LABEL: @test_int_x86_avx512_mask_rndscale_ps_256
4534 ; CHECK: vrndscaleps {{.*}}{%k1}
4535 ; CHECK: vrndscaleps
4536 define <8 x float>@test_int_x86_avx512_mask_rndscale_ps_256(<8 x float> %x0, <8 x float> %x2, i8 %x3) {
4537 %res = call <8 x float> @llvm.x86.avx512.mask.rndscale.ps.256(<8 x float> %x0, i32 5, <8 x float> %x2, i8 %x3)
4538 %res1 = call <8 x float> @llvm.x86.avx512.mask.rndscale.ps.256(<8 x float> %x0, i32 66, <8 x float> %x2, i8 -1)
4539 %res2 = fadd <8 x float> %res, %res1
4540 ret <8 x float> %res2
4543 declare <8 x float> @llvm.x86.avx512.mask.shuf.f32x4.256(<8 x float>, <8 x float>, i32, <8 x float>, i8)
4545 define <8 x float>@test_int_x86_avx512_mask_shuf_f32x4_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x3, i8 %x4) {
4546 ; CHECK-LABEL: test_int_x86_avx512_mask_shuf_f32x4_256:
4548 ; CHECK-NEXT: movzbl %dil, %eax
4549 ; CHECK-NEXT: kmovw %eax, %k1
4550 ; CHECK-NEXT: vshuff32x4 $22, %ymm1, %ymm0, %ymm2 {%k1}
4551 ; CHECK-NEXT: ## ymm2 = ymm0[0,1,2,3],ymm1[4,5,6,7]
4552 ; CHECK-NEXT: vshuff32x4 $22, %ymm1, %ymm0, %ymm3 {%k1} {z}
4553 ; CHECK-NEXT: ## ymm3 = ymm0[0,1,2,3],ymm1[4,5,6,7]
4554 ; CHECK-NEXT: vshuff32x4 $22, %ymm1, %ymm0, %ymm0
4555 ; CHECK-NEXT: ## ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
4556 ; CHECK-NEXT: vaddps %ymm0, %ymm2, %ymm0
4557 ; CHECK-NEXT: vaddps %ymm0, %ymm3, %ymm0
4559 %res = call <8 x float> @llvm.x86.avx512.mask.shuf.f32x4.256(<8 x float> %x0, <8 x float> %x1, i32 22, <8 x float> %x3, i8 %x4)
4560 %res1 = call <8 x float> @llvm.x86.avx512.mask.shuf.f32x4.256(<8 x float> %x0, <8 x float> %x1, i32 22, <8 x float> %x3, i8 -1)
4561 %res2 = call <8 x float> @llvm.x86.avx512.mask.shuf.f32x4.256(<8 x float> %x0, <8 x float> %x1, i32 22, <8 x float> zeroinitializer, i8 %x4)
4562 %res3 = fadd <8 x float> %res, %res1
4563 %res4 = fadd <8 x float> %res2, %res3
4564 ret <8 x float> %res4
4567 declare <4 x double> @llvm.x86.avx512.mask.shuf.f64x2.256(<4 x double>, <4 x double>, i32, <4 x double>, i8)
4569 define <4 x double>@test_int_x86_avx512_mask_shuf_f64x2_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x3, i8 %x4) {
4570 ; CHECK-LABEL: test_int_x86_avx512_mask_shuf_f64x2_256:
4572 ; CHECK-NEXT: movzbl %dil, %eax
4573 ; CHECK-NEXT: kmovw %eax, %k1
4574 ; CHECK-NEXT: vshuff64x2 $22, %ymm1, %ymm0, %ymm2 {%k1}
4575 ; CHECK-NEXT: ## ymm2 = ymm0[0,1],ymm1[2,3]
4576 ; CHECK-NEXT: vshuff64x2 $22, %ymm1, %ymm0, %ymm3 {%k1} {z}
4577 ; CHECK-NEXT: ## ymm3 = ymm0[0,1],ymm1[2,3]
4578 ; CHECK-NEXT: vshuff64x2 $22, %ymm1, %ymm0, %ymm0
4579 ; CHECK-NEXT: ## ymm0 = ymm0[0,1],ymm1[2,3]
4580 ; CHECK-NEXT: vaddpd %ymm0, %ymm2, %ymm0
4581 ; CHECK-NEXT: vaddpd %ymm0, %ymm3, %ymm0
4583 %res = call <4 x double> @llvm.x86.avx512.mask.shuf.f64x2.256(<4 x double> %x0, <4 x double> %x1, i32 22, <4 x double> %x3, i8 %x4)
4584 %res1 = call <4 x double> @llvm.x86.avx512.mask.shuf.f64x2.256(<4 x double> %x0, <4 x double> %x1, i32 22, <4 x double> %x3, i8 -1)
4585 %res2 = call <4 x double> @llvm.x86.avx512.mask.shuf.f64x2.256(<4 x double> %x0, <4 x double> %x1, i32 22, <4 x double> zeroinitializer, i8 %x4)
4586 %res3 = fadd <4 x double> %res, %res1
4587 %res4 = fadd <4 x double> %res2, %res3
4588 ret <4 x double> %res4
4591 declare <8 x i32> @llvm.x86.avx512.mask.shuf.i32x4.256(<8 x i32>, <8 x i32>, i32, <8 x i32>, i8)
4593 define <8 x i32>@test_int_x86_avx512_mask_shuf_i32x4_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x3, i8 %x4) {
4594 ; CHECK-LABEL: test_int_x86_avx512_mask_shuf_i32x4_256:
4596 ; CHECK-NEXT: movzbl %dil, %eax
4597 ; CHECK-NEXT: kmovw %eax, %k1
4598 ; CHECK-NEXT: vshufi32x4 $22, %ymm1, %ymm0, %ymm2 {%k1}
4599 ; CHECK-NEXT: ## ymm2 = ymm0[0,1,2,3],ymm1[4,5,6,7]
4600 ; CHECK-NEXT: vshufi32x4 $22, %ymm1, %ymm0, %ymm0
4601 ; CHECK-NEXT: ## ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
4602 ; CHECK-NEXT: vpaddd %ymm0, %ymm2, %ymm0
4604 %res = call <8 x i32> @llvm.x86.avx512.mask.shuf.i32x4.256(<8 x i32> %x0, <8 x i32> %x1, i32 22, <8 x i32> %x3, i8 %x4)
4605 %res1 = call <8 x i32> @llvm.x86.avx512.mask.shuf.i32x4.256(<8 x i32> %x0, <8 x i32> %x1, i32 22, <8 x i32> %x3, i8 -1)
4606 %res2 = add <8 x i32> %res, %res1
4610 declare <4 x i64> @llvm.x86.avx512.mask.shuf.i64x2.256(<4 x i64>, <4 x i64>, i32, <4 x i64>, i8)
4612 define <4 x i64>@test_int_x86_avx512_mask_shuf_i64x2_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x3, i8 %x4) {
4613 ; CHECK-LABEL: test_int_x86_avx512_mask_shuf_i64x2_256:
4615 ; CHECK-NEXT: movzbl %dil, %eax
4616 ; CHECK-NEXT: kmovw %eax, %k1
4617 ; CHECK-NEXT: vshufi64x2 $22, %ymm1, %ymm0, %ymm2 {%k1}
4618 ; CHECK-NEXT: ## ymm2 = ymm0[0,1],ymm1[2,3]
4619 ; CHECK-NEXT: vshufi64x2 $22, %ymm1, %ymm0, %ymm0
4620 ; CHECK-NEXT: ## ymm0 = ymm0[0,1],ymm1[2,3]
4621 ; CHECK-NEXT: vpaddq %ymm0, %ymm2, %ymm0
4623 %res = call <4 x i64> @llvm.x86.avx512.mask.shuf.i64x2.256(<4 x i64> %x0, <4 x i64> %x1, i32 22, <4 x i64> %x3, i8 %x4)
4624 %res1 = call <4 x i64> @llvm.x86.avx512.mask.shuf.i64x2.256(<4 x i64> %x0, <4 x i64> %x1, i32 22, <4 x i64> %x3, i8 -1)
4625 %res2 = add <4 x i64> %res, %res1
4629 declare <4 x float> @llvm.x86.avx512.mask.vextractf32x4.256(<8 x float>, i32, <4 x float>, i8)
4631 define <4 x float>@test_int_x86_avx512_mask_vextractf32x4_256(<8 x float> %x0, <4 x float> %x2, i8 %x3) {
4632 ; CHECK-LABEL: test_int_x86_avx512_mask_vextractf32x4_256:
4634 ; CHECK-NEXT: kmovw %edi, %k1
4635 ; CHECK-NEXT: vextractf32x4 $1, %ymm0, %xmm1 {%k1}
4636 ; CHECK-NEXT: vextractf32x4 $1, %ymm0, %xmm2 {%k1} {z}
4637 ; CHECK-NEXT: vextractf32x4 $1, %ymm0, %xmm0
4638 ; CHECK-NEXT: vaddps %xmm2, %xmm1, %xmm1
4639 ; CHECK-NEXT: vaddps %xmm1, %xmm0, %xmm0
4641 %res = call <4 x float> @llvm.x86.avx512.mask.vextractf32x4.256(<8 x float> %x0, i32 1, <4 x float> %x2, i8 %x3)
4642 %res1 = call <4 x float> @llvm.x86.avx512.mask.vextractf32x4.256(<8 x float> %x0, i32 1, <4 x float> zeroinitializer, i8 %x3)
4643 %res2 = call <4 x float> @llvm.x86.avx512.mask.vextractf32x4.256(<8 x float> %x0, i32 1, <4 x float> zeroinitializer, i8 -1)
4644 %res3 = fadd <4 x float> %res, %res1
4645 %res4 = fadd <4 x float> %res2, %res3
4646 ret <4 x float> %res4
4649 declare <2 x double> @llvm.x86.avx512.mask.getmant.pd.128(<2 x double>, i32, <2 x double>, i8)
4651 define <2 x double>@test_int_x86_avx512_mask_getmant_pd_128(<2 x double> %x0, <2 x double> %x2, i8 %x3) {
4652 ; CHECK-LABEL: test_int_x86_avx512_mask_getmant_pd_128:
4654 ; CHECK-NEXT: movzbl %dil, %eax
4655 ; CHECK-NEXT: kmovw %eax, %k1
4656 ; CHECK-NEXT: vgetmantpd $11, %xmm0, %xmm1 {%k1}
4657 ; CHECK-NEXT: vgetmantpd $11, %xmm0, %xmm2 {%k1} {z}
4658 ; CHECK-NEXT: vgetmantpd $11, %xmm0, %xmm0
4659 ; CHECK-NEXT: vaddpd %xmm0, %xmm1, %xmm0
4660 ; CHECK-NEXT: vaddpd %xmm0, %xmm2, %xmm0
4662 %res = call <2 x double> @llvm.x86.avx512.mask.getmant.pd.128(<2 x double> %x0, i32 11, <2 x double> %x2, i8 %x3)
4663 %res2 = call <2 x double> @llvm.x86.avx512.mask.getmant.pd.128(<2 x double> %x0, i32 11, <2 x double> zeroinitializer, i8 %x3)
4664 %res1 = call <2 x double> @llvm.x86.avx512.mask.getmant.pd.128(<2 x double> %x0, i32 11, <2 x double> %x2, i8 -1)
4665 %res3 = fadd <2 x double> %res, %res1
4666 %res4 = fadd <2 x double> %res2, %res3
4667 ret <2 x double> %res4
4670 declare <4 x double> @llvm.x86.avx512.mask.getmant.pd.256(<4 x double>, i32, <4 x double>, i8)
4672 define <4 x double>@test_int_x86_avx512_mask_getmant_pd_256(<4 x double> %x0, <4 x double> %x2, i8 %x3) {
4673 ; CHECK-LABEL: test_int_x86_avx512_mask_getmant_pd_256:
4675 ; CHECK-NEXT: movzbl %dil, %eax
4676 ; CHECK-NEXT: kmovw %eax, %k1
4677 ; CHECK-NEXT: vgetmantpd $11, %ymm0, %ymm1 {%k1}
4678 ; CHECK-NEXT: vgetmantpd $11, %ymm0, %ymm0
4679 ; CHECK-NEXT: vaddpd %ymm0, %ymm1, %ymm0
4681 %res = call <4 x double> @llvm.x86.avx512.mask.getmant.pd.256(<4 x double> %x0, i32 11, <4 x double> %x2, i8 %x3)
4682 %res1 = call <4 x double> @llvm.x86.avx512.mask.getmant.pd.256(<4 x double> %x0, i32 11, <4 x double> %x2, i8 -1)
4683 %res2 = fadd <4 x double> %res, %res1
4684 ret <4 x double> %res2
4687 declare <4 x float> @llvm.x86.avx512.mask.getmant.ps.128(<4 x float>, i32, <4 x float>, i8)
4689 define <4 x float>@test_int_x86_avx512_mask_getmant_ps_128(<4 x float> %x0, <4 x float> %x2, i8 %x3) {
4690 ; CHECK-LABEL: test_int_x86_avx512_mask_getmant_ps_128:
4692 ; CHECK-NEXT: movzbl %dil, %eax
4693 ; CHECK-NEXT: kmovw %eax, %k1
4694 ; CHECK-NEXT: vgetmantps $11, %xmm0, %xmm1 {%k1}
4695 ; CHECK-NEXT: vgetmantps $11, %xmm0, %xmm0
4696 ; CHECK-NEXT: vaddps %xmm0, %xmm1, %xmm0
4698 %res = call <4 x float> @llvm.x86.avx512.mask.getmant.ps.128(<4 x float> %x0, i32 11, <4 x float> %x2, i8 %x3)
4699 %res1 = call <4 x float> @llvm.x86.avx512.mask.getmant.ps.128(<4 x float> %x0, i32 11, <4 x float> %x2, i8 -1)
4700 %res2 = fadd <4 x float> %res, %res1
4701 ret <4 x float> %res2
4704 declare <8 x float> @llvm.x86.avx512.mask.getmant.ps.256(<8 x float>, i32, <8 x float>, i8)
4706 define <8 x float>@test_int_x86_avx512_mask_getmant_ps_256(<8 x float> %x0, <8 x float> %x2, i8 %x3) {
4707 ; CHECK-LABEL: test_int_x86_avx512_mask_getmant_ps_256:
4709 ; CHECK-NEXT: movzbl %dil, %eax
4710 ; CHECK-NEXT: kmovw %eax, %k1
4711 ; CHECK-NEXT: vgetmantps $11, %ymm0, %ymm1 {%k1}
4712 ; CHECK-NEXT: vgetmantps $11, %ymm0, %ymm0
4713 ; CHECK-NEXT: vaddps %ymm0, %ymm1, %ymm0
4715 %res = call <8 x float> @llvm.x86.avx512.mask.getmant.ps.256(<8 x float> %x0, i32 11, <8 x float> %x2, i8 %x3)
4716 %res1 = call <8 x float> @llvm.x86.avx512.mask.getmant.ps.256(<8 x float> %x0, i32 11, <8 x float> %x2, i8 -1)
4717 %res2 = fadd <8 x float> %res, %res1
4718 ret <8 x float> %res2
4721 declare <2 x double> @llvm.x86.avx512.mask.shuf.pd.128(<2 x double>, <2 x double>, i32, <2 x double>, i8)
4723 define <2 x double>@test_int_x86_avx512_mask_shuf_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x3, i8 %x4) {
4724 ; CHECK-LABEL: test_int_x86_avx512_mask_shuf_pd_128:
4726 ; CHECK-NEXT: movzbl %dil, %eax
4727 ; CHECK-NEXT: kmovw %eax, %k1
4728 ; CHECK-NEXT: vshufpd $22, %xmm1, %xmm0, %xmm2 {%k1}
4729 ; CHECK-NEXT: ## xmm2 = xmm2[0],k1[1]
4730 ; CHECK-NEXT: vshufpd $22, %xmm1, %xmm0, %xmm3 {%k1} {z}
4731 ; CHECK-NEXT: ## xmm3 = k1[0],xmm0[1]
4732 ; CHECK-NEXT: vshufpd $22, %xmm1, %xmm0, %xmm0
4733 ; CHECK-NEXT: ## xmm0 = xmm0[0],xmm1[1]
4734 ; CHECK-NEXT: vaddpd %xmm0, %xmm2, %xmm0
4735 ; CHECK-NEXT: vaddpd %xmm0, %xmm3, %xmm0
4737 %res = call <2 x double> @llvm.x86.avx512.mask.shuf.pd.128(<2 x double> %x0, <2 x double> %x1, i32 22, <2 x double> %x3, i8 %x4)
4738 %res1 = call <2 x double> @llvm.x86.avx512.mask.shuf.pd.128(<2 x double> %x0, <2 x double> %x1, i32 22, <2 x double> %x3, i8 -1)
4739 %res2 = call <2 x double> @llvm.x86.avx512.mask.shuf.pd.128(<2 x double> %x0, <2 x double> %x1, i32 22, <2 x double> zeroinitializer, i8 %x4)
4740 %res3 = fadd <2 x double> %res, %res1
4741 %res4 = fadd <2 x double> %res2, %res3
4742 ret <2 x double> %res4
4745 declare <4 x double> @llvm.x86.avx512.mask.shuf.pd.256(<4 x double>, <4 x double>, i32, <4 x double>, i8)
4747 define <4 x double>@test_int_x86_avx512_mask_shuf_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x3, i8 %x4) {
4748 ; CHECK-LABEL: test_int_x86_avx512_mask_shuf_pd_256:
4750 ; CHECK-NEXT: movzbl %dil, %eax
4751 ; CHECK-NEXT: kmovw %eax, %k1
4752 ; CHECK-NEXT: vshufpd $22, %ymm1, %ymm0, %ymm2 {%k1}
4753 ; CHECK-NEXT: ## ymm2 = ymm2[0],k1[1],ymm2[3],k1[2]
4754 ; CHECK-NEXT: vshufpd $22, %ymm1, %ymm0, %ymm0
4755 ; CHECK-NEXT: ## ymm0 = ymm0[0],ymm1[1],ymm0[3],ymm1[2]
4756 ; CHECK-NEXT: vaddpd %ymm0, %ymm2, %ymm0
4758 %res = call <4 x double> @llvm.x86.avx512.mask.shuf.pd.256(<4 x double> %x0, <4 x double> %x1, i32 22, <4 x double> %x3, i8 %x4)
4759 %res1 = call <4 x double> @llvm.x86.avx512.mask.shuf.pd.256(<4 x double> %x0, <4 x double> %x1, i32 22, <4 x double> %x3, i8 -1)
4760 %res2 = fadd <4 x double> %res, %res1
4761 ret <4 x double> %res2
4764 declare <4 x float> @llvm.x86.avx512.mask.shuf.ps.128(<4 x float>, <4 x float>, i32, <4 x float>, i8)
4766 define <4 x float>@test_int_x86_avx512_mask_shuf_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x3, i8 %x4) {
4767 ; CHECK-LABEL: test_int_x86_avx512_mask_shuf_ps_128:
4769 ; CHECK-NEXT: movzbl %dil, %eax
4770 ; CHECK-NEXT: kmovw %eax, %k1
4771 ; CHECK-NEXT: vshufps $22, %xmm1, %xmm0, %xmm2 {%k1}
4772 ; CHECK-NEXT: ## xmm2 = xmm2[2,1],k1[1,0]
4773 ; CHECK-NEXT: vshufps $22, %xmm1, %xmm0, %xmm0
4774 ; CHECK-NEXT: ## xmm0 = xmm0[2,1],xmm1[1,0]
4775 ; CHECK-NEXT: vaddps %xmm0, %xmm2, %xmm0
4777 %res = call <4 x float> @llvm.x86.avx512.mask.shuf.ps.128(<4 x float> %x0, <4 x float> %x1, i32 22, <4 x float> %x3, i8 %x4)
4778 %res1 = call <4 x float> @llvm.x86.avx512.mask.shuf.ps.128(<4 x float> %x0, <4 x float> %x1, i32 22, <4 x float> %x3, i8 -1)
4779 %res2 = fadd <4 x float> %res, %res1
4780 ret <4 x float> %res2
4783 declare <8 x float> @llvm.x86.avx512.mask.shuf.ps.256(<8 x float>, <8 x float>, i32, <8 x float>, i8)
4785 define <8 x float>@test_int_x86_avx512_mask_shuf_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x3, i8 %x4) {
4786 ; CHECK-LABEL: test_int_x86_avx512_mask_shuf_ps_256:
4788 ; CHECK-NEXT: movzbl %dil, %eax
4789 ; CHECK-NEXT: kmovw %eax, %k1
4790 ; CHECK-NEXT: vshufps $22, %ymm1, %ymm0, %ymm2 {%k1}
4791 ; CHECK-NEXT: ## ymm2 = ymm2[2,1],k1[1,0],ymm2[6,5],k1[5,4]
4792 ; CHECK-NEXT: vshufps $22, %ymm1, %ymm0, %ymm0
4793 ; CHECK-NEXT: ## ymm0 = ymm0[2,1],ymm1[1,0],ymm0[6,5],ymm1[5,4]
4794 ; CHECK-NEXT: vaddps %ymm0, %ymm2, %ymm0
4796 %res = call <8 x float> @llvm.x86.avx512.mask.shuf.ps.256(<8 x float> %x0, <8 x float> %x1, i32 22, <8 x float> %x3, i8 %x4)
4797 %res1 = call <8 x float> @llvm.x86.avx512.mask.shuf.ps.256(<8 x float> %x0, <8 x float> %x1, i32 22, <8 x float> %x3, i8 -1)
4798 %res2 = fadd <8 x float> %res, %res1
4799 ret <8 x float> %res2
4802 declare <4 x i32> @llvm.x86.avx512.mask.valign.d.128(<4 x i32>, <4 x i32>, i32, <4 x i32>, i8)
4804 define <4 x i32>@test_int_x86_avx512_mask_valign_d_128(<4 x i32> %x0, <4 x i32> %x1,<4 x i32> %x3, i8 %x4) {
4805 ; CHECK-LABEL: test_int_x86_avx512_mask_valign_d_128:
4807 ; CHECK-NEXT: movzbl %dil, %eax
4808 ; CHECK-NEXT: kmovw %eax, %k1
4809 ; CHECK-NEXT: valignd $22, %xmm1, %xmm0, %xmm2 {%k1}
4810 ; CHECK-NEXT: valignd $22, %xmm1, %xmm0, %xmm3 {%k1} {z}
4811 ; CHECK-NEXT: valignd $22, %xmm1, %xmm0, %xmm0
4812 ; CHECK-NEXT: vpaddd %xmm0, %xmm2, %xmm0
4813 ; CHECK-NEXT: vpaddd %xmm3, %xmm0, %xmm0
4815 %res = call <4 x i32> @llvm.x86.avx512.mask.valign.d.128(<4 x i32> %x0, <4 x i32> %x1, i32 22, <4 x i32> %x3, i8 %x4)
4816 %res1 = call <4 x i32> @llvm.x86.avx512.mask.valign.d.128(<4 x i32> %x0, <4 x i32> %x1, i32 22, <4 x i32> %x3, i8 -1)
4817 %res2 = call <4 x i32> @llvm.x86.avx512.mask.valign.d.128(<4 x i32> %x0, <4 x i32> %x1, i32 22, <4 x i32> zeroinitializer,i8 %x4)
4818 %res3 = add <4 x i32> %res, %res1
4819 %res4 = add <4 x i32> %res3, %res2
4823 declare <8 x i32> @llvm.x86.avx512.mask.valign.d.256(<8 x i32>, <8 x i32>, i32, <8 x i32>, i8)
4825 define <8 x i32>@test_int_x86_avx512_mask_valign_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x3, i8 %x4) {
4826 ; CHECK-LABEL: test_int_x86_avx512_mask_valign_d_256:
4828 ; CHECK-NEXT: movzbl %dil, %eax
4829 ; CHECK-NEXT: kmovw %eax, %k1
4830 ; CHECK-NEXT: valignd $22, %ymm1, %ymm0, %ymm2 {%k1}
4831 ; CHECK-NEXT: valignd $22, %ymm1, %ymm0, %ymm0
4832 ; CHECK-NEXT: vpaddd %ymm0, %ymm2, %ymm0
4834 %res = call <8 x i32> @llvm.x86.avx512.mask.valign.d.256(<8 x i32> %x0, <8 x i32> %x1, i32 22, <8 x i32> %x3, i8 %x4)
4835 %res1 = call <8 x i32> @llvm.x86.avx512.mask.valign.d.256(<8 x i32> %x0, <8 x i32> %x1, i32 22, <8 x i32> %x3, i8 -1)
4836 %res2 = add <8 x i32> %res, %res1
4840 declare <2 x i64> @llvm.x86.avx512.mask.valign.q.128(<2 x i64>, <2 x i64>, i32, <2 x i64>, i8)
4842 define <2 x i64>@test_int_x86_avx512_mask_valign_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x3, i8 %x4) {
4843 ; CHECK-LABEL: test_int_x86_avx512_mask_valign_q_128:
4845 ; CHECK-NEXT: movzbl %dil, %eax
4846 ; CHECK-NEXT: kmovw %eax, %k1
4847 ; CHECK-NEXT: valignq $22, %xmm1, %xmm0, %xmm2 {%k1}
4848 ; CHECK-NEXT: valignq $22, %xmm1, %xmm0, %xmm0
4849 ; CHECK-NEXT: vpaddq %xmm0, %xmm2, %xmm0
4851 %res = call <2 x i64> @llvm.x86.avx512.mask.valign.q.128(<2 x i64> %x0, <2 x i64> %x1, i32 22, <2 x i64> %x3, i8 %x4)
4852 %res1 = call <2 x i64> @llvm.x86.avx512.mask.valign.q.128(<2 x i64> %x0, <2 x i64> %x1, i32 22, <2 x i64> %x3, i8 -1)
4853 %res2 = add <2 x i64> %res, %res1
4857 declare <4 x i64> @llvm.x86.avx512.mask.valign.q.256(<4 x i64>, <4 x i64>, i32, <4 x i64>, i8)
4859 define <4 x i64>@test_int_x86_avx512_mask_valign_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x3, i8 %x4) {
4860 ; CHECK-LABEL: test_int_x86_avx512_mask_valign_q_256:
4862 ; CHECK-NEXT: movzbl %dil, %eax
4863 ; CHECK-NEXT: kmovw %eax, %k1
4864 ; CHECK-NEXT: valignq $22, %ymm1, %ymm0, %ymm2 {%k1}
4865 ; CHECK-NEXT: valignq $22, %ymm1, %ymm0, %ymm0
4866 ; CHECK-NEXT: vpaddq %ymm0, %ymm2, %ymm0
4868 %res = call <4 x i64> @llvm.x86.avx512.mask.valign.q.256(<4 x i64> %x0, <4 x i64> %x1, i32 22, <4 x i64> %x3, i8 %x4)
4869 %res1 = call <4 x i64> @llvm.x86.avx512.mask.valign.q.256(<4 x i64> %x0, <4 x i64> %x1, i32 22, <4 x i64> %x3, i8 -1)
4870 %res2 = add <4 x i64> %res, %res1
4874 declare <4 x double> @llvm.x86.avx512.mask.vpermil.pd.256(<4 x double>, i32, <4 x double>, i8)
4876 define <4 x double>@test_int_x86_avx512_mask_vpermil_pd_256(<4 x double> %x0, <4 x double> %x2, i8 %x3) {
4877 ; CHECK-LABEL: test_int_x86_avx512_mask_vpermil_pd_256:
4879 ; CHECK-NEXT: movzbl %dil, %eax
4880 ; CHECK-NEXT: kmovw %eax, %k1
4881 ; CHECK-NEXT: vpermilpd $22, %ymm0, %ymm1 {%k1}
4882 ; CHECK-NEXT: ## ymm1 = ymm1[0,1,3,2]
4883 ; CHECK-NEXT: vpermilpd $22, %ymm0, %ymm2 {%k1} {z}
4884 ; CHECK-NEXT: ## ymm2 = k1[0,1,3,2]
4885 ; CHECK-NEXT: vpermilpd $22, %ymm0, %ymm0
4886 ; CHECK-NEXT: ## ymm0 = ymm0[0,1,3,2]
4887 ; CHECK-NEXT: vaddpd %ymm2, %ymm1, %ymm1
4888 ; CHECK-NEXT: vaddpd %ymm1, %ymm0, %ymm0
4890 %res = call <4 x double> @llvm.x86.avx512.mask.vpermil.pd.256(<4 x double> %x0, i32 22, <4 x double> %x2, i8 %x3)
4891 %res1 = call <4 x double> @llvm.x86.avx512.mask.vpermil.pd.256(<4 x double> %x0, i32 22, <4 x double> zeroinitializer, i8 %x3)
4892 %res2 = call <4 x double> @llvm.x86.avx512.mask.vpermil.pd.256(<4 x double> %x0, i32 22, <4 x double> %x2, i8 -1)
4893 %res3 = fadd <4 x double> %res, %res1
4894 %res4 = fadd <4 x double> %res2, %res3
4895 ret <4 x double> %res4
4898 declare <2 x double> @llvm.x86.avx512.mask.vpermil.pd.128(<2 x double>, i32, <2 x double>, i8)
4900 define <2 x double>@test_int_x86_avx512_mask_vpermil_pd_128(<2 x double> %x0, <2 x double> %x2, i8 %x3) {
4901 ; CHECK-LABEL: test_int_x86_avx512_mask_vpermil_pd_128:
4903 ; CHECK-NEXT: movzbl %dil, %eax
4904 ; CHECK-NEXT: kmovw %eax, %k1
4905 ; CHECK-NEXT: vpermilpd $1, %xmm0, %xmm1 {%k1}
4906 ; CHECK-NEXT: ## xmm1 = xmm1[1,0]
4907 ; CHECK-NEXT: vpermilpd $1, %xmm0, %xmm2 {%k1} {z}
4908 ; CHECK-NEXT: ## xmm2 = k1[1,0]
4909 ; CHECK-NEXT: vpermilpd $1, %xmm0, %xmm0
4910 ; CHECK-NEXT: ## xmm0 = xmm0[1,0]
4911 ; CHECK-NEXT: vaddpd %xmm2, %xmm1, %xmm1
4912 ; CHECK-NEXT: vaddpd %xmm0, %xmm1, %xmm0
4914 %res = call <2 x double> @llvm.x86.avx512.mask.vpermil.pd.128(<2 x double> %x0, i32 1, <2 x double> %x2, i8 %x3)
4915 %res1 = call <2 x double> @llvm.x86.avx512.mask.vpermil.pd.128(<2 x double> %x0, i32 1, <2 x double> zeroinitializer, i8 %x3)
4916 %res2 = call <2 x double> @llvm.x86.avx512.mask.vpermil.pd.128(<2 x double> %x0, i32 1, <2 x double> %x2, i8 -1)
4917 %res3 = fadd <2 x double> %res, %res1
4918 %res4 = fadd <2 x double> %res3, %res2
4919 ret <2 x double> %res4
4922 declare <8 x float> @llvm.x86.avx512.mask.vpermil.ps.256(<8 x float>, i32, <8 x float>, i8)
4924 define <8 x float>@test_int_x86_avx512_mask_vpermil_ps_256(<8 x float> %x0, <8 x float> %x2, i8 %x3) {
4925 ; CHECK-LABEL: test_int_x86_avx512_mask_vpermil_ps_256:
4927 ; CHECK-NEXT: movzbl %dil, %eax
4928 ; CHECK-NEXT: kmovw %eax, %k1
4929 ; CHECK-NEXT: vpermilps $22, %ymm0, %ymm1 {%k1}
4930 ; CHECK-NEXT: ## ymm1 = ymm1[2,1,1,0,6,5,5,4]
4931 ; CHECK-NEXT: vpermilps $22, %ymm0, %ymm2 {%k1} {z}
4932 ; CHECK-NEXT: ## ymm2 = k1[2,1,1,0,6,5,5,4]
4933 ; CHECK-NEXT: vpermilps $22, %ymm0, %ymm0
4934 ; CHECK-NEXT: ## ymm0 = ymm0[2,1,1,0,6,5,5,4]
4935 ; CHECK-NEXT: vaddps %ymm2, %ymm1, %ymm1
4936 ; CHECK-NEXT: vaddps %ymm0, %ymm1, %ymm0
4938 %res = call <8 x float> @llvm.x86.avx512.mask.vpermil.ps.256(<8 x float> %x0, i32 22, <8 x float> %x2, i8 %x3)
4939 %res1 = call <8 x float> @llvm.x86.avx512.mask.vpermil.ps.256(<8 x float> %x0, i32 22, <8 x float> zeroinitializer, i8 %x3)
4940 %res2 = call <8 x float> @llvm.x86.avx512.mask.vpermil.ps.256(<8 x float> %x0, i32 22, <8 x float> %x2, i8 -1)
4941 %res3 = fadd <8 x float> %res, %res1
4942 %res4 = fadd <8 x float> %res3, %res2
4943 ret <8 x float> %res4
4946 declare <4 x float> @llvm.x86.avx512.mask.vpermil.ps.128(<4 x float>, i32, <4 x float>, i8)
4948 define <4 x float>@test_int_x86_avx512_mask_vpermil_ps_128(<4 x float> %x0, <4 x float> %x2, i8 %x3) {
4949 ; CHECK-LABEL: test_int_x86_avx512_mask_vpermil_ps_128:
4951 ; CHECK-NEXT: movzbl %dil, %eax
4952 ; CHECK-NEXT: kmovw %eax, %k1
4953 ; CHECK-NEXT: vpermilps $22, %xmm0, %xmm1 {%k1}
4954 ; CHECK-NEXT: ## xmm1 = xmm1[2,1,1,0]
4955 ; CHECK-NEXT: vpermilps $22, %xmm0, %xmm2 {%k1} {z}
4956 ; CHECK-NEXT: ## xmm2 = k1[2,1,1,0]
4957 ; CHECK-NEXT: vpermilps $22, %xmm0, %xmm0
4958 ; CHECK-NEXT: ## xmm0 = xmm0[2,1,1,0]
4959 ; CHECK-NEXT: vaddps %xmm2, %xmm1, %xmm1
4960 ; CHECK-NEXT: vaddps %xmm1, %xmm0, %xmm0
4962 %res = call <4 x float> @llvm.x86.avx512.mask.vpermil.ps.128(<4 x float> %x0, i32 22, <4 x float> %x2, i8 %x3)
4963 %res1 = call <4 x float> @llvm.x86.avx512.mask.vpermil.ps.128(<4 x float> %x0, i32 22, <4 x float> zeroinitializer, i8 %x3)
4964 %res2 = call <4 x float> @llvm.x86.avx512.mask.vpermil.ps.128(<4 x float> %x0, i32 22, <4 x float> %x2, i8 -1)
4965 %res3 = fadd <4 x float> %res, %res1
4966 %res4 = fadd <4 x float> %res2, %res3
4967 ret <4 x float> %res4
4970 declare <4 x double> @llvm.x86.avx512.mask.vpermilvar.pd.256(<4 x double>, <4 x i64>, <4 x double>, i8)
4972 define <4 x double>@test_int_x86_avx512_mask_vpermilvar_pd_256(<4 x double> %x0, <4 x i64> %x1, <4 x double> %x2, i8 %x3) {
4973 ; CHECK-LABEL: test_int_x86_avx512_mask_vpermilvar_pd_256:
4975 ; CHECK-NEXT: movzbl %dil, %eax
4976 ; CHECK-NEXT: kmovw %eax, %k1
4977 ; CHECK-NEXT: vpermilpd %ymm1, %ymm0, %ymm2 {%k1}
4978 ; CHECK-NEXT: vpermilpd %ymm1, %ymm0, %ymm3 {%k1} {z}
4979 ; CHECK-NEXT: vpermilpd %ymm1, %ymm0, %ymm0
4980 ; CHECK-NEXT: vaddpd %ymm3, %ymm2, %ymm1
4981 ; CHECK-NEXT: vaddpd %ymm1, %ymm0, %ymm0
4983 %res = call <4 x double> @llvm.x86.avx512.mask.vpermilvar.pd.256(<4 x double> %x0, <4 x i64> %x1, <4 x double> %x2, i8 %x3)
4984 %res1 = call <4 x double> @llvm.x86.avx512.mask.vpermilvar.pd.256(<4 x double> %x0, <4 x i64> %x1, <4 x double> zeroinitializer, i8 %x3)
4985 %res2 = call <4 x double> @llvm.x86.avx512.mask.vpermilvar.pd.256(<4 x double> %x0, <4 x i64> %x1, <4 x double> %x2, i8 -1)
4986 %res3 = fadd <4 x double> %res, %res1
4987 %res4 = fadd <4 x double> %res2, %res3
4988 ret <4 x double> %res4
4991 declare <2 x double> @llvm.x86.avx512.mask.vpermilvar.pd.128(<2 x double>, <2 x i64>, <2 x double>, i8)
4993 define <2 x double>@test_int_x86_avx512_mask_vpermilvar_pd_128(<2 x double> %x0, <2 x i64> %x1, <2 x double> %x2, i8 %x3) {
4994 ; CHECK-LABEL: test_int_x86_avx512_mask_vpermilvar_pd_128:
4996 ; CHECK-NEXT: movzbl %dil, %eax
4997 ; CHECK-NEXT: kmovw %eax, %k1
4998 ; CHECK-NEXT: vpermilpd %xmm1, %xmm0, %xmm2 {%k1}
4999 ; CHECK-NEXT: vpermilpd %xmm1, %xmm0, %xmm3 {%k1} {z}
5000 ; CHECK-NEXT: vpermilpd %xmm1, %xmm0, %xmm0
5001 ; CHECK-NEXT: vaddpd %xmm3, %xmm2, %xmm1
5002 ; CHECK-NEXT: vaddpd %xmm0, %xmm1, %xmm0
5004 %res = call <2 x double> @llvm.x86.avx512.mask.vpermilvar.pd.128(<2 x double> %x0, <2 x i64> %x1, <2 x double> %x2, i8 %x3)
5005 %res1 = call <2 x double> @llvm.x86.avx512.mask.vpermilvar.pd.128(<2 x double> %x0, <2 x i64> %x1, <2 x double> zeroinitializer, i8 %x3)
5006 %res2 = call <2 x double> @llvm.x86.avx512.mask.vpermilvar.pd.128(<2 x double> %x0, <2 x i64> %x1, <2 x double> %x2, i8 -1)
5007 %res3 = fadd <2 x double> %res, %res1
5008 %res4 = fadd <2 x double> %res3, %res2
5009 ret <2 x double> %res4
5012 declare <8 x float> @llvm.x86.avx512.mask.vpermilvar.ps.256(<8 x float>, <8 x i32>, <8 x float>, i8)
5014 define <8 x float>@test_int_x86_avx512_mask_vpermilvar_ps_256(<8 x float> %x0, <8 x i32> %x1, <8 x float> %x2, i8 %x3) {
5015 ; CHECK-LABEL: test_int_x86_avx512_mask_vpermilvar_ps_256:
5017 ; CHECK-NEXT: movzbl %dil, %eax
5018 ; CHECK-NEXT: kmovw %eax, %k1
5019 ; CHECK-NEXT: vpermilps %ymm1, %ymm0, %ymm2 {%k1}
5020 ; CHECK-NEXT: vpermilps %ymm1, %ymm0, %ymm3 {%k1} {z}
5021 ; CHECK-NEXT: vpermilps %ymm1, %ymm0, %ymm0
5022 ; CHECK-NEXT: vaddps %ymm3, %ymm2, %ymm1
5023 ; CHECK-NEXT: vaddps %ymm0, %ymm1, %ymm0
5025 %res = call <8 x float> @llvm.x86.avx512.mask.vpermilvar.ps.256(<8 x float> %x0, <8 x i32> %x1, <8 x float> %x2, i8 %x3)
5026 %res1 = call <8 x float> @llvm.x86.avx512.mask.vpermilvar.ps.256(<8 x float> %x0, <8 x i32> %x1, <8 x float> zeroinitializer, i8 %x3)
5027 %res2 = call <8 x float> @llvm.x86.avx512.mask.vpermilvar.ps.256(<8 x float> %x0, <8 x i32> %x1, <8 x float> %x2, i8 -1)
5028 %res3 = fadd <8 x float> %res, %res1
5029 %res4 = fadd <8 x float> %res3, %res2
5030 ret <8 x float> %res4
5033 declare <4 x float> @llvm.x86.avx512.mask.vpermilvar.ps.128(<4 x float>, <4 x i32>, <4 x float>, i8)
5035 define <4 x float>@test_int_x86_avx512_mask_vpermilvar_ps_128(<4 x float> %x0, <4 x i32> %x1, <4 x float> %x2, i8 %x3) {
5036 ; CHECK-LABEL: test_int_x86_avx512_mask_vpermilvar_ps_128:
5038 ; CHECK-NEXT: movzbl %dil, %eax
5039 ; CHECK-NEXT: kmovw %eax, %k1
5040 ; CHECK-NEXT: vpermilps %xmm1, %xmm0, %xmm2 {%k1}
5041 ; CHECK-NEXT: vpermilps %xmm1, %xmm0, %xmm3 {%k1} {z}
5042 ; CHECK-NEXT: vpermilps %xmm1, %xmm0, %xmm0
5043 ; CHECK-NEXT: vaddps %xmm3, %xmm2, %xmm1
5044 ; CHECK-NEXT: vaddps %xmm1, %xmm0, %xmm0
5046 %res = call <4 x float> @llvm.x86.avx512.mask.vpermilvar.ps.128(<4 x float> %x0, <4 x i32> %x1, <4 x float> %x2, i8 %x3)
5047 %res1 = call <4 x float> @llvm.x86.avx512.mask.vpermilvar.ps.128(<4 x float> %x0, <4 x i32> %x1, <4 x float> zeroinitializer, i8 %x3)
5048 %res2 = call <4 x float> @llvm.x86.avx512.mask.vpermilvar.ps.128(<4 x float> %x0, <4 x i32> %x1, <4 x float> %x2, i8 -1)
5049 %res3 = fadd <4 x float> %res, %res1
5050 %res4 = fadd <4 x float> %res2, %res3
5051 ret <4 x float> %res4
5054 declare <8 x float> @llvm.x86.avx512.mask.insertf32x4.256(<8 x float>, <4 x float>, i32, <8 x float>, i8)
5056 define <8 x float>@test_int_x86_avx512_mask_insertf32x4_256(<8 x float> %x0, <4 x float> %x1, <8 x float> %x3, i8 %x4) {
5057 ; CHECK-LABEL: test_int_x86_avx512_mask_insertf32x4_256:
5059 ; CHECK-NEXT: movzbl %dil, %eax
5060 ; CHECK-NEXT: kmovw %eax, %k1
5061 ; CHECK-NEXT: vinsertf32x4 $1, %xmm1, %ymm0, %ymm2 {%k1}
5062 ; CHECK-NEXT: vinsertf32x4 $1, %xmm1, %ymm0, %ymm3 {%k1} {z}
5063 ; CHECK-NEXT: vinsertf32x4 $1, %xmm1, %ymm0, %ymm0
5064 ; CHECK-NEXT: vaddps %ymm0, %ymm2, %ymm0
5065 ; CHECK-NEXT: vaddps %ymm0, %ymm3, %ymm0
5067 %res = call <8 x float> @llvm.x86.avx512.mask.insertf32x4.256(<8 x float> %x0, <4 x float> %x1, i32 1, <8 x float> %x3, i8 %x4)
5068 %res1 = call <8 x float> @llvm.x86.avx512.mask.insertf32x4.256(<8 x float> %x0, <4 x float> %x1, i32 1, <8 x float> %x3, i8 -1)
5069 %res2 = call <8 x float> @llvm.x86.avx512.mask.insertf32x4.256(<8 x float> %x0, <4 x float> %x1, i32 1, <8 x float> zeroinitializer, i8 %x4)
5070 %res3 = fadd <8 x float> %res, %res1
5071 %res4 = fadd <8 x float> %res2, %res3
5072 ret <8 x float> %res4
5075 declare <8 x i32> @llvm.x86.avx512.mask.inserti32x4.256(<8 x i32>, <4 x i32>, i32, <8 x i32>, i8)
5077 define <8 x i32>@test_int_x86_avx512_mask_inserti32x4_256(<8 x i32> %x0, <4 x i32> %x1, <8 x i32> %x3, i8 %x4) {
5078 ; CHECK-LABEL: test_int_x86_avx512_mask_inserti32x4_256:
5080 ; CHECK-NEXT: movzbl %dil, %eax
5081 ; CHECK-NEXT: kmovw %eax, %k1
5082 ; CHECK-NEXT: vinserti32x4 $1, %xmm1, %ymm0, %ymm2 {%k1}
5083 ; CHECK-NEXT: vinserti32x4 $1, %xmm1, %ymm0, %ymm3 {%k1} {z}
5084 ; CHECK-NEXT: vinserti32x4 $1, %xmm1, %ymm0, %ymm0
5085 ; CHECK-NEXT: vpaddd %ymm0, %ymm2, %ymm0
5086 ; CHECK-NEXT: vpaddd %ymm0, %ymm3, %ymm0
5089 %res = call <8 x i32> @llvm.x86.avx512.mask.inserti32x4.256(<8 x i32> %x0, <4 x i32> %x1, i32 1, <8 x i32> %x3, i8 %x4)
5090 %res1 = call <8 x i32> @llvm.x86.avx512.mask.inserti32x4.256(<8 x i32> %x0, <4 x i32> %x1, i32 1, <8 x i32> %x3, i8 -1)
5091 %res2 = call <8 x i32> @llvm.x86.avx512.mask.inserti32x4.256(<8 x i32> %x0, <4 x i32> %x1, i32 1, <8 x i32> zeroinitializer, i8 %x4)
5092 %res3 = add <8 x i32> %res, %res1
5093 %res4 = add <8 x i32> %res2, %res3
5097 declare <4 x i32> @llvm.x86.avx512.mask.pternlog.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i32, i8)
5099 define <4 x i32>@test_int_x86_avx512_mask_pternlog_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x4) {
5100 ; CHECK-LABEL: test_int_x86_avx512_mask_pternlog_d_128:
5102 ; CHECK-NEXT: movzbl %dil, %eax
5103 ; CHECK-NEXT: kmovw %eax, %k1
5104 ; CHECK-NEXT: vmovaps %zmm0, %zmm3
5105 ; CHECK-NEXT: vpternlogd $33, %xmm2, %xmm1, %xmm3 {%k1}
5106 ; CHECK-NEXT: vpternlogd $33, %xmm2, %xmm1, %xmm0
5107 ; CHECK-NEXT: vpaddd %xmm0, %xmm3, %xmm0
5109 %res = call <4 x i32> @llvm.x86.avx512.mask.pternlog.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i32 33, i8 %x4)
5110 %res1 = call <4 x i32> @llvm.x86.avx512.mask.pternlog.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i32 33, i8 -1)
5111 %res2 = add <4 x i32> %res, %res1
5115 declare <4 x i32> @llvm.x86.avx512.maskz.pternlog.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i32, i8)
5117 define <4 x i32>@test_int_x86_avx512_maskz_pternlog_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x4) {
5118 ; CHECK-LABEL: test_int_x86_avx512_maskz_pternlog_d_128:
5120 ; CHECK-NEXT: movzbl %dil, %eax
5121 ; CHECK-NEXT: kmovw %eax, %k1
5122 ; CHECK-NEXT: vmovaps %zmm0, %zmm3
5123 ; CHECK-NEXT: vpternlogd $33, %xmm2, %xmm1, %xmm3 {%k1} {z}
5124 ; CHECK-NEXT: vpternlogd $33, %xmm2, %xmm1, %xmm0
5125 ; CHECK-NEXT: vpaddd %xmm0, %xmm3, %xmm0
5127 %res = call <4 x i32> @llvm.x86.avx512.maskz.pternlog.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i32 33, i8 %x4)
5128 %res1 = call <4 x i32> @llvm.x86.avx512.maskz.pternlog.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i32 33, i8 -1)
5129 %res2 = add <4 x i32> %res, %res1
5133 declare <8 x i32> @llvm.x86.avx512.mask.pternlog.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i32, i8)
5135 define <8 x i32>@test_int_x86_avx512_mask_pternlog_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x4) {
5136 ; CHECK-LABEL: test_int_x86_avx512_mask_pternlog_d_256:
5138 ; CHECK-NEXT: movzbl %dil, %eax
5139 ; CHECK-NEXT: kmovw %eax, %k1
5140 ; CHECK-NEXT: vmovaps %zmm0, %zmm3
5141 ; CHECK-NEXT: vpternlogd $33, %ymm2, %ymm1, %ymm3 {%k1}
5142 ; CHECK-NEXT: vpternlogd $33, %ymm2, %ymm1, %ymm0
5143 ; CHECK-NEXT: vpaddd %ymm0, %ymm3, %ymm0
5145 %res = call <8 x i32> @llvm.x86.avx512.mask.pternlog.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i32 33, i8 %x4)
5146 %res1 = call <8 x i32> @llvm.x86.avx512.mask.pternlog.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i32 33, i8 -1)
5147 %res2 = add <8 x i32> %res, %res1
5151 declare <8 x i32> @llvm.x86.avx512.maskz.pternlog.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i32, i8)
5153 define <8 x i32>@test_int_x86_avx512_maskz_pternlog_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x4) {
5154 ; CHECK-LABEL: test_int_x86_avx512_maskz_pternlog_d_256:
5156 ; CHECK-NEXT: movzbl %dil, %eax
5157 ; CHECK-NEXT: kmovw %eax, %k1
5158 ; CHECK-NEXT: vmovaps %zmm0, %zmm3
5159 ; CHECK-NEXT: vpternlogd $33, %ymm2, %ymm1, %ymm3 {%k1} {z}
5160 ; CHECK-NEXT: vpternlogd $33, %ymm2, %ymm1, %ymm0
5161 ; CHECK-NEXT: vpaddd %ymm0, %ymm3, %ymm0
5163 %res = call <8 x i32> @llvm.x86.avx512.maskz.pternlog.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i32 33, i8 %x4)
5164 %res1 = call <8 x i32> @llvm.x86.avx512.maskz.pternlog.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i32 33, i8 -1)
5165 %res2 = add <8 x i32> %res, %res1
5169 declare <2 x i64> @llvm.x86.avx512.mask.pternlog.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i32, i8)
5171 define <2 x i64>@test_int_x86_avx512_mask_pternlog_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x4) {
5172 ; CHECK-LABEL: test_int_x86_avx512_mask_pternlog_q_128:
5174 ; CHECK-NEXT: movzbl %dil, %eax
5175 ; CHECK-NEXT: kmovw %eax, %k1
5176 ; CHECK-NEXT: vmovaps %zmm0, %zmm3
5177 ; CHECK-NEXT: vpternlogq $33, %xmm2, %xmm1, %xmm3 {%k1}
5178 ; CHECK-NEXT: vpternlogq $33, %xmm2, %xmm1, %xmm0
5179 ; CHECK-NEXT: vpaddq %xmm0, %xmm3, %xmm0
5181 %res = call <2 x i64> @llvm.x86.avx512.mask.pternlog.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i32 33, i8 %x4)
5182 %res1 = call <2 x i64> @llvm.x86.avx512.mask.pternlog.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i32 33, i8 -1)
5183 %res2 = add <2 x i64> %res, %res1
5187 declare <2 x i64> @llvm.x86.avx512.maskz.pternlog.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i32, i8)
5189 define <2 x i64>@test_int_x86_avx512_maskz_pternlog_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x4) {
5190 ; CHECK-LABEL: test_int_x86_avx512_maskz_pternlog_q_128:
5192 ; CHECK-NEXT: movzbl %dil, %eax
5193 ; CHECK-NEXT: kmovw %eax, %k1
5194 ; CHECK-NEXT: vmovaps %zmm0, %zmm3
5195 ; CHECK-NEXT: vpternlogq $33, %xmm2, %xmm1, %xmm3 {%k1} {z}
5196 ; CHECK-NEXT: vpternlogq $33, %xmm2, %xmm1, %xmm0
5197 ; CHECK-NEXT: vpaddq %xmm0, %xmm3, %xmm0
5199 %res = call <2 x i64> @llvm.x86.avx512.maskz.pternlog.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i32 33, i8 %x4)
5200 %res1 = call <2 x i64> @llvm.x86.avx512.maskz.pternlog.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i32 33, i8 -1)
5201 %res2 = add <2 x i64> %res, %res1
5205 declare <4 x i64> @llvm.x86.avx512.mask.pternlog.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i32, i8)
5207 define <4 x i64>@test_int_x86_avx512_mask_pternlog_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x4) {
5208 ; CHECK-LABEL: test_int_x86_avx512_mask_pternlog_q_256:
5210 ; CHECK-NEXT: movzbl %dil, %eax
5211 ; CHECK-NEXT: kmovw %eax, %k1
5212 ; CHECK-NEXT: vmovaps %zmm0, %zmm3
5213 ; CHECK-NEXT: vpternlogq $33, %ymm2, %ymm1, %ymm3 {%k1}
5214 ; CHECK-NEXT: vpternlogq $33, %ymm2, %ymm1, %ymm0
5215 ; CHECK-NEXT: vpaddq %ymm0, %ymm3, %ymm0
5217 %res = call <4 x i64> @llvm.x86.avx512.mask.pternlog.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i32 33, i8 %x4)
5218 %res1 = call <4 x i64> @llvm.x86.avx512.mask.pternlog.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i32 33, i8 -1)
5219 %res2 = add <4 x i64> %res, %res1
5223 declare <4 x i64> @llvm.x86.avx512.maskz.pternlog.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i32, i8)
5225 define <4 x i64>@test_int_x86_avx512_maskz_pternlog_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x4) {
5226 ; CHECK-LABEL: test_int_x86_avx512_maskz_pternlog_q_256:
5228 ; CHECK-NEXT: movzbl %dil, %eax
5229 ; CHECK-NEXT: kmovw %eax, %k1
5230 ; CHECK-NEXT: vmovaps %zmm0, %zmm3
5231 ; CHECK-NEXT: vpternlogq $33, %ymm2, %ymm1, %ymm3 {%k1} {z}
5232 ; CHECK-NEXT: vpternlogq $33, %ymm2, %ymm1, %ymm0
5233 ; CHECK-NEXT: vpaddq %ymm0, %ymm3, %ymm0
5235 %res = call <4 x i64> @llvm.x86.avx512.maskz.pternlog.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i32 33, i8 %x4)
5236 %res1 = call <4 x i64> @llvm.x86.avx512.maskz.pternlog.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i32 33, i8 -1)
5237 %res2 = add <4 x i64> %res, %res1
5241 declare <8 x i32> @llvm.x86.avx512.pbroadcastd.256(<4 x i32>, <8 x i32>, i8)
5243 define <8 x i32>@test_int_x86_avx512_pbroadcastd_256(<4 x i32> %x0, <8 x i32> %x1, i8 %mask) {
5244 ; CHECK-LABEL: test_int_x86_avx512_pbroadcastd_256:
5246 ; CHECK-NEXT: movzbl %dil, %eax
5247 ; CHECK-NEXT: kmovw %eax, %k1
5248 ; CHECK-NEXT: vpbroadcastd %xmm0, %ymm1 {%k1}
5249 ; CHECK-NEXT: vpbroadcastd %xmm0, %ymm2 {%k1} {z}
5250 ; CHECK-NEXT: vpbroadcastd %xmm0, %ymm0
5251 ; CHECK-NEXT: vpaddd %ymm1, %ymm0, %ymm0
5252 ; CHECK-NEXT: vpaddd %ymm0, %ymm2, %ymm0
5254 %res = call <8 x i32> @llvm.x86.avx512.pbroadcastd.256(<4 x i32> %x0, <8 x i32> %x1, i8 -1)
5255 %res1 = call <8 x i32> @llvm.x86.avx512.pbroadcastd.256(<4 x i32> %x0, <8 x i32> %x1, i8 %mask)
5256 %res2 = call <8 x i32> @llvm.x86.avx512.pbroadcastd.256(<4 x i32> %x0, <8 x i32> zeroinitializer, i8 %mask)
5257 %res3 = add <8 x i32> %res, %res1
5258 %res4 = add <8 x i32> %res2, %res3
5262 declare <4 x i32> @llvm.x86.avx512.pbroadcastd.128(<4 x i32>, <4 x i32>, i8)
5264 define <4 x i32>@test_int_x86_avx512_pbroadcastd_128(<4 x i32> %x0, <4 x i32> %x1, i8 %mask) {
5265 ; CHECK-LABEL: test_int_x86_avx512_pbroadcastd_128:
5267 ; CHECK-NEXT: movzbl %dil, %eax
5268 ; CHECK-NEXT: kmovw %eax, %k1
5269 ; CHECK-NEXT: vpbroadcastd %xmm0, %xmm1 {%k1}
5270 ; CHECK-NEXT: vpbroadcastd %xmm0, %xmm2 {%k1} {z}
5271 ; CHECK-NEXT: vpbroadcastd %xmm0, %xmm0
5272 ; CHECK-NEXT: vpaddd %xmm1, %xmm0, %xmm0
5273 ; CHECK-NEXT: vpaddd %xmm0, %xmm2, %xmm0
5275 %res = call <4 x i32> @llvm.x86.avx512.pbroadcastd.128(<4 x i32> %x0, <4 x i32> %x1, i8 -1)
5276 %res1 = call <4 x i32> @llvm.x86.avx512.pbroadcastd.128(<4 x i32> %x0, <4 x i32> %x1, i8 %mask)
5277 %res2 = call <4 x i32> @llvm.x86.avx512.pbroadcastd.128(<4 x i32> %x0, <4 x i32> zeroinitializer, i8 %mask)
5278 %res3 = add <4 x i32> %res, %res1
5279 %res4 = add <4 x i32> %res2, %res3
5283 declare <4 x i64> @llvm.x86.avx512.pbroadcastq.256(<2 x i64>, <4 x i64>, i8)
5285 define <4 x i64>@test_int_x86_avx512_pbroadcastq_256(<2 x i64> %x0, <4 x i64> %x1, i8 %mask) {
5286 ; CHECK-LABEL: test_int_x86_avx512_pbroadcastq_256:
5288 ; CHECK-NEXT: movzbl %dil, %eax
5289 ; CHECK-NEXT: kmovw %eax, %k1
5290 ; CHECK-NEXT: vpbroadcastq %xmm0, %ymm1 {%k1}
5291 ; CHECK-NEXT: vpbroadcastq %xmm0, %ymm2 {%k1} {z}
5292 ; CHECK-NEXT: vpbroadcastq %xmm0, %ymm0
5293 ; CHECK-NEXT: vpaddq %ymm1, %ymm0, %ymm0
5294 ; CHECK-NEXT: vpaddq %ymm0, %ymm2, %ymm0
5296 %res = call <4 x i64> @llvm.x86.avx512.pbroadcastq.256(<2 x i64> %x0, <4 x i64> %x1,i8 -1)
5297 %res1 = call <4 x i64> @llvm.x86.avx512.pbroadcastq.256(<2 x i64> %x0, <4 x i64> %x1,i8 %mask)
5298 %res2 = call <4 x i64> @llvm.x86.avx512.pbroadcastq.256(<2 x i64> %x0, <4 x i64> zeroinitializer,i8 %mask)
5299 %res3 = add <4 x i64> %res, %res1
5300 %res4 = add <4 x i64> %res2, %res3
5304 declare <2 x i64> @llvm.x86.avx512.pbroadcastq.128(<2 x i64>, <2 x i64>, i8)
5306 define <2 x i64>@test_int_x86_avx512_pbroadcastq_128(<2 x i64> %x0, <2 x i64> %x1, i8 %mask) {
5307 ; CHECK-LABEL: test_int_x86_avx512_pbroadcastq_128:
5309 ; CHECK-NEXT: movzbl %dil, %eax
5310 ; CHECK-NEXT: kmovw %eax, %k1
5311 ; CHECK-NEXT: vpbroadcastq %xmm0, %xmm1 {%k1}
5312 ; CHECK-NEXT: vpbroadcastq %xmm0, %xmm2 {%k1} {z}
5313 ; CHECK-NEXT: vpbroadcastq %xmm0, %xmm0
5314 ; CHECK-NEXT: vpaddq %xmm1, %xmm0, %xmm0
5315 ; CHECK-NEXT: vpaddq %xmm0, %xmm2, %xmm0
5317 %res = call <2 x i64> @llvm.x86.avx512.pbroadcastq.128(<2 x i64> %x0, <2 x i64> %x1,i8 -1)
5318 %res1 = call <2 x i64> @llvm.x86.avx512.pbroadcastq.128(<2 x i64> %x0, <2 x i64> %x1,i8 %mask)
5319 %res2 = call <2 x i64> @llvm.x86.avx512.pbroadcastq.128(<2 x i64> %x0, <2 x i64> zeroinitializer,i8 %mask)
5320 %res3 = add <2 x i64> %res, %res1
5321 %res4 = add <2 x i64> %res2, %res3
5325 define <4 x float> @test_x86_vcvtph2ps_128(<8 x i16> %a0) {
5326 ; CHECK: test_x86_vcvtph2ps_128
5327 ; CHECK: vcvtph2ps %xmm0, %xmm0
5328 %res = call <4 x float> @llvm.x86.avx512.mask.vcvtph2ps.128(<8 x i16> %a0, <4 x float> zeroinitializer, i8 -1)
5329 ret <4 x float> %res
5332 define <4 x float> @test_x86_vcvtph2ps_128_rrk(<8 x i16> %a0,<4 x float> %a1, i8 %mask) {
5333 ; CHECK: test_x86_vcvtph2ps_128_rrk
5334 ; CHECK: vcvtph2ps %xmm0, %xmm1 {%k1}
5335 %res = call <4 x float> @llvm.x86.avx512.mask.vcvtph2ps.128(<8 x i16> %a0, <4 x float> %a1, i8 %mask)
5336 ret <4 x float> %res
5340 define <4 x float> @test_x86_vcvtph2ps_128_rrkz(<8 x i16> %a0, i8 %mask) {
5341 ; CHECK: test_x86_vcvtph2ps_128_rrkz
5342 ; CHECK: vcvtph2ps %xmm0, %xmm0 {%k1} {z}
5343 %res = call <4 x float> @llvm.x86.avx512.mask.vcvtph2ps.128(<8 x i16> %a0, <4 x float> zeroinitializer, i8 %mask)
5344 ret <4 x float> %res
5347 declare <4 x float> @llvm.x86.avx512.mask.vcvtph2ps.128(<8 x i16>, <4 x float>, i8) nounwind readonly
5349 define <8 x float> @test_x86_vcvtph2ps_256(<8 x i16> %a0) {
5350 ; CHECK: test_x86_vcvtph2ps_256
5351 ; CHECK: vcvtph2ps %xmm0, %ymm0
5352 %res = call <8 x float> @llvm.x86.avx512.mask.vcvtph2ps.256(<8 x i16> %a0, <8 x float> zeroinitializer, i8 -1)
5353 ret <8 x float> %res
5356 define <8 x float> @test_x86_vcvtph2ps_256_rrk(<8 x i16> %a0,<8 x float> %a1, i8 %mask) {
5357 ; CHECK: test_x86_vcvtph2ps_256_rrk
5358 ; CHECK: vcvtph2ps %xmm0, %ymm1 {%k1}
5359 %res = call <8 x float> @llvm.x86.avx512.mask.vcvtph2ps.256(<8 x i16> %a0, <8 x float> %a1, i8 %mask)
5360 ret <8 x float> %res
5363 define <8 x float> @test_x86_vcvtph2ps_256_rrkz(<8 x i16> %a0, i8 %mask) {
5364 ; CHECK: test_x86_vcvtph2ps_256_rrkz
5365 ; CHECK: vcvtph2ps %xmm0, %ymm0 {%k1} {z}
5366 %res = call <8 x float> @llvm.x86.avx512.mask.vcvtph2ps.256(<8 x i16> %a0, <8 x float> zeroinitializer, i8 %mask)
5367 ret <8 x float> %res
5370 declare <8 x float> @llvm.x86.avx512.mask.vcvtph2ps.256(<8 x i16>, <8 x float>, i8) nounwind readonly
5372 define <8 x i16> @test_x86_vcvtps2ph_128(<4 x float> %a0) {
5373 ; CHECK: test_x86_vcvtps2ph_128
5374 ; CHECK: vcvtps2ph $2, %xmm0, %xmm0
5375 %res = call <8 x i16> @llvm.x86.avx512.mask.vcvtps2ph.128(<4 x float> %a0, i32 2, <8 x i16> zeroinitializer, i8 -1)
5380 declare <8 x i16> @llvm.x86.avx512.mask.vcvtps2ph.128(<4 x float>, i32, <8 x i16>, i8) nounwind readonly
5382 define <8 x i16> @test_x86_vcvtps2ph_256(<8 x float> %a0) {
5383 ; CHECK: test_x86_vcvtps2ph_256
5384 ; CHECK: vcvtps2ph $2, %ymm0, %xmm0
5385 %res = call <8 x i16> @llvm.x86.avx512.mask.vcvtps2ph.256(<8 x float> %a0, i32 2, <8 x i16> zeroinitializer, i8 -1)
5389 declare <8 x i16> @llvm.x86.avx512.mask.vcvtps2ph.256(<8 x float>, i32, <8 x i16>, i8) nounwind readonly
5391 declare <4 x float> @llvm.x86.avx512.mask.movsldup.128(<4 x float>, <4 x float>, i8)
5393 define <4 x float>@test_int_x86_avx512_mask_movsldup_128(<4 x float> %x0, <4 x float> %x1, i8 %x2) {
5394 ; CHECK-LABEL: test_int_x86_avx512_mask_movsldup_128:
5396 ; CHECK-NEXT: movzbl %dil, %eax
5397 ; CHECK-NEXT: kmovw %eax, %k1
5398 ; CHECK-NEXT: vmovsldup %xmm0, %xmm1 {%k1}
5399 ; CHECK-NEXT: ## xmm1 = xmm0[0,0,2,2]
5400 ; CHECK-NEXT: vmovsldup %xmm0, %xmm2 {%k1} {z}
5401 ; CHECK-NEXT: ## xmm2 = xmm0[0,0,2,2]
5402 ; CHECK-NEXT: vmovsldup %xmm0, %xmm0
5403 ; CHECK-NEXT: ## xmm0 = xmm0[0,0,2,2]
5404 ; CHECK-NEXT: vaddps %xmm0, %xmm1, %xmm0
5405 ; CHECK-NEXT: vaddps %xmm0, %xmm2, %xmm0
5407 %res = call <4 x float> @llvm.x86.avx512.mask.movsldup.128(<4 x float> %x0, <4 x float> %x1, i8 %x2)
5408 %res1 = call <4 x float> @llvm.x86.avx512.mask.movsldup.128(<4 x float> %x0, <4 x float> %x1, i8 -1)
5409 %res2 = call <4 x float> @llvm.x86.avx512.mask.movsldup.128(<4 x float> %x0, <4 x float> zeroinitializer, i8 %x2)
5410 %res3 = fadd <4 x float> %res, %res1
5411 %res4 = fadd <4 x float> %res2, %res3
5412 ret <4 x float> %res4
5415 declare <8 x float> @llvm.x86.avx512.mask.movsldup.256(<8 x float>, <8 x float>, i8)
5417 define <8 x float>@test_int_x86_avx512_mask_movsldup_256(<8 x float> %x0, <8 x float> %x1, i8 %x2) {
5418 ; CHECK-LABEL: test_int_x86_avx512_mask_movsldup_256:
5420 ; CHECK-NEXT: movzbl %dil, %eax
5421 ; CHECK-NEXT: kmovw %eax, %k1
5422 ; CHECK-NEXT: vmovsldup %ymm0, %ymm1 {%k1}
5423 ; CHECK-NEXT: ## ymm1 = ymm0[0,0,2,2,4,4,6,6]
5424 ; CHECK-NEXT: vmovsldup %ymm0, %ymm2 {%k1} {z}
5425 ; CHECK-NEXT: ## ymm2 = ymm0[0,0,2,2,4,4,6,6]
5426 ; CHECK-NEXT: vmovsldup %ymm0, %ymm0
5427 ; CHECK-NEXT: ## ymm0 = ymm0[0,0,2,2,4,4,6,6]
5428 ; CHECK-NEXT: vaddps %ymm0, %ymm1, %ymm0
5429 ; CHECK-NEXT: vaddps %ymm0, %ymm2, %ymm0
5431 %res = call <8 x float> @llvm.x86.avx512.mask.movsldup.256(<8 x float> %x0, <8 x float> %x1, i8 %x2)
5432 %res1 = call <8 x float> @llvm.x86.avx512.mask.movsldup.256(<8 x float> %x0, <8 x float> %x1, i8 -1)
5433 %res2 = call <8 x float> @llvm.x86.avx512.mask.movsldup.256(<8 x float> %x0, <8 x float> zeroinitializer, i8 %x2)
5434 %res3 = fadd <8 x float> %res, %res1
5435 %res4 = fadd <8 x float> %res2, %res3
5436 ret <8 x float> %res4
5439 declare <4 x float> @llvm.x86.avx512.mask.movshdup.128(<4 x float>, <4 x float>, i8)
5441 define <4 x float>@test_int_x86_avx512_mask_movshdup_128(<4 x float> %x0, <4 x float> %x1, i8 %x2) {
5442 ; CHECK-LABEL: test_int_x86_avx512_mask_movshdup_128:
5444 ; CHECK-NEXT: movzbl %dil, %eax
5445 ; CHECK-NEXT: kmovw %eax, %k1
5446 ; CHECK-NEXT: vmovshdup %xmm0, %xmm1 {%k1}
5447 ; CHECK-NEXT: ## xmm1 = xmm0[1,1,3,3]
5448 ; CHECK-NEXT: vmovshdup %xmm0, %xmm2 {%k1} {z}
5449 ; CHECK-NEXT: ## xmm2 = xmm0[1,1,3,3]
5450 ; CHECK-NEXT: vmovshdup %xmm0, %xmm0
5451 ; CHECK-NEXT: ## xmm0 = xmm0[1,1,3,3]
5452 ; CHECK-NEXT: vaddps %xmm0, %xmm1, %xmm0
5453 ; CHECK-NEXT: vaddps %xmm0, %xmm2, %xmm0
5455 %res = call <4 x float> @llvm.x86.avx512.mask.movshdup.128(<4 x float> %x0, <4 x float> %x1, i8 %x2)
5456 %res1 = call <4 x float> @llvm.x86.avx512.mask.movshdup.128(<4 x float> %x0, <4 x float> %x1, i8 -1)
5457 %res2 = call <4 x float> @llvm.x86.avx512.mask.movshdup.128(<4 x float> %x0, <4 x float> zeroinitializer, i8 %x2)
5458 %res3 = fadd <4 x float> %res, %res1
5459 %res4 = fadd <4 x float> %res2, %res3
5460 ret <4 x float> %res4
5463 declare <8 x float> @llvm.x86.avx512.mask.movshdup.256(<8 x float>, <8 x float>, i8)
5465 define <8 x float>@test_int_x86_avx512_mask_movshdup_256(<8 x float> %x0, <8 x float> %x1, i8 %x2) {
5466 ; CHECK-LABEL: test_int_x86_avx512_mask_movshdup_256:
5468 ; CHECK-NEXT: movzbl %dil, %eax
5469 ; CHECK-NEXT: kmovw %eax, %k1
5470 ; CHECK-NEXT: vmovshdup %ymm0, %ymm1 {%k1}
5471 ; CHECK-NEXT: ## ymm1 = ymm0[1,1,3,3,5,5,7,7]
5472 ; CHECK-NEXT: vmovshdup %ymm0, %ymm2 {%k1} {z}
5473 ; CHECK-NEXT: ## ymm2 = ymm0[1,1,3,3,5,5,7,7]
5474 ; CHECK-NEXT: vmovshdup %ymm0, %ymm0
5475 ; CHECK-NEXT: ## ymm0 = ymm0[1,1,3,3,5,5,7,7]
5476 ; CHECK-NEXT: vaddps %ymm0, %ymm1, %ymm0
5477 ; CHECK-NEXT: vaddps %ymm0, %ymm2, %ymm0
5479 %res = call <8 x float> @llvm.x86.avx512.mask.movshdup.256(<8 x float> %x0, <8 x float> %x1, i8 %x2)
5480 %res1 = call <8 x float> @llvm.x86.avx512.mask.movshdup.256(<8 x float> %x0, <8 x float> %x1, i8 -1)
5481 %res2 = call <8 x float> @llvm.x86.avx512.mask.movshdup.256(<8 x float> %x0, <8 x float> zeroinitializer, i8 %x2)
5482 %res3 = fadd <8 x float> %res, %res1
5483 %res4 = fadd <8 x float> %res2, %res3
5484 ret <8 x float> %res4
5486 declare <2 x double> @llvm.x86.avx512.mask.movddup.128(<2 x double>, <2 x double>, i8)
5488 define <2 x double>@test_int_x86_avx512_mask_movddup_128(<2 x double> %x0, <2 x double> %x1, i8 %x2) {
5489 ; CHECK-LABEL: test_int_x86_avx512_mask_movddup_128:
5491 ; CHECK-NEXT: movzbl %dil, %eax
5492 ; CHECK-NEXT: kmovw %eax, %k1
5493 ; CHECK-NEXT: vmovddup %xmm0, %xmm1 {%k1}
5494 ; CHECK-NEXT: ## xmm1 = xmm0[0,0]
5495 ; CHECK-NEXT: vmovddup %xmm0, %xmm2 {%k1} {z}
5496 ; CHECK-NEXT: ## xmm2 = xmm0[0,0]
5497 ; CHECK-NEXT: vmovddup %xmm0, %xmm0
5498 ; CHECK-NEXT: ## xmm0 = xmm0[0,0]
5499 ; CHECK-NEXT: vaddpd %xmm0, %xmm1, %xmm0
5500 ; CHECK-NEXT: vaddpd %xmm0, %xmm2, %xmm0
5502 %res = call <2 x double> @llvm.x86.avx512.mask.movddup.128(<2 x double> %x0, <2 x double> %x1, i8 %x2)
5503 %res1 = call <2 x double> @llvm.x86.avx512.mask.movddup.128(<2 x double> %x0, <2 x double> %x1, i8 -1)
5504 %res2 = call <2 x double> @llvm.x86.avx512.mask.movddup.128(<2 x double> %x0, <2 x double> zeroinitializer, i8 %x2)
5505 %res3 = fadd <2 x double> %res, %res1
5506 %res4 = fadd <2 x double> %res2, %res3
5507 ret <2 x double> %res4
5510 declare <4 x double> @llvm.x86.avx512.mask.movddup.256(<4 x double>, <4 x double>, i8)
5512 define <4 x double>@test_int_x86_avx512_mask_movddup_256(<4 x double> %x0, <4 x double> %x1, i8 %x2) {
5513 ; CHECK-LABEL: test_int_x86_avx512_mask_movddup_256:
5515 ; CHECK-NEXT: movzbl %dil, %eax
5516 ; CHECK-NEXT: kmovw %eax, %k1
5517 ; CHECK-NEXT: vmovddup %ymm0, %ymm1 {%k1}
5518 ; CHECK-NEXT: ## ymm1 = ymm0[0,0,2,2]
5519 ; CHECK-NEXT: vmovddup %ymm0, %ymm2 {%k1} {z}
5520 ; CHECK-NEXT: ## ymm2 = ymm0[0,0,2,2]
5521 ; CHECK-NEXT: vmovddup %ymm0, %ymm0
5522 ; CHECK-NEXT: ## ymm0 = ymm0[0,0,2,2]
5523 ; CHECK-NEXT: vaddpd %ymm0, %ymm1, %ymm0
5524 ; CHECK-NEXT: vaddpd %ymm0, %ymm2, %ymm0
5526 %res = call <4 x double> @llvm.x86.avx512.mask.movddup.256(<4 x double> %x0, <4 x double> %x1, i8 %x2)
5527 %res1 = call <4 x double> @llvm.x86.avx512.mask.movddup.256(<4 x double> %x0, <4 x double> %x1, i8 -1)
5528 %res2 = call <4 x double> @llvm.x86.avx512.mask.movddup.256(<4 x double> %x0, <4 x double> zeroinitializer, i8 %x2)
5529 %res3 = fadd <4 x double> %res, %res1
5530 %res4 = fadd <4 x double> %res2, %res3
5531 ret <4 x double> %res4
5534 define <8 x float> @test_rsqrt_ps_256_rr(<8 x float> %a0) {
5535 ; CHECK-LABEL: test_rsqrt_ps_256_rr:
5536 ; CHECK: vrsqrt14ps %ymm0, %ymm0
5537 %res = call <8 x float> @llvm.x86.avx512.rsqrt14.ps.256(<8 x float> %a0, <8 x float> zeroinitializer, i8 -1)
5538 ret <8 x float> %res
5541 define <8 x float> @test_rsqrt_ps_256_rrkz(<8 x float> %a0, i8 %mask) {
5542 ; CHECK-LABEL: test_rsqrt_ps_256_rrkz:
5543 ; CHECK: vrsqrt14ps %ymm0, %ymm0 {%k1} {z}
5544 %res = call <8 x float> @llvm.x86.avx512.rsqrt14.ps.256(<8 x float> %a0, <8 x float> zeroinitializer, i8 %mask)
5545 ret <8 x float> %res
5548 define <8 x float> @test_rsqrt_ps_256_rrk(<8 x float> %a0, <8 x float> %a1, i8 %mask) {
5549 ; CHECK-LABEL: test_rsqrt_ps_256_rrk:
5550 ; CHECK: vrsqrt14ps %ymm0, %ymm1 {%k1}
5551 %res = call <8 x float> @llvm.x86.avx512.rsqrt14.ps.256(<8 x float> %a0, <8 x float> %a1, i8 %mask)
5552 ret <8 x float> %res
5555 define <4 x float> @test_rsqrt_ps_128_rr(<4 x float> %a0) {
5556 ; CHECK-LABEL: test_rsqrt_ps_128_rr:
5557 ; CHECK: vrsqrt14ps %xmm0, %xmm0
5558 %res = call <4 x float> @llvm.x86.avx512.rsqrt14.ps.128(<4 x float> %a0, <4 x float> zeroinitializer, i8 -1)
5559 ret <4 x float> %res
5562 define <4 x float> @test_rsqrt_ps_128_rrkz(<4 x float> %a0, i8 %mask) {
5563 ; CHECK-LABEL: test_rsqrt_ps_128_rrkz:
5564 ; CHECK: vrsqrt14ps %xmm0, %xmm0 {%k1} {z}
5565 %res = call <4 x float> @llvm.x86.avx512.rsqrt14.ps.128(<4 x float> %a0, <4 x float> zeroinitializer, i8 %mask)
5566 ret <4 x float> %res
5569 define <4 x float> @test_rsqrt_ps_128_rrk(<4 x float> %a0, <4 x float> %a1, i8 %mask) {
5570 ; CHECK-LABEL: test_rsqrt_ps_128_rrk:
5571 ; CHECK: vrsqrt14ps %xmm0, %xmm1 {%k1}
5572 %res = call <4 x float> @llvm.x86.avx512.rsqrt14.ps.128(<4 x float> %a0, <4 x float> %a1, i8 %mask)
5573 ret <4 x float> %res
5576 declare <8 x float> @llvm.x86.avx512.rsqrt14.ps.256(<8 x float>, <8 x float>, i8) nounwind readnone
5577 declare <4 x float> @llvm.x86.avx512.rsqrt14.ps.128(<4 x float>, <4 x float>, i8) nounwind readnone
5579 define <8 x float> @test_rcp_ps_256_rr(<8 x float> %a0) {
5580 ; CHECK-LABEL: test_rcp_ps_256_rr:
5581 ; CHECK: vrcp14ps %ymm0, %ymm0
5582 %res = call <8 x float> @llvm.x86.avx512.rcp14.ps.256(<8 x float> %a0, <8 x float> zeroinitializer, i8 -1)
5583 ret <8 x float> %res
5586 define <8 x float> @test_rcp_ps_256_rrkz(<8 x float> %a0, i8 %mask) {
5587 ; CHECK-LABEL: test_rcp_ps_256_rrkz:
5588 ; CHECK: vrcp14ps %ymm0, %ymm0 {%k1} {z}
5589 %res = call <8 x float> @llvm.x86.avx512.rcp14.ps.256(<8 x float> %a0, <8 x float> zeroinitializer, i8 %mask)
5590 ret <8 x float> %res
5593 define <8 x float> @test_rcp_ps_256_rrk(<8 x float> %a0, <8 x float> %a1, i8 %mask) {
5594 ; CHECK-LABEL: test_rcp_ps_256_rrk:
5595 ; CHECK: vrcp14ps %ymm0, %ymm1 {%k1}
5596 %res = call <8 x float> @llvm.x86.avx512.rcp14.ps.256(<8 x float> %a0, <8 x float> %a1, i8 %mask)
5597 ret <8 x float> %res
5600 define <4 x float> @test_rcp_ps_128_rr(<4 x float> %a0) {
5601 ; CHECK-LABEL: test_rcp_ps_128_rr:
5602 ; CHECK: vrcp14ps %xmm0, %xmm0
5603 %res = call <4 x float> @llvm.x86.avx512.rcp14.ps.128(<4 x float> %a0, <4 x float> zeroinitializer, i8 -1)
5604 ret <4 x float> %res
5607 define <4 x float> @test_rcp_ps_128_rrkz(<4 x float> %a0, i8 %mask) {
5608 ; CHECK-LABEL: test_rcp_ps_128_rrkz:
5609 ; CHECK: vrcp14ps %xmm0, %xmm0 {%k1} {z}
5610 %res = call <4 x float> @llvm.x86.avx512.rcp14.ps.128(<4 x float> %a0, <4 x float> zeroinitializer, i8 %mask)
5611 ret <4 x float> %res
5614 define <4 x float> @test_rcp_ps_128_rrk(<4 x float> %a0, <4 x float> %a1, i8 %mask) {
5615 ; CHECK-LABEL: test_rcp_ps_128_rrk:
5616 ; CHECK: vrcp14ps %xmm0, %xmm1 {%k1}
5617 %res = call <4 x float> @llvm.x86.avx512.rcp14.ps.128(<4 x float> %a0, <4 x float> %a1, i8 %mask)
5618 ret <4 x float> %res
5621 declare <8 x float> @llvm.x86.avx512.rcp14.ps.256(<8 x float>, <8 x float>, i8) nounwind readnone
5622 declare <4 x float> @llvm.x86.avx512.rcp14.ps.128(<4 x float>, <4 x float>, i8) nounwind readnone
5625 define <4 x double> @test_rsqrt_pd_256_rr(<4 x double> %a0) {
5626 ; CHECK-LABEL: test_rsqrt_pd_256_rr:
5627 ; CHECK: vrsqrt14pd %ymm0, %ymm0
5628 %res = call <4 x double> @llvm.x86.avx512.rsqrt14.pd.256(<4 x double> %a0, <4 x double> zeroinitializer, i8 -1)
5629 ret <4 x double> %res
5632 define <4 x double> @test_rsqrt_pd_256_rrkz(<4 x double> %a0, i8 %mask) {
5633 ; CHECK-LABEL: test_rsqrt_pd_256_rrkz:
5634 ; CHECK: vrsqrt14pd %ymm0, %ymm0 {%k1} {z}
5635 %res = call <4 x double> @llvm.x86.avx512.rsqrt14.pd.256(<4 x double> %a0, <4 x double> zeroinitializer, i8 %mask)
5636 ret <4 x double> %res
5639 define <4 x double> @test_rsqrt_pd_256_rrk(<4 x double> %a0, <4 x double> %a1, i8 %mask) {
5640 ; CHECK-LABEL: test_rsqrt_pd_256_rrk:
5641 ; CHECK: vrsqrt14pd %ymm0, %ymm1 {%k1}
5642 %res = call <4 x double> @llvm.x86.avx512.rsqrt14.pd.256(<4 x double> %a0, <4 x double> %a1, i8 %mask)
5643 ret <4 x double> %res
5646 define <2 x double> @test_rsqrt_pd_128_rr(<2 x double> %a0) {
5647 ; CHECK-LABEL: test_rsqrt_pd_128_rr:
5648 ; CHECK: vrsqrt14pd %xmm0, %xmm0
5649 %res = call <2 x double> @llvm.x86.avx512.rsqrt14.pd.128(<2 x double> %a0, <2 x double> zeroinitializer, i8 -1)
5650 ret <2 x double> %res
5653 define <2 x double> @test_rsqrt_pd_128_rrkz(<2 x double> %a0, i8 %mask) {
5654 ; CHECK-LABEL: test_rsqrt_pd_128_rrkz:
5655 ; CHECK: vrsqrt14pd %xmm0, %xmm0 {%k1} {z}
5656 %res = call <2 x double> @llvm.x86.avx512.rsqrt14.pd.128(<2 x double> %a0, <2 x double> zeroinitializer, i8 %mask)
5657 ret <2 x double> %res
5660 define <2 x double> @test_rsqrt_pd_128_rrk(<2 x double> %a0, <2 x double> %a1, i8 %mask) {
5661 ; CHECK-LABEL: test_rsqrt_pd_128_rrk:
5662 ; CHECK: vrsqrt14pd %xmm0, %xmm1 {%k1}
5663 %res = call <2 x double> @llvm.x86.avx512.rsqrt14.pd.128(<2 x double> %a0, <2 x double> %a1, i8 %mask)
5664 ret <2 x double> %res
5667 declare <4 x double> @llvm.x86.avx512.rsqrt14.pd.256(<4 x double>, <4 x double>, i8) nounwind readnone
5668 declare <2 x double> @llvm.x86.avx512.rsqrt14.pd.128(<2 x double>, <2 x double>, i8) nounwind readnone
5670 define <4 x double> @test_rcp_pd_256_rr(<4 x double> %a0) {
5671 ; CHECK-LABEL: test_rcp_pd_256_rr:
5672 ; CHECK: vrcp14pd %ymm0, %ymm0
5673 %res = call <4 x double> @llvm.x86.avx512.rcp14.pd.256(<4 x double> %a0, <4 x double> zeroinitializer, i8 -1)
5674 ret <4 x double> %res
5677 define <4 x double> @test_rcp_pd_256_rrkz(<4 x double> %a0, i8 %mask) {
5678 ; CHECK-LABEL: test_rcp_pd_256_rrkz:
5679 ; CHECK: vrcp14pd %ymm0, %ymm0 {%k1} {z}
5680 %res = call <4 x double> @llvm.x86.avx512.rcp14.pd.256(<4 x double> %a0, <4 x double> zeroinitializer, i8 %mask)
5681 ret <4 x double> %res
5684 define <4 x double> @test_rcp_pd_256_rrk(<4 x double> %a0, <4 x double> %a1, i8 %mask) {
5685 ; CHECK-LABEL: test_rcp_pd_256_rrk:
5686 ; CHECK: vrcp14pd %ymm0, %ymm1 {%k1}
5687 %res = call <4 x double> @llvm.x86.avx512.rcp14.pd.256(<4 x double> %a0, <4 x double> %a1, i8 %mask)
5688 ret <4 x double> %res
5691 define <2 x double> @test_rcp_pd_128_rr(<2 x double> %a0) {
5692 ; CHECK-LABEL: test_rcp_pd_128_rr:
5693 ; CHECK: vrcp14pd %xmm0, %xmm0
5694 %res = call <2 x double> @llvm.x86.avx512.rcp14.pd.128(<2 x double> %a0, <2 x double> zeroinitializer, i8 -1)
5695 ret <2 x double> %res
5698 define <2 x double> @test_rcp_pd_128_rrkz(<2 x double> %a0, i8 %mask) {
5699 ; CHECK-LABEL: test_rcp_pd_128_rrkz:
5700 ; CHECK: vrcp14pd %xmm0, %xmm0 {%k1} {z}
5701 %res = call <2 x double> @llvm.x86.avx512.rcp14.pd.128(<2 x double> %a0, <2 x double> zeroinitializer, i8 %mask)
5702 ret <2 x double> %res
5705 define <2 x double> @test_rcp_pd_128_rrk(<2 x double> %a0, <2 x double> %a1, i8 %mask) {
5706 ; CHECK-LABEL: test_rcp_pd_128_rrk:
5707 ; CHECK: vrcp14pd %xmm0, %xmm1 {%k1}
5708 %res = call <2 x double> @llvm.x86.avx512.rcp14.pd.128(<2 x double> %a0, <2 x double> %a1, i8 %mask)
5709 ret <2 x double> %res
5712 declare <4 x double> @llvm.x86.avx512.rcp14.pd.256(<4 x double>, <4 x double>, i8) nounwind readnone
5713 declare <2 x double> @llvm.x86.avx512.rcp14.pd.128(<2 x double>, <2 x double>, i8) nounwind readnone
5715 define <4 x double> @test_x86_vbroadcast_sd_pd_256(<2 x double> %a0, <4 x double> %a1, i8 %mask ) {
5716 ; CHECK-LABEL: test_x86_vbroadcast_sd_pd_256:
5717 ; CHECK: kmovw %eax, %k1
5718 ; CHECK-NEXT: vbroadcastsd %xmm0, %ymm1 {%k1}
5719 ; CHECK-NEXT: vbroadcastsd %xmm0, %ymm2 {%k1} {z}
5720 ; CHECK-NEXT: vbroadcastsd %xmm0, %ymm0
5721 ; CHECK-NEXT: vaddpd %ymm1, %ymm0, %ymm0
5723 %res = call <4 x double> @llvm.x86.avx512.mask.broadcast.sd.pd.256(<2 x double> %a0, <4 x double> zeroinitializer, i8 -1)
5724 %res1 = call <4 x double> @llvm.x86.avx512.mask.broadcast.sd.pd.256(<2 x double> %a0, <4 x double> %a1, i8 %mask)
5725 %res2 = call <4 x double> @llvm.x86.avx512.mask.broadcast.sd.pd.256(<2 x double> %a0, <4 x double> zeroinitializer, i8 %mask)
5726 %res3 = fadd <4 x double> %res, %res1
5727 %res4 = fadd <4 x double> %res2, %res3
5728 ret <4 x double> %res4
5730 declare <4 x double> @llvm.x86.avx512.mask.broadcast.sd.pd.256(<2 x double>, <4 x double>, i8) nounwind readonly
5732 define <8 x float> @test_x86_vbroadcast_ss_ps_256(<4 x float> %a0, <8 x float> %a1, i8 %mask ) {
5733 ; CHECK-LABEL: test_x86_vbroadcast_ss_ps_256:
5734 ; CHECK: kmovw %eax, %k1
5735 ; CHECK-NEXT: vbroadcastss %xmm0, %ymm1 {%k1}
5736 ; CHECK-NEXT: vbroadcastss %xmm0, %ymm2 {%k1} {z}
5737 ; CHECK-NEXT: vbroadcastss %xmm0, %ymm0
5738 ; CHECK-NEXT: vaddps %ymm1, %ymm0, %ymm0
5740 %res = call <8 x float> @llvm.x86.avx512.mask.broadcast.ss.ps.256(<4 x float> %a0, <8 x float> zeroinitializer, i8 -1)
5741 %res1 = call <8 x float> @llvm.x86.avx512.mask.broadcast.ss.ps.256(<4 x float> %a0, <8 x float> %a1, i8 %mask)
5742 %res2 = call <8 x float> @llvm.x86.avx512.mask.broadcast.ss.ps.256(<4 x float> %a0, <8 x float> zeroinitializer, i8 %mask)
5743 %res3 = fadd <8 x float> %res, %res1
5744 %res4 = fadd <8 x float> %res2, %res3
5745 ret <8 x float> %res4
5747 declare <8 x float> @llvm.x86.avx512.mask.broadcast.ss.ps.256(<4 x float>, <8 x float>, i8) nounwind readonly
5749 define <4 x float> @test_x86_vbroadcast_ss_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask ) {
5750 ; CHECK-LABEL: test_x86_vbroadcast_ss_ps_128:
5751 ; CHECK: kmovw %eax, %k1
5752 ; CHECK-NEXT: vbroadcastss %xmm0, %xmm1 {%k1}
5753 ; CHECK-NEXT: vbroadcastss %xmm0, %xmm2 {%k1} {z}
5754 ; CHECK-NEXT: vbroadcastss %xmm0, %xmm0
5755 ; CHECK-NEXT: vaddps %xmm1, %xmm0, %xmm0
5757 %res = call <4 x float> @llvm.x86.avx512.mask.broadcast.ss.ps.128(<4 x float> %a0, <4 x float> zeroinitializer, i8 -1)
5758 %res1 = call <4 x float> @llvm.x86.avx512.mask.broadcast.ss.ps.128(<4 x float> %a0, <4 x float> %a1, i8 %mask)
5759 %res2 = call <4 x float> @llvm.x86.avx512.mask.broadcast.ss.ps.128(<4 x float> %a0, <4 x float> zeroinitializer, i8 %mask)
5760 %res3 = fadd <4 x float> %res, %res1
5761 %res4 = fadd <4 x float> %res2, %res3
5762 ret <4 x float> %res4
5764 declare <4 x float> @llvm.x86.avx512.mask.broadcast.ss.ps.128(<4 x float>, <4 x float>, i8) nounwind readonly
5767 declare <8 x float> @llvm.x86.avx512.mask.broadcastf32x4.256(<4 x float>, <8 x float>, i8)
5769 define <8 x float>@test_int_x86_avx512_mask_broadcastf32x4_256(<4 x float> %x0, <8 x float> %x2, i8 %mask) {
5770 ; CHECK-LABEL: test_int_x86_avx512_mask_broadcastf32x4_256:
5771 ; CHECK: kmovw %eax, %k1
5772 ; CHECK: vshuff32x4 $0, %ymm0, %ymm0, %ymm2 {%k1} {z}
5773 ; CHECK: vshuff32x4 $0, %ymm0, %ymm0, %ymm1 {%k1}
5774 ; CHECK: vshuff32x4 $0, %ymm0, %ymm0, %ymm0
5775 ; CHECK: vaddps %ymm1, %ymm0, %ymm0
5776 ; CHECK: vaddps %ymm0, %ymm2, %ymm0
5778 %res1 = call <8 x float> @llvm.x86.avx512.mask.broadcastf32x4.256(<4 x float> %x0, <8 x float> %x2, i8 -1)
5779 %res2 = call <8 x float> @llvm.x86.avx512.mask.broadcastf32x4.256(<4 x float> %x0, <8 x float> %x2, i8 %mask)
5780 %res3 = call <8 x float> @llvm.x86.avx512.mask.broadcastf32x4.256(<4 x float> %x0, <8 x float> zeroinitializer, i8 %mask)
5781 %res4 = fadd <8 x float> %res1, %res2
5782 %res5 = fadd <8 x float> %res3, %res4
5783 ret <8 x float> %res5
5786 declare <8 x i32> @llvm.x86.avx512.mask.broadcasti32x4.256(<4 x i32>, <8 x i32>, i8)
5788 define <8 x i32>@test_int_x86_avx512_mask_broadcasti32x4_256(<4 x i32> %x0, <8 x i32> %x2, i8 %mask) {
5789 ; CHECK-LABEL: test_int_x86_avx512_mask_broadcasti32x4_256:
5790 ; CHECK: kmovw %eax, %k1
5791 ; CHECK: vshufi32x4 $0, %ymm0, %ymm0, %ymm2 {%k1} {z}
5792 ; CHECK: vshufi32x4 $0, %ymm0, %ymm0, %ymm1 {%k1}
5793 ; CHECK: vshufi32x4 $0, %ymm0, %ymm0, %ymm0
5794 ; CHECK: vpaddd %ymm1, %ymm0, %ymm0
5795 ; CHECK: vpaddd %ymm0, %ymm2, %ymm0
5797 %res1 = call <8 x i32> @llvm.x86.avx512.mask.broadcasti32x4.256(<4 x i32> %x0, <8 x i32> %x2, i8 -1)
5798 %res2 = call <8 x i32> @llvm.x86.avx512.mask.broadcasti32x4.256(<4 x i32> %x0, <8 x i32> %x2, i8 %mask)
5799 %res3 = call <8 x i32> @llvm.x86.avx512.mask.broadcasti32x4.256(<4 x i32> %x0, <8 x i32> zeroinitializer, i8 %mask)
5800 %res4 = add <8 x i32> %res1, %res2
5801 %res5 = add <8 x i32> %res3, %res4
5805 declare <2 x i64> @llvm.x86.avx512.mask.psrl.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8)
5807 define <2 x i64>@test_int_x86_avx512_mask_psrl_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) {
5808 ; CHECK-LABEL: test_int_x86_avx512_mask_psrl_q_128:
5810 ; CHECK-NEXT: movzbl %dil, %eax
5811 ; CHECK-NEXT: kmovw %eax, %k1
5812 ; CHECK-NEXT: vpsrlq %xmm1, %xmm0, %xmm2 {%k1}
5813 ; CHECK-NEXT: vpsrlq %xmm1, %xmm0, %xmm3 {%k1} {z}
5814 ; CHECK-NEXT: vpsrlq %xmm1, %xmm0, %xmm0
5815 ; CHECK-NEXT: vpaddq %xmm0, %xmm2, %xmm0
5816 ; CHECK-NEXT: vpaddq %xmm3, %xmm0, %xmm0
5818 %res = call <2 x i64> @llvm.x86.avx512.mask.psrl.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3)
5819 %res1 = call <2 x i64> @llvm.x86.avx512.mask.psrl.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 -1)
5820 %res2 = call <2 x i64> @llvm.x86.avx512.mask.psrl.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> zeroinitializer, i8 %x3)
5821 %res3 = add <2 x i64> %res, %res1
5822 %res4 = add <2 x i64> %res3, %res2
5826 declare <4 x i64> @llvm.x86.avx512.mask.psrl.q.256(<4 x i64>, <2 x i64>, <4 x i64>, i8)
5828 define <4 x i64>@test_int_x86_avx512_mask_psrl_q_256(<4 x i64> %x0, <2 x i64> %x1, <4 x i64> %x2, i8 %x3) {
5829 ; CHECK-LABEL: test_int_x86_avx512_mask_psrl_q_256:
5831 ; CHECK-NEXT: movzbl %dil, %eax
5832 ; CHECK-NEXT: kmovw %eax, %k1
5833 ; CHECK-NEXT: vpsrlq %xmm1, %ymm0, %ymm2 {%k1}
5834 ; CHECK-NEXT: vpsrlq %xmm1, %ymm0, %ymm3 {%k1} {z}
5835 ; CHECK-NEXT: vpsrlq %xmm1, %ymm0, %ymm0
5836 ; CHECK-NEXT: vpaddq %ymm0, %ymm2, %ymm0
5837 ; CHECK-NEXT: vpaddq %ymm3, %ymm0, %ymm0
5839 %res = call <4 x i64> @llvm.x86.avx512.mask.psrl.q.256(<4 x i64> %x0, <2 x i64> %x1, <4 x i64> %x2, i8 %x3)
5840 %res1 = call <4 x i64> @llvm.x86.avx512.mask.psrl.q.256(<4 x i64> %x0, <2 x i64> %x1, <4 x i64> %x2, i8 -1)
5841 %res2 = call <4 x i64> @llvm.x86.avx512.mask.psrl.q.256(<4 x i64> %x0, <2 x i64> %x1, <4 x i64> zeroinitializer, i8 %x3)
5842 %res3 = add <4 x i64> %res, %res1
5843 %res4 = add <4 x i64> %res3, %res2
5847 declare <2 x i64> @llvm.x86.avx512.mask.psrl.qi.128(<2 x i64>, i8, <2 x i64>, i8)
5849 define <2 x i64>@test_int_x86_avx512_mask_psrl_qi_128(<2 x i64> %x0, i8 %x1, <2 x i64> %x2, i8 %x3) {
5850 ; CHECK-LABEL: test_int_x86_avx512_mask_psrl_qi_128:
5852 ; CHECK-NEXT: movzbl %sil, %eax
5853 ; CHECK-NEXT: kmovw %eax, %k1
5854 ; CHECK-NEXT: vpsrlq $255, %xmm0, %xmm1 {%k1}
5855 ; CHECK-NEXT: vpsrlq $255, %xmm0, %xmm2 {%k1} {z}
5856 ; CHECK-NEXT: vpsrlq $255, %xmm0, %xmm0
5857 ; CHECK-NEXT: vpaddq %xmm0, %xmm1, %xmm0
5858 ; CHECK-NEXT: vpaddq %xmm0, %xmm2, %xmm0
5860 %res = call <2 x i64> @llvm.x86.avx512.mask.psrl.qi.128(<2 x i64> %x0, i8 255, <2 x i64> %x2, i8 %x3)
5861 %res1 = call <2 x i64> @llvm.x86.avx512.mask.psrl.qi.128(<2 x i64> %x0, i8 255, <2 x i64> %x2, i8 -1)
5862 %res2 = call <2 x i64> @llvm.x86.avx512.mask.psrl.qi.128(<2 x i64> %x0, i8 255, <2 x i64> zeroinitializer, i8 %x3)
5863 %res3 = add <2 x i64> %res, %res1
5864 %res4 = add <2 x i64> %res2, %res3
5868 declare <4 x i64> @llvm.x86.avx512.mask.psrl.qi.256(<4 x i64>, i8, <4 x i64>, i8)
5870 define <4 x i64>@test_int_x86_avx512_mask_psrl_qi_256(<4 x i64> %x0, i8 %x1, <4 x i64> %x2, i8 %x3) {
5871 ; CHECK-LABEL: test_int_x86_avx512_mask_psrl_qi_256:
5873 ; CHECK-NEXT: movzbl %sil, %eax
5874 ; CHECK-NEXT: kmovw %eax, %k1
5875 ; CHECK-NEXT: vpsrlq $255, %ymm0, %ymm1 {%k1}
5876 ; CHECK-NEXT: vpsrlq $255, %ymm0, %ymm2 {%k1} {z}
5877 ; CHECK-NEXT: vpsrlq $255, %ymm0, %ymm0
5878 ; CHECK-NEXT: vpaddq %ymm0, %ymm1, %ymm0
5879 ; CHECK-NEXT: vpaddq %ymm0, %ymm2, %ymm0
5881 %res = call <4 x i64> @llvm.x86.avx512.mask.psrl.qi.256(<4 x i64> %x0, i8 255, <4 x i64> %x2, i8 %x3)
5882 %res1 = call <4 x i64> @llvm.x86.avx512.mask.psrl.qi.256(<4 x i64> %x0, i8 255, <4 x i64> %x2, i8 -1)
5883 %res2 = call <4 x i64> @llvm.x86.avx512.mask.psrl.qi.256(<4 x i64> %x0, i8 255, <4 x i64> zeroinitializer, i8 %x3)
5884 %res3 = add <4 x i64> %res, %res1
5885 %res4 = add <4 x i64> %res2, %res3
5888 declare <4 x i32> @llvm.x86.avx512.mask.psrl.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)
5889 define <4 x i32>@test_int_x86_avx512_mask_psrl_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) {
5890 ; CHECK-LABEL: test_int_x86_avx512_mask_psrl_d_128:
5892 ; CHECK-NEXT: movzbl %dil, %eax
5893 ; CHECK-NEXT: kmovw %eax, %k1
5894 ; CHECK-NEXT: vpsrld %xmm1, %xmm0, %xmm2 {%k1}
5895 ; CHECK-NEXT: vpsrld %xmm1, %xmm0, %xmm3 {%k1} {z}
5896 ; CHECK-NEXT: vpsrld %xmm1, %xmm0, %xmm0
5897 ; CHECK-NEXT: vpaddd %xmm0, %xmm2, %xmm0
5898 ; CHECK-NEXT: vpaddd %xmm3, %xmm0, %xmm0
5900 %res = call <4 x i32> @llvm.x86.avx512.mask.psrl.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3)
5901 %res1 = call <4 x i32> @llvm.x86.avx512.mask.psrl.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 -1)
5902 %res2 = call <4 x i32> @llvm.x86.avx512.mask.psrl.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> zeroinitializer, i8 %x3)
5903 %res3 = add <4 x i32> %res, %res1
5904 %res4 = add <4 x i32> %res3, %res2
5908 declare <8 x i32> @llvm.x86.avx512.mask.psrl.d.256(<8 x i32>, <4 x i32>, <8 x i32>, i8)
5910 define <8 x i32>@test_int_x86_avx512_mask_psrl_d_256(<8 x i32> %x0, <4 x i32> %x1, <8 x i32> %x2, i8 %x3) {
5911 ; CHECK-LABEL: test_int_x86_avx512_mask_psrl_d_256:
5913 ; CHECK-NEXT: movzbl %dil, %eax
5914 ; CHECK-NEXT: kmovw %eax, %k1
5915 ; CHECK-NEXT: vpsrld %xmm1, %ymm0, %ymm2 {%k1}
5916 ; CHECK-NEXT: vpsrld %xmm1, %ymm0, %ymm3 {%k1} {z}
5917 ; CHECK-NEXT: vpsrld %xmm1, %ymm0, %ymm0
5918 ; CHECK-NEXT: vpaddd %ymm0, %ymm2, %ymm0
5919 ; CHECK-NEXT: vpaddd %ymm0, %ymm3, %ymm0
5921 %res = call <8 x i32> @llvm.x86.avx512.mask.psrl.d.256(<8 x i32> %x0, <4 x i32> %x1, <8 x i32> %x2, i8 %x3)
5922 %res1 = call <8 x i32> @llvm.x86.avx512.mask.psrl.d.256(<8 x i32> %x0, <4 x i32> %x1, <8 x i32> %x2, i8 -1)
5923 %res2 = call <8 x i32> @llvm.x86.avx512.mask.psrl.d.256(<8 x i32> %x0, <4 x i32> %x1, <8 x i32> zeroinitializer, i8 %x3)
5924 %res3 = add <8 x i32> %res, %res1
5925 %res4 = add <8 x i32> %res2, %res3
5929 declare <4 x i32> @llvm.x86.avx512.mask.psrl.di.128(<4 x i32>, i8, <4 x i32>, i8)
5931 define <4 x i32>@test_int_x86_avx512_mask_psrl_di_128(<4 x i32> %x0, i8 %x1, <4 x i32> %x2, i8 %x3) {
5932 ; CHECK-LABEL: test_int_x86_avx512_mask_psrl_di_128:
5934 ; CHECK-NEXT: movzbl %sil, %eax
5935 ; CHECK-NEXT: kmovw %eax, %k1
5936 ; CHECK-NEXT: vpsrld $255, %xmm0, %xmm1 {%k1}
5937 ; CHECK-NEXT: vpsrld $255, %xmm0, %xmm2 {%k1} {z}
5938 ; CHECK-NEXT: vpsrld $255, %xmm0, %xmm0
5939 ; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0
5940 ; CHECK-NEXT: vpaddd %xmm0, %xmm2, %xmm0
5942 %res = call <4 x i32> @llvm.x86.avx512.mask.psrl.di.128(<4 x i32> %x0, i8 255, <4 x i32> %x2, i8 %x3)
5943 %res1 = call <4 x i32> @llvm.x86.avx512.mask.psrl.di.128(<4 x i32> %x0, i8 255, <4 x i32> %x2, i8 -1)
5944 %res2 = call <4 x i32> @llvm.x86.avx512.mask.psrl.di.128(<4 x i32> %x0, i8 255, <4 x i32> zeroinitializer, i8 %x3)
5945 %res3 = add <4 x i32> %res, %res1
5946 %res4 = add <4 x i32> %res2, %res3
5950 declare <8 x i32> @llvm.x86.avx512.mask.psrl.di.256(<8 x i32>, i8, <8 x i32>, i8)
5952 define <8 x i32>@test_int_x86_avx512_mask_psrl_di_256(<8 x i32> %x0, i8 %x1, <8 x i32> %x2, i8 %x3) {
5953 ; CHECK-LABEL: test_int_x86_avx512_mask_psrl_di_256:
5955 ; CHECK-NEXT: movzbl %sil, %eax
5956 ; CHECK-NEXT: kmovw %eax, %k1
5957 ; CHECK-NEXT: vpsrld $255, %ymm0, %ymm1 {%k1}
5958 ; CHECK-NEXT: vpsrld $255, %ymm0, %ymm2 {%k1} {z}
5959 ; CHECK-NEXT: vpsrld $255, %ymm0, %ymm0
5960 ; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0
5961 ; CHECK-NEXT: vpaddd %ymm0, %ymm2, %ymm0
5963 %res = call <8 x i32> @llvm.x86.avx512.mask.psrl.di.256(<8 x i32> %x0, i8 255, <8 x i32> %x2, i8 %x3)
5964 %res1 = call <8 x i32> @llvm.x86.avx512.mask.psrl.di.256(<8 x i32> %x0, i8 255, <8 x i32> %x2, i8 -1)
5965 %res2 = call <8 x i32> @llvm.x86.avx512.mask.psrl.di.256(<8 x i32> %x0, i8 255, <8 x i32> zeroinitializer, i8 %x3)
5966 %res3 = add <8 x i32> %res, %res1
5967 %res4 = add <8 x i32> %res2, %res3
5971 declare <16 x i32> @llvm.x86.avx512.mask.psrl.di.512(<16 x i32>, i8, <16 x i32>, i16)
5973 define <16 x i32>@test_int_x86_avx512_mask_psrl_di_512(<16 x i32> %x0, i8 %x1, <16 x i32> %x2, i16 %x3) {
5974 ; CHECK-LABEL: test_int_x86_avx512_mask_psrl_di_512:
5976 ; CHECK-NEXT: kmovw %esi, %k1
5977 ; CHECK-NEXT: vpsrld $255, %zmm0, %zmm1 {%k1}
5978 ; CHECK-NEXT: vpsrld $255, %zmm0, %zmm2 {%k1} {z}
5979 ; CHECK-NEXT: vpsrld $255, %zmm0, %zmm0
5980 ; CHECK-NEXT: vpaddd %zmm0, %zmm1, %zmm0
5981 ; CHECK-NEXT: vpaddd %zmm0, %zmm2, %zmm0
5983 %res = call <16 x i32> @llvm.x86.avx512.mask.psrl.di.512(<16 x i32> %x0, i8 255, <16 x i32> %x2, i16 %x3)
5984 %res1 = call <16 x i32> @llvm.x86.avx512.mask.psrl.di.512(<16 x i32> %x0, i8 255, <16 x i32> %x2, i16 -1)
5985 %res2 = call <16 x i32> @llvm.x86.avx512.mask.psrl.di.512(<16 x i32> %x0, i8 255, <16 x i32> zeroinitializer, i16 %x3)
5986 %res3 = add <16 x i32> %res, %res1
5987 %res4 = add <16 x i32> %res2, %res3
5988 ret <16 x i32> %res4
5991 declare <2 x i64> @llvm.x86.avx512.mask.psrlv2.di(<2 x i64>, <2 x i64>, <2 x i64>, i8)
5993 define <2 x i64>@test_int_x86_avx512_mask_psrlv2_di(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) {
5994 ; CHECK-LABEL: test_int_x86_avx512_mask_psrlv2_di:
5996 ; CHECK-NEXT: movzbl %dil, %eax
5997 ; CHECK-NEXT: kmovw %eax, %k1
5998 ; CHECK-NEXT: vpsrlvq %xmm1, %xmm0, %xmm2 {%k1}
5999 ; CHECK-NEXT: vpsrlvq %xmm1, %xmm0, %xmm3 {%k1} {z}
6000 ; CHECK-NEXT: vpsrlvq %xmm1, %xmm0, %xmm0
6001 ; CHECK-NEXT: vpaddq %xmm3, %xmm2, %xmm1
6002 ; CHECK-NEXT: vpaddq %xmm0, %xmm1, %xmm0
6004 %res = call <2 x i64> @llvm.x86.avx512.mask.psrlv2.di(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3)
6005 %res1 = call <2 x i64> @llvm.x86.avx512.mask.psrlv2.di(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> zeroinitializer, i8 %x3)
6006 %res2 = call <2 x i64> @llvm.x86.avx512.mask.psrlv2.di(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 -1)
6007 %res3 = add <2 x i64> %res, %res1
6008 %res4 = add <2 x i64> %res3, %res2
6012 declare <4 x i64> @llvm.x86.avx512.mask.psrlv4.di(<4 x i64>, <4 x i64>, <4 x i64>, i8)
6014 define <4 x i64>@test_int_x86_avx512_mask_psrlv4_di(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) {
6015 ; CHECK-LABEL: test_int_x86_avx512_mask_psrlv4_di:
6017 ; CHECK-NEXT: movzbl %dil, %eax
6018 ; CHECK-NEXT: kmovw %eax, %k1
6019 ; CHECK-NEXT: vpsrlvq %ymm1, %ymm0, %ymm2 {%k1}
6020 ; CHECK-NEXT: vpsrlvq %ymm1, %ymm0, %ymm3 {%k1} {z}
6021 ; CHECK-NEXT: vpsrlvq %ymm1, %ymm0, %ymm0
6022 ; CHECK-NEXT: vpaddq %ymm3, %ymm2, %ymm1
6023 ; CHECK-NEXT: vpaddq %ymm0, %ymm1, %ymm0
6025 %res = call <4 x i64> @llvm.x86.avx512.mask.psrlv4.di(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3)
6026 %res1 = call <4 x i64> @llvm.x86.avx512.mask.psrlv4.di(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> zeroinitializer, i8 %x3)
6027 %res2 = call <4 x i64> @llvm.x86.avx512.mask.psrlv4.di(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 -1)
6028 %res3 = add <4 x i64> %res, %res1
6029 %res4 = add <4 x i64> %res3, %res2
6033 declare <4 x i32> @llvm.x86.avx512.mask.psrlv4.si(<4 x i32>, <4 x i32>, <4 x i32>, i8)
6035 define <4 x i32>@test_int_x86_avx512_mask_psrlv4_si(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) {
6036 ; CHECK-LABEL: test_int_x86_avx512_mask_psrlv4_si:
6038 ; CHECK-NEXT: movzbl %dil, %eax
6039 ; CHECK-NEXT: kmovw %eax, %k1
6040 ; CHECK-NEXT: vpsrlvd %xmm1, %xmm0, %xmm2 {%k1}
6041 ; CHECK-NEXT: vpsrlvd %xmm1, %xmm0, %xmm3 {%k1} {z}
6042 ; CHECK-NEXT: vpsrlvd %xmm1, %xmm0, %xmm0
6043 ; CHECK-NEXT: vpaddd %xmm3, %xmm2, %xmm1
6044 ; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0
6046 %res = call <4 x i32> @llvm.x86.avx512.mask.psrlv4.si(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3)
6047 %res1 = call <4 x i32> @llvm.x86.avx512.mask.psrlv4.si(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> zeroinitializer, i8 %x3)
6048 %res2 = call <4 x i32> @llvm.x86.avx512.mask.psrlv4.si(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 -1)
6049 %res3 = add <4 x i32> %res, %res1
6050 %res4 = add <4 x i32> %res3, %res2
6054 declare <8 x i32> @llvm.x86.avx512.mask.psrlv8.si(<8 x i32>, <8 x i32>, <8 x i32>, i8)
6056 define <8 x i32>@test_int_x86_avx512_mask_psrlv8_si(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) {
6057 ; CHECK-LABEL: test_int_x86_avx512_mask_psrlv8_si:
6059 ; CHECK-NEXT: movzbl %dil, %eax
6060 ; CHECK-NEXT: kmovw %eax, %k1
6061 ; CHECK-NEXT: vpsrlvd %ymm1, %ymm0, %ymm2 {%k1}
6062 ; CHECK-NEXT: vpsrlvd %ymm1, %ymm0, %ymm3 {%k1} {z}
6063 ; CHECK-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0
6064 ; CHECK-NEXT: vpaddd %ymm3, %ymm2, %ymm1
6065 ; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0
6067 %res = call <8 x i32> @llvm.x86.avx512.mask.psrlv8.si(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3)
6068 %res1 = call <8 x i32> @llvm.x86.avx512.mask.psrlv8.si(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> zeroinitializer, i8 %x3)
6069 %res2 = call <8 x i32> @llvm.x86.avx512.mask.psrlv8.si(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1)
6070 %res3 = add <8 x i32> %res, %res1
6071 %res4 = add <8 x i32> %res3, %res2
6075 declare <4 x i32> @llvm.x86.avx512.mask.psra.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)
6077 define <4 x i32>@test_int_x86_avx512_mask_psra_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) {
6078 ; CHECK-LABEL: test_int_x86_avx512_mask_psra_d_128:
6080 ; CHECK-NEXT: movzbl %dil, %eax
6081 ; CHECK-NEXT: kmovw %eax, %k1
6082 ; CHECK-NEXT: vpsrad %xmm1, %xmm0, %xmm2 {%k1}
6083 ; CHECK-NEXT: vpsrad %xmm1, %xmm0, %xmm3 {%k1} {z}
6084 ; CHECK-NEXT: vpsrad %xmm1, %xmm0, %xmm0
6085 ; CHECK-NEXT: vpaddd %xmm3, %xmm2, %xmm1
6086 ; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0
6088 %res = call <4 x i32> @llvm.x86.avx512.mask.psra.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3)
6089 %res1 = call <4 x i32> @llvm.x86.avx512.mask.psra.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> zeroinitializer, i8 %x3)
6090 %res2 = call <4 x i32> @llvm.x86.avx512.mask.psra.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 -1)
6091 %res3 = add <4 x i32> %res, %res1
6092 %res4 = add <4 x i32> %res3, %res2
6096 declare <8 x i32> @llvm.x86.avx512.mask.psra.d.256(<8 x i32>, <4 x i32>, <8 x i32>, i8)
6098 define <8 x i32>@test_int_x86_avx512_mask_psra_d_256(<8 x i32> %x0, <4 x i32> %x1, <8 x i32> %x2, i8 %x3) {
6099 ; CHECK-LABEL: test_int_x86_avx512_mask_psra_d_256:
6101 ; CHECK-NEXT: movzbl %dil, %eax
6102 ; CHECK-NEXT: kmovw %eax, %k1
6103 ; CHECK-NEXT: vpsrad %xmm1, %ymm0, %ymm2 {%k1}
6104 ; CHECK-NEXT: vpsrad %xmm1, %ymm0, %ymm3 {%k1} {z}
6105 ; CHECK-NEXT: vpsrad %xmm1, %ymm0, %ymm0
6106 ; CHECK-NEXT: vpaddd %ymm3, %ymm2, %ymm1
6107 ; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0
6109 %res = call <8 x i32> @llvm.x86.avx512.mask.psra.d.256(<8 x i32> %x0, <4 x i32> %x1, <8 x i32> %x2, i8 %x3)
6110 %res1 = call <8 x i32> @llvm.x86.avx512.mask.psra.d.256(<8 x i32> %x0, <4 x i32> %x1, <8 x i32> zeroinitializer, i8 %x3)
6111 %res2 = call <8 x i32> @llvm.x86.avx512.mask.psra.d.256(<8 x i32> %x0, <4 x i32> %x1, <8 x i32> %x2, i8 -1)
6112 %res3 = add <8 x i32> %res, %res1
6113 %res4 = add <8 x i32> %res3, %res2
6117 declare <4 x i32> @llvm.x86.avx512.mask.psra.di.128(<4 x i32>, i8, <4 x i32>, i8)
6119 define <4 x i32>@test_int_x86_avx512_mask_psra_di_128(<4 x i32> %x0, i8 %x1, <4 x i32> %x2, i8 %x3) {
6120 ; CHECK-LABEL: test_int_x86_avx512_mask_psra_di_128:
6122 ; CHECK-NEXT: movzbl %sil, %eax
6123 ; CHECK-NEXT: kmovw %eax, %k1
6124 ; CHECK-NEXT: vpsrad $3, %xmm0, %xmm1 {%k1}
6125 ; CHECK-NEXT: vpsrad $3, %xmm0, %xmm2 {%k1} {z}
6126 ; CHECK-NEXT: vpsrad $3, %xmm0, %xmm0
6127 ; CHECK-NEXT: vpaddd %xmm2, %xmm1, %xmm1
6128 ; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0
6130 %res = call <4 x i32> @llvm.x86.avx512.mask.psra.di.128(<4 x i32> %x0, i8 3, <4 x i32> %x2, i8 %x3)
6131 %res1 = call <4 x i32> @llvm.x86.avx512.mask.psra.di.128(<4 x i32> %x0, i8 3, <4 x i32> zeroinitializer, i8 %x3)
6132 %res2 = call <4 x i32> @llvm.x86.avx512.mask.psra.di.128(<4 x i32> %x0, i8 3, <4 x i32> %x2, i8 -1)
6133 %res3 = add <4 x i32> %res, %res1
6134 %res4 = add <4 x i32> %res3, %res2
6138 declare <8 x i32> @llvm.x86.avx512.mask.psra.di.256(<8 x i32>, i8, <8 x i32>, i8)
6140 define <8 x i32>@test_int_x86_avx512_mask_psra_di_256(<8 x i32> %x0, i8 %x1, <8 x i32> %x2, i8 %x3) {
6141 ; CHECK-LABEL: test_int_x86_avx512_mask_psra_di_256:
6143 ; CHECK-NEXT: movzbl %sil, %eax
6144 ; CHECK-NEXT: kmovw %eax, %k1
6145 ; CHECK-NEXT: vpsrad $3, %ymm0, %ymm1 {%k1}
6146 ; CHECK-NEXT: vpsrad $3, %ymm0, %ymm2 {%k1} {z}
6147 ; CHECK-NEXT: vpsrad $3, %ymm0, %ymm0
6148 ; CHECK-NEXT: vpaddd %ymm2, %ymm1, %ymm1
6149 ; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0
6151 %res = call <8 x i32> @llvm.x86.avx512.mask.psra.di.256(<8 x i32> %x0, i8 3, <8 x i32> %x2, i8 %x3)
6152 %res1 = call <8 x i32> @llvm.x86.avx512.mask.psra.di.256(<8 x i32> %x0, i8 3, <8 x i32> zeroinitializer, i8 %x3)
6153 %res2 = call <8 x i32> @llvm.x86.avx512.mask.psra.di.256(<8 x i32> %x0, i8 3, <8 x i32> %x2, i8 -1)
6154 %res3 = add <8 x i32> %res, %res1
6155 %res4 = add <8 x i32> %res3, %res2
6159 declare <2 x i64> @llvm.x86.avx512.mask.psra.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8)
6161 define <2 x i64>@test_int_x86_avx512_mask_psra_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) {
6162 ; CHECK-LABEL: test_int_x86_avx512_mask_psra_q_128:
6164 ; CHECK-NEXT: movzbl %dil, %eax
6165 ; CHECK-NEXT: kmovw %eax, %k1
6166 ; CHECK-NEXT: vpsraq %xmm1, %xmm0, %xmm2 {%k1}
6167 ; CHECK-NEXT: vpsraq %xmm1, %xmm0, %xmm3 {%k1} {z}
6168 ; CHECK-NEXT: vpsraq %xmm1, %xmm0, %xmm0
6169 ; CHECK-NEXT: vpaddq %xmm3, %xmm2, %xmm1
6170 ; CHECK-NEXT: vpaddq %xmm0, %xmm1, %xmm0
6172 %res = call <2 x i64> @llvm.x86.avx512.mask.psra.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3)
6173 %res1 = call <2 x i64> @llvm.x86.avx512.mask.psra.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> zeroinitializer, i8 %x3)
6174 %res2 = call <2 x i64> @llvm.x86.avx512.mask.psra.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 -1)
6175 %res3 = add <2 x i64> %res, %res1
6176 %res4 = add <2 x i64> %res3, %res2
6180 declare <4 x i64> @llvm.x86.avx512.mask.psra.q.256(<4 x i64>, <2 x i64>, <4 x i64>, i8)
6182 define <4 x i64>@test_int_x86_avx512_mask_psra_q_256(<4 x i64> %x0, <2 x i64> %x1, <4 x i64> %x2, i8 %x3) {
6183 ; CHECK-LABEL: test_int_x86_avx512_mask_psra_q_256:
6185 ; CHECK-NEXT: movzbl %dil, %eax
6186 ; CHECK-NEXT: kmovw %eax, %k1
6187 ; CHECK-NEXT: vpsraq %xmm1, %ymm0, %ymm2 {%k1}
6188 ; CHECK-NEXT: vpsraq %xmm1, %ymm0, %ymm3 {%k1} {z}
6189 ; CHECK-NEXT: vpsraq %xmm1, %ymm0, %ymm0
6190 ; CHECK-NEXT: vpaddq %ymm3, %ymm2, %ymm1
6191 ; CHECK-NEXT: vpaddq %ymm0, %ymm1, %ymm0
6193 %res = call <4 x i64> @llvm.x86.avx512.mask.psra.q.256(<4 x i64> %x0, <2 x i64> %x1, <4 x i64> %x2, i8 %x3)
6194 %res1 = call <4 x i64> @llvm.x86.avx512.mask.psra.q.256(<4 x i64> %x0, <2 x i64> %x1, <4 x i64> zeroinitializer, i8 %x3)
6195 %res2 = call <4 x i64> @llvm.x86.avx512.mask.psra.q.256(<4 x i64> %x0, <2 x i64> %x1, <4 x i64> %x2, i8 -1)
6196 %res3 = add <4 x i64> %res, %res1
6197 %res4 = add <4 x i64> %res3, %res2
6201 declare <2 x i64> @llvm.x86.avx512.mask.psra.qi.128(<2 x i64>, i8, <2 x i64>, i8)
6203 define <2 x i64>@test_int_x86_avx512_mask_psra_qi_128(<2 x i64> %x0, i8 %x1, <2 x i64> %x2, i8 %x3) {
6204 ; CHECK-LABEL: test_int_x86_avx512_mask_psra_qi_128:
6206 ; CHECK-NEXT: movzbl %sil, %eax
6207 ; CHECK-NEXT: kmovw %eax, %k1
6208 ; CHECK-NEXT: vpsraq $3, %xmm0, %xmm1 {%k1}
6209 ; CHECK-NEXT: vpsraq $3, %xmm0, %xmm2 {%k1} {z}
6210 ; CHECK-NEXT: vpsraq $3, %xmm0, %xmm0
6211 ; CHECK-NEXT: vpaddq %xmm2, %xmm1, %xmm1
6212 ; CHECK-NEXT: vpaddq %xmm0, %xmm1, %xmm0
6214 %res = call <2 x i64> @llvm.x86.avx512.mask.psra.qi.128(<2 x i64> %x0, i8 3, <2 x i64> %x2, i8 %x3)
6215 %res1 = call <2 x i64> @llvm.x86.avx512.mask.psra.qi.128(<2 x i64> %x0, i8 3, <2 x i64> zeroinitializer, i8 %x3)
6216 %res2 = call <2 x i64> @llvm.x86.avx512.mask.psra.qi.128(<2 x i64> %x0, i8 3, <2 x i64> %x2, i8 -1)
6217 %res3 = add <2 x i64> %res, %res1
6218 %res4 = add <2 x i64> %res3, %res2
6222 declare <4 x i64> @llvm.x86.avx512.mask.psra.qi.256(<4 x i64>, i8, <4 x i64>, i8)
6224 define <4 x i64>@test_int_x86_avx512_mask_psra_qi_256(<4 x i64> %x0, i8 %x1, <4 x i64> %x2, i8 %x3) {
6225 ; CHECK-LABEL: test_int_x86_avx512_mask_psra_qi_256:
6227 ; CHECK-NEXT: movzbl %sil, %eax
6228 ; CHECK-NEXT: kmovw %eax, %k1
6229 ; CHECK-NEXT: vpsraq $3, %ymm0, %ymm1 {%k1}
6230 ; CHECK-NEXT: vpsraq $3, %ymm0, %ymm2 {%k1} {z}
6231 ; CHECK-NEXT: vpsraq $3, %ymm0, %ymm0
6232 ; CHECK-NEXT: vpaddq %ymm2, %ymm1, %ymm1
6233 ; CHECK-NEXT: vpaddq %ymm0, %ymm1, %ymm0
6235 %res = call <4 x i64> @llvm.x86.avx512.mask.psra.qi.256(<4 x i64> %x0, i8 3, <4 x i64> %x2, i8 %x3)
6236 %res1 = call <4 x i64> @llvm.x86.avx512.mask.psra.qi.256(<4 x i64> %x0, i8 3, <4 x i64> zeroinitializer, i8 %x3)
6237 %res2 = call <4 x i64> @llvm.x86.avx512.mask.psra.qi.256(<4 x i64> %x0, i8 3, <4 x i64> %x2, i8 -1)
6238 %res3 = add <4 x i64> %res, %res1
6239 %res4 = add <4 x i64> %res3, %res2
6244 declare <4 x i32> @llvm.x86.avx512.mask.psll.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)
6246 define <4 x i32>@test_int_x86_avx512_mask_psll_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) {
6247 ; CHECK-LABEL: test_int_x86_avx512_mask_psll_d_128:
6249 ; CHECK-NEXT: movzbl %dil, %eax
6250 ; CHECK-NEXT: kmovw %eax, %k1
6251 ; CHECK-NEXT: vpslld %xmm1, %xmm0, %xmm2 {%k1}
6252 ; CHECK-NEXT: vpslld %xmm1, %xmm0, %xmm3 {%k1} {z}
6253 ; CHECK-NEXT: vpslld %xmm1, %xmm0, %xmm0
6254 ; CHECK-NEXT: vpaddd %xmm3, %xmm2, %xmm1
6255 ; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0
6257 %res = call <4 x i32> @llvm.x86.avx512.mask.psll.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3)
6258 %res1 = call <4 x i32> @llvm.x86.avx512.mask.psll.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> zeroinitializer, i8 %x3)
6259 %res2 = call <4 x i32> @llvm.x86.avx512.mask.psll.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 -1)
6260 %res3 = add <4 x i32> %res, %res1
6261 %res4 = add <4 x i32> %res3, %res2
6265 declare <8 x i32> @llvm.x86.avx512.mask.psll.d.256(<8 x i32>, <4 x i32>, <8 x i32>, i8)
6267 define <8 x i32>@test_int_x86_avx512_mask_psll_d_256(<8 x i32> %x0, <4 x i32> %x1, <8 x i32> %x2, i8 %x3) {
6268 ; CHECK-LABEL: test_int_x86_avx512_mask_psll_d_256:
6270 ; CHECK-NEXT: movzbl %dil, %eax
6271 ; CHECK-NEXT: kmovw %eax, %k1
6272 ; CHECK-NEXT: vpslld %xmm1, %ymm0, %ymm2 {%k1}
6273 ; CHECK-NEXT: vpslld %xmm1, %ymm0, %ymm3 {%k1} {z}
6274 ; CHECK-NEXT: vpslld %xmm1, %ymm0, %ymm0
6275 ; CHECK-NEXT: vpaddd %ymm3, %ymm2, %ymm1
6276 ; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0
6278 %res = call <8 x i32> @llvm.x86.avx512.mask.psll.d.256(<8 x i32> %x0, <4 x i32> %x1, <8 x i32> %x2, i8 %x3)
6279 %res1 = call <8 x i32> @llvm.x86.avx512.mask.psll.d.256(<8 x i32> %x0, <4 x i32> %x1, <8 x i32> zeroinitializer, i8 %x3)
6280 %res2 = call <8 x i32> @llvm.x86.avx512.mask.psll.d.256(<8 x i32> %x0, <4 x i32> %x1, <8 x i32> %x2, i8 -1)
6281 %res3 = add <8 x i32> %res, %res1
6282 %res4 = add <8 x i32> %res3, %res2
6286 declare <4 x i32> @llvm.x86.avx512.mask.psll.di.128(<4 x i32>, i8, <4 x i32>, i8)
6288 define <4 x i32>@test_int_x86_avx512_mask_psll_di_128(<4 x i32> %x0, i8 %x1, <4 x i32> %x2, i8 %x3) {
6289 ; CHECK-LABEL: test_int_x86_avx512_mask_psll_di_128:
6291 ; CHECK-NEXT: movzbl %sil, %eax
6292 ; CHECK-NEXT: kmovw %eax, %k1
6293 ; CHECK-NEXT: vpslld $3, %xmm0, %xmm1 {%k1}
6294 ; CHECK-NEXT: vpslld $3, %xmm0, %xmm2 {%k1} {z}
6295 ; CHECK-NEXT: vpslld $3, %xmm0, %xmm0
6296 ; CHECK-NEXT: vpaddd %xmm2, %xmm1, %xmm1
6297 ; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0
6299 %res = call <4 x i32> @llvm.x86.avx512.mask.psll.di.128(<4 x i32> %x0, i8 3, <4 x i32> %x2, i8 %x3)
6300 %res1 = call <4 x i32> @llvm.x86.avx512.mask.psll.di.128(<4 x i32> %x0, i8 3, <4 x i32> zeroinitializer, i8 %x3)
6301 %res2 = call <4 x i32> @llvm.x86.avx512.mask.psll.di.128(<4 x i32> %x0, i8 3, <4 x i32> %x2, i8 -1)
6302 %res3 = add <4 x i32> %res, %res1
6303 %res4 = add <4 x i32> %res3, %res2
6307 declare <8 x i32> @llvm.x86.avx512.mask.psll.di.256(<8 x i32>, i8, <8 x i32>, i8)
6309 define <8 x i32>@test_int_x86_avx512_mask_psll_di_256(<8 x i32> %x0, i8 %x1, <8 x i32> %x2, i8 %x3) {
6310 ; CHECK-LABEL: test_int_x86_avx512_mask_psll_di_256:
6312 ; CHECK-NEXT: movzbl %sil, %eax
6313 ; CHECK-NEXT: kmovw %eax, %k1
6314 ; CHECK-NEXT: vpslld $3, %ymm0, %ymm1 {%k1}
6315 ; CHECK-NEXT: vpslld $3, %ymm0, %ymm2 {%k1} {z}
6316 ; CHECK-NEXT: vpslld $3, %ymm0, %ymm0
6317 ; CHECK-NEXT: vpaddd %ymm2, %ymm1, %ymm1
6318 ; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0
6320 %res = call <8 x i32> @llvm.x86.avx512.mask.psll.di.256(<8 x i32> %x0, i8 3, <8 x i32> %x2, i8 %x3)
6321 %res1 = call <8 x i32> @llvm.x86.avx512.mask.psll.di.256(<8 x i32> %x0, i8 3, <8 x i32> zeroinitializer, i8 %x3)
6322 %res2 = call <8 x i32> @llvm.x86.avx512.mask.psll.di.256(<8 x i32> %x0, i8 3, <8 x i32> %x2, i8 -1)
6323 %res3 = add <8 x i32> %res, %res1
6324 %res4 = add <8 x i32> %res3, %res2
6328 declare <4 x i64> @llvm.x86.avx512.mask.psll.q.256(<4 x i64>, <2 x i64>, <4 x i64>, i8)
6330 define <4 x i64>@test_int_x86_avx512_mask_psll_q_256(<4 x i64> %x0, <2 x i64> %x1, <4 x i64> %x2, i8 %x3) {
6331 ; CHECK-LABEL: test_int_x86_avx512_mask_psll_q_256:
6333 ; CHECK-NEXT: movzbl %dil, %eax
6334 ; CHECK-NEXT: kmovw %eax, %k1
6335 ; CHECK-NEXT: vpsllq %xmm1, %ymm0, %ymm2 {%k1}
6336 ; CHECK-NEXT: vpsllq %xmm1, %ymm0, %ymm3 {%k1} {z}
6337 ; CHECK-NEXT: vpsllq %xmm1, %ymm0, %ymm0
6338 ; CHECK-NEXT: vpaddq %ymm3, %ymm2, %ymm1
6339 ; CHECK-NEXT: vpaddq %ymm0, %ymm1, %ymm0
6341 %res = call <4 x i64> @llvm.x86.avx512.mask.psll.q.256(<4 x i64> %x0, <2 x i64> %x1, <4 x i64> %x2, i8 %x3)
6342 %res1 = call <4 x i64> @llvm.x86.avx512.mask.psll.q.256(<4 x i64> %x0, <2 x i64> %x1, <4 x i64> zeroinitializer, i8 %x3)
6343 %res2 = call <4 x i64> @llvm.x86.avx512.mask.psll.q.256(<4 x i64> %x0, <2 x i64> %x1, <4 x i64> %x2, i8 -1)
6344 %res3 = add <4 x i64> %res, %res1
6345 %res4 = add <4 x i64> %res3, %res2
6349 declare <2 x i64> @llvm.x86.avx512.mask.psll.qi.128(<2 x i64>, i8, <2 x i64>, i8)
6351 define <2 x i64>@test_int_x86_avx512_mask_psll_qi_128(<2 x i64> %x0, i8 %x1, <2 x i64> %x2, i8 %x3) {
6352 ; CHECK-LABEL: test_int_x86_avx512_mask_psll_qi_128:
6354 ; CHECK-NEXT: movzbl %sil, %eax
6355 ; CHECK-NEXT: kmovw %eax, %k1
6356 ; CHECK-NEXT: vpsllq $3, %xmm0, %xmm1 {%k1}
6357 ; CHECK-NEXT: vpsllq $3, %xmm0, %xmm2 {%k1} {z}
6358 ; CHECK-NEXT: vpsllq $3, %xmm0, %xmm0
6359 ; CHECK-NEXT: vpaddq %xmm2, %xmm1, %xmm1
6360 ; CHECK-NEXT: vpaddq %xmm0, %xmm1, %xmm0
6362 %res = call <2 x i64> @llvm.x86.avx512.mask.psll.qi.128(<2 x i64> %x0, i8 3, <2 x i64> %x2, i8 %x3)
6363 %res1 = call <2 x i64> @llvm.x86.avx512.mask.psll.qi.128(<2 x i64> %x0, i8 3, <2 x i64> zeroinitializer, i8 %x3)
6364 %res2 = call <2 x i64> @llvm.x86.avx512.mask.psll.qi.128(<2 x i64> %x0, i8 3, <2 x i64> %x2, i8 -1)
6365 %res3 = add <2 x i64> %res, %res1
6366 %res4 = add <2 x i64> %res3, %res2
6370 declare <4 x i64> @llvm.x86.avx512.mask.psll.qi.256(<4 x i64>, i8, <4 x i64>, i8)
6372 define <4 x i64>@test_int_x86_avx512_mask_psll_qi_256(<4 x i64> %x0, i8 %x1, <4 x i64> %x2, i8 %x3) {
6373 ; CHECK-LABEL: test_int_x86_avx512_mask_psll_qi_256:
6375 ; CHECK-NEXT: movzbl %sil, %eax
6376 ; CHECK-NEXT: kmovw %eax, %k1
6377 ; CHECK-NEXT: vpsllq $3, %ymm0, %ymm1 {%k1}
6378 ; CHECK-NEXT: vpsllq $3, %ymm0, %ymm2 {%k1} {z}
6379 ; CHECK-NEXT: vpsllq $3, %ymm0, %ymm0
6380 ; CHECK-NEXT: vpaddq %ymm2, %ymm1, %ymm1
6381 ; CHECK-NEXT: vpaddq %ymm0, %ymm1, %ymm0
6383 %res = call <4 x i64> @llvm.x86.avx512.mask.psll.qi.256(<4 x i64> %x0, i8 3, <4 x i64> %x2, i8 %x3)
6384 %res1 = call <4 x i64> @llvm.x86.avx512.mask.psll.qi.256(<4 x i64> %x0, i8 3, <4 x i64> zeroinitializer, i8 %x3)
6385 %res2 = call <4 x i64> @llvm.x86.avx512.mask.psll.qi.256(<4 x i64> %x0, i8 3, <4 x i64> %x2, i8 -1)
6386 %res3 = add <4 x i64> %res, %res1
6387 %res4 = add <4 x i64> %res3, %res2
6391 define <8 x float> @test_mask_load_aligned_ps_256(<8 x float> %data, i8* %ptr, i8 %mask) {
6392 ; CHECK-LABEL: test_mask_load_aligned_ps_256:
6394 ; CHECK-NEXT: movzbl %sil, %eax
6395 ; CHECK-NEXT: kmovw %eax, %k1
6396 ; CHECK-NEXT: vmovaps (%rdi), %ymm0
6397 ; CHECK-NEXT: vmovaps (%rdi), %ymm0 {%k1}
6398 ; CHECK-NEXT: vmovaps (%rdi), %ymm1 {%k1} {z}
6399 ; CHECK-NEXT: vaddps %ymm0, %ymm1, %ymm0
6401 %res = call <8 x float> @llvm.x86.avx512.mask.load.ps.256(i8* %ptr, <8 x float> zeroinitializer, i8 -1)
6402 %res1 = call <8 x float> @llvm.x86.avx512.mask.load.ps.256(i8* %ptr, <8 x float> %res, i8 %mask)
6403 %res2 = call <8 x float> @llvm.x86.avx512.mask.load.ps.256(i8* %ptr, <8 x float> zeroinitializer, i8 %mask)
6404 %res4 = fadd <8 x float> %res2, %res1
6405 ret <8 x float> %res4
6408 declare <8 x float> @llvm.x86.avx512.mask.load.ps.256(i8*, <8 x float>, i8)
6410 define <8 x float> @test_mask_load_unaligned_ps_256(<8 x float> %data, i8* %ptr, i8 %mask) {
6411 ; CHECK-LABEL: test_mask_load_unaligned_ps_256:
6413 ; CHECK-NEXT: movzbl %sil, %eax
6414 ; CHECK-NEXT: kmovw %eax, %k1
6415 ; CHECK-NEXT: vmovups (%rdi), %ymm0
6416 ; CHECK-NEXT: vmovups (%rdi), %ymm0 {%k1}
6417 ; CHECK-NEXT: vmovups (%rdi), %ymm1 {%k1} {z}
6418 ; CHECK-NEXT: vaddps %ymm0, %ymm1, %ymm0
6420 %res = call <8 x float> @llvm.x86.avx512.mask.loadu.ps.256(i8* %ptr, <8 x float> zeroinitializer, i8 -1)
6421 %res1 = call <8 x float> @llvm.x86.avx512.mask.loadu.ps.256(i8* %ptr, <8 x float> %res, i8 %mask)
6422 %res2 = call <8 x float> @llvm.x86.avx512.mask.loadu.ps.256(i8* %ptr, <8 x float> zeroinitializer, i8 %mask)
6423 %res4 = fadd <8 x float> %res2, %res1
6424 ret <8 x float> %res4
6427 declare <8 x float> @llvm.x86.avx512.mask.loadu.ps.256(i8*, <8 x float>, i8)
6429 define <4 x double> @test_mask_load_aligned_pd_256(<4 x double> %data, i8* %ptr, i8 %mask) {
6430 ; CHECK-LABEL: test_mask_load_aligned_pd_256:
6432 ; CHECK-NEXT: movzbl %sil, %eax
6433 ; CHECK-NEXT: kmovw %eax, %k1
6434 ; CHECK-NEXT: vmovapd (%rdi), %ymm0
6435 ; CHECK-NEXT: vmovapd (%rdi), %ymm0 {%k1}
6436 ; CHECK-NEXT: vmovapd (%rdi), %ymm1 {%k1} {z}
6437 ; CHECK-NEXT: vaddpd %ymm0, %ymm1, %ymm0
6439 %res = call <4 x double> @llvm.x86.avx512.mask.load.pd.256(i8* %ptr, <4 x double> zeroinitializer, i8 -1)
6440 %res1 = call <4 x double> @llvm.x86.avx512.mask.load.pd.256(i8* %ptr, <4 x double> %res, i8 %mask)
6441 %res2 = call <4 x double> @llvm.x86.avx512.mask.load.pd.256(i8* %ptr, <4 x double> zeroinitializer, i8 %mask)
6442 %res4 = fadd <4 x double> %res2, %res1
6443 ret <4 x double> %res4
6446 declare <4 x double> @llvm.x86.avx512.mask.load.pd.256(i8*, <4 x double>, i8)
6448 define <4 x double> @test_mask_load_unaligned_pd_256(<4 x double> %data, i8* %ptr, i8 %mask) {
6449 ; CHECK-LABEL: test_mask_load_unaligned_pd_256:
6451 ; CHECK-NEXT: movzbl %sil, %eax
6452 ; CHECK-NEXT: kmovw %eax, %k1
6453 ; CHECK-NEXT: vmovupd (%rdi), %ymm0
6454 ; CHECK-NEXT: vmovupd (%rdi), %ymm0 {%k1}
6455 ; CHECK-NEXT: vmovupd (%rdi), %ymm1 {%k1} {z}
6456 ; CHECK-NEXT: vaddpd %ymm0, %ymm1, %ymm0
6458 %res = call <4 x double> @llvm.x86.avx512.mask.loadu.pd.256(i8* %ptr, <4 x double> zeroinitializer, i8 -1)
6459 %res1 = call <4 x double> @llvm.x86.avx512.mask.loadu.pd.256(i8* %ptr, <4 x double> %res, i8 %mask)
6460 %res2 = call <4 x double> @llvm.x86.avx512.mask.loadu.pd.256(i8* %ptr, <4 x double> zeroinitializer, i8 %mask)
6461 %res4 = fadd <4 x double> %res2, %res1
6462 ret <4 x double> %res4
6465 declare <4 x double> @llvm.x86.avx512.mask.loadu.pd.256(i8*, <4 x double>, i8)
6467 define <4 x float> @test_mask_load_aligned_ps_128(<4 x float> %data, i8* %ptr, i8 %mask) {
6468 ; CHECK-LABEL: test_mask_load_aligned_ps_128:
6470 ; CHECK-NEXT: movzbl %sil, %eax
6471 ; CHECK-NEXT: kmovw %eax, %k1
6472 ; CHECK-NEXT: vmovaps (%rdi), %xmm0
6473 ; CHECK-NEXT: vmovaps (%rdi), %xmm0 {%k1}
6474 ; CHECK-NEXT: vmovaps (%rdi), %xmm1 {%k1} {z}
6475 ; CHECK-NEXT: vaddps %xmm0, %xmm1, %xmm0
6477 %res = call <4 x float> @llvm.x86.avx512.mask.load.ps.128(i8* %ptr, <4 x float> zeroinitializer, i8 -1)
6478 %res1 = call <4 x float> @llvm.x86.avx512.mask.load.ps.128(i8* %ptr, <4 x float> %res, i8 %mask)
6479 %res2 = call <4 x float> @llvm.x86.avx512.mask.load.ps.128(i8* %ptr, <4 x float> zeroinitializer, i8 %mask)
6480 %res4 = fadd <4 x float> %res2, %res1
6481 ret <4 x float> %res4
6484 declare <4 x float> @llvm.x86.avx512.mask.load.ps.128(i8*, <4 x float>, i8)
6486 define <4 x float> @test_mask_load_unaligned_ps_128(<4 x float> %data, i8* %ptr, i8 %mask) {
6487 ; CHECK-LABEL: test_mask_load_unaligned_ps_128:
6489 ; CHECK-NEXT: movzbl %sil, %eax
6490 ; CHECK-NEXT: kmovw %eax, %k1
6491 ; CHECK-NEXT: vmovups (%rdi), %xmm0
6492 ; CHECK-NEXT: vmovups (%rdi), %xmm0 {%k1}
6493 ; CHECK-NEXT: vmovups (%rdi), %xmm1 {%k1} {z}
6494 ; CHECK-NEXT: vaddps %xmm0, %xmm1, %xmm0
6496 %res = call <4 x float> @llvm.x86.avx512.mask.loadu.ps.128(i8* %ptr, <4 x float> zeroinitializer, i8 -1)
6497 %res1 = call <4 x float> @llvm.x86.avx512.mask.loadu.ps.128(i8* %ptr, <4 x float> %res, i8 %mask)
6498 %res2 = call <4 x float> @llvm.x86.avx512.mask.loadu.ps.128(i8* %ptr, <4 x float> zeroinitializer, i8 %mask)
6499 %res4 = fadd <4 x float> %res2, %res1
6500 ret <4 x float> %res4
6503 declare <4 x float> @llvm.x86.avx512.mask.loadu.ps.128(i8*, <4 x float>, i8)
6505 define <2 x double> @test_mask_load_aligned_pd_128(<2 x double> %data, i8* %ptr, i8 %mask) {
6506 ; CHECK-LABEL: test_mask_load_aligned_pd_128:
6508 ; CHECK-NEXT: movzbl %sil, %eax
6509 ; CHECK-NEXT: kmovw %eax, %k1
6510 ; CHECK-NEXT: vmovapd (%rdi), %xmm0
6511 ; CHECK-NEXT: vmovapd (%rdi), %xmm0 {%k1}
6512 ; CHECK-NEXT: vmovapd (%rdi), %xmm1 {%k1} {z}
6513 ; CHECK-NEXT: vaddpd %xmm0, %xmm1, %xmm0
6515 %res = call <2 x double> @llvm.x86.avx512.mask.load.pd.128(i8* %ptr, <2 x double> zeroinitializer, i8 -1)
6516 %res1 = call <2 x double> @llvm.x86.avx512.mask.load.pd.128(i8* %ptr, <2 x double> %res, i8 %mask)
6517 %res2 = call <2 x double> @llvm.x86.avx512.mask.load.pd.128(i8* %ptr, <2 x double> zeroinitializer, i8 %mask)
6518 %res4 = fadd <2 x double> %res2, %res1
6519 ret <2 x double> %res4
6522 declare <2 x double> @llvm.x86.avx512.mask.load.pd.128(i8*, <2 x double>, i8)
6524 define <2 x double> @test_mask_load_unaligned_pd_128(<2 x double> %data, i8* %ptr, i8 %mask) {
6525 ; CHECK-LABEL: test_mask_load_unaligned_pd_128:
6527 ; CHECK-NEXT: movzbl %sil, %eax
6528 ; CHECK-NEXT: kmovw %eax, %k1
6529 ; CHECK-NEXT: vmovupd (%rdi), %xmm0
6530 ; CHECK-NEXT: vmovupd (%rdi), %xmm0 {%k1}
6531 ; CHECK-NEXT: vmovupd (%rdi), %xmm1 {%k1} {z}
6532 ; CHECK-NEXT: vaddpd %xmm0, %xmm1, %xmm0
6534 %res = call <2 x double> @llvm.x86.avx512.mask.loadu.pd.128(i8* %ptr, <2 x double> zeroinitializer, i8 -1)
6535 %res1 = call <2 x double> @llvm.x86.avx512.mask.loadu.pd.128(i8* %ptr, <2 x double> %res, i8 %mask)
6536 %res2 = call <2 x double> @llvm.x86.avx512.mask.loadu.pd.128(i8* %ptr, <2 x double> zeroinitializer, i8 %mask)
6537 %res4 = fadd <2 x double> %res2, %res1
6538 ret <2 x double> %res4
6541 declare <2 x double> @llvm.x86.avx512.mask.loadu.pd.128(i8*, <2 x double>, i8)
6543 declare <4 x i32> @llvm.x86.avx512.mask.psrav4.si(<4 x i32>, <4 x i32>, <4 x i32>, i8)
6545 define <4 x i32>@test_int_x86_avx512_mask_psrav4_si(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) {
6546 ; CHECK-LABEL: test_int_x86_avx512_mask_psrav4_si:
6548 ; CHECK-NEXT: movzbl %dil, %eax
6549 ; CHECK-NEXT: kmovw %eax, %k1
6550 ; CHECK-NEXT: vpsravd %xmm1, %xmm0, %xmm2 {%k1}
6551 ; CHECK-NEXT: vpsravd %xmm1, %xmm0, %xmm3 {%k1} {z}
6552 ; CHECK-NEXT: vpsravd %xmm1, %xmm0, %xmm0
6553 ; CHECK-NEXT: vpaddd %xmm3, %xmm2, %xmm1
6554 ; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0
6556 %res = call <4 x i32> @llvm.x86.avx512.mask.psrav4.si(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3)
6557 %res1 = call <4 x i32> @llvm.x86.avx512.mask.psrav4.si(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> zeroinitializer, i8 %x3)
6558 %res2 = call <4 x i32> @llvm.x86.avx512.mask.psrav4.si(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 -1)
6559 %res3 = add <4 x i32> %res, %res1
6560 %res4 = add <4 x i32> %res3, %res2
6564 declare <8 x i32> @llvm.x86.avx512.mask.psrav8.si(<8 x i32>, <8 x i32>, <8 x i32>, i8)
6566 define <8 x i32>@test_int_x86_avx512_mask_psrav8_si(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) {
6567 ; CHECK-LABEL: test_int_x86_avx512_mask_psrav8_si:
6569 ; CHECK-NEXT: movzbl %dil, %eax
6570 ; CHECK-NEXT: kmovw %eax, %k1
6571 ; CHECK-NEXT: vpsravd %ymm1, %ymm0, %ymm2 {%k1}
6572 ; CHECK-NEXT: vpsravd %ymm1, %ymm0, %ymm3 {%k1} {z}
6573 ; CHECK-NEXT: vpsravd %ymm1, %ymm0, %ymm0
6574 ; CHECK-NEXT: vpaddd %ymm3, %ymm2, %ymm1
6575 ; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0
6577 %res = call <8 x i32> @llvm.x86.avx512.mask.psrav8.si(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3)
6578 %res1 = call <8 x i32> @llvm.x86.avx512.mask.psrav8.si(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> zeroinitializer, i8 %x3)
6579 %res2 = call <8 x i32> @llvm.x86.avx512.mask.psrav8.si(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1)
6580 %res3 = add <8 x i32> %res, %res1
6581 %res4 = add <8 x i32> %res3, %res2
6585 declare <2 x i64> @llvm.x86.avx512.mask.psrav.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8)
6587 define <2 x i64>@test_int_x86_avx512_mask_psrav_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) {
6588 ; CHECK-LABEL: test_int_x86_avx512_mask_psrav_q_128:
6590 ; CHECK-NEXT: movzbl %dil, %eax
6591 ; CHECK-NEXT: kmovw %eax, %k1
6592 ; CHECK-NEXT: vpsravq %xmm1, %xmm0, %xmm2 {%k1}
6593 ; CHECK-NEXT: vpsravq %xmm1, %xmm0, %xmm3 {%k1} {z}
6594 ; CHECK-NEXT: vpsravq %xmm1, %xmm0, %xmm0
6595 ; CHECK-NEXT: vpaddq %xmm3, %xmm2, %xmm1
6596 ; CHECK-NEXT: vpaddq %xmm0, %xmm1, %xmm0
6598 %res = call <2 x i64> @llvm.x86.avx512.mask.psrav.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3)
6599 %res1 = call <2 x i64> @llvm.x86.avx512.mask.psrav.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> zeroinitializer, i8 %x3)
6600 %res2 = call <2 x i64> @llvm.x86.avx512.mask.psrav.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 -1)
6601 %res3 = add <2 x i64> %res, %res1
6602 %res4 = add <2 x i64> %res3, %res2
6606 declare <4 x i64> @llvm.x86.avx512.mask.psrav.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i8)
6608 define <4 x i64>@test_int_x86_avx512_mask_psrav_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) {
6609 ; CHECK-LABEL: test_int_x86_avx512_mask_psrav_q_256:
6611 ; CHECK-NEXT: movzbl %dil, %eax
6612 ; CHECK-NEXT: kmovw %eax, %k1
6613 ; CHECK-NEXT: vpsravq %ymm1, %ymm0, %ymm2 {%k1}
6614 ; CHECK-NEXT: vpsravq %ymm1, %ymm0, %ymm3 {%k1} {z}
6615 ; CHECK-NEXT: vpsravq %ymm1, %ymm0, %ymm0
6616 ; CHECK-NEXT: vpaddq %ymm3, %ymm2, %ymm1
6617 ; CHECK-NEXT: vpaddq %ymm0, %ymm1, %ymm0
6619 %res = call <4 x i64> @llvm.x86.avx512.mask.psrav.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3)
6620 %res1 = call <4 x i64> @llvm.x86.avx512.mask.psrav.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> zeroinitializer, i8 %x3)
6621 %res2 = call <4 x i64> @llvm.x86.avx512.mask.psrav.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 -1)
6622 %res3 = add <4 x i64> %res, %res1
6623 %res4 = add <4 x i64> %res3, %res2
6627 declare <2 x i64> @llvm.x86.avx512.mask.psllv2.di(<2 x i64>, <2 x i64>, <2 x i64>, i8)
6629 define <2 x i64>@test_int_x86_avx512_mask_psllv2_di(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) {
6630 ; CHECK-LABEL: test_int_x86_avx512_mask_psllv2_di:
6632 ; CHECK-NEXT: movzbl %dil, %eax
6633 ; CHECK-NEXT: kmovw %eax, %k1
6634 ; CHECK-NEXT: vpsllvq %xmm1, %xmm0, %xmm2 {%k1}
6635 ; CHECK-NEXT: vpsllvq %xmm1, %xmm0, %xmm3 {%k1} {z}
6636 ; CHECK-NEXT: vpsllvq %xmm1, %xmm0, %xmm0
6637 ; CHECK-NEXT: vpaddq %xmm3, %xmm2, %xmm1
6638 ; CHECK-NEXT: vpaddq %xmm0, %xmm1, %xmm0
6640 %res = call <2 x i64> @llvm.x86.avx512.mask.psllv2.di(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3)
6641 %res1 = call <2 x i64> @llvm.x86.avx512.mask.psllv2.di(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> zeroinitializer, i8 %x3)
6642 %res2 = call <2 x i64> @llvm.x86.avx512.mask.psllv2.di(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 -1)
6643 %res3 = add <2 x i64> %res, %res1
6644 %res4 = add <2 x i64> %res3, %res2
6648 declare <4 x i64> @llvm.x86.avx512.mask.psllv4.di(<4 x i64>, <4 x i64>, <4 x i64>, i8)
6650 define <4 x i64>@test_int_x86_avx512_mask_psllv4_di(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) {
6651 ; CHECK-LABEL: test_int_x86_avx512_mask_psllv4_di:
6653 ; CHECK-NEXT: movzbl %dil, %eax
6654 ; CHECK-NEXT: kmovw %eax, %k1
6655 ; CHECK-NEXT: vpsllvq %ymm1, %ymm0, %ymm2 {%k1}
6656 ; CHECK-NEXT: vpsllvq %ymm1, %ymm0, %ymm3 {%k1} {z}
6657 ; CHECK-NEXT: vpsllvq %ymm1, %ymm0, %ymm0
6658 ; CHECK-NEXT: vpaddq %ymm3, %ymm2, %ymm1
6659 ; CHECK-NEXT: vpaddq %ymm0, %ymm1, %ymm0
6661 %res = call <4 x i64> @llvm.x86.avx512.mask.psllv4.di(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3)
6662 %res1 = call <4 x i64> @llvm.x86.avx512.mask.psllv4.di(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> zeroinitializer, i8 %x3)
6663 %res2 = call <4 x i64> @llvm.x86.avx512.mask.psllv4.di(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 -1)
6664 %res3 = add <4 x i64> %res, %res1
6665 %res4 = add <4 x i64> %res3, %res2
6669 declare <4 x i32> @llvm.x86.avx512.mask.psllv4.si(<4 x i32>, <4 x i32>, <4 x i32>, i8)
6671 define <4 x i32>@test_int_x86_avx512_mask_psllv4_si(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) {
6672 ; CHECK-LABEL: test_int_x86_avx512_mask_psllv4_si:
6674 ; CHECK-NEXT: movzbl %dil, %eax
6675 ; CHECK-NEXT: kmovw %eax, %k1
6676 ; CHECK-NEXT: vpsllvd %xmm1, %xmm0, %xmm2 {%k1}
6677 ; CHECK-NEXT: vpsllvd %xmm1, %xmm0, %xmm3 {%k1} {z}
6678 ; CHECK-NEXT: vpsllvd %xmm1, %xmm0, %xmm0
6679 ; CHECK-NEXT: vpaddd %xmm3, %xmm2, %xmm1
6680 ; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0
6682 %res = call <4 x i32> @llvm.x86.avx512.mask.psllv4.si(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3)
6683 %res1 = call <4 x i32> @llvm.x86.avx512.mask.psllv4.si(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> zeroinitializer, i8 %x3)
6684 %res2 = call <4 x i32> @llvm.x86.avx512.mask.psllv4.si(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 -1)
6685 %res3 = add <4 x i32> %res, %res1
6686 %res4 = add <4 x i32> %res3, %res2
6690 declare <8 x i32> @llvm.x86.avx512.mask.psllv8.si(<8 x i32>, <8 x i32>, <8 x i32>, i8)
6692 define <8 x i32>@test_int_x86_avx512_mask_psllv8_si(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) {
6693 ; CHECK-LABEL: test_int_x86_avx512_mask_psllv8_si:
6695 ; CHECK-NEXT: movzbl %dil, %eax
6696 ; CHECK-NEXT: kmovw %eax, %k1
6697 ; CHECK-NEXT: vpsllvd %ymm1, %ymm0, %ymm2 {%k1}
6698 ; CHECK-NEXT: vpsllvd %ymm1, %ymm0, %ymm3 {%k1} {z}
6699 ; CHECK-NEXT: vpsllvd %ymm1, %ymm0, %ymm0
6700 ; CHECK-NEXT: vpaddd %ymm3, %ymm2, %ymm1
6701 ; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0
6703 %res = call <8 x i32> @llvm.x86.avx512.mask.psllv8.si(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3)
6704 %res1 = call <8 x i32> @llvm.x86.avx512.mask.psllv8.si(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> zeroinitializer, i8 %x3)
6705 %res2 = call <8 x i32> @llvm.x86.avx512.mask.psllv8.si(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1)
6706 %res3 = add <8 x i32> %res, %res1
6707 %res4 = add <8 x i32> %res3, %res2
6711 declare <4 x i32> @llvm.x86.avx512.mask.prorv.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)
6713 define <4 x i32>@test_int_x86_avx512_mask_prorv_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) {
6714 ; CHECK-LABEL: test_int_x86_avx512_mask_prorv_d_128:
6716 ; CHECK-NEXT: movzbl %dil, %eax
6717 ; CHECK-NEXT: kmovw %eax, %k1
6718 ; CHECK-NEXT: vprorvd %xmm1, %xmm0, %xmm2 {%k1}
6719 ; CHECK-NEXT: vprorvd %xmm1, %xmm0, %xmm3 {%k1} {z}
6720 ; CHECK-NEXT: vprorvd %xmm1, %xmm0, %xmm0
6721 ; CHECK-NEXT: vpaddd %xmm3, %xmm2, %xmm1
6722 ; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0
6724 %res = call <4 x i32> @llvm.x86.avx512.mask.prorv.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3)
6725 %res1 = call <4 x i32> @llvm.x86.avx512.mask.prorv.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> zeroinitializer, i8 %x3)
6726 %res2 = call <4 x i32> @llvm.x86.avx512.mask.prorv.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 -1)
6727 %res3 = add <4 x i32> %res, %res1
6728 %res4 = add <4 x i32> %res3, %res2
6732 declare <8 x i32> @llvm.x86.avx512.mask.prorv.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8)
6734 define <8 x i32>@test_int_x86_avx512_mask_prorv_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) {
6735 ; CHECK-LABEL: test_int_x86_avx512_mask_prorv_d_256:
6737 ; CHECK-NEXT: movzbl %dil, %eax
6738 ; CHECK-NEXT: kmovw %eax, %k1
6739 ; CHECK-NEXT: vprorvd %ymm1, %ymm0, %ymm2 {%k1}
6740 ; CHECK-NEXT: vprorvd %ymm1, %ymm0, %ymm3 {%k1} {z}
6741 ; CHECK-NEXT: vprorvd %ymm1, %ymm0, %ymm0
6742 ; CHECK-NEXT: vpaddd %ymm3, %ymm2, %ymm1
6743 ; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0
6745 %res = call <8 x i32> @llvm.x86.avx512.mask.prorv.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3)
6746 %res1 = call <8 x i32> @llvm.x86.avx512.mask.prorv.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> zeroinitializer, i8 %x3)
6747 %res2 = call <8 x i32> @llvm.x86.avx512.mask.prorv.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1)
6748 %res3 = add <8 x i32> %res, %res1
6749 %res4 = add <8 x i32> %res3, %res2
6753 declare <2 x i64> @llvm.x86.avx512.mask.prorv.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8)
6755 define <2 x i64>@test_int_x86_avx512_mask_prorv_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) {
6756 ; CHECK-LABEL: test_int_x86_avx512_mask_prorv_q_128:
6758 ; CHECK-NEXT: movzbl %dil, %eax
6759 ; CHECK-NEXT: kmovw %eax, %k1
6760 ; CHECK-NEXT: vprorvq %xmm1, %xmm0, %xmm2 {%k1}
6761 ; CHECK-NEXT: vprorvq %xmm1, %xmm0, %xmm3 {%k1} {z}
6762 ; CHECK-NEXT: vprorvq %xmm1, %xmm0, %xmm0
6763 ; CHECK-NEXT: vpaddq %xmm3, %xmm2, %xmm1
6764 ; CHECK-NEXT: vpaddq %xmm0, %xmm1, %xmm0
6766 %res = call <2 x i64> @llvm.x86.avx512.mask.prorv.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3)
6767 %res1 = call <2 x i64> @llvm.x86.avx512.mask.prorv.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> zeroinitializer, i8 %x3)
6768 %res2 = call <2 x i64> @llvm.x86.avx512.mask.prorv.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 -1)
6769 %res3 = add <2 x i64> %res, %res1
6770 %res4 = add <2 x i64> %res3, %res2
6774 declare <4 x i64> @llvm.x86.avx512.mask.prorv.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i8)
6776 define <4 x i64>@test_int_x86_avx512_mask_prorv_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) {
6777 ; CHECK-LABEL: test_int_x86_avx512_mask_prorv_q_256:
6779 ; CHECK-NEXT: movzbl %dil, %eax
6780 ; CHECK-NEXT: kmovw %eax, %k1
6781 ; CHECK-NEXT: vprorvq %ymm1, %ymm0, %ymm2 {%k1}
6782 ; CHECK-NEXT: vprorvq %ymm1, %ymm0, %ymm3 {%k1} {z}
6783 ; CHECK-NEXT: vprorvq %ymm1, %ymm0, %ymm0
6784 ; CHECK-NEXT: vpaddq %ymm3, %ymm2, %ymm1
6785 ; CHECK-NEXT: vpaddq %ymm0, %ymm1, %ymm0
6787 %res = call <4 x i64> @llvm.x86.avx512.mask.prorv.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3)
6788 %res1 = call <4 x i64> @llvm.x86.avx512.mask.prorv.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> zeroinitializer, i8 %x3)
6789 %res2 = call <4 x i64> @llvm.x86.avx512.mask.prorv.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 -1)
6790 %res3 = add <4 x i64> %res, %res1
6791 %res4 = add <4 x i64> %res3, %res2
6794 declare <4 x i32> @llvm.x86.avx512.mask.prol.d.128(<4 x i32>, i8, <4 x i32>, i8)
6796 define <4 x i32>@test_int_x86_avx512_mask_prol_d_128(<4 x i32> %x0, i8 %x1, <4 x i32> %x2, i8 %x3) {
6797 ; CHECK-LABEL: test_int_x86_avx512_mask_prol_d_128:
6799 ; CHECK-NEXT: movzbl %sil, %eax
6800 ; CHECK-NEXT: kmovw %eax, %k1
6801 ; CHECK-NEXT: vprold $3, %xmm0, %xmm1 {%k1}
6802 ; CHECK-NEXT: vprold $3, %xmm0, %xmm2 {%k1} {z}
6803 ; CHECK-NEXT: vprold $3, %xmm0, %xmm0
6804 ; CHECK-NEXT: vpaddd %xmm2, %xmm1, %xmm1
6805 ; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0
6807 %res = call <4 x i32> @llvm.x86.avx512.mask.prol.d.128(<4 x i32> %x0, i8 3, <4 x i32> %x2, i8 %x3)
6808 %res1 = call <4 x i32> @llvm.x86.avx512.mask.prol.d.128(<4 x i32> %x0, i8 3, <4 x i32> zeroinitializer, i8 %x3)
6809 %res2 = call <4 x i32> @llvm.x86.avx512.mask.prol.d.128(<4 x i32> %x0, i8 3, <4 x i32> %x2, i8 -1)
6810 %res3 = add <4 x i32> %res, %res1
6811 %res4 = add <4 x i32> %res3, %res2
6815 declare <8 x i32> @llvm.x86.avx512.mask.prol.d.256(<8 x i32>, i8, <8 x i32>, i8)
6817 define <8 x i32>@test_int_x86_avx512_mask_prol_d_256(<8 x i32> %x0, i8 %x1, <8 x i32> %x2, i8 %x3) {
6818 ; CHECK-LABEL: test_int_x86_avx512_mask_prol_d_256:
6820 ; CHECK-NEXT: movzbl %sil, %eax
6821 ; CHECK-NEXT: kmovw %eax, %k1
6822 ; CHECK-NEXT: vprold $3, %ymm0, %ymm1 {%k1}
6823 ; CHECK-NEXT: vprold $3, %ymm0, %ymm2 {%k1} {z}
6824 ; CHECK-NEXT: vprold $3, %ymm0, %ymm0
6825 ; CHECK-NEXT: vpaddd %ymm2, %ymm1, %ymm1
6826 ; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0
6828 %res = call <8 x i32> @llvm.x86.avx512.mask.prol.d.256(<8 x i32> %x0, i8 3, <8 x i32> %x2, i8 %x3)
6829 %res1 = call <8 x i32> @llvm.x86.avx512.mask.prol.d.256(<8 x i32> %x0, i8 3, <8 x i32> zeroinitializer, i8 %x3)
6830 %res2 = call <8 x i32> @llvm.x86.avx512.mask.prol.d.256(<8 x i32> %x0, i8 3, <8 x i32> %x2, i8 -1)
6831 %res3 = add <8 x i32> %res, %res1
6832 %res4 = add <8 x i32> %res3, %res2
6836 declare <2 x i64> @llvm.x86.avx512.mask.prol.q.128(<2 x i64>, i8, <2 x i64>, i8)
6838 define <2 x i64>@test_int_x86_avx512_mask_prol_q_128(<2 x i64> %x0, i8 %x1, <2 x i64> %x2, i8 %x3) {
6839 ; CHECK-LABEL: test_int_x86_avx512_mask_prol_q_128:
6841 ; CHECK-NEXT: movzbl %sil, %eax
6842 ; CHECK-NEXT: kmovw %eax, %k1
6843 ; CHECK-NEXT: vprolq $3, %xmm0, %xmm1 {%k1}
6844 ; CHECK-NEXT: vprolq $3, %xmm0, %xmm2 {%k1} {z}
6845 ; CHECK-NEXT: vprolq $3, %xmm0, %xmm0
6846 ; CHECK-NEXT: vpaddq %xmm2, %xmm1, %xmm1
6847 ; CHECK-NEXT: vpaddq %xmm0, %xmm1, %xmm0
6849 %res = call <2 x i64> @llvm.x86.avx512.mask.prol.q.128(<2 x i64> %x0, i8 3, <2 x i64> %x2, i8 %x3)
6850 %res1 = call <2 x i64> @llvm.x86.avx512.mask.prol.q.128(<2 x i64> %x0, i8 3, <2 x i64> zeroinitializer, i8 %x3)
6851 %res2 = call <2 x i64> @llvm.x86.avx512.mask.prol.q.128(<2 x i64> %x0, i8 3, <2 x i64> %x2, i8 -1)
6852 %res3 = add <2 x i64> %res, %res1
6853 %res4 = add <2 x i64> %res3, %res2
6857 declare <4 x i64> @llvm.x86.avx512.mask.prol.q.256(<4 x i64>, i8, <4 x i64>, i8)
6859 define <4 x i64>@test_int_x86_avx512_mask_prol_q_256(<4 x i64> %x0, i8 %x1, <4 x i64> %x2, i8 %x3) {
6860 ; CHECK-LABEL: test_int_x86_avx512_mask_prol_q_256:
6862 ; CHECK-NEXT: movzbl %sil, %eax
6863 ; CHECK-NEXT: kmovw %eax, %k1
6864 ; CHECK-NEXT: vprolq $3, %ymm0, %ymm1 {%k1}
6865 ; CHECK-NEXT: vprolq $3, %ymm0, %ymm2 {%k1} {z}
6866 ; CHECK-NEXT: vprolq $3, %ymm0, %ymm0
6867 ; CHECK-NEXT: vpaddq %ymm2, %ymm1, %ymm1
6868 ; CHECK-NEXT: vpaddq %ymm0, %ymm1, %ymm0
6870 %res = call <4 x i64> @llvm.x86.avx512.mask.prol.q.256(<4 x i64> %x0, i8 3, <4 x i64> %x2, i8 %x3)
6871 %res1 = call <4 x i64> @llvm.x86.avx512.mask.prol.q.256(<4 x i64> %x0, i8 3, <4 x i64> zeroinitializer, i8 %x3)
6872 %res2 = call <4 x i64> @llvm.x86.avx512.mask.prol.q.256(<4 x i64> %x0, i8 3, <4 x i64> %x2, i8 -1)
6873 %res3 = add <4 x i64> %res, %res1
6874 %res4 = add <4 x i64> %res3, %res2
6878 declare <4 x i32> @llvm.x86.avx512.mask.prolv.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)
6880 define <4 x i32>@test_int_x86_avx512_mask_prolv_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) {
6881 ; CHECK-LABEL: test_int_x86_avx512_mask_prolv_d_128:
6883 ; CHECK-NEXT: movzbl %dil, %eax
6884 ; CHECK-NEXT: kmovw %eax, %k1
6885 ; CHECK-NEXT: vprolvd %xmm1, %xmm0, %xmm2 {%k1}
6886 ; CHECK-NEXT: vprolvd %xmm1, %xmm0, %xmm3 {%k1} {z}
6887 ; CHECK-NEXT: vprolvd %xmm1, %xmm0, %xmm0
6888 ; CHECK-NEXT: vpaddd %xmm3, %xmm2, %xmm1
6889 ; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0
6891 %res = call <4 x i32> @llvm.x86.avx512.mask.prolv.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3)
6892 %res1 = call <4 x i32> @llvm.x86.avx512.mask.prolv.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> zeroinitializer, i8 %x3)
6893 %res2 = call <4 x i32> @llvm.x86.avx512.mask.prolv.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 -1)
6894 %res3 = add <4 x i32> %res, %res1
6895 %res4 = add <4 x i32> %res3, %res2
6899 declare <8 x i32> @llvm.x86.avx512.mask.prolv.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8)
6901 define <8 x i32>@test_int_x86_avx512_mask_prolv_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) {
6902 ; CHECK-LABEL: test_int_x86_avx512_mask_prolv_d_256:
6904 ; CHECK-NEXT: movzbl %dil, %eax
6905 ; CHECK-NEXT: kmovw %eax, %k1
6906 ; CHECK-NEXT: vprolvd %ymm1, %ymm0, %ymm2 {%k1}
6907 ; CHECK-NEXT: vprolvd %ymm1, %ymm0, %ymm3 {%k1} {z}
6908 ; CHECK-NEXT: vprolvd %ymm1, %ymm0, %ymm0
6909 ; CHECK-NEXT: vpaddd %ymm3, %ymm2, %ymm1
6910 ; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0
6912 %res = call <8 x i32> @llvm.x86.avx512.mask.prolv.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3)
6913 %res1 = call <8 x i32> @llvm.x86.avx512.mask.prolv.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> zeroinitializer, i8 %x3)
6914 %res2 = call <8 x i32> @llvm.x86.avx512.mask.prolv.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1)
6915 %res3 = add <8 x i32> %res, %res1
6916 %res4 = add <8 x i32> %res3, %res2
6920 declare <2 x i64> @llvm.x86.avx512.mask.prolv.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8)
6922 define <2 x i64>@test_int_x86_avx512_mask_prolv_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) {
6923 ; CHECK-LABEL: test_int_x86_avx512_mask_prolv_q_128:
6925 ; CHECK-NEXT: movzbl %dil, %eax
6926 ; CHECK-NEXT: kmovw %eax, %k1
6927 ; CHECK-NEXT: vprolvq %xmm1, %xmm0, %xmm2 {%k1}
6928 ; CHECK-NEXT: vprolvq %xmm1, %xmm0, %xmm3 {%k1} {z}
6929 ; CHECK-NEXT: vprolvq %xmm1, %xmm0, %xmm0
6930 ; CHECK-NEXT: vpaddq %xmm3, %xmm2, %xmm1
6931 ; CHECK-NEXT: vpaddq %xmm0, %xmm1, %xmm0
6933 %res = call <2 x i64> @llvm.x86.avx512.mask.prolv.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3)
6934 %res1 = call <2 x i64> @llvm.x86.avx512.mask.prolv.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> zeroinitializer, i8 %x3)
6935 %res2 = call <2 x i64> @llvm.x86.avx512.mask.prolv.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 -1)
6936 %res3 = add <2 x i64> %res, %res1
6937 %res4 = add <2 x i64> %res3, %res2
6941 declare <4 x i64> @llvm.x86.avx512.mask.prolv.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i8)
6943 define <4 x i64>@test_int_x86_avx512_mask_prolv_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) {
6944 ; CHECK-LABEL: test_int_x86_avx512_mask_prolv_q_256:
6946 ; CHECK-NEXT: movzbl %dil, %eax
6947 ; CHECK-NEXT: kmovw %eax, %k1
6948 ; CHECK-NEXT: vprolvq %ymm1, %ymm0, %ymm2 {%k1}
6949 ; CHECK-NEXT: vprolvq %ymm1, %ymm0, %ymm3 {%k1} {z}
6950 ; CHECK-NEXT: vprolvq %ymm1, %ymm0, %ymm0
6951 ; CHECK-NEXT: vpaddq %ymm3, %ymm2, %ymm1
6952 ; CHECK-NEXT: vpaddq %ymm0, %ymm1, %ymm0
6954 %res = call <4 x i64> @llvm.x86.avx512.mask.prolv.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3)
6955 %res1 = call <4 x i64> @llvm.x86.avx512.mask.prolv.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> zeroinitializer, i8 %x3)
6956 %res2 = call <4 x i64> @llvm.x86.avx512.mask.prolv.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 -1)
6957 %res3 = add <4 x i64> %res, %res1
6958 %res4 = add <4 x i64> %res3, %res2
6962 declare <4 x i32> @llvm.x86.avx512.mask.pror.d.128(<4 x i32>, i8, <4 x i32>, i8)
6964 define <4 x i32>@test_int_x86_avx512_mask_pror_d_128(<4 x i32> %x0, i8 %x1, <4 x i32> %x2, i8 %x3) {
6965 ; CHECK-LABEL: test_int_x86_avx512_mask_pror_d_128:
6967 ; CHECK-NEXT: movzbl %sil, %eax
6968 ; CHECK-NEXT: kmovw %eax, %k1
6969 ; CHECK-NEXT: vprord $3, %xmm0, %xmm1 {%k1}
6970 ; CHECK-NEXT: vprord $3, %xmm0, %xmm2 {%k1} {z}
6971 ; CHECK-NEXT: vprord $3, %xmm0, %xmm0
6972 ; CHECK-NEXT: vpaddd %xmm2, %xmm1, %xmm1
6973 ; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0
6975 %res = call <4 x i32> @llvm.x86.avx512.mask.pror.d.128(<4 x i32> %x0, i8 3, <4 x i32> %x2, i8 %x3)
6976 %res1 = call <4 x i32> @llvm.x86.avx512.mask.pror.d.128(<4 x i32> %x0, i8 3, <4 x i32> zeroinitializer, i8 %x3)
6977 %res2 = call <4 x i32> @llvm.x86.avx512.mask.pror.d.128(<4 x i32> %x0, i8 3, <4 x i32> %x2, i8 -1)
6978 %res3 = add <4 x i32> %res, %res1
6979 %res4 = add <4 x i32> %res3, %res2
6983 declare <8 x i32> @llvm.x86.avx512.mask.pror.d.256(<8 x i32>, i8, <8 x i32>, i8)
6985 define <8 x i32>@test_int_x86_avx512_mask_pror_d_256(<8 x i32> %x0, i32 %x1, <8 x i32> %x2, i8 %x3) {
6986 ; CHECK-LABEL: test_int_x86_avx512_mask_pror_d_256:
6988 ; CHECK-NEXT: movzbl %sil, %eax
6989 ; CHECK-NEXT: kmovw %eax, %k1
6990 ; CHECK-NEXT: vprord $3, %ymm0, %ymm1 {%k1}
6991 ; CHECK-NEXT: vprord $3, %ymm0, %ymm2 {%k1} {z}
6992 ; CHECK-NEXT: vprord $3, %ymm0, %ymm0
6993 ; CHECK-NEXT: vpaddd %ymm2, %ymm1, %ymm1
6994 ; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0
6996 %res = call <8 x i32> @llvm.x86.avx512.mask.pror.d.256(<8 x i32> %x0, i8 3, <8 x i32> %x2, i8 %x3)
6997 %res1 = call <8 x i32> @llvm.x86.avx512.mask.pror.d.256(<8 x i32> %x0, i8 3, <8 x i32> zeroinitializer, i8 %x3)
6998 %res2 = call <8 x i32> @llvm.x86.avx512.mask.pror.d.256(<8 x i32> %x0, i8 3, <8 x i32> %x2, i8 -1)
6999 %res3 = add <8 x i32> %res, %res1
7000 %res4 = add <8 x i32> %res3, %res2
7004 declare <2 x i64> @llvm.x86.avx512.mask.pror.q.128(<2 x i64>, i8, <2 x i64>, i8)
7006 define <2 x i64>@test_int_x86_avx512_mask_pror_q_128(<2 x i64> %x0, i8 %x1, <2 x i64> %x2, i8 %x3) {
7007 ; CHECK-LABEL: test_int_x86_avx512_mask_pror_q_128:
7009 ; CHECK-NEXT: movzbl %sil, %eax
7010 ; CHECK-NEXT: kmovw %eax, %k1
7011 ; CHECK-NEXT: vprorq $3, %xmm0, %xmm1 {%k1}
7012 ; CHECK-NEXT: vprorq $3, %xmm0, %xmm2 {%k1} {z}
7013 ; CHECK-NEXT: vprorq $3, %xmm0, %xmm0
7014 ; CHECK-NEXT: vpaddq %xmm2, %xmm1, %xmm1
7015 ; CHECK-NEXT: vpaddq %xmm0, %xmm1, %xmm0
7017 %res = call <2 x i64> @llvm.x86.avx512.mask.pror.q.128(<2 x i64> %x0, i8 3, <2 x i64> %x2, i8 %x3)
7018 %res1 = call <2 x i64> @llvm.x86.avx512.mask.pror.q.128(<2 x i64> %x0, i8 3, <2 x i64> zeroinitializer, i8 %x3)
7019 %res2 = call <2 x i64> @llvm.x86.avx512.mask.pror.q.128(<2 x i64> %x0, i8 3, <2 x i64> %x2, i8 -1)
7020 %res3 = add <2 x i64> %res, %res1
7021 %res4 = add <2 x i64> %res3, %res2
7025 declare <4 x i64> @llvm.x86.avx512.mask.pror.q.256(<4 x i64>, i8, <4 x i64>, i8)
7027 define <4 x i64>@test_int_x86_avx512_mask_pror_q_256(<4 x i64> %x0, i8 %x1, <4 x i64> %x2, i8 %x3) {
7028 ; CHECK-LABEL: test_int_x86_avx512_mask_pror_q_256:
7030 ; CHECK-NEXT: movzbl %sil, %eax
7031 ; CHECK-NEXT: kmovw %eax, %k1
7032 ; CHECK-NEXT: vprorq $3, %ymm0, %ymm1 {%k1}
7033 ; CHECK-NEXT: vprorq $3, %ymm0, %ymm2 {%k1} {z}
7034 ; CHECK-NEXT: vprorq $3, %ymm0, %ymm0
7035 ; CHECK-NEXT: vpaddq %ymm2, %ymm1, %ymm1
7036 ; CHECK-NEXT: vpaddq %ymm0, %ymm1, %ymm0
7038 %res = call <4 x i64> @llvm.x86.avx512.mask.pror.q.256(<4 x i64> %x0, i8 3, <4 x i64> %x2, i8 %x3)
7039 %res1 = call <4 x i64> @llvm.x86.avx512.mask.pror.q.256(<4 x i64> %x0, i8 3, <4 x i64> zeroinitializer, i8 %x3)
7040 %res2 = call <4 x i64> @llvm.x86.avx512.mask.pror.q.256(<4 x i64> %x0, i8 3, <4 x i64> %x2, i8 -1)
7041 %res3 = add <4 x i64> %res, %res1
7042 %res4 = add <4 x i64> %res3, %res2
7046 declare <4 x i32> @llvm.x86.avx512.mask.pmovzxb.d.128(<16 x i8>, <4 x i32>, i8)
7048 define <4 x i32>@test_int_x86_avx512_mask_pmovzxb_d_128(<16 x i8> %x0, <4 x i32> %x1, i8 %x2) {
7049 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovzxb_d_128:
7051 ; CHECK-NEXT: movzbl %dil, %eax
7052 ; CHECK-NEXT: kmovw %eax, %k1
7053 ; CHECK-NEXT: vpmovzxbd %xmm0, %xmm1 {%k1}
7054 ; CHECK-NEXT: vpmovzxbd %xmm0, %xmm2 {%k1} {z}
7055 ; CHECK-NEXT: vpmovzxbd %xmm0, %xmm0
7056 ; CHECK-NEXT: vpaddd %xmm2, %xmm1, %xmm1
7057 ; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0
7059 %res = call <4 x i32> @llvm.x86.avx512.mask.pmovzxb.d.128(<16 x i8> %x0, <4 x i32> %x1, i8 %x2)
7060 %res1 = call <4 x i32> @llvm.x86.avx512.mask.pmovzxb.d.128(<16 x i8> %x0, <4 x i32> zeroinitializer, i8 %x2)
7061 %res2 = call <4 x i32> @llvm.x86.avx512.mask.pmovzxb.d.128(<16 x i8> %x0, <4 x i32> %x1, i8 -1)
7062 %res3 = add <4 x i32> %res, %res1
7063 %res4 = add <4 x i32> %res3, %res2
7067 declare <8 x i32> @llvm.x86.avx512.mask.pmovzxb.d.256(<16 x i8>, <8 x i32>, i8)
7069 define <8 x i32>@test_int_x86_avx512_mask_pmovzxb_d_256(<16 x i8> %x0, <8 x i32> %x1, i8 %x2) {
7070 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovzxb_d_256:
7072 ; CHECK-NEXT: movzbl %dil, %eax
7073 ; CHECK-NEXT: kmovw %eax, %k1
7074 ; CHECK-NEXT: vpmovzxbd %xmm0, %ymm1 {%k1}
7075 ; CHECK-NEXT: vpmovzxbd %xmm0, %ymm2 {%k1} {z}
7076 ; CHECK-NEXT: vpmovzxbd %xmm0, %ymm0
7077 ; CHECK-NEXT: vpaddd %ymm2, %ymm1, %ymm1
7078 ; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0
7080 %res = call <8 x i32> @llvm.x86.avx512.mask.pmovzxb.d.256(<16 x i8> %x0, <8 x i32> %x1, i8 %x2)
7081 %res1 = call <8 x i32> @llvm.x86.avx512.mask.pmovzxb.d.256(<16 x i8> %x0, <8 x i32> zeroinitializer, i8 %x2)
7082 %res2 = call <8 x i32> @llvm.x86.avx512.mask.pmovzxb.d.256(<16 x i8> %x0, <8 x i32> %x1, i8 -1)
7083 %res3 = add <8 x i32> %res, %res1
7084 %res4 = add <8 x i32> %res3, %res2
7088 declare <2 x i64> @llvm.x86.avx512.mask.pmovzxb.q.128(<16 x i8>, <2 x i64>, i8)
7090 define <2 x i64>@test_int_x86_avx512_mask_pmovzxb_q_128(<16 x i8> %x0, <2 x i64> %x1, i8 %x2) {
7091 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovzxb_q_128:
7093 ; CHECK-NEXT: movzbl %dil, %eax
7094 ; CHECK-NEXT: kmovw %eax, %k1
7095 ; CHECK-NEXT: vpmovzxbq %xmm0, %xmm1 {%k1}
7096 ; CHECK-NEXT: vpmovzxbq %xmm0, %xmm2 {%k1} {z}
7097 ; CHECK-NEXT: vpmovzxbq %xmm0, %xmm0
7098 ; CHECK-NEXT: vpaddq %xmm2, %xmm1, %xmm1
7099 ; CHECK-NEXT: vpaddq %xmm0, %xmm1, %xmm0
7101 %res = call <2 x i64> @llvm.x86.avx512.mask.pmovzxb.q.128(<16 x i8> %x0, <2 x i64> %x1, i8 %x2)
7102 %res1 = call <2 x i64> @llvm.x86.avx512.mask.pmovzxb.q.128(<16 x i8> %x0, <2 x i64> zeroinitializer, i8 %x2)
7103 %res2 = call <2 x i64> @llvm.x86.avx512.mask.pmovzxb.q.128(<16 x i8> %x0, <2 x i64> %x1, i8 -1)
7104 %res3 = add <2 x i64> %res, %res1
7105 %res4 = add <2 x i64> %res3, %res2
7109 declare <4 x i64> @llvm.x86.avx512.mask.pmovzxb.q.256(<16 x i8>, <4 x i64>, i8)
7111 define <4 x i64>@test_int_x86_avx512_mask_pmovzxb_q_256(<16 x i8> %x0, <4 x i64> %x1, i8 %x2) {
7112 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovzxb_q_256:
7114 ; CHECK-NEXT: movzbl %dil, %eax
7115 ; CHECK-NEXT: kmovw %eax, %k1
7116 ; CHECK-NEXT: vpmovzxbq %xmm0, %ymm1 {%k1}
7117 ; CHECK-NEXT: vpmovzxbq %xmm0, %ymm2 {%k1} {z}
7118 ; CHECK-NEXT: vpmovzxbq %xmm0, %ymm0
7119 ; CHECK-NEXT: vpaddq %ymm2, %ymm1, %ymm1
7120 ; CHECK-NEXT: vpaddq %ymm0, %ymm1, %ymm0
7122 %res = call <4 x i64> @llvm.x86.avx512.mask.pmovzxb.q.256(<16 x i8> %x0, <4 x i64> %x1, i8 %x2)
7123 %res1 = call <4 x i64> @llvm.x86.avx512.mask.pmovzxb.q.256(<16 x i8> %x0, <4 x i64> zeroinitializer, i8 %x2)
7124 %res2 = call <4 x i64> @llvm.x86.avx512.mask.pmovzxb.q.256(<16 x i8> %x0, <4 x i64> %x1, i8 -1)
7125 %res3 = add <4 x i64> %res, %res1
7126 %res4 = add <4 x i64> %res3, %res2
7130 declare <2 x i64> @llvm.x86.avx512.mask.pmovzxd.q.128(<4 x i32>, <2 x i64>, i8)
7132 define <2 x i64>@test_int_x86_avx512_mask_pmovzxd_q_128(<4 x i32> %x0, <2 x i64> %x1, i8 %x2) {
7133 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovzxd_q_128:
7135 ; CHECK-NEXT: movzbl %dil, %eax
7136 ; CHECK-NEXT: kmovw %eax, %k1
7137 ; CHECK-NEXT: vpmovzxdq %xmm0, %xmm1 {%k1}
7138 ; CHECK-NEXT: vpmovzxdq %xmm0, %xmm2 {%k1} {z}
7139 ; CHECK-NEXT: vpmovzxdq %xmm0, %xmm0
7140 ; CHECK-NEXT: vpaddq %xmm2, %xmm1, %xmm1
7141 ; CHECK-NEXT: vpaddq %xmm0, %xmm1, %xmm0
7143 %res = call <2 x i64> @llvm.x86.avx512.mask.pmovzxd.q.128(<4 x i32> %x0, <2 x i64> %x1, i8 %x2)
7144 %res1 = call <2 x i64> @llvm.x86.avx512.mask.pmovzxd.q.128(<4 x i32> %x0, <2 x i64> zeroinitializer, i8 %x2)
7145 %res2 = call <2 x i64> @llvm.x86.avx512.mask.pmovzxd.q.128(<4 x i32> %x0, <2 x i64> %x1, i8 -1)
7146 %res3 = add <2 x i64> %res, %res1
7147 %res4 = add <2 x i64> %res3, %res2
7151 declare <4 x i64> @llvm.x86.avx512.mask.pmovzxd.q.256(<4 x i32>, <4 x i64>, i8)
7153 define <4 x i64>@test_int_x86_avx512_mask_pmovzxd_q_256(<4 x i32> %x0, <4 x i64> %x1, i8 %x2) {
7154 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovzxd_q_256:
7156 ; CHECK-NEXT: movzbl %dil, %eax
7157 ; CHECK-NEXT: kmovw %eax, %k1
7158 ; CHECK-NEXT: vpmovzxdq %xmm0, %ymm1 {%k1}
7159 ; CHECK-NEXT: vpmovzxdq %xmm0, %ymm2 {%k1} {z}
7160 ; CHECK-NEXT: vpmovzxdq %xmm0, %ymm0
7161 ; CHECK-NEXT: vpaddq %ymm2, %ymm1, %ymm1
7162 ; CHECK-NEXT: vpaddq %ymm0, %ymm1, %ymm0
7164 %res = call <4 x i64> @llvm.x86.avx512.mask.pmovzxd.q.256(<4 x i32> %x0, <4 x i64> %x1, i8 %x2)
7165 %res1 = call <4 x i64> @llvm.x86.avx512.mask.pmovzxd.q.256(<4 x i32> %x0, <4 x i64> zeroinitializer, i8 %x2)
7166 %res2 = call <4 x i64> @llvm.x86.avx512.mask.pmovzxd.q.256(<4 x i32> %x0, <4 x i64> %x1, i8 -1)
7167 %res3 = add <4 x i64> %res, %res1
7168 %res4 = add <4 x i64> %res3, %res2
7172 declare <4 x i32> @llvm.x86.avx512.mask.pmovzxw.d.128(<8 x i16>, <4 x i32>, i8)
7174 define <4 x i32>@test_int_x86_avx512_mask_pmovzxw_d_128(<8 x i16> %x0, <4 x i32> %x1, i8 %x2) {
7175 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovzxw_d_128:
7177 ; CHECK-NEXT: movzbl %dil, %eax
7178 ; CHECK-NEXT: kmovw %eax, %k1
7179 ; CHECK-NEXT: vpmovzxwd %xmm0, %xmm1 {%k1}
7180 ; CHECK-NEXT: vpmovzxwd %xmm0, %xmm2 {%k1} {z}
7181 ; CHECK-NEXT: vpmovzxwd %xmm0, %xmm0
7182 ; CHECK-NEXT: vpaddd %xmm2, %xmm1, %xmm1
7183 ; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0
7185 %res = call <4 x i32> @llvm.x86.avx512.mask.pmovzxw.d.128(<8 x i16> %x0, <4 x i32> %x1, i8 %x2)
7186 %res1 = call <4 x i32> @llvm.x86.avx512.mask.pmovzxw.d.128(<8 x i16> %x0, <4 x i32> zeroinitializer, i8 %x2)
7187 %res2 = call <4 x i32> @llvm.x86.avx512.mask.pmovzxw.d.128(<8 x i16> %x0, <4 x i32> %x1, i8 -1)
7188 %res3 = add <4 x i32> %res, %res1
7189 %res4 = add <4 x i32> %res3, %res2
7193 declare <8 x i32> @llvm.x86.avx512.mask.pmovzxw.d.256(<8 x i16>, <8 x i32>, i8)
7195 define <8 x i32>@test_int_x86_avx512_mask_pmovzxw_d_256(<8 x i16> %x0, <8 x i32> %x1, i8 %x2) {
7196 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovzxw_d_256:
7198 ; CHECK-NEXT: movzbl %dil, %eax
7199 ; CHECK-NEXT: kmovw %eax, %k1
7200 ; CHECK-NEXT: vpmovzxwd %xmm0, %ymm1 {%k1}
7201 ; CHECK-NEXT: vpmovzxwd %xmm0, %ymm2 {%k1} {z}
7202 ; CHECK-NEXT: vpmovzxwd %xmm0, %ymm0
7203 ; CHECK-NEXT: vpaddd %ymm2, %ymm1, %ymm1
7204 ; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0
7206 %res = call <8 x i32> @llvm.x86.avx512.mask.pmovzxw.d.256(<8 x i16> %x0, <8 x i32> %x1, i8 %x2)
7207 %res1 = call <8 x i32> @llvm.x86.avx512.mask.pmovzxw.d.256(<8 x i16> %x0, <8 x i32> zeroinitializer, i8 %x2)
7208 %res2 = call <8 x i32> @llvm.x86.avx512.mask.pmovzxw.d.256(<8 x i16> %x0, <8 x i32> %x1, i8 -1)
7209 %res3 = add <8 x i32> %res, %res1
7210 %res4 = add <8 x i32> %res3, %res2
7214 declare <2 x i64> @llvm.x86.avx512.mask.pmovzxw.q.128(<8 x i16>, <2 x i64>, i8)
7216 define <2 x i64>@test_int_x86_avx512_mask_pmovzxw_q_128(<8 x i16> %x0, <2 x i64> %x1, i8 %x2) {
7217 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovzxw_q_128:
7219 ; CHECK-NEXT: movzbl %dil, %eax
7220 ; CHECK-NEXT: kmovw %eax, %k1
7221 ; CHECK-NEXT: vpmovzxwq %xmm0, %xmm1 {%k1}
7222 ; CHECK-NEXT: vpmovzxwq %xmm0, %xmm2 {%k1} {z}
7223 ; CHECK-NEXT: vpmovzxwq %xmm0, %xmm0
7224 ; CHECK-NEXT: vpaddq %xmm2, %xmm1, %xmm1
7225 ; CHECK-NEXT: vpaddq %xmm0, %xmm1, %xmm0
7227 %res = call <2 x i64> @llvm.x86.avx512.mask.pmovzxw.q.128(<8 x i16> %x0, <2 x i64> %x1, i8 %x2)
7228 %res1 = call <2 x i64> @llvm.x86.avx512.mask.pmovzxw.q.128(<8 x i16> %x0, <2 x i64> zeroinitializer, i8 %x2)
7229 %res2 = call <2 x i64> @llvm.x86.avx512.mask.pmovzxw.q.128(<8 x i16> %x0, <2 x i64> %x1, i8 -1)
7230 %res3 = add <2 x i64> %res, %res1
7231 %res4 = add <2 x i64> %res3, %res2
7235 declare <4 x i64> @llvm.x86.avx512.mask.pmovzxw.q.256(<8 x i16>, <4 x i64>, i8)
7237 define <4 x i64>@test_int_x86_avx512_mask_pmovzxw_q_256(<8 x i16> %x0, <4 x i64> %x1, i8 %x2) {
7238 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovzxw_q_256:
7240 ; CHECK-NEXT: movzbl %dil, %eax
7241 ; CHECK-NEXT: kmovw %eax, %k1
7242 ; CHECK-NEXT: vpmovzxwq %xmm0, %ymm1 {%k1}
7243 ; CHECK-NEXT: vpmovzxwq %xmm0, %ymm2 {%k1} {z}
7244 ; CHECK-NEXT: vpmovzxwq %xmm0, %ymm0
7245 ; CHECK-NEXT: vpaddq %ymm2, %ymm1, %ymm1
7246 ; CHECK-NEXT: vpaddq %ymm0, %ymm1, %ymm0
7248 %res = call <4 x i64> @llvm.x86.avx512.mask.pmovzxw.q.256(<8 x i16> %x0, <4 x i64> %x1, i8 %x2)
7249 %res1 = call <4 x i64> @llvm.x86.avx512.mask.pmovzxw.q.256(<8 x i16> %x0, <4 x i64> zeroinitializer, i8 %x2)
7250 %res2 = call <4 x i64> @llvm.x86.avx512.mask.pmovzxw.q.256(<8 x i16> %x0, <4 x i64> %x1, i8 -1)
7251 %res3 = add <4 x i64> %res, %res1
7252 %res4 = add <4 x i64> %res3, %res2
7256 declare <4 x i32> @llvm.x86.avx512.mask.pmovsxb.d.128(<16 x i8>, <4 x i32>, i8)
7258 define <4 x i32>@test_int_x86_avx512_mask_pmovsxb_d_128(<16 x i8> %x0, <4 x i32> %x1, i8 %x2) {
7259 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovsxb_d_128:
7261 ; CHECK-NEXT: movzbl %dil, %eax
7262 ; CHECK-NEXT: kmovw %eax, %k1
7263 ; CHECK-NEXT: vpmovsxbd %xmm0, %xmm1 {%k1}
7264 ; CHECK-NEXT: vpmovsxbd %xmm0, %xmm2 {%k1} {z}
7265 ; CHECK-NEXT: vpmovsxbd %xmm0, %xmm0
7266 ; CHECK-NEXT: vpaddd %xmm2, %xmm1, %xmm1
7267 ; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0
7269 %res = call <4 x i32> @llvm.x86.avx512.mask.pmovsxb.d.128(<16 x i8> %x0, <4 x i32> %x1, i8 %x2)
7270 %res1 = call <4 x i32> @llvm.x86.avx512.mask.pmovsxb.d.128(<16 x i8> %x0, <4 x i32> zeroinitializer, i8 %x2)
7271 %res2 = call <4 x i32> @llvm.x86.avx512.mask.pmovsxb.d.128(<16 x i8> %x0, <4 x i32> %x1, i8 -1)
7272 %res3 = add <4 x i32> %res, %res1
7273 %res4 = add <4 x i32> %res3, %res2
7277 declare <8 x i32> @llvm.x86.avx512.mask.pmovsxb.d.256(<16 x i8>, <8 x i32>, i8)
7279 define <8 x i32>@test_int_x86_avx512_mask_pmovsxb_d_256(<16 x i8> %x0, <8 x i32> %x1, i8 %x2) {
7280 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovsxb_d_256:
7282 ; CHECK-NEXT: movzbl %dil, %eax
7283 ; CHECK-NEXT: kmovw %eax, %k1
7284 ; CHECK-NEXT: vpmovsxbd %xmm0, %ymm1 {%k1}
7285 ; CHECK-NEXT: vpmovsxbd %xmm0, %ymm2 {%k1} {z}
7286 ; CHECK-NEXT: vpmovsxbd %xmm0, %ymm0
7287 ; CHECK-NEXT: vpaddd %ymm2, %ymm1, %ymm1
7288 ; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0
7290 %res = call <8 x i32> @llvm.x86.avx512.mask.pmovsxb.d.256(<16 x i8> %x0, <8 x i32> %x1, i8 %x2)
7291 %res1 = call <8 x i32> @llvm.x86.avx512.mask.pmovsxb.d.256(<16 x i8> %x0, <8 x i32> zeroinitializer, i8 %x2)
7292 %res2 = call <8 x i32> @llvm.x86.avx512.mask.pmovsxb.d.256(<16 x i8> %x0, <8 x i32> %x1, i8 -1)
7293 %res3 = add <8 x i32> %res, %res1
7294 %res4 = add <8 x i32> %res3, %res2
7298 declare <2 x i64> @llvm.x86.avx512.mask.pmovsxb.q.128(<16 x i8>, <2 x i64>, i8)
7300 define <2 x i64>@test_int_x86_avx512_mask_pmovsxb_q_128(<16 x i8> %x0, <2 x i64> %x1, i8 %x2) {
7301 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovsxb_q_128:
7303 ; CHECK-NEXT: movzbl %dil, %eax
7304 ; CHECK-NEXT: kmovw %eax, %k1
7305 ; CHECK-NEXT: vpmovsxbq %xmm0, %xmm1 {%k1}
7306 ; CHECK-NEXT: vpmovsxbq %xmm0, %xmm2 {%k1} {z}
7307 ; CHECK-NEXT: vpmovsxbq %xmm0, %xmm0
7308 ; CHECK-NEXT: vpaddq %xmm2, %xmm1, %xmm1
7309 ; CHECK-NEXT: vpaddq %xmm0, %xmm1, %xmm0
7311 %res = call <2 x i64> @llvm.x86.avx512.mask.pmovsxb.q.128(<16 x i8> %x0, <2 x i64> %x1, i8 %x2)
7312 %res1 = call <2 x i64> @llvm.x86.avx512.mask.pmovsxb.q.128(<16 x i8> %x0, <2 x i64> zeroinitializer, i8 %x2)
7313 %res2 = call <2 x i64> @llvm.x86.avx512.mask.pmovsxb.q.128(<16 x i8> %x0, <2 x i64> %x1, i8 -1)
7314 %res3 = add <2 x i64> %res, %res1
7315 %res4 = add <2 x i64> %res3, %res2
7319 declare <4 x i64> @llvm.x86.avx512.mask.pmovsxb.q.256(<16 x i8>, <4 x i64>, i8)
7321 define <4 x i64>@test_int_x86_avx512_mask_pmovsxb_q_256(<16 x i8> %x0, <4 x i64> %x1, i8 %x2) {
7322 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovsxb_q_256:
7324 ; CHECK-NEXT: movzbl %dil, %eax
7325 ; CHECK-NEXT: kmovw %eax, %k1
7326 ; CHECK-NEXT: vpmovsxbq %xmm0, %ymm1 {%k1}
7327 ; CHECK-NEXT: vpmovsxbq %xmm0, %ymm2 {%k1} {z}
7328 ; CHECK-NEXT: vpmovsxbq %xmm0, %ymm0
7329 ; CHECK-NEXT: vpaddq %ymm2, %ymm1, %ymm1
7330 ; CHECK-NEXT: vpaddq %ymm0, %ymm1, %ymm0
7332 %res = call <4 x i64> @llvm.x86.avx512.mask.pmovsxb.q.256(<16 x i8> %x0, <4 x i64> %x1, i8 %x2)
7333 %res1 = call <4 x i64> @llvm.x86.avx512.mask.pmovsxb.q.256(<16 x i8> %x0, <4 x i64> zeroinitializer, i8 %x2)
7334 %res2 = call <4 x i64> @llvm.x86.avx512.mask.pmovsxb.q.256(<16 x i8> %x0, <4 x i64> %x1, i8 -1)
7335 %res3 = add <4 x i64> %res, %res1
7336 %res4 = add <4 x i64> %res3, %res2
7340 declare <4 x i32> @llvm.x86.avx512.mask.pmovsxw.d.128(<8 x i16>, <4 x i32>, i8)
7342 define <4 x i32>@test_int_x86_avx512_mask_pmovsxw_d_128(<8 x i16> %x0, <4 x i32> %x1, i8 %x2) {
7343 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovsxw_d_128:
7345 ; CHECK-NEXT: movzbl %dil, %eax
7346 ; CHECK-NEXT: kmovw %eax, %k1
7347 ; CHECK-NEXT: vpmovsxwd %xmm0, %xmm1 {%k1}
7348 ; CHECK-NEXT: vpmovsxwd %xmm0, %xmm2 {%k1} {z}
7349 ; CHECK-NEXT: vpmovsxwd %xmm0, %xmm0
7350 ; CHECK-NEXT: vpaddd %xmm2, %xmm1, %xmm1
7351 ; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0
7353 %res = call <4 x i32> @llvm.x86.avx512.mask.pmovsxw.d.128(<8 x i16> %x0, <4 x i32> %x1, i8 %x2)
7354 %res1 = call <4 x i32> @llvm.x86.avx512.mask.pmovsxw.d.128(<8 x i16> %x0, <4 x i32> zeroinitializer, i8 %x2)
7355 %res2 = call <4 x i32> @llvm.x86.avx512.mask.pmovsxw.d.128(<8 x i16> %x0, <4 x i32> %x1, i8 -1)
7356 %res3 = add <4 x i32> %res, %res1
7357 %res4 = add <4 x i32> %res3, %res2
7361 declare <8 x i32> @llvm.x86.avx512.mask.pmovsxw.d.256(<8 x i16>, <8 x i32>, i8)
7363 define <8 x i32>@test_int_x86_avx512_mask_pmovsxw_d_256(<8 x i16> %x0, <8 x i32> %x1, i8 %x2) {
7364 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovsxw_d_256:
7366 ; CHECK-NEXT: movzbl %dil, %eax
7367 ; CHECK-NEXT: kmovw %eax, %k1
7368 ; CHECK-NEXT: vpmovsxwd %xmm0, %ymm1 {%k1}
7369 ; CHECK-NEXT: vpmovsxwd %xmm0, %ymm2 {%k1} {z}
7370 ; CHECK-NEXT: vpmovsxwd %xmm0, %ymm0
7371 ; CHECK-NEXT: vpaddd %ymm2, %ymm1, %ymm1
7372 ; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0
7374 %res = call <8 x i32> @llvm.x86.avx512.mask.pmovsxw.d.256(<8 x i16> %x0, <8 x i32> %x1, i8 %x2)
7375 %res1 = call <8 x i32> @llvm.x86.avx512.mask.pmovsxw.d.256(<8 x i16> %x0, <8 x i32> zeroinitializer, i8 %x2)
7376 %res2 = call <8 x i32> @llvm.x86.avx512.mask.pmovsxw.d.256(<8 x i16> %x0, <8 x i32> %x1, i8 -1)
7377 %res3 = add <8 x i32> %res, %res1
7378 %res4 = add <8 x i32> %res3, %res2
7382 declare <2 x i64> @llvm.x86.avx512.mask.pmovsxw.q.128(<8 x i16>, <2 x i64>, i8)
7384 define <2 x i64>@test_int_x86_avx512_mask_pmovsxw_q_128(<8 x i16> %x0, <2 x i64> %x1, i8 %x2) {
7385 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovsxw_q_128:
7387 ; CHECK-NEXT: movzbl %dil, %eax
7388 ; CHECK-NEXT: kmovw %eax, %k1
7389 ; CHECK-NEXT: vpmovsxwq %xmm0, %xmm1 {%k1}
7390 ; CHECK-NEXT: vpmovsxwq %xmm0, %xmm2 {%k1} {z}
7391 ; CHECK-NEXT: vpmovsxwq %xmm0, %xmm0
7392 ; CHECK-NEXT: vpaddq %xmm2, %xmm1, %xmm1
7393 ; CHECK-NEXT: vpaddq %xmm0, %xmm1, %xmm0
7395 %res = call <2 x i64> @llvm.x86.avx512.mask.pmovsxw.q.128(<8 x i16> %x0, <2 x i64> %x1, i8 %x2)
7396 %res1 = call <2 x i64> @llvm.x86.avx512.mask.pmovsxw.q.128(<8 x i16> %x0, <2 x i64> zeroinitializer, i8 %x2)
7397 %res2 = call <2 x i64> @llvm.x86.avx512.mask.pmovsxw.q.128(<8 x i16> %x0, <2 x i64> %x1, i8 -1)
7398 %res3 = add <2 x i64> %res, %res1
7399 %res4 = add <2 x i64> %res3, %res2
7403 declare <4 x i64> @llvm.x86.avx512.mask.pmovsxw.q.256(<8 x i16>, <4 x i64>, i8)
7405 define <4 x i64>@test_int_x86_avx512_mask_pmovsxw_q_256(<8 x i16> %x0, <4 x i64> %x1, i8 %x2) {
7406 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovsxw_q_256:
7408 ; CHECK-NEXT: movzbl %dil, %eax
7409 ; CHECK-NEXT: kmovw %eax, %k1
7410 ; CHECK-NEXT: vpmovsxwq %xmm0, %ymm1 {%k1}
7411 ; CHECK-NEXT: vpmovsxwq %xmm0, %ymm2 {%k1} {z}
7412 ; CHECK-NEXT: vpmovsxwq %xmm0, %ymm0
7413 ; CHECK-NEXT: vpaddq %ymm2, %ymm1, %ymm1
7414 ; CHECK-NEXT: vpaddq %ymm0, %ymm1, %ymm0
7416 %res = call <4 x i64> @llvm.x86.avx512.mask.pmovsxw.q.256(<8 x i16> %x0, <4 x i64> %x1, i8 %x2)
7417 %res1 = call <4 x i64> @llvm.x86.avx512.mask.pmovsxw.q.256(<8 x i16> %x0, <4 x i64> zeroinitializer, i8 %x2)
7418 %res2 = call <4 x i64> @llvm.x86.avx512.mask.pmovsxw.q.256(<8 x i16> %x0, <4 x i64> %x1, i8 -1)
7419 %res3 = add <4 x i64> %res, %res1
7420 %res4 = add <4 x i64> %res3, %res2