1 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512vl --show-mc-encoding| FileCheck %s
5 define i8 @test_pcmpeq_d_256(<8 x i32> %a, <8 x i32> %b) {
6 ; CHECK-LABEL: test_pcmpeq_d_256
7 ; CHECK: vpcmpeqd %ymm1, %ymm0, %k0 ##
8 %res = call i8 @llvm.x86.avx512.mask.pcmpeq.d.256(<8 x i32> %a, <8 x i32> %b, i8 -1)
12 define i8 @test_mask_pcmpeq_d_256(<8 x i32> %a, <8 x i32> %b, i8 %mask) {
13 ; CHECK-LABEL: test_mask_pcmpeq_d_256
14 ; CHECK: vpcmpeqd %ymm1, %ymm0, %k0 {%k1} ##
15 %res = call i8 @llvm.x86.avx512.mask.pcmpeq.d.256(<8 x i32> %a, <8 x i32> %b, i8 %mask)
19 declare i8 @llvm.x86.avx512.mask.pcmpeq.d.256(<8 x i32>, <8 x i32>, i8)
21 define i8 @test_pcmpeq_q_256(<4 x i64> %a, <4 x i64> %b) {
22 ; CHECK-LABEL: test_pcmpeq_q_256
23 ; CHECK: vpcmpeqq %ymm1, %ymm0, %k0 ##
24 %res = call i8 @llvm.x86.avx512.mask.pcmpeq.q.256(<4 x i64> %a, <4 x i64> %b, i8 -1)
28 define i8 @test_mask_pcmpeq_q_256(<4 x i64> %a, <4 x i64> %b, i8 %mask) {
29 ; CHECK-LABEL: test_mask_pcmpeq_q_256
30 ; CHECK: vpcmpeqq %ymm1, %ymm0, %k0 {%k1} ##
31 %res = call i8 @llvm.x86.avx512.mask.pcmpeq.q.256(<4 x i64> %a, <4 x i64> %b, i8 %mask)
35 declare i8 @llvm.x86.avx512.mask.pcmpeq.q.256(<4 x i64>, <4 x i64>, i8)
37 define i8 @test_pcmpgt_d_256(<8 x i32> %a, <8 x i32> %b) {
38 ; CHECK-LABEL: test_pcmpgt_d_256
39 ; CHECK: vpcmpgtd %ymm1, %ymm0, %k0 ##
40 %res = call i8 @llvm.x86.avx512.mask.pcmpgt.d.256(<8 x i32> %a, <8 x i32> %b, i8 -1)
44 define i8 @test_mask_pcmpgt_d_256(<8 x i32> %a, <8 x i32> %b, i8 %mask) {
45 ; CHECK-LABEL: test_mask_pcmpgt_d_256
46 ; CHECK: vpcmpgtd %ymm1, %ymm0, %k0 {%k1} ##
47 %res = call i8 @llvm.x86.avx512.mask.pcmpgt.d.256(<8 x i32> %a, <8 x i32> %b, i8 %mask)
51 declare i8 @llvm.x86.avx512.mask.pcmpgt.d.256(<8 x i32>, <8 x i32>, i8)
53 define i8 @test_pcmpgt_q_256(<4 x i64> %a, <4 x i64> %b) {
54 ; CHECK-LABEL: test_pcmpgt_q_256
55 ; CHECK: vpcmpgtq %ymm1, %ymm0, %k0 ##
56 %res = call i8 @llvm.x86.avx512.mask.pcmpgt.q.256(<4 x i64> %a, <4 x i64> %b, i8 -1)
60 define i8 @test_mask_pcmpgt_q_256(<4 x i64> %a, <4 x i64> %b, i8 %mask) {
61 ; CHECK-LABEL: test_mask_pcmpgt_q_256
62 ; CHECK: vpcmpgtq %ymm1, %ymm0, %k0 {%k1} ##
63 %res = call i8 @llvm.x86.avx512.mask.pcmpgt.q.256(<4 x i64> %a, <4 x i64> %b, i8 %mask)
67 declare i8 @llvm.x86.avx512.mask.pcmpgt.q.256(<4 x i64>, <4 x i64>, i8)
69 define <8 x i8> @test_cmp_d_256(<8 x i32> %a0, <8 x i32> %a1) {
70 ; CHECK-LABEL: test_cmp_d_256
71 ; CHECK: vpcmpeqd %ymm1, %ymm0, %k0 ##
72 %res0 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 0, i8 -1)
73 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
74 ; CHECK: vpcmpltd %ymm1, %ymm0, %k0 ##
75 %res1 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 1, i8 -1)
76 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
77 ; CHECK: vpcmpled %ymm1, %ymm0, %k0 ##
78 %res2 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 2, i8 -1)
79 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
80 ; CHECK: vpcmpunordd %ymm1, %ymm0, %k0 ##
81 %res3 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 3, i8 -1)
82 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
83 ; CHECK: vpcmpneqd %ymm1, %ymm0, %k0 ##
84 %res4 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 4, i8 -1)
85 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
86 ; CHECK: vpcmpnltd %ymm1, %ymm0, %k0 ##
87 %res5 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 5, i8 -1)
88 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
89 ; CHECK: vpcmpnled %ymm1, %ymm0, %k0 ##
90 %res6 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 6, i8 -1)
91 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
92 ; CHECK: vpcmpordd %ymm1, %ymm0, %k0 ##
93 %res7 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 7, i8 -1)
94 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
98 define <8 x i8> @test_mask_cmp_d_256(<8 x i32> %a0, <8 x i32> %a1, i8 %mask) {
99 ; CHECK-LABEL: test_mask_cmp_d_256
100 ; CHECK: vpcmpeqd %ymm1, %ymm0, %k0 {%k1} ##
101 %res0 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 0, i8 %mask)
102 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
103 ; CHECK: vpcmpltd %ymm1, %ymm0, %k0 {%k1} ##
104 %res1 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 1, i8 %mask)
105 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
106 ; CHECK: vpcmpled %ymm1, %ymm0, %k0 {%k1} ##
107 %res2 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 2, i8 %mask)
108 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
109 ; CHECK: vpcmpunordd %ymm1, %ymm0, %k0 {%k1} ##
110 %res3 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 3, i8 %mask)
111 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
112 ; CHECK: vpcmpneqd %ymm1, %ymm0, %k0 {%k1} ##
113 %res4 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 4, i8 %mask)
114 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
115 ; CHECK: vpcmpnltd %ymm1, %ymm0, %k0 {%k1} ##
116 %res5 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 5, i8 %mask)
117 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
118 ; CHECK: vpcmpnled %ymm1, %ymm0, %k0 {%k1} ##
119 %res6 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 6, i8 %mask)
120 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
121 ; CHECK: vpcmpordd %ymm1, %ymm0, %k0 {%k1} ##
122 %res7 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 7, i8 %mask)
123 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
127 declare i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32>, <8 x i32>, i32, i8) nounwind readnone
129 define <8 x i8> @test_ucmp_d_256(<8 x i32> %a0, <8 x i32> %a1) {
130 ; CHECK-LABEL: test_ucmp_d_256
131 ; CHECK: vpcmpequd %ymm1, %ymm0, %k0 ##
132 %res0 = call i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 0, i8 -1)
133 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
134 ; CHECK: vpcmpltud %ymm1, %ymm0, %k0 ##
135 %res1 = call i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 1, i8 -1)
136 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
137 ; CHECK: vpcmpleud %ymm1, %ymm0, %k0 ##
138 %res2 = call i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 2, i8 -1)
139 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
140 ; CHECK: vpcmpunordud %ymm1, %ymm0, %k0 ##
141 %res3 = call i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 3, i8 -1)
142 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
143 ; CHECK: vpcmpnequd %ymm1, %ymm0, %k0 ##
144 %res4 = call i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 4, i8 -1)
145 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
146 ; CHECK: vpcmpnltud %ymm1, %ymm0, %k0 ##
147 %res5 = call i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 5, i8 -1)
148 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
149 ; CHECK: vpcmpnleud %ymm1, %ymm0, %k0 ##
150 %res6 = call i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 6, i8 -1)
151 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
152 ; CHECK: vpcmpordud %ymm1, %ymm0, %k0 ##
153 %res7 = call i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 7, i8 -1)
154 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
158 define <8 x i8> @test_mask_ucmp_d_256(<8 x i32> %a0, <8 x i32> %a1, i8 %mask) {
159 ; CHECK-LABEL: test_mask_ucmp_d_256
160 ; CHECK: vpcmpequd %ymm1, %ymm0, %k0 {%k1} ##
161 %res0 = call i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 0, i8 %mask)
162 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
163 ; CHECK: vpcmpltud %ymm1, %ymm0, %k0 {%k1} ##
164 %res1 = call i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 1, i8 %mask)
165 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
166 ; CHECK: vpcmpleud %ymm1, %ymm0, %k0 {%k1} ##
167 %res2 = call i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 2, i8 %mask)
168 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
169 ; CHECK: vpcmpunordud %ymm1, %ymm0, %k0 {%k1} ##
170 %res3 = call i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 3, i8 %mask)
171 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
172 ; CHECK: vpcmpnequd %ymm1, %ymm0, %k0 {%k1} ##
173 %res4 = call i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 4, i8 %mask)
174 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
175 ; CHECK: vpcmpnltud %ymm1, %ymm0, %k0 {%k1} ##
176 %res5 = call i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 5, i8 %mask)
177 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
178 ; CHECK: vpcmpnleud %ymm1, %ymm0, %k0 {%k1} ##
179 %res6 = call i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 6, i8 %mask)
180 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
181 ; CHECK: vpcmpordud %ymm1, %ymm0, %k0 {%k1} ##
182 %res7 = call i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 7, i8 %mask)
183 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
187 declare i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32>, <8 x i32>, i32, i8) nounwind readnone
189 define <8 x i8> @test_cmp_q_256(<4 x i64> %a0, <4 x i64> %a1) {
190 ; CHECK-LABEL: test_cmp_q_256
191 ; CHECK: vpcmpeqq %ymm1, %ymm0, %k0 ##
192 %res0 = call i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 0, i8 -1)
193 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
194 ; CHECK: vpcmpltq %ymm1, %ymm0, %k0 ##
195 %res1 = call i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 1, i8 -1)
196 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
197 ; CHECK: vpcmpleq %ymm1, %ymm0, %k0 ##
198 %res2 = call i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 2, i8 -1)
199 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
200 ; CHECK: vpcmpunordq %ymm1, %ymm0, %k0 ##
201 %res3 = call i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 3, i8 -1)
202 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
203 ; CHECK: vpcmpneqq %ymm1, %ymm0, %k0 ##
204 %res4 = call i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 4, i8 -1)
205 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
206 ; CHECK: vpcmpnltq %ymm1, %ymm0, %k0 ##
207 %res5 = call i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 5, i8 -1)
208 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
209 ; CHECK: vpcmpnleq %ymm1, %ymm0, %k0 ##
210 %res6 = call i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 6, i8 -1)
211 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
212 ; CHECK: vpcmpordq %ymm1, %ymm0, %k0 ##
213 %res7 = call i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 7, i8 -1)
214 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
218 define <8 x i8> @test_mask_cmp_q_256(<4 x i64> %a0, <4 x i64> %a1, i8 %mask) {
219 ; CHECK-LABEL: test_mask_cmp_q_256
220 ; CHECK: vpcmpeqq %ymm1, %ymm0, %k0 {%k1} ##
221 %res0 = call i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 0, i8 %mask)
222 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
223 ; CHECK: vpcmpltq %ymm1, %ymm0, %k0 {%k1} ##
224 %res1 = call i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 1, i8 %mask)
225 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
226 ; CHECK: vpcmpleq %ymm1, %ymm0, %k0 {%k1} ##
227 %res2 = call i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 2, i8 %mask)
228 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
229 ; CHECK: vpcmpunordq %ymm1, %ymm0, %k0 {%k1} ##
230 %res3 = call i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 3, i8 %mask)
231 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
232 ; CHECK: vpcmpneqq %ymm1, %ymm0, %k0 {%k1} ##
233 %res4 = call i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 4, i8 %mask)
234 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
235 ; CHECK: vpcmpnltq %ymm1, %ymm0, %k0 {%k1} ##
236 %res5 = call i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 5, i8 %mask)
237 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
238 ; CHECK: vpcmpnleq %ymm1, %ymm0, %k0 {%k1} ##
239 %res6 = call i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 6, i8 %mask)
240 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
241 ; CHECK: vpcmpordq %ymm1, %ymm0, %k0 {%k1} ##
242 %res7 = call i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 7, i8 %mask)
243 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
247 declare i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64>, <4 x i64>, i32, i8) nounwind readnone
249 define <8 x i8> @test_ucmp_q_256(<4 x i64> %a0, <4 x i64> %a1) {
250 ; CHECK-LABEL: test_ucmp_q_256
251 ; CHECK: vpcmpequq %ymm1, %ymm0, %k0 ##
252 %res0 = call i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 0, i8 -1)
253 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
254 ; CHECK: vpcmpltuq %ymm1, %ymm0, %k0 ##
255 %res1 = call i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 1, i8 -1)
256 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
257 ; CHECK: vpcmpleuq %ymm1, %ymm0, %k0 ##
258 %res2 = call i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 2, i8 -1)
259 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
260 ; CHECK: vpcmpunorduq %ymm1, %ymm0, %k0 ##
261 %res3 = call i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 3, i8 -1)
262 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
263 ; CHECK: vpcmpnequq %ymm1, %ymm0, %k0 ##
264 %res4 = call i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 4, i8 -1)
265 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
266 ; CHECK: vpcmpnltuq %ymm1, %ymm0, %k0 ##
267 %res5 = call i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 5, i8 -1)
268 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
269 ; CHECK: vpcmpnleuq %ymm1, %ymm0, %k0 ##
270 %res6 = call i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 6, i8 -1)
271 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
272 ; CHECK: vpcmporduq %ymm1, %ymm0, %k0 ##
273 %res7 = call i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 7, i8 -1)
274 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
278 define <8 x i8> @test_mask_ucmp_q_256(<4 x i64> %a0, <4 x i64> %a1, i8 %mask) {
279 ; CHECK-LABEL: test_mask_ucmp_q_256
280 ; CHECK: vpcmpequq %ymm1, %ymm0, %k0 {%k1} ##
281 %res0 = call i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 0, i8 %mask)
282 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
283 ; CHECK: vpcmpltuq %ymm1, %ymm0, %k0 {%k1} ##
284 %res1 = call i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 1, i8 %mask)
285 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
286 ; CHECK: vpcmpleuq %ymm1, %ymm0, %k0 {%k1} ##
287 %res2 = call i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 2, i8 %mask)
288 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
289 ; CHECK: vpcmpunorduq %ymm1, %ymm0, %k0 {%k1} ##
290 %res3 = call i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 3, i8 %mask)
291 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
292 ; CHECK: vpcmpnequq %ymm1, %ymm0, %k0 {%k1} ##
293 %res4 = call i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 4, i8 %mask)
294 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
295 ; CHECK: vpcmpnltuq %ymm1, %ymm0, %k0 {%k1} ##
296 %res5 = call i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 5, i8 %mask)
297 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
298 ; CHECK: vpcmpnleuq %ymm1, %ymm0, %k0 {%k1} ##
299 %res6 = call i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 6, i8 %mask)
300 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
301 ; CHECK: vpcmporduq %ymm1, %ymm0, %k0 {%k1} ##
302 %res7 = call i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 7, i8 %mask)
303 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
307 declare i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64>, <4 x i64>, i32, i8) nounwind readnone
311 define i8 @test_pcmpeq_d_128(<4 x i32> %a, <4 x i32> %b) {
312 ; CHECK-LABEL: test_pcmpeq_d_128
313 ; CHECK: vpcmpeqd %xmm1, %xmm0, %k0 ##
314 %res = call i8 @llvm.x86.avx512.mask.pcmpeq.d.128(<4 x i32> %a, <4 x i32> %b, i8 -1)
318 define i8 @test_mask_pcmpeq_d_128(<4 x i32> %a, <4 x i32> %b, i8 %mask) {
319 ; CHECK-LABEL: test_mask_pcmpeq_d_128
320 ; CHECK: vpcmpeqd %xmm1, %xmm0, %k0 {%k1} ##
321 %res = call i8 @llvm.x86.avx512.mask.pcmpeq.d.128(<4 x i32> %a, <4 x i32> %b, i8 %mask)
325 declare i8 @llvm.x86.avx512.mask.pcmpeq.d.128(<4 x i32>, <4 x i32>, i8)
327 define i8 @test_pcmpeq_q_128(<2 x i64> %a, <2 x i64> %b) {
328 ; CHECK-LABEL: test_pcmpeq_q_128
329 ; CHECK: vpcmpeqq %xmm1, %xmm0, %k0 ##
330 %res = call i8 @llvm.x86.avx512.mask.pcmpeq.q.128(<2 x i64> %a, <2 x i64> %b, i8 -1)
334 define i8 @test_mask_pcmpeq_q_128(<2 x i64> %a, <2 x i64> %b, i8 %mask) {
335 ; CHECK-LABEL: test_mask_pcmpeq_q_128
336 ; CHECK: vpcmpeqq %xmm1, %xmm0, %k0 {%k1} ##
337 %res = call i8 @llvm.x86.avx512.mask.pcmpeq.q.128(<2 x i64> %a, <2 x i64> %b, i8 %mask)
341 declare i8 @llvm.x86.avx512.mask.pcmpeq.q.128(<2 x i64>, <2 x i64>, i8)
343 define i8 @test_pcmpgt_d_128(<4 x i32> %a, <4 x i32> %b) {
344 ; CHECK-LABEL: test_pcmpgt_d_128
345 ; CHECK: vpcmpgtd %xmm1, %xmm0, %k0 ##
346 %res = call i8 @llvm.x86.avx512.mask.pcmpgt.d.128(<4 x i32> %a, <4 x i32> %b, i8 -1)
350 define i8 @test_mask_pcmpgt_d_128(<4 x i32> %a, <4 x i32> %b, i8 %mask) {
351 ; CHECK-LABEL: test_mask_pcmpgt_d_128
352 ; CHECK: vpcmpgtd %xmm1, %xmm0, %k0 {%k1} ##
353 %res = call i8 @llvm.x86.avx512.mask.pcmpgt.d.128(<4 x i32> %a, <4 x i32> %b, i8 %mask)
357 declare i8 @llvm.x86.avx512.mask.pcmpgt.d.128(<4 x i32>, <4 x i32>, i8)
359 define i8 @test_pcmpgt_q_128(<2 x i64> %a, <2 x i64> %b) {
360 ; CHECK-LABEL: test_pcmpgt_q_128
361 ; CHECK: vpcmpgtq %xmm1, %xmm0, %k0 ##
362 %res = call i8 @llvm.x86.avx512.mask.pcmpgt.q.128(<2 x i64> %a, <2 x i64> %b, i8 -1)
366 define i8 @test_mask_pcmpgt_q_128(<2 x i64> %a, <2 x i64> %b, i8 %mask) {
367 ; CHECK-LABEL: test_mask_pcmpgt_q_128
368 ; CHECK: vpcmpgtq %xmm1, %xmm0, %k0 {%k1} ##
369 %res = call i8 @llvm.x86.avx512.mask.pcmpgt.q.128(<2 x i64> %a, <2 x i64> %b, i8 %mask)
373 declare i8 @llvm.x86.avx512.mask.pcmpgt.q.128(<2 x i64>, <2 x i64>, i8)
375 define <8 x i8> @test_cmp_d_128(<4 x i32> %a0, <4 x i32> %a1) {
376 ; CHECK-LABEL: test_cmp_d_128
377 ; CHECK: vpcmpeqd %xmm1, %xmm0, %k0 ##
378 %res0 = call i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 0, i8 -1)
379 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
380 ; CHECK: vpcmpltd %xmm1, %xmm0, %k0 ##
381 %res1 = call i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 1, i8 -1)
382 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
383 ; CHECK: vpcmpled %xmm1, %xmm0, %k0 ##
384 %res2 = call i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 2, i8 -1)
385 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
386 ; CHECK: vpcmpunordd %xmm1, %xmm0, %k0 ##
387 %res3 = call i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 3, i8 -1)
388 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
389 ; CHECK: vpcmpneqd %xmm1, %xmm0, %k0 ##
390 %res4 = call i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 4, i8 -1)
391 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
392 ; CHECK: vpcmpnltd %xmm1, %xmm0, %k0 ##
393 %res5 = call i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 5, i8 -1)
394 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
395 ; CHECK: vpcmpnled %xmm1, %xmm0, %k0 ##
396 %res6 = call i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 6, i8 -1)
397 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
398 ; CHECK: vpcmpordd %xmm1, %xmm0, %k0 ##
399 %res7 = call i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 7, i8 -1)
400 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
404 define <8 x i8> @test_mask_cmp_d_128(<4 x i32> %a0, <4 x i32> %a1, i8 %mask) {
405 ; CHECK-LABEL: test_mask_cmp_d_128
406 ; CHECK: vpcmpeqd %xmm1, %xmm0, %k0 {%k1} ##
407 %res0 = call i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 0, i8 %mask)
408 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
409 ; CHECK: vpcmpltd %xmm1, %xmm0, %k0 {%k1} ##
410 %res1 = call i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 1, i8 %mask)
411 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
412 ; CHECK: vpcmpled %xmm1, %xmm0, %k0 {%k1} ##
413 %res2 = call i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 2, i8 %mask)
414 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
415 ; CHECK: vpcmpunordd %xmm1, %xmm0, %k0 {%k1} ##
416 %res3 = call i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 3, i8 %mask)
417 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
418 ; CHECK: vpcmpneqd %xmm1, %xmm0, %k0 {%k1} ##
419 %res4 = call i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 4, i8 %mask)
420 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
421 ; CHECK: vpcmpnltd %xmm1, %xmm0, %k0 {%k1} ##
422 %res5 = call i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 5, i8 %mask)
423 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
424 ; CHECK: vpcmpnled %xmm1, %xmm0, %k0 {%k1} ##
425 %res6 = call i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 6, i8 %mask)
426 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
427 ; CHECK: vpcmpordd %xmm1, %xmm0, %k0 {%k1} ##
428 %res7 = call i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 7, i8 %mask)
429 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
433 declare i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32>, <4 x i32>, i32, i8) nounwind readnone
435 define <8 x i8> @test_ucmp_d_128(<4 x i32> %a0, <4 x i32> %a1) {
436 ; CHECK-LABEL: test_ucmp_d_128
437 ; CHECK: vpcmpequd %xmm1, %xmm0, %k0 ##
438 %res0 = call i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 0, i8 -1)
439 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
440 ; CHECK: vpcmpltud %xmm1, %xmm0, %k0 ##
441 %res1 = call i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 1, i8 -1)
442 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
443 ; CHECK: vpcmpleud %xmm1, %xmm0, %k0 ##
444 %res2 = call i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 2, i8 -1)
445 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
446 ; CHECK: vpcmpunordud %xmm1, %xmm0, %k0 ##
447 %res3 = call i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 3, i8 -1)
448 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
449 ; CHECK: vpcmpnequd %xmm1, %xmm0, %k0 ##
450 %res4 = call i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 4, i8 -1)
451 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
452 ; CHECK: vpcmpnltud %xmm1, %xmm0, %k0 ##
453 %res5 = call i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 5, i8 -1)
454 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
455 ; CHECK: vpcmpnleud %xmm1, %xmm0, %k0 ##
456 %res6 = call i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 6, i8 -1)
457 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
458 ; CHECK: vpcmpordud %xmm1, %xmm0, %k0 ##
459 %res7 = call i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 7, i8 -1)
460 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
464 define <8 x i8> @test_mask_ucmp_d_128(<4 x i32> %a0, <4 x i32> %a1, i8 %mask) {
465 ; CHECK-LABEL: test_mask_ucmp_d_128
466 ; CHECK: vpcmpequd %xmm1, %xmm0, %k0 {%k1} ##
467 %res0 = call i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 0, i8 %mask)
468 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
469 ; CHECK: vpcmpltud %xmm1, %xmm0, %k0 {%k1} ##
470 %res1 = call i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 1, i8 %mask)
471 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
472 ; CHECK: vpcmpleud %xmm1, %xmm0, %k0 {%k1} ##
473 %res2 = call i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 2, i8 %mask)
474 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
475 ; CHECK: vpcmpunordud %xmm1, %xmm0, %k0 {%k1} ##
476 %res3 = call i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 3, i8 %mask)
477 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
478 ; CHECK: vpcmpnequd %xmm1, %xmm0, %k0 {%k1} ##
479 %res4 = call i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 4, i8 %mask)
480 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
481 ; CHECK: vpcmpnltud %xmm1, %xmm0, %k0 {%k1} ##
482 %res5 = call i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 5, i8 %mask)
483 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
484 ; CHECK: vpcmpnleud %xmm1, %xmm0, %k0 {%k1} ##
485 %res6 = call i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 6, i8 %mask)
486 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
487 ; CHECK: vpcmpordud %xmm1, %xmm0, %k0 {%k1} ##
488 %res7 = call i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 7, i8 %mask)
489 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
493 declare i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32>, <4 x i32>, i32, i8) nounwind readnone
495 define <8 x i8> @test_cmp_q_128(<2 x i64> %a0, <2 x i64> %a1) {
496 ; CHECK-LABEL: test_cmp_q_128
497 ; CHECK: vpcmpeqq %xmm1, %xmm0, %k0 ##
498 %res0 = call i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 0, i8 -1)
499 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
500 ; CHECK: vpcmpltq %xmm1, %xmm0, %k0 ##
501 %res1 = call i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 1, i8 -1)
502 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
503 ; CHECK: vpcmpleq %xmm1, %xmm0, %k0 ##
504 %res2 = call i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 2, i8 -1)
505 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
506 ; CHECK: vpcmpunordq %xmm1, %xmm0, %k0 ##
507 %res3 = call i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 3, i8 -1)
508 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
509 ; CHECK: vpcmpneqq %xmm1, %xmm0, %k0 ##
510 %res4 = call i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 4, i8 -1)
511 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
512 ; CHECK: vpcmpnltq %xmm1, %xmm0, %k0 ##
513 %res5 = call i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 5, i8 -1)
514 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
515 ; CHECK: vpcmpnleq %xmm1, %xmm0, %k0 ##
516 %res6 = call i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 6, i8 -1)
517 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
518 ; CHECK: vpcmpordq %xmm1, %xmm0, %k0 ##
519 %res7 = call i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 7, i8 -1)
520 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
524 define <8 x i8> @test_mask_cmp_q_128(<2 x i64> %a0, <2 x i64> %a1, i8 %mask) {
525 ; CHECK-LABEL: test_mask_cmp_q_128
526 ; CHECK: vpcmpeqq %xmm1, %xmm0, %k0 {%k1} ##
527 %res0 = call i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 0, i8 %mask)
528 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
529 ; CHECK: vpcmpltq %xmm1, %xmm0, %k0 {%k1} ##
530 %res1 = call i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 1, i8 %mask)
531 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
532 ; CHECK: vpcmpleq %xmm1, %xmm0, %k0 {%k1} ##
533 %res2 = call i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 2, i8 %mask)
534 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
535 ; CHECK: vpcmpunordq %xmm1, %xmm0, %k0 {%k1} ##
536 %res3 = call i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 3, i8 %mask)
537 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
538 ; CHECK: vpcmpneqq %xmm1, %xmm0, %k0 {%k1} ##
539 %res4 = call i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 4, i8 %mask)
540 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
541 ; CHECK: vpcmpnltq %xmm1, %xmm0, %k0 {%k1} ##
542 %res5 = call i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 5, i8 %mask)
543 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
544 ; CHECK: vpcmpnleq %xmm1, %xmm0, %k0 {%k1} ##
545 %res6 = call i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 6, i8 %mask)
546 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
547 ; CHECK: vpcmpordq %xmm1, %xmm0, %k0 {%k1} ##
548 %res7 = call i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 7, i8 %mask)
549 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
553 declare i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64>, <2 x i64>, i32, i8) nounwind readnone
555 define <8 x i8> @test_ucmp_q_128(<2 x i64> %a0, <2 x i64> %a1) {
556 ; CHECK-LABEL: test_ucmp_q_128
557 ; CHECK: vpcmpequq %xmm1, %xmm0, %k0 ##
558 %res0 = call i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 0, i8 -1)
559 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
560 ; CHECK: vpcmpltuq %xmm1, %xmm0, %k0 ##
561 %res1 = call i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 1, i8 -1)
562 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
563 ; CHECK: vpcmpleuq %xmm1, %xmm0, %k0 ##
564 %res2 = call i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 2, i8 -1)
565 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
566 ; CHECK: vpcmpunorduq %xmm1, %xmm0, %k0 ##
567 %res3 = call i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 3, i8 -1)
568 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
569 ; CHECK: vpcmpnequq %xmm1, %xmm0, %k0 ##
570 %res4 = call i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 4, i8 -1)
571 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
572 ; CHECK: vpcmpnltuq %xmm1, %xmm0, %k0 ##
573 %res5 = call i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 5, i8 -1)
574 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
575 ; CHECK: vpcmpnleuq %xmm1, %xmm0, %k0 ##
576 %res6 = call i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 6, i8 -1)
577 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
578 ; CHECK: vpcmporduq %xmm1, %xmm0, %k0 ##
579 %res7 = call i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 7, i8 -1)
580 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
584 define <8 x i8> @test_mask_ucmp_q_128(<2 x i64> %a0, <2 x i64> %a1, i8 %mask) {
585 ; CHECK-LABEL: test_mask_ucmp_q_128
586 ; CHECK: vpcmpequq %xmm1, %xmm0, %k0 {%k1} ##
587 %res0 = call i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 0, i8 %mask)
588 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
589 ; CHECK: vpcmpltuq %xmm1, %xmm0, %k0 {%k1} ##
590 %res1 = call i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 1, i8 %mask)
591 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
592 ; CHECK: vpcmpleuq %xmm1, %xmm0, %k0 {%k1} ##
593 %res2 = call i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 2, i8 %mask)
594 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
595 ; CHECK: vpcmpunorduq %xmm1, %xmm0, %k0 {%k1} ##
596 %res3 = call i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 3, i8 %mask)
597 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
598 ; CHECK: vpcmpnequq %xmm1, %xmm0, %k0 {%k1} ##
599 %res4 = call i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 4, i8 %mask)
600 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
601 ; CHECK: vpcmpnltuq %xmm1, %xmm0, %k0 {%k1} ##
602 %res5 = call i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 5, i8 %mask)
603 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
604 ; CHECK: vpcmpnleuq %xmm1, %xmm0, %k0 {%k1} ##
605 %res6 = call i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 6, i8 %mask)
606 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
607 ; CHECK: vpcmporduq %xmm1, %xmm0, %k0 {%k1} ##
608 %res7 = call i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 7, i8 %mask)
609 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
613 declare i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64>, <2 x i64>, i32, i8) nounwind readnone
615 ; CHECK-LABEL: compr1
616 ; CHECK: vcompresspd %zmm0
617 define void @compr1(i8* %addr, <8 x double> %data, i8 %mask) {
618 call void @llvm.x86.avx512.mask.compress.store.pd.512(i8* %addr, <8 x double> %data, i8 %mask)
622 declare void @llvm.x86.avx512.mask.compress.store.pd.512(i8* %addr, <8 x double> %data, i8 %mask)
624 ; CHECK-LABEL: compr2
625 ; CHECK: vcompresspd %ymm0
626 define void @compr2(i8* %addr, <4 x double> %data, i8 %mask) {
627 call void @llvm.x86.avx512.mask.compress.store.pd.256(i8* %addr, <4 x double> %data, i8 %mask)
631 declare void @llvm.x86.avx512.mask.compress.store.pd.256(i8* %addr, <4 x double> %data, i8 %mask)
633 ; CHECK-LABEL: compr3
634 ; CHECK: vcompressps %xmm0
635 define void @compr3(i8* %addr, <4 x float> %data, i8 %mask) {
636 call void @llvm.x86.avx512.mask.compress.store.ps.128(i8* %addr, <4 x float> %data, i8 %mask)
640 declare void @llvm.x86.avx512.mask.compress.store.ps.128(i8* %addr, <4 x float> %data, i8 %mask)
642 ; CHECK-LABEL: compr4
643 ; CHECK: vcompresspd %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x8a,0xc0]
644 define <8 x double> @compr4(i8* %addr, <8 x double> %data, i8 %mask) {
645 %res = call <8 x double> @llvm.x86.avx512.mask.compress.pd.512(<8 x double> %data, <8 x double> zeroinitializer, i8 %mask)
646 ret <8 x double> %res
649 declare <8 x double> @llvm.x86.avx512.mask.compress.pd.512(<8 x double> %data, <8 x double> %src0, i8 %mask)
651 ; CHECK-LABEL: compr5
652 ; CHECK: vcompresspd %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x8a,0xc1]
653 define <4 x double> @compr5(<4 x double> %data, <4 x double> %src0, i8 %mask) {
654 %res = call <4 x double> @llvm.x86.avx512.mask.compress.pd.256( <4 x double> %data, <4 x double> %src0, i8 %mask)
655 ret <4 x double> %res
658 declare <4 x double> @llvm.x86.avx512.mask.compress.pd.256(<4 x double> %data, <4 x double> %src0, i8 %mask)
660 ; CHECK-LABEL: compr6
661 ; CHECK: vcompressps %xmm0
662 define <4 x float> @compr6(<4 x float> %data, i8 %mask) {
663 %res = call <4 x float> @llvm.x86.avx512.mask.compress.ps.128(<4 x float> %data, <4 x float>zeroinitializer, i8 %mask)
667 declare <4 x float> @llvm.x86.avx512.mask.compress.ps.128(<4 x float> %data, <4 x float> %src0, i8 %mask)
669 ; CHECK-LABEL: compr7
670 ; CHECK-NOT: vcompress
672 define void @compr7(i8* %addr, <8 x double> %data) {
673 call void @llvm.x86.avx512.mask.compress.store.pd.512(i8* %addr, <8 x double> %data, i8 -1)
677 ; CHECK-LABEL: compr8
678 ; CHECK-NOT: vcompressps %xmm0
679 define <4 x float> @compr8(<4 x float> %data) {
680 %res = call <4 x float> @llvm.x86.avx512.mask.compress.ps.128(<4 x float> %data, <4 x float>zeroinitializer, i8 -1)
684 ; CHECK-LABEL: compr9
685 ; CHECK: vpcompressq %zmm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x8b,0x07]
686 define void @compr9(i8* %addr, <8 x i64> %data, i8 %mask) {
687 call void @llvm.x86.avx512.mask.compress.store.q.512(i8* %addr, <8 x i64> %data, i8 %mask)
691 declare void @llvm.x86.avx512.mask.compress.store.q.512(i8* %addr, <8 x i64> %data, i8 %mask)
693 ; CHECK-LABEL: compr10
694 ; CHECK: vpcompressd %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0x8b,0xc0]
695 define <4 x i32> @compr10(<4 x i32> %data, i8 %mask) {
696 %res = call <4 x i32> @llvm.x86.avx512.mask.compress.d.128(<4 x i32> %data, <4 x i32>zeroinitializer, i8 %mask)
700 declare <4 x i32> @llvm.x86.avx512.mask.compress.d.128(<4 x i32> %data, <4 x i32> %src0, i8 %mask)
704 ; CHECK-LABEL: expand1
705 ; CHECK: vexpandpd (%rdi), %zmm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x88,0x07]
706 define <8 x double> @expand1(i8* %addr, <8 x double> %data, i8 %mask) {
707 %res = call <8 x double> @llvm.x86.avx512.mask.expand.load.pd.512(i8* %addr, <8 x double> %data, i8 %mask)
708 ret <8 x double> %res
711 declare <8 x double> @llvm.x86.avx512.mask.expand.load.pd.512(i8* %addr, <8 x double> %data, i8 %mask)
713 ; CHECK-LABEL: expand2
714 ; CHECK: vexpandpd (%rdi), %ymm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x88,0x07]
715 define <4 x double> @expand2(i8* %addr, <4 x double> %data, i8 %mask) {
716 %res = call <4 x double> @llvm.x86.avx512.mask.expand.load.pd.256(i8* %addr, <4 x double> %data, i8 %mask)
717 ret <4 x double> %res
720 declare <4 x double> @llvm.x86.avx512.mask.expand.load.pd.256(i8* %addr, <4 x double> %data, i8 %mask)
722 ; CHECK-LABEL: expand3
723 ; CHECK: vexpandps (%rdi), %xmm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x88,0x07]
724 define <4 x float> @expand3(i8* %addr, <4 x float> %data, i8 %mask) {
725 %res = call <4 x float> @llvm.x86.avx512.mask.expand.load.ps.128(i8* %addr, <4 x float> %data, i8 %mask)
729 declare <4 x float> @llvm.x86.avx512.mask.expand.load.ps.128(i8* %addr, <4 x float> %data, i8 %mask)
731 ; CHECK-LABEL: expand4
732 ; CHECK: vexpandpd %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x88,0xc0]
733 define <8 x double> @expand4(i8* %addr, <8 x double> %data, i8 %mask) {
734 %res = call <8 x double> @llvm.x86.avx512.mask.expand.pd.512(<8 x double> %data, <8 x double> zeroinitializer, i8 %mask)
735 ret <8 x double> %res
738 declare <8 x double> @llvm.x86.avx512.mask.expand.pd.512(<8 x double> %data, <8 x double> %src0, i8 %mask)
740 ; CHECK-LABEL: expand5
741 ; CHECK: vexpandpd %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x88,0xc8]
742 define <4 x double> @expand5(<4 x double> %data, <4 x double> %src0, i8 %mask) {
743 %res = call <4 x double> @llvm.x86.avx512.mask.expand.pd.256( <4 x double> %data, <4 x double> %src0, i8 %mask)
744 ret <4 x double> %res
747 declare <4 x double> @llvm.x86.avx512.mask.expand.pd.256(<4 x double> %data, <4 x double> %src0, i8 %mask)
749 ; CHECK-LABEL: expand6
750 ; CHECK: vexpandps %xmm0
751 define <4 x float> @expand6(<4 x float> %data, i8 %mask) {
752 %res = call <4 x float> @llvm.x86.avx512.mask.expand.ps.128(<4 x float> %data, <4 x float>zeroinitializer, i8 %mask)
756 declare <4 x float> @llvm.x86.avx512.mask.expand.ps.128(<4 x float> %data, <4 x float> %src0, i8 %mask)
758 ; CHECK-LABEL: expand7
761 define <8 x double> @expand7(i8* %addr, <8 x double> %data) {
762 %res = call <8 x double> @llvm.x86.avx512.mask.expand.load.pd.512(i8* %addr, <8 x double> %data, i8 -1)
763 ret <8 x double> %res
766 ; CHECK-LABEL: expand8
767 ; CHECK-NOT: vexpandps %xmm0
768 define <4 x float> @expand8(<4 x float> %data) {
769 %res = call <4 x float> @llvm.x86.avx512.mask.expand.ps.128(<4 x float> %data, <4 x float>zeroinitializer, i8 -1)
773 ; CHECK-LABEL: expand9
774 ; CHECK: vpexpandq (%rdi), %zmm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x89,0x07]
775 define <8 x i64> @expand9(i8* %addr, <8 x i64> %data, i8 %mask) {
776 %res = call <8 x i64> @llvm.x86.avx512.mask.expand.load.q.512(i8* %addr, <8 x i64> %data, i8 %mask)
780 declare <8 x i64> @llvm.x86.avx512.mask.expand.load.q.512(i8* %addr, <8 x i64> %data, i8 %mask)
782 ; CHECK-LABEL: expand10
783 ; CHECK: vpexpandd %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0x89,0xc0]
784 define <4 x i32> @expand10(<4 x i32> %data, i8 %mask) {
785 %res = call <4 x i32> @llvm.x86.avx512.mask.expand.d.128(<4 x i32> %data, <4 x i32>zeroinitializer, i8 %mask)
789 declare <4 x i32> @llvm.x86.avx512.mask.expand.d.128(<4 x i32> %data, <4 x i32> %src0, i8 %mask)
791 define <8 x float> @test_x86_mask_blend_ps_256(i8 %a0, <8 x float> %a1, <8 x float> %a2) {
792 ; CHECK: vblendmps %ymm1, %ymm0
793 %res = call <8 x float> @llvm.x86.avx512.mask.blend.ps.256(<8 x float> %a1, <8 x float> %a2, i8 %a0) ; <<8 x float>> [#uses=1]
797 declare <8 x float> @llvm.x86.avx512.mask.blend.ps.256(<8 x float>, <8 x float>, i8) nounwind readonly
799 define <4 x double> @test_x86_mask_blend_pd_256(i8 %a0, <4 x double> %a1, <4 x double> %a2) {
800 ; CHECK: vblendmpd %ymm1, %ymm0
801 %res = call <4 x double> @llvm.x86.avx512.mask.blend.pd.256(<4 x double> %a1, <4 x double> %a2, i8 %a0) ; <<4 x double>> [#uses=1]
802 ret <4 x double> %res
805 define <4 x double> @test_x86_mask_blend_pd_256_memop(<4 x double> %a, <4 x double>* %ptr, i8 %mask) {
806 ; CHECK-LABEL: test_x86_mask_blend_pd_256_memop
807 ; CHECK: vblendmpd (%
808 %b = load <4 x double>, <4 x double>* %ptr
809 %res = call <4 x double> @llvm.x86.avx512.mask.blend.pd.256(<4 x double> %a, <4 x double> %b, i8 %mask) ; <<4 x double>> [#uses=1]
810 ret <4 x double> %res
812 declare <4 x double> @llvm.x86.avx512.mask.blend.pd.256(<4 x double>, <4 x double>, i8) nounwind readonly
814 ; CHECK-LABEL: test_x86_mask_blend_d_256
816 define <8 x i32> @test_x86_mask_blend_d_256(i8 %a0, <8 x i32> %a1, <8 x i32> %a2) {
817 %res = call <8 x i32> @llvm.x86.avx512.mask.blend.d.256(<8 x i32> %a1, <8 x i32> %a2, i8 %a0) ; <<8 x i32>> [#uses=1]
820 declare <8 x i32> @llvm.x86.avx512.mask.blend.d.256(<8 x i32>, <8 x i32>, i8) nounwind readonly
822 define <4 x i64> @test_x86_mask_blend_q_256(i8 %a0, <4 x i64> %a1, <4 x i64> %a2) {
824 %res = call <4 x i64> @llvm.x86.avx512.mask.blend.q.256(<4 x i64> %a1, <4 x i64> %a2, i8 %a0) ; <<4 x i64>> [#uses=1]
827 declare <4 x i64> @llvm.x86.avx512.mask.blend.q.256(<4 x i64>, <4 x i64>, i8) nounwind readonly
829 define <4 x float> @test_x86_mask_blend_ps_128(i8 %a0, <4 x float> %a1, <4 x float> %a2) {
830 ; CHECK: vblendmps %xmm1, %xmm0
831 %res = call <4 x float> @llvm.x86.avx512.mask.blend.ps.128(<4 x float> %a1, <4 x float> %a2, i8 %a0) ; <<4 x float>> [#uses=1]
835 declare <4 x float> @llvm.x86.avx512.mask.blend.ps.128(<4 x float>, <4 x float>, i8) nounwind readonly
837 define <2 x double> @test_x86_mask_blend_pd_128(i8 %a0, <2 x double> %a1, <2 x double> %a2) {
838 ; CHECK: vblendmpd %xmm1, %xmm0
839 %res = call <2 x double> @llvm.x86.avx512.mask.blend.pd.128(<2 x double> %a1, <2 x double> %a2, i8 %a0) ; <<2 x double>> [#uses=1]
840 ret <2 x double> %res
843 define <2 x double> @test_x86_mask_blend_pd_128_memop(<2 x double> %a, <2 x double>* %ptr, i8 %mask) {
844 ; CHECK-LABEL: test_x86_mask_blend_pd_128_memop
845 ; CHECK: vblendmpd (%
846 %b = load <2 x double>, <2 x double>* %ptr
847 %res = call <2 x double> @llvm.x86.avx512.mask.blend.pd.128(<2 x double> %a, <2 x double> %b, i8 %mask) ; <<2 x double>> [#uses=1]
848 ret <2 x double> %res
850 declare <2 x double> @llvm.x86.avx512.mask.blend.pd.128(<2 x double>, <2 x double>, i8) nounwind readonly
852 define <4 x i32> @test_x86_mask_blend_d_128(i8 %a0, <4 x i32> %a1, <4 x i32> %a2) {
854 %res = call <4 x i32> @llvm.x86.avx512.mask.blend.d.128(<4 x i32> %a1, <4 x i32> %a2, i8 %a0) ; <<4 x i32>> [#uses=1]
857 declare <4 x i32> @llvm.x86.avx512.mask.blend.d.128(<4 x i32>, <4 x i32>, i8) nounwind readonly
859 define <2 x i64> @test_x86_mask_blend_q_128(i8 %a0, <2 x i64> %a1, <2 x i64> %a2) {
861 %res = call <2 x i64> @llvm.x86.avx512.mask.blend.q.128(<2 x i64> %a1, <2 x i64> %a2, i8 %a0) ; <<2 x i64>> [#uses=1]
864 declare <2 x i64> @llvm.x86.avx512.mask.blend.q.128(<2 x i64>, <2 x i64>, i8) nounwind readonly
867 define < 2 x i64> @test_mask_mul_epi32_rr_128(< 4 x i32> %a, < 4 x i32> %b) {
868 ;CHECK-LABEL: test_mask_mul_epi32_rr_128
869 ;CHECK: vpmuldq %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf2,0xfd,0x08,0x28,0xc1]
870 %res = call < 2 x i64> @llvm.x86.avx512.mask.pmul.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> zeroinitializer, i8 -1)
874 define < 2 x i64> @test_mask_mul_epi32_rrk_128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> %passThru, i8 %mask) {
875 ;CHECK-LABEL: test_mask_mul_epi32_rrk_128
876 ;CHECK: vpmuldq %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x28,0xd1]
877 %res = call < 2 x i64> @llvm.x86.avx512.mask.pmul.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> %passThru, i8 %mask)
881 define < 2 x i64> @test_mask_mul_epi32_rrkz_128(< 4 x i32> %a, < 4 x i32> %b, i8 %mask) {
882 ;CHECK-LABEL: test_mask_mul_epi32_rrkz_128
883 ;CHECK: vpmuldq %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0x89,0x28,0xc1]
884 %res = call < 2 x i64> @llvm.x86.avx512.mask.pmul.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> zeroinitializer, i8 %mask)
888 define < 2 x i64> @test_mask_mul_epi32_rm_128(< 4 x i32> %a, < 4 x i32>* %ptr_b) {
889 ;CHECK-LABEL: test_mask_mul_epi32_rm_128
890 ;CHECK: vpmuldq (%rdi), %xmm0, %xmm0 ## encoding: [0x62,0xf2,0xfd,0x08,0x28,0x07]
891 %b = load < 4 x i32>, < 4 x i32>* %ptr_b
892 %res = call < 2 x i64> @llvm.x86.avx512.mask.pmul.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> zeroinitializer, i8 -1)
896 define < 2 x i64> @test_mask_mul_epi32_rmk_128(< 4 x i32> %a, < 4 x i32>* %ptr_b, < 2 x i64> %passThru, i8 %mask) {
897 ;CHECK-LABEL: test_mask_mul_epi32_rmk_128
898 ;CHECK: vpmuldq (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x28,0x0f]
899 %b = load < 4 x i32>, < 4 x i32>* %ptr_b
900 %res = call < 2 x i64> @llvm.x86.avx512.mask.pmul.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> %passThru, i8 %mask)
904 define < 2 x i64> @test_mask_mul_epi32_rmkz_128(< 4 x i32> %a, < 4 x i32>* %ptr_b, i8 %mask) {
905 ;CHECK-LABEL: test_mask_mul_epi32_rmkz_128
906 ;CHECK: vpmuldq (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0x89,0x28,0x07]
907 %b = load < 4 x i32>, < 4 x i32>* %ptr_b
908 %res = call < 2 x i64> @llvm.x86.avx512.mask.pmul.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> zeroinitializer, i8 %mask)
912 define < 2 x i64> @test_mask_mul_epi32_rmb_128(< 4 x i32> %a, i64* %ptr_b) {
913 ;CHECK-LABEL: test_mask_mul_epi32_rmb_128
914 ;CHECK: vpmuldq (%rdi){1to2}, %xmm0, %xmm0 ## encoding: [0x62,0xf2,0xfd,0x18,0x28,0x07]
915 %q = load i64, i64* %ptr_b
916 %vecinit.i = insertelement < 2 x i64> undef, i64 %q, i32 0
917 %b64 = shufflevector < 2 x i64> %vecinit.i, < 2 x i64> undef, <2 x i32> zeroinitializer
918 %b = bitcast < 2 x i64> %b64 to < 4 x i32>
919 %res = call < 2 x i64> @llvm.x86.avx512.mask.pmul.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> zeroinitializer, i8 -1)
923 define < 2 x i64> @test_mask_mul_epi32_rmbk_128(< 4 x i32> %a, i64* %ptr_b, < 2 x i64> %passThru, i8 %mask) {
924 ;CHECK-LABEL: test_mask_mul_epi32_rmbk_128
925 ;CHECK: vpmuldq (%rdi){1to2}, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x19,0x28,0x0f]
926 %q = load i64, i64* %ptr_b
927 %vecinit.i = insertelement < 2 x i64> undef, i64 %q, i32 0
928 %b64 = shufflevector < 2 x i64> %vecinit.i, < 2 x i64> undef, <2 x i32> zeroinitializer
929 %b = bitcast < 2 x i64> %b64 to < 4 x i32>
930 %res = call < 2 x i64> @llvm.x86.avx512.mask.pmul.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> %passThru, i8 %mask)
934 define < 2 x i64> @test_mask_mul_epi32_rmbkz_128(< 4 x i32> %a, i64* %ptr_b, i8 %mask) {
935 ;CHECK-LABEL: test_mask_mul_epi32_rmbkz_128
936 ;CHECK: vpmuldq (%rdi){1to2}, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0x99,0x28,0x07]
937 %q = load i64, i64* %ptr_b
938 %vecinit.i = insertelement < 2 x i64> undef, i64 %q, i32 0
939 %b64 = shufflevector < 2 x i64> %vecinit.i, < 2 x i64> undef, < 2 x i32> zeroinitializer
940 %b = bitcast < 2 x i64> %b64 to < 4 x i32>
941 %res = call < 2 x i64> @llvm.x86.avx512.mask.pmul.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> zeroinitializer, i8 %mask)
945 declare < 2 x i64> @llvm.x86.avx512.mask.pmul.dq.128(< 4 x i32>, < 4 x i32>, < 2 x i64>, i8)
947 define < 4 x i64> @test_mask_mul_epi32_rr_256(< 8 x i32> %a, < 8 x i32> %b) {
948 ;CHECK-LABEL: test_mask_mul_epi32_rr_256
949 ;CHECK: vpmuldq %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf2,0xfd,0x28,0x28,0xc1]
950 %res = call < 4 x i64> @llvm.x86.avx512.mask.pmul.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> zeroinitializer, i8 -1)
954 define < 4 x i64> @test_mask_mul_epi32_rrk_256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> %passThru, i8 %mask) {
955 ;CHECK-LABEL: test_mask_mul_epi32_rrk_256
956 ;CHECK: vpmuldq %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x28,0xd1]
957 %res = call < 4 x i64> @llvm.x86.avx512.mask.pmul.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> %passThru, i8 %mask)
961 define < 4 x i64> @test_mask_mul_epi32_rrkz_256(< 8 x i32> %a, < 8 x i32> %b, i8 %mask) {
962 ;CHECK-LABEL: test_mask_mul_epi32_rrkz_256
963 ;CHECK: vpmuldq %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xa9,0x28,0xc1]
964 %res = call < 4 x i64> @llvm.x86.avx512.mask.pmul.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> zeroinitializer, i8 %mask)
968 define < 4 x i64> @test_mask_mul_epi32_rm_256(< 8 x i32> %a, < 8 x i32>* %ptr_b) {
969 ;CHECK-LABEL: test_mask_mul_epi32_rm_256
970 ;CHECK: vpmuldq (%rdi), %ymm0, %ymm0 ## encoding: [0x62,0xf2,0xfd,0x28,0x28,0x07]
971 %b = load < 8 x i32>, < 8 x i32>* %ptr_b
972 %res = call < 4 x i64> @llvm.x86.avx512.mask.pmul.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> zeroinitializer, i8 -1)
976 define < 4 x i64> @test_mask_mul_epi32_rmk_256(< 8 x i32> %a, < 8 x i32>* %ptr_b, < 4 x i64> %passThru, i8 %mask) {
977 ;CHECK-LABEL: test_mask_mul_epi32_rmk_256
978 ;CHECK: vpmuldq (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x28,0x0f]
979 %b = load < 8 x i32>, < 8 x i32>* %ptr_b
980 %res = call < 4 x i64> @llvm.x86.avx512.mask.pmul.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> %passThru, i8 %mask)
984 define < 4 x i64> @test_mask_mul_epi32_rmkz_256(< 8 x i32> %a, < 8 x i32>* %ptr_b, i8 %mask) {
985 ;CHECK-LABEL: test_mask_mul_epi32_rmkz_256
986 ;CHECK: vpmuldq (%rdi), %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xa9,0x28,0x07]
987 %b = load < 8 x i32>, < 8 x i32>* %ptr_b
988 %res = call < 4 x i64> @llvm.x86.avx512.mask.pmul.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> zeroinitializer, i8 %mask)
992 define < 4 x i64> @test_mask_mul_epi32_rmb_256(< 8 x i32> %a, i64* %ptr_b) {
993 ;CHECK-LABEL: test_mask_mul_epi32_rmb_256
994 ;CHECK: vpmuldq (%rdi){1to4}, %ymm0, %ymm0 ## encoding: [0x62,0xf2,0xfd,0x38,0x28,0x07]
995 %q = load i64, i64* %ptr_b
996 %vecinit.i = insertelement < 4 x i64> undef, i64 %q, i32 0
997 %b64 = shufflevector < 4 x i64> %vecinit.i, < 4 x i64> undef, < 4 x i32> zeroinitializer
998 %b = bitcast < 4 x i64> %b64 to < 8 x i32>
999 %res = call < 4 x i64> @llvm.x86.avx512.mask.pmul.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> zeroinitializer, i8 -1)
1003 define < 4 x i64> @test_mask_mul_epi32_rmbk_256(< 8 x i32> %a, i64* %ptr_b, < 4 x i64> %passThru, i8 %mask) {
1004 ;CHECK-LABEL: test_mask_mul_epi32_rmbk_256
1005 ;CHECK: vpmuldq (%rdi){1to4}, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x39,0x28,0x0f]
1006 %q = load i64, i64* %ptr_b
1007 %vecinit.i = insertelement < 4 x i64> undef, i64 %q, i32 0
1008 %b64 = shufflevector < 4 x i64> %vecinit.i, < 4 x i64> undef, < 4 x i32> zeroinitializer
1009 %b = bitcast < 4 x i64> %b64 to < 8 x i32>
1010 %res = call < 4 x i64> @llvm.x86.avx512.mask.pmul.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> %passThru, i8 %mask)
1014 define < 4 x i64> @test_mask_mul_epi32_rmbkz_256(< 8 x i32> %a, i64* %ptr_b, i8 %mask) {
1015 ;CHECK-LABEL: test_mask_mul_epi32_rmbkz_256
1016 ;CHECK: vpmuldq (%rdi){1to4}, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xb9,0x28,0x07]
1017 %q = load i64, i64* %ptr_b
1018 %vecinit.i = insertelement < 4 x i64> undef, i64 %q, i32 0
1019 %b64 = shufflevector < 4 x i64> %vecinit.i, < 4 x i64> undef, < 4 x i32> zeroinitializer
1020 %b = bitcast < 4 x i64> %b64 to < 8 x i32>
1021 %res = call < 4 x i64> @llvm.x86.avx512.mask.pmul.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> zeroinitializer, i8 %mask)
1025 declare < 4 x i64> @llvm.x86.avx512.mask.pmul.dq.256(< 8 x i32>, < 8 x i32>, < 4 x i64>, i8)
1027 define < 2 x i64> @test_mask_mul_epu32_rr_128(< 4 x i32> %a, < 4 x i32> %b) {
1028 ;CHECK-LABEL: test_mask_mul_epu32_rr_128
1029 ;CHECK: vpmuludq %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xf4,0xc1]
1030 %res = call < 2 x i64> @llvm.x86.avx512.mask.pmulu.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> zeroinitializer, i8 -1)
1034 define < 2 x i64> @test_mask_mul_epu32_rrk_128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> %passThru, i8 %mask) {
1035 ;CHECK-LABEL: test_mask_mul_epu32_rrk_128
1036 ;CHECK: vpmuludq %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x09,0xf4,0xd1]
1037 %res = call < 2 x i64> @llvm.x86.avx512.mask.pmulu.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> %passThru, i8 %mask)
1041 define < 2 x i64> @test_mask_mul_epu32_rrkz_128(< 4 x i32> %a, < 4 x i32> %b, i8 %mask) {
1042 ;CHECK-LABEL: test_mask_mul_epu32_rrkz_128
1043 ;CHECK: vpmuludq %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0x89,0xf4,0xc1]
1044 %res = call < 2 x i64> @llvm.x86.avx512.mask.pmulu.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> zeroinitializer, i8 %mask)
1048 define < 2 x i64> @test_mask_mul_epu32_rm_128(< 4 x i32> %a, < 4 x i32>* %ptr_b) {
1049 ;CHECK-LABEL: test_mask_mul_epu32_rm_128
1050 ;CHECK: vpmuludq (%rdi), %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xf4,0x07]
1051 %b = load < 4 x i32>, < 4 x i32>* %ptr_b
1052 %res = call < 2 x i64> @llvm.x86.avx512.mask.pmulu.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> zeroinitializer, i8 -1)
1056 define < 2 x i64> @test_mask_mul_epu32_rmk_128(< 4 x i32> %a, < 4 x i32>* %ptr_b, < 2 x i64> %passThru, i8 %mask) {
1057 ;CHECK-LABEL: test_mask_mul_epu32_rmk_128
1058 ;CHECK: vpmuludq (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x09,0xf4,0x0f]
1059 %b = load < 4 x i32>, < 4 x i32>* %ptr_b
1060 %res = call < 2 x i64> @llvm.x86.avx512.mask.pmulu.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> %passThru, i8 %mask)
1064 define < 2 x i64> @test_mask_mul_epu32_rmkz_128(< 4 x i32> %a, < 4 x i32>* %ptr_b, i8 %mask) {
1065 ;CHECK-LABEL: test_mask_mul_epu32_rmkz_128
1066 ;CHECK: vpmuludq (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0x89,0xf4,0x07]
1067 %b = load < 4 x i32>, < 4 x i32>* %ptr_b
1068 %res = call < 2 x i64> @llvm.x86.avx512.mask.pmulu.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> zeroinitializer, i8 %mask)
1072 define < 2 x i64> @test_mask_mul_epu32_rmb_128(< 4 x i32> %a, i64* %ptr_b) {
1073 ;CHECK-LABEL: test_mask_mul_epu32_rmb_128
1074 ;CHECK: vpmuludq (%rdi){1to2}, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x18,0xf4,0x07]
1075 %q = load i64, i64* %ptr_b
1076 %vecinit.i = insertelement < 2 x i64> undef, i64 %q, i32 0
1077 %b64 = shufflevector < 2 x i64> %vecinit.i, < 2 x i64> undef, <2 x i32> zeroinitializer
1078 %b = bitcast < 2 x i64> %b64 to < 4 x i32>
1079 %res = call < 2 x i64> @llvm.x86.avx512.mask.pmulu.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> zeroinitializer, i8 -1)
1083 define < 2 x i64> @test_mask_mul_epu32_rmbk_128(< 4 x i32> %a, i64* %ptr_b, < 2 x i64> %passThru, i8 %mask) {
1084 ;CHECK-LABEL: test_mask_mul_epu32_rmbk_128
1085 ;CHECK: vpmuludq (%rdi){1to2}, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x19,0xf4,0x0f]
1086 %q = load i64, i64* %ptr_b
1087 %vecinit.i = insertelement < 2 x i64> undef, i64 %q, i32 0
1088 %b64 = shufflevector < 2 x i64> %vecinit.i, < 2 x i64> undef, <2 x i32> zeroinitializer
1089 %b = bitcast < 2 x i64> %b64 to < 4 x i32>
1090 %res = call < 2 x i64> @llvm.x86.avx512.mask.pmulu.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> %passThru, i8 %mask)
1094 define < 2 x i64> @test_mask_mul_epu32_rmbkz_128(< 4 x i32> %a, i64* %ptr_b, i8 %mask) {
1095 ;CHECK-LABEL: test_mask_mul_epu32_rmbkz_128
1096 ;CHECK: vpmuludq (%rdi){1to2}, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0x99,0xf4,0x07]
1097 %q = load i64, i64* %ptr_b
1098 %vecinit.i = insertelement < 2 x i64> undef, i64 %q, i32 0
1099 %b64 = shufflevector < 2 x i64> %vecinit.i, < 2 x i64> undef, < 2 x i32> zeroinitializer
1100 %b = bitcast < 2 x i64> %b64 to < 4 x i32>
1101 %res = call < 2 x i64> @llvm.x86.avx512.mask.pmulu.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> zeroinitializer, i8 %mask)
1105 declare < 2 x i64> @llvm.x86.avx512.mask.pmulu.dq.128(< 4 x i32>, < 4 x i32>, < 2 x i64>, i8)
1107 define < 4 x i64> @test_mask_mul_epu32_rr_256(< 8 x i32> %a, < 8 x i32> %b) {
1108 ;CHECK-LABEL: test_mask_mul_epu32_rr_256
1109 ;CHECK: vpmuludq %ymm1, %ymm0, %ymm0 ## encoding: [0xc5,0xfd,0xf4,0xc1]
1110 %res = call < 4 x i64> @llvm.x86.avx512.mask.pmulu.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> zeroinitializer, i8 -1)
1114 define < 4 x i64> @test_mask_mul_epu32_rrk_256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> %passThru, i8 %mask) {
1115 ;CHECK-LABEL: test_mask_mul_epu32_rrk_256
1116 ;CHECK: vpmuludq %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x29,0xf4,0xd1]
1117 %res = call < 4 x i64> @llvm.x86.avx512.mask.pmulu.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> %passThru, i8 %mask)
1121 define < 4 x i64> @test_mask_mul_epu32_rrkz_256(< 8 x i32> %a, < 8 x i32> %b, i8 %mask) {
1122 ;CHECK-LABEL: test_mask_mul_epu32_rrkz_256
1123 ;CHECK: vpmuludq %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xa9,0xf4,0xc1]
1124 %res = call < 4 x i64> @llvm.x86.avx512.mask.pmulu.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> zeroinitializer, i8 %mask)
1128 define < 4 x i64> @test_mask_mul_epu32_rm_256(< 8 x i32> %a, < 8 x i32>* %ptr_b) {
1129 ;CHECK-LABEL: test_mask_mul_epu32_rm_256
1130 ;CHECK: vpmuludq (%rdi), %ymm0, %ymm0 ## encoding: [0xc5,0xfd,0xf4,0x07]
1131 %b = load < 8 x i32>, < 8 x i32>* %ptr_b
1132 %res = call < 4 x i64> @llvm.x86.avx512.mask.pmulu.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> zeroinitializer, i8 -1)
1136 define < 4 x i64> @test_mask_mul_epu32_rmk_256(< 8 x i32> %a, < 8 x i32>* %ptr_b, < 4 x i64> %passThru, i8 %mask) {
1137 ;CHECK-LABEL: test_mask_mul_epu32_rmk_256
1138 ;CHECK: vpmuludq (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x29,0xf4,0x0f]
1139 %b = load < 8 x i32>, < 8 x i32>* %ptr_b
1140 %res = call < 4 x i64> @llvm.x86.avx512.mask.pmulu.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> %passThru, i8 %mask)
1144 define < 4 x i64> @test_mask_mul_epu32_rmkz_256(< 8 x i32> %a, < 8 x i32>* %ptr_b, i8 %mask) {
1145 ;CHECK-LABEL: test_mask_mul_epu32_rmkz_256
1146 ;CHECK: vpmuludq (%rdi), %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xa9,0xf4,0x07]
1147 %b = load < 8 x i32>, < 8 x i32>* %ptr_b
1148 %res = call < 4 x i64> @llvm.x86.avx512.mask.pmulu.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> zeroinitializer, i8 %mask)
1152 define < 4 x i64> @test_mask_mul_epu32_rmb_256(< 8 x i32> %a, i64* %ptr_b) {
1153 ;CHECK-LABEL: test_mask_mul_epu32_rmb_256
1154 ;CHECK: vpmuludq (%rdi){1to4}, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x38,0xf4,0x07]
1155 %q = load i64, i64* %ptr_b
1156 %vecinit.i = insertelement < 4 x i64> undef, i64 %q, i32 0
1157 %b64 = shufflevector < 4 x i64> %vecinit.i, < 4 x i64> undef, < 4 x i32> zeroinitializer
1158 %b = bitcast < 4 x i64> %b64 to < 8 x i32>
1159 %res = call < 4 x i64> @llvm.x86.avx512.mask.pmulu.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> zeroinitializer, i8 -1)
1163 define < 4 x i64> @test_mask_mul_epu32_rmbk_256(< 8 x i32> %a, i64* %ptr_b, < 4 x i64> %passThru, i8 %mask) {
1164 ;CHECK-LABEL: test_mask_mul_epu32_rmbk_256
1165 ;CHECK: vpmuludq (%rdi){1to4}, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x39,0xf4,0x0f]
1166 %q = load i64, i64* %ptr_b
1167 %vecinit.i = insertelement < 4 x i64> undef, i64 %q, i32 0
1168 %b64 = shufflevector < 4 x i64> %vecinit.i, < 4 x i64> undef, < 4 x i32> zeroinitializer
1169 %b = bitcast < 4 x i64> %b64 to < 8 x i32>
1170 %res = call < 4 x i64> @llvm.x86.avx512.mask.pmulu.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> %passThru, i8 %mask)
1174 define < 4 x i64> @test_mask_mul_epu32_rmbkz_256(< 8 x i32> %a, i64* %ptr_b, i8 %mask) {
1175 ;CHECK-LABEL: test_mask_mul_epu32_rmbkz_256
1176 ;CHECK: vpmuludq (%rdi){1to4}, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xb9,0xf4,0x07]
1177 %q = load i64, i64* %ptr_b
1178 %vecinit.i = insertelement < 4 x i64> undef, i64 %q, i32 0
1179 %b64 = shufflevector < 4 x i64> %vecinit.i, < 4 x i64> undef, < 4 x i32> zeroinitializer
1180 %b = bitcast < 4 x i64> %b64 to < 8 x i32>
1181 %res = call < 4 x i64> @llvm.x86.avx512.mask.pmulu.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> zeroinitializer, i8 %mask)
1185 declare < 4 x i64> @llvm.x86.avx512.mask.pmulu.dq.256(< 8 x i32>, < 8 x i32>, < 4 x i64>, i8)
1187 define <4 x i32> @test_mask_add_epi32_rr_128(<4 x i32> %a, <4 x i32> %b) {
1188 ;CHECK-LABEL: test_mask_add_epi32_rr_128
1189 ;CHECK: vpaddd %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xfe,0xc1]
1190 %res = call <4 x i32> @llvm.x86.avx512.mask.padd.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1)
1194 define <4 x i32> @test_mask_add_epi32_rrk_128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask) {
1195 ;CHECK-LABEL: test_mask_add_epi32_rrk_128
1196 ;CHECK: vpaddd %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xfe,0xd1]
1197 %res = call <4 x i32> @llvm.x86.avx512.mask.padd.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask)
1201 define <4 x i32> @test_mask_add_epi32_rrkz_128(<4 x i32> %a, <4 x i32> %b, i8 %mask) {
1202 ;CHECK-LABEL: test_mask_add_epi32_rrkz_128
1203 ;CHECK: vpaddd %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xfe,0xc1]
1204 %res = call <4 x i32> @llvm.x86.avx512.mask.padd.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask)
1208 define <4 x i32> @test_mask_add_epi32_rm_128(<4 x i32> %a, <4 x i32>* %ptr_b) {
1209 ;CHECK-LABEL: test_mask_add_epi32_rm_128
1210 ;CHECK: vpaddd (%rdi), %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xfe,0x07]
1211 %b = load <4 x i32>, <4 x i32>* %ptr_b
1212 %res = call <4 x i32> @llvm.x86.avx512.mask.padd.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1)
1216 define <4 x i32> @test_mask_add_epi32_rmk_128(<4 x i32> %a, <4 x i32>* %ptr_b, <4 x i32> %passThru, i8 %mask) {
1217 ;CHECK-LABEL: test_mask_add_epi32_rmk_128
1218 ;CHECK: vpaddd (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xfe,0x0f]
1219 %b = load <4 x i32>, <4 x i32>* %ptr_b
1220 %res = call <4 x i32> @llvm.x86.avx512.mask.padd.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask)
1224 define <4 x i32> @test_mask_add_epi32_rmkz_128(<4 x i32> %a, <4 x i32>* %ptr_b, i8 %mask) {
1225 ;CHECK-LABEL: test_mask_add_epi32_rmkz_128
1226 ;CHECK: vpaddd (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xfe,0x07]
1227 %b = load <4 x i32>, <4 x i32>* %ptr_b
1228 %res = call <4 x i32> @llvm.x86.avx512.mask.padd.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask)
1232 define <4 x i32> @test_mask_add_epi32_rmb_128(<4 x i32> %a, i32* %ptr_b) {
1233 ;CHECK-LABEL: test_mask_add_epi32_rmb_128
1234 ;CHECK: vpaddd (%rdi){1to4}, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x18,0xfe,0x07]
1235 %q = load i32, i32* %ptr_b
1236 %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
1237 %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
1238 %res = call <4 x i32> @llvm.x86.avx512.mask.padd.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1)
1242 define <4 x i32> @test_mask_add_epi32_rmbk_128(<4 x i32> %a, i32* %ptr_b, <4 x i32> %passThru, i8 %mask) {
1243 ;CHECK-LABEL: test_mask_add_epi32_rmbk_128
1244 ;CHECK: vpaddd (%rdi){1to4}, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x19,0xfe,0x0f]
1245 %q = load i32, i32* %ptr_b
1246 %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
1247 %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
1248 %res = call <4 x i32> @llvm.x86.avx512.mask.padd.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask)
1252 define <4 x i32> @test_mask_add_epi32_rmbkz_128(<4 x i32> %a, i32* %ptr_b, i8 %mask) {
1253 ;CHECK-LABEL: test_mask_add_epi32_rmbkz_128
1254 ;CHECK: vpaddd (%rdi){1to4}, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x99,0xfe,0x07]
1255 %q = load i32, i32* %ptr_b
1256 %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
1257 %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
1258 %res = call <4 x i32> @llvm.x86.avx512.mask.padd.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask)
1262 declare <4 x i32> @llvm.x86.avx512.mask.padd.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)
1264 define <4 x i32> @test_mask_sub_epi32_rr_128(<4 x i32> %a, <4 x i32> %b) {
1265 ;CHECK-LABEL: test_mask_sub_epi32_rr_128
1266 ;CHECK: vpsubd %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xfa,0xc1]
1267 %res = call <4 x i32> @llvm.x86.avx512.mask.psub.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1)
1271 define <4 x i32> @test_mask_sub_epi32_rrk_128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask) {
1272 ;CHECK-LABEL: test_mask_sub_epi32_rrk_128
1273 ;CHECK: vpsubd %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xfa,0xd1]
1274 %res = call <4 x i32> @llvm.x86.avx512.mask.psub.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask)
1278 define <4 x i32> @test_mask_sub_epi32_rrkz_128(<4 x i32> %a, <4 x i32> %b, i8 %mask) {
1279 ;CHECK-LABEL: test_mask_sub_epi32_rrkz_128
1280 ;CHECK: vpsubd %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xfa,0xc1]
1281 %res = call <4 x i32> @llvm.x86.avx512.mask.psub.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask)
1285 define <4 x i32> @test_mask_sub_epi32_rm_128(<4 x i32> %a, <4 x i32>* %ptr_b) {
1286 ;CHECK-LABEL: test_mask_sub_epi32_rm_128
1287 ;CHECK: (%rdi), %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xfa,0x07]
1288 %b = load <4 x i32>, <4 x i32>* %ptr_b
1289 %res = call <4 x i32> @llvm.x86.avx512.mask.psub.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1)
1293 define <4 x i32> @test_mask_sub_epi32_rmk_128(<4 x i32> %a, <4 x i32>* %ptr_b, <4 x i32> %passThru, i8 %mask) {
1294 ;CHECK-LABEL: test_mask_sub_epi32_rmk_128
1295 ;CHECK: vpsubd (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xfa,0x0f]
1296 %b = load <4 x i32>, <4 x i32>* %ptr_b
1297 %res = call <4 x i32> @llvm.x86.avx512.mask.psub.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask)
1301 define <4 x i32> @test_mask_sub_epi32_rmkz_128(<4 x i32> %a, <4 x i32>* %ptr_b, i8 %mask) {
1302 ;CHECK-LABEL: test_mask_sub_epi32_rmkz_128
1303 ;CHECK: vpsubd (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xfa,0x07]
1304 %b = load <4 x i32>, <4 x i32>* %ptr_b
1305 %res = call <4 x i32> @llvm.x86.avx512.mask.psub.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask)
1309 define <4 x i32> @test_mask_sub_epi32_rmb_128(<4 x i32> %a, i32* %ptr_b) {
1310 ;CHECK-LABEL: test_mask_sub_epi32_rmb_128
1311 ;CHECK: vpsubd (%rdi){1to4}, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x18,0xfa,0x07]
1312 %q = load i32, i32* %ptr_b
1313 %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
1314 %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
1315 %res = call <4 x i32> @llvm.x86.avx512.mask.psub.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1)
1319 define <4 x i32> @test_mask_sub_epi32_rmbk_128(<4 x i32> %a, i32* %ptr_b, <4 x i32> %passThru, i8 %mask) {
1320 ;CHECK-LABEL: test_mask_sub_epi32_rmbk_128
1321 ;CHECK: vpsubd (%rdi){1to4}, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x19,0xfa,0x0f]
1322 %q = load i32, i32* %ptr_b
1323 %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
1324 %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
1325 %res = call <4 x i32> @llvm.x86.avx512.mask.psub.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask)
1329 define <4 x i32> @test_mask_sub_epi32_rmbkz_128(<4 x i32> %a, i32* %ptr_b, i8 %mask) {
1330 ;CHECK-LABEL: test_mask_sub_epi32_rmbkz_128
1331 ;CHECK: vpsubd (%rdi){1to4}, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x99,0xfa,0x07]
1332 %q = load i32, i32* %ptr_b
1333 %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
1334 %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
1335 %res = call <4 x i32> @llvm.x86.avx512.mask.psub.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask)
1339 declare <4 x i32> @llvm.x86.avx512.mask.psub.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)
1341 define <8 x i32> @test_mask_sub_epi32_rr_256(<8 x i32> %a, <8 x i32> %b) {
1342 ;CHECK-LABEL: test_mask_sub_epi32_rr_256
1343 ;CHECK: vpsubd %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xfa,0xc1]
1344 %res = call <8 x i32> @llvm.x86.avx512.mask.psub.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1)
1348 define <8 x i32> @test_mask_sub_epi32_rrk_256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask) {
1349 ;CHECK-LABEL: test_mask_sub_epi32_rrk_256
1350 ;CHECK: vpsubd %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xfa,0xd1]
1351 %res = call <8 x i32> @llvm.x86.avx512.mask.psub.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask)
1355 define <8 x i32> @test_mask_sub_epi32_rrkz_256(<8 x i32> %a, <8 x i32> %b, i8 %mask) {
1356 ;CHECK-LABEL: test_mask_sub_epi32_rrkz_256
1357 ;CHECK: vpsubd %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xfa,0xc1]
1358 %res = call <8 x i32> @llvm.x86.avx512.mask.psub.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask)
1362 define <8 x i32> @test_mask_sub_epi32_rm_256(<8 x i32> %a, <8 x i32>* %ptr_b) {
1363 ;CHECK-LABEL: test_mask_sub_epi32_rm_256
1364 ;CHECK: vpsubd (%rdi), %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xfa,0x07]
1365 %b = load <8 x i32>, <8 x i32>* %ptr_b
1366 %res = call <8 x i32> @llvm.x86.avx512.mask.psub.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1)
1370 define <8 x i32> @test_mask_sub_epi32_rmk_256(<8 x i32> %a, <8 x i32>* %ptr_b, <8 x i32> %passThru, i8 %mask) {
1371 ;CHECK-LABEL: test_mask_sub_epi32_rmk_256
1372 ;CHECK: vpsubd (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xfa,0x0f]
1373 %b = load <8 x i32>, <8 x i32>* %ptr_b
1374 %res = call <8 x i32> @llvm.x86.avx512.mask.psub.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask)
1378 define <8 x i32> @test_mask_sub_epi32_rmkz_256(<8 x i32> %a, <8 x i32>* %ptr_b, i8 %mask) {
1379 ;CHECK-LABEL: test_mask_sub_epi32_rmkz_256
1380 ;CHECK: vpsubd (%rdi), %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xfa,0x07]
1381 %b = load <8 x i32>, <8 x i32>* %ptr_b
1382 %res = call <8 x i32> @llvm.x86.avx512.mask.psub.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask)
1386 define <8 x i32> @test_mask_sub_epi32_rmb_256(<8 x i32> %a, i32* %ptr_b) {
1387 ;CHECK-LABEL: test_mask_sub_epi32_rmb_256
1388 ;CHECK: vpsubd (%rdi){1to8}, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x38,0xfa,0x07]
1389 %q = load i32, i32* %ptr_b
1390 %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
1391 %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
1392 %res = call <8 x i32> @llvm.x86.avx512.mask.psub.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1)
1396 define <8 x i32> @test_mask_sub_epi32_rmbk_256(<8 x i32> %a, i32* %ptr_b, <8 x i32> %passThru, i8 %mask) {
1397 ;CHECK-LABEL: test_mask_sub_epi32_rmbk_256
1398 ;CHECK: vpsubd (%rdi){1to8}, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x39,0xfa,0x0f]
1399 %q = load i32, i32* %ptr_b
1400 %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
1401 %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
1402 %res = call <8 x i32> @llvm.x86.avx512.mask.psub.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask)
1406 define <8 x i32> @test_mask_sub_epi32_rmbkz_256(<8 x i32> %a, i32* %ptr_b, i8 %mask) {
1407 ;CHECK-LABEL: test_mask_sub_epi32_rmbkz_256
1408 ;CHECK: vpsubd (%rdi){1to8}, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xb9,0xfa,0x07]
1409 %q = load i32, i32* %ptr_b
1410 %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
1411 %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
1412 %res = call <8 x i32> @llvm.x86.avx512.mask.psub.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask)
1416 declare <8 x i32> @llvm.x86.avx512.mask.psub.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8)
1418 define <8 x i32> @test_mask_add_epi32_rr_256(<8 x i32> %a, <8 x i32> %b) {
1419 ;CHECK-LABEL: test_mask_add_epi32_rr_256
1420 ;CHECK: vpaddd %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xfe,0xc1]
1421 %res = call <8 x i32> @llvm.x86.avx512.mask.padd.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1)
1425 define <8 x i32> @test_mask_add_epi32_rrk_256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask) {
1426 ;CHECK-LABEL: test_mask_add_epi32_rrk_256
1427 ;CHECK: vpaddd %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xfe,0xd1]
1428 %res = call <8 x i32> @llvm.x86.avx512.mask.padd.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask)
1432 define <8 x i32> @test_mask_add_epi32_rrkz_256(<8 x i32> %a, <8 x i32> %b, i8 %mask) {
1433 ;CHECK-LABEL: test_mask_add_epi32_rrkz_256
1434 ;CHECK: vpaddd %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xfe,0xc1]
1435 %res = call <8 x i32> @llvm.x86.avx512.mask.padd.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask)
1439 define <8 x i32> @test_mask_add_epi32_rm_256(<8 x i32> %a, <8 x i32>* %ptr_b) {
1440 ;CHECK-LABEL: test_mask_add_epi32_rm_256
1441 ;CHECK: vpaddd (%rdi), %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xfe,0x07]
1442 %b = load <8 x i32>, <8 x i32>* %ptr_b
1443 %res = call <8 x i32> @llvm.x86.avx512.mask.padd.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1)
1447 define <8 x i32> @test_mask_add_epi32_rmk_256(<8 x i32> %a, <8 x i32>* %ptr_b, <8 x i32> %passThru, i8 %mask) {
1448 ;CHECK-LABEL: test_mask_add_epi32_rmk_256
1449 ;CHECK: vpaddd (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xfe,0x0f]
1450 %b = load <8 x i32>, <8 x i32>* %ptr_b
1451 %res = call <8 x i32> @llvm.x86.avx512.mask.padd.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask)
1455 define <8 x i32> @test_mask_add_epi32_rmkz_256(<8 x i32> %a, <8 x i32>* %ptr_b, i8 %mask) {
1456 ;CHECK-LABEL: test_mask_add_epi32_rmkz_256
1457 ;CHECK: vpaddd (%rdi), %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xfe,0x07]
1458 %b = load <8 x i32>, <8 x i32>* %ptr_b
1459 %res = call <8 x i32> @llvm.x86.avx512.mask.padd.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask)
1463 define <8 x i32> @test_mask_add_epi32_rmb_256(<8 x i32> %a, i32* %ptr_b) {
1464 ;CHECK-LABEL: test_mask_add_epi32_rmb_256
1465 ;CHECK: vpaddd (%rdi){1to8}, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x38,0xfe,0x07]
1466 %q = load i32, i32* %ptr_b
1467 %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
1468 %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
1469 %res = call <8 x i32> @llvm.x86.avx512.mask.padd.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1)
1473 define <8 x i32> @test_mask_add_epi32_rmbk_256(<8 x i32> %a, i32* %ptr_b, <8 x i32> %passThru, i8 %mask) {
1474 ;CHECK-LABEL: test_mask_add_epi32_rmbk_256
1475 ;CHECK: vpaddd (%rdi){1to8}, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x39,0xfe,0x0f]
1476 %q = load i32, i32* %ptr_b
1477 %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
1478 %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
1479 %res = call <8 x i32> @llvm.x86.avx512.mask.padd.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask)
1483 define <8 x i32> @test_mask_add_epi32_rmbkz_256(<8 x i32> %a, i32* %ptr_b, i8 %mask) {
1484 ;CHECK-LABEL: test_mask_add_epi32_rmbkz_256
1485 ;CHECK: vpaddd (%rdi){1to8}, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xb9,0xfe,0x07]
1486 %q = load i32, i32* %ptr_b
1487 %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
1488 %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
1489 %res = call <8 x i32> @llvm.x86.avx512.mask.padd.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask)
1493 declare <8 x i32> @llvm.x86.avx512.mask.padd.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8)
1495 define <4 x i32> @test_mask_and_epi32_rr_128(<4 x i32> %a, <4 x i32> %b) {
1496 ;CHECK-LABEL: test_mask_and_epi32_rr_128
1497 ;CHECK: vpandd %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xdb,0xc1]
1498 %res = call <4 x i32> @llvm.x86.avx512.mask.pand.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1)
1502 define <4 x i32> @test_mask_and_epi32_rrk_128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask) {
1503 ;CHECK-LABEL: test_mask_and_epi32_rrk_128
1504 ;CHECK: vpandd %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xdb,0xd1]
1505 %res = call <4 x i32> @llvm.x86.avx512.mask.pand.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask)
1509 define <4 x i32> @test_mask_and_epi32_rrkz_128(<4 x i32> %a, <4 x i32> %b, i8 %mask) {
1510 ;CHECK-LABEL: test_mask_and_epi32_rrkz_128
1511 ;CHECK: vpandd %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xdb,0xc1]
1512 %res = call <4 x i32> @llvm.x86.avx512.mask.pand.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask)
1516 define <4 x i32> @test_mask_and_epi32_rm_128(<4 x i32> %a, <4 x i32>* %ptr_b) {
1517 ;CHECK-LABEL: test_mask_and_epi32_rm_128
1518 ;CHECK: vpandd (%rdi), %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xdb,0x07]
1519 %b = load <4 x i32>, <4 x i32>* %ptr_b
1520 %res = call <4 x i32> @llvm.x86.avx512.mask.pand.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1)
1524 define <4 x i32> @test_mask_and_epi32_rmk_128(<4 x i32> %a, <4 x i32>* %ptr_b, <4 x i32> %passThru, i8 %mask) {
1525 ;CHECK-LABEL: test_mask_and_epi32_rmk_128
1526 ;CHECK: vpandd (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xdb,0x0f]
1527 %b = load <4 x i32>, <4 x i32>* %ptr_b
1528 %res = call <4 x i32> @llvm.x86.avx512.mask.pand.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask)
1532 define <4 x i32> @test_mask_and_epi32_rmkz_128(<4 x i32> %a, <4 x i32>* %ptr_b, i8 %mask) {
1533 ;CHECK-LABEL: test_mask_and_epi32_rmkz_128
1534 ;CHECK: vpandd (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xdb,0x07]
1535 %b = load <4 x i32>, <4 x i32>* %ptr_b
1536 %res = call <4 x i32> @llvm.x86.avx512.mask.pand.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask)
1540 define <4 x i32> @test_mask_and_epi32_rmb_128(<4 x i32> %a, i32* %ptr_b) {
1541 ;CHECK-LABEL: test_mask_and_epi32_rmb_128
1542 ;CHECK: vpandd (%rdi){1to4}, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x18,0xdb,0x07]
1543 %q = load i32, i32* %ptr_b
1544 %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
1545 %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
1546 %res = call <4 x i32> @llvm.x86.avx512.mask.pand.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1)
1550 define <4 x i32> @test_mask_and_epi32_rmbk_128(<4 x i32> %a, i32* %ptr_b, <4 x i32> %passThru, i8 %mask) {
1551 ;CHECK-LABEL: test_mask_and_epi32_rmbk_128
1552 ;CHECK: vpandd (%rdi){1to4}, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x19,0xdb,0x0f]
1553 %q = load i32, i32* %ptr_b
1554 %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
1555 %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
1556 %res = call <4 x i32> @llvm.x86.avx512.mask.pand.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask)
1560 define <4 x i32> @test_mask_and_epi32_rmbkz_128(<4 x i32> %a, i32* %ptr_b, i8 %mask) {
1561 ;CHECK-LABEL: test_mask_and_epi32_rmbkz_128
1562 ;CHECK: vpandd (%rdi){1to4}, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x99,0xdb,0x07]
1563 %q = load i32, i32* %ptr_b
1564 %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
1565 %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
1566 %res = call <4 x i32> @llvm.x86.avx512.mask.pand.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask)
1570 declare <4 x i32> @llvm.x86.avx512.mask.pand.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)
1572 define <8 x i32> @test_mask_and_epi32_rr_256(<8 x i32> %a, <8 x i32> %b) {
1573 ;CHECK-LABEL: test_mask_and_epi32_rr_256
1574 ;CHECK: vpandd %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xdb,0xc1]
1575 %res = call <8 x i32> @llvm.x86.avx512.mask.pand.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1)
1579 define <8 x i32> @test_mask_and_epi32_rrk_256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask) {
1580 ;CHECK-LABEL: test_mask_and_epi32_rrk_256
1581 ;CHECK: vpandd %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xdb,0xd1]
1582 %res = call <8 x i32> @llvm.x86.avx512.mask.pand.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask)
1586 define <8 x i32> @test_mask_and_epi32_rrkz_256(<8 x i32> %a, <8 x i32> %b, i8 %mask) {
1587 ;CHECK-LABEL: test_mask_and_epi32_rrkz_256
1588 ;CHECK: vpandd %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xdb,0xc1]
1589 %res = call <8 x i32> @llvm.x86.avx512.mask.pand.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask)
1593 define <8 x i32> @test_mask_and_epi32_rm_256(<8 x i32> %a, <8 x i32>* %ptr_b) {
1594 ;CHECK-LABEL: test_mask_and_epi32_rm_256
1595 ;CHECK: vpandd (%rdi), %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xdb,0x07]
1596 %b = load <8 x i32>, <8 x i32>* %ptr_b
1597 %res = call <8 x i32> @llvm.x86.avx512.mask.pand.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1)
1601 define <8 x i32> @test_mask_and_epi32_rmk_256(<8 x i32> %a, <8 x i32>* %ptr_b, <8 x i32> %passThru, i8 %mask) {
1602 ;CHECK-LABEL: test_mask_and_epi32_rmk_256
1603 ;CHECK: vpandd (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xdb,0x0f]
1604 %b = load <8 x i32>, <8 x i32>* %ptr_b
1605 %res = call <8 x i32> @llvm.x86.avx512.mask.pand.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask)
1609 define <8 x i32> @test_mask_and_epi32_rmkz_256(<8 x i32> %a, <8 x i32>* %ptr_b, i8 %mask) {
1610 ;CHECK-LABEL: test_mask_and_epi32_rmkz_256
1611 ;CHECK: vpandd (%rdi), %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xdb,0x07]
1612 %b = load <8 x i32>, <8 x i32>* %ptr_b
1613 %res = call <8 x i32> @llvm.x86.avx512.mask.pand.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask)
1617 define <8 x i32> @test_mask_and_epi32_rmb_256(<8 x i32> %a, i32* %ptr_b) {
1618 ;CHECK-LABEL: test_mask_and_epi32_rmb_256
1619 ;CHECK: vpandd (%rdi){1to8}, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x38,0xdb,0x07]
1620 %q = load i32, i32* %ptr_b
1621 %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
1622 %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
1623 %res = call <8 x i32> @llvm.x86.avx512.mask.pand.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1)
1627 define <8 x i32> @test_mask_and_epi32_rmbk_256(<8 x i32> %a, i32* %ptr_b, <8 x i32> %passThru, i8 %mask) {
1628 ;CHECK-LABEL: test_mask_and_epi32_rmbk_256
1629 ;CHECK: vpandd (%rdi){1to8}, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x39,0xdb,0x0f]
1630 %q = load i32, i32* %ptr_b
1631 %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
1632 %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
1633 %res = call <8 x i32> @llvm.x86.avx512.mask.pand.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask)
1637 define <8 x i32> @test_mask_and_epi32_rmbkz_256(<8 x i32> %a, i32* %ptr_b, i8 %mask) {
1638 ;CHECK-LABEL: test_mask_and_epi32_rmbkz_256
1639 ;CHECK: vpandd (%rdi){1to8}, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xb9,0xdb,0x07]
1640 %q = load i32, i32* %ptr_b
1641 %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
1642 %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
1643 %res = call <8 x i32> @llvm.x86.avx512.mask.pand.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask)
1647 declare <8 x i32> @llvm.x86.avx512.mask.pand.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8)
1649 define <4 x i32> @test_mask_or_epi32_rr_128(<4 x i32> %a, <4 x i32> %b) {
1650 ;CHECK-LABEL: test_mask_or_epi32_rr_128
1651 ;CHECK: vpord %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xeb,0xc1]
1652 %res = call <4 x i32> @llvm.x86.avx512.mask.por.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1)
1656 define <4 x i32> @test_mask_or_epi32_rrk_128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask) {
1657 ;CHECK-LABEL: test_mask_or_epi32_rrk_128
1658 ;CHECK: vpord %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xeb,0xd1]
1659 %res = call <4 x i32> @llvm.x86.avx512.mask.por.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask)
1663 define <4 x i32> @test_mask_or_epi32_rrkz_128(<4 x i32> %a, <4 x i32> %b, i8 %mask) {
1664 ;CHECK-LABEL: test_mask_or_epi32_rrkz_128
1665 ;CHECK: vpord %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xeb,0xc1]
1666 %res = call <4 x i32> @llvm.x86.avx512.mask.por.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask)
1670 define <4 x i32> @test_mask_or_epi32_rm_128(<4 x i32> %a, <4 x i32>* %ptr_b) {
1671 ;CHECK-LABEL: test_mask_or_epi32_rm_128
1672 ;CHECK: vpord (%rdi), %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xeb,0x07]
1673 %b = load <4 x i32>, <4 x i32>* %ptr_b
1674 %res = call <4 x i32> @llvm.x86.avx512.mask.por.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1)
1678 define <4 x i32> @test_mask_or_epi32_rmk_128(<4 x i32> %a, <4 x i32>* %ptr_b, <4 x i32> %passThru, i8 %mask) {
1679 ;CHECK-LABEL: test_mask_or_epi32_rmk_128
1680 ;CHECK: vpord (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xeb,0x0f]
1681 %b = load <4 x i32>, <4 x i32>* %ptr_b
1682 %res = call <4 x i32> @llvm.x86.avx512.mask.por.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask)
1686 define <4 x i32> @test_mask_or_epi32_rmkz_128(<4 x i32> %a, <4 x i32>* %ptr_b, i8 %mask) {
1687 ;CHECK-LABEL: test_mask_or_epi32_rmkz_128
1688 ;CHECK: vpord (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xeb,0x07]
1689 %b = load <4 x i32>, <4 x i32>* %ptr_b
1690 %res = call <4 x i32> @llvm.x86.avx512.mask.por.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask)
1694 define <4 x i32> @test_mask_or_epi32_rmb_128(<4 x i32> %a, i32* %ptr_b) {
1695 ;CHECK-LABEL: test_mask_or_epi32_rmb_128
1696 ;CHECK: vpord (%rdi){1to4}, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x18,0xeb,0x07]
1697 %q = load i32, i32* %ptr_b
1698 %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
1699 %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
1700 %res = call <4 x i32> @llvm.x86.avx512.mask.por.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1)
1704 define <4 x i32> @test_mask_or_epi32_rmbk_128(<4 x i32> %a, i32* %ptr_b, <4 x i32> %passThru, i8 %mask) {
1705 ;CHECK-LABEL: test_mask_or_epi32_rmbk_128
1706 ;CHECK: vpord (%rdi){1to4}, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x19,0xeb,0x0f]
1707 %q = load i32, i32* %ptr_b
1708 %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
1709 %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
1710 %res = call <4 x i32> @llvm.x86.avx512.mask.por.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask)
1714 define <4 x i32> @test_mask_or_epi32_rmbkz_128(<4 x i32> %a, i32* %ptr_b, i8 %mask) {
1715 ;CHECK-LABEL: test_mask_or_epi32_rmbkz_128
1716 ;CHECK: vpord (%rdi){1to4}, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x99,0xeb,0x07]
1717 %q = load i32, i32* %ptr_b
1718 %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
1719 %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
1720 %res = call <4 x i32> @llvm.x86.avx512.mask.por.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask)
1724 declare <4 x i32> @llvm.x86.avx512.mask.por.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)
1726 define <8 x i32> @test_mask_or_epi32_rr_256(<8 x i32> %a, <8 x i32> %b) {
1727 ;CHECK-LABEL: test_mask_or_epi32_rr_256
1728 ;CHECK: vpord %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xeb,0xc1]
1729 %res = call <8 x i32> @llvm.x86.avx512.mask.por.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1)
1733 define <8 x i32> @test_mask_or_epi32_rrk_256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask) {
1734 ;CHECK-LABEL: test_mask_or_epi32_rrk_256
1735 ;CHECK: vpord %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xeb,0xd1]
1736 %res = call <8 x i32> @llvm.x86.avx512.mask.por.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask)
1740 define <8 x i32> @test_mask_or_epi32_rrkz_256(<8 x i32> %a, <8 x i32> %b, i8 %mask) {
1741 ;CHECK-LABEL: test_mask_or_epi32_rrkz_256
1742 ;CHECK: vpord %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xeb,0xc1]
1743 %res = call <8 x i32> @llvm.x86.avx512.mask.por.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask)
1747 define <8 x i32> @test_mask_or_epi32_rm_256(<8 x i32> %a, <8 x i32>* %ptr_b) {
1748 ;CHECK-LABEL: test_mask_or_epi32_rm_256
1749 ;CHECK: vpord (%rdi), %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xeb,0x07]
1750 %b = load <8 x i32>, <8 x i32>* %ptr_b
1751 %res = call <8 x i32> @llvm.x86.avx512.mask.por.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1)
1755 define <8 x i32> @test_mask_or_epi32_rmk_256(<8 x i32> %a, <8 x i32>* %ptr_b, <8 x i32> %passThru, i8 %mask) {
1756 ;CHECK-LABEL: test_mask_or_epi32_rmk_256
1757 ;CHECK: vpord (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xeb,0x0f]
1758 %b = load <8 x i32>, <8 x i32>* %ptr_b
1759 %res = call <8 x i32> @llvm.x86.avx512.mask.por.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask)
1763 define <8 x i32> @test_mask_or_epi32_rmkz_256(<8 x i32> %a, <8 x i32>* %ptr_b, i8 %mask) {
1764 ;CHECK-LABEL: test_mask_or_epi32_rmkz_256
1765 ;CHECK: vpord (%rdi), %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xeb,0x07]
1766 %b = load <8 x i32>, <8 x i32>* %ptr_b
1767 %res = call <8 x i32> @llvm.x86.avx512.mask.por.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask)
1771 define <8 x i32> @test_mask_or_epi32_rmb_256(<8 x i32> %a, i32* %ptr_b) {
1772 ;CHECK-LABEL: test_mask_or_epi32_rmb_256
1773 ;CHECK: vpord (%rdi){1to8}, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x38,0xeb,0x07]
1774 %q = load i32, i32* %ptr_b
1775 %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
1776 %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
1777 %res = call <8 x i32> @llvm.x86.avx512.mask.por.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1)
1781 define <8 x i32> @test_mask_or_epi32_rmbk_256(<8 x i32> %a, i32* %ptr_b, <8 x i32> %passThru, i8 %mask) {
1782 ;CHECK-LABEL: test_mask_or_epi32_rmbk_256
1783 ;CHECK: vpord (%rdi){1to8}, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x39,0xeb,0x0f]
1784 %q = load i32, i32* %ptr_b
1785 %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
1786 %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
1787 %res = call <8 x i32> @llvm.x86.avx512.mask.por.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask)
1791 define <8 x i32> @test_mask_or_epi32_rmbkz_256(<8 x i32> %a, i32* %ptr_b, i8 %mask) {
1792 ;CHECK-LABEL: test_mask_or_epi32_rmbkz_256
1793 ;CHECK: vpord (%rdi){1to8}, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xb9,0xeb,0x07]
1794 %q = load i32, i32* %ptr_b
1795 %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
1796 %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
1797 %res = call <8 x i32> @llvm.x86.avx512.mask.por.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask)
1801 declare <8 x i32> @llvm.x86.avx512.mask.por.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8)
1803 define <4 x i32> @test_mask_xor_epi32_rr_128(<4 x i32> %a, <4 x i32> %b) {
1804 ;CHECK-LABEL: test_mask_xor_epi32_rr_128
1805 ;CHECK: vpxord %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xef,0xc1]
1806 %res = call <4 x i32> @llvm.x86.avx512.mask.pxor.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1)
1810 define <4 x i32> @test_mask_xor_epi32_rrk_128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask) {
1811 ;CHECK-LABEL: test_mask_xor_epi32_rrk_128
1812 ;CHECK: vpxord %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xef,0xd1]
1813 %res = call <4 x i32> @llvm.x86.avx512.mask.pxor.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask)
1817 define <4 x i32> @test_mask_xor_epi32_rrkz_128(<4 x i32> %a, <4 x i32> %b, i8 %mask) {
1818 ;CHECK-LABEL: test_mask_xor_epi32_rrkz_128
1819 ;CHECK: vpxord %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xef,0xc1]
1820 %res = call <4 x i32> @llvm.x86.avx512.mask.pxor.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask)
1824 define <4 x i32> @test_mask_xor_epi32_rm_128(<4 x i32> %a, <4 x i32>* %ptr_b) {
1825 ;CHECK-LABEL: test_mask_xor_epi32_rm_128
1826 ;CHECK: vpxord (%rdi), %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xef,0x07]
1827 %b = load <4 x i32>, <4 x i32>* %ptr_b
1828 %res = call <4 x i32> @llvm.x86.avx512.mask.pxor.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1)
1832 define <4 x i32> @test_mask_xor_epi32_rmk_128(<4 x i32> %a, <4 x i32>* %ptr_b, <4 x i32> %passThru, i8 %mask) {
1833 ;CHECK-LABEL: test_mask_xor_epi32_rmk_128
1834 ;CHECK: vpxord (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xef,0x0f]
1835 %b = load <4 x i32>, <4 x i32>* %ptr_b
1836 %res = call <4 x i32> @llvm.x86.avx512.mask.pxor.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask)
1840 define <4 x i32> @test_mask_xor_epi32_rmkz_128(<4 x i32> %a, <4 x i32>* %ptr_b, i8 %mask) {
1841 ;CHECK-LABEL: test_mask_xor_epi32_rmkz_128
1842 ;CHECK: vpxord (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xef,0x07]
1843 %b = load <4 x i32>, <4 x i32>* %ptr_b
1844 %res = call <4 x i32> @llvm.x86.avx512.mask.pxor.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask)
1848 define <4 x i32> @test_mask_xor_epi32_rmb_128(<4 x i32> %a, i32* %ptr_b) {
1849 ;CHECK-LABEL: test_mask_xor_epi32_rmb_128
1850 ;CHECK: vpxord (%rdi){1to4}, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x18,0xef,0x07]
1851 %q = load i32, i32* %ptr_b
1852 %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
1853 %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
1854 %res = call <4 x i32> @llvm.x86.avx512.mask.pxor.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1)
1858 define <4 x i32> @test_mask_xor_epi32_rmbk_128(<4 x i32> %a, i32* %ptr_b, <4 x i32> %passThru, i8 %mask) {
1859 ;CHECK-LABEL: test_mask_xor_epi32_rmbk_128
1860 ;CHECK: vpxord (%rdi){1to4}, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x19,0xef,0x0f]
1861 %q = load i32, i32* %ptr_b
1862 %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
1863 %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
1864 %res = call <4 x i32> @llvm.x86.avx512.mask.pxor.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask)
1868 define <4 x i32> @test_mask_xor_epi32_rmbkz_128(<4 x i32> %a, i32* %ptr_b, i8 %mask) {
1869 ;CHECK-LABEL: test_mask_xor_epi32_rmbkz_128
1870 ;CHECK: vpxord (%rdi){1to4}, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x99,0xef,0x07]
1871 %q = load i32, i32* %ptr_b
1872 %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
1873 %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
1874 %res = call <4 x i32> @llvm.x86.avx512.mask.pxor.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask)
1878 declare <4 x i32> @llvm.x86.avx512.mask.pxor.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)
1880 define <8 x i32> @test_mask_xor_epi32_rr_256(<8 x i32> %a, <8 x i32> %b) {
1881 ;CHECK-LABEL: test_mask_xor_epi32_rr_256
1882 ;CHECK: vpxord %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xef,0xc1]
1883 %res = call <8 x i32> @llvm.x86.avx512.mask.pxor.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1)
1887 define <8 x i32> @test_mask_xor_epi32_rrk_256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask) {
1888 ;CHECK-LABEL: test_mask_xor_epi32_rrk_256
1889 ;CHECK: vpxord %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xef,0xd1]
1890 %res = call <8 x i32> @llvm.x86.avx512.mask.pxor.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask)
1894 define <8 x i32> @test_mask_xor_epi32_rrkz_256(<8 x i32> %a, <8 x i32> %b, i8 %mask) {
1895 ;CHECK-LABEL: test_mask_xor_epi32_rrkz_256
1896 ;CHECK: vpxord %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xef,0xc1]
1897 %res = call <8 x i32> @llvm.x86.avx512.mask.pxor.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask)
1901 define <8 x i32> @test_mask_xor_epi32_rm_256(<8 x i32> %a, <8 x i32>* %ptr_b) {
1902 ;CHECK-LABEL: test_mask_xor_epi32_rm_256
1903 ;CHECK: vpxord (%rdi), %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xef,0x07]
1904 %b = load <8 x i32>, <8 x i32>* %ptr_b
1905 %res = call <8 x i32> @llvm.x86.avx512.mask.pxor.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1)
1909 define <8 x i32> @test_mask_xor_epi32_rmk_256(<8 x i32> %a, <8 x i32>* %ptr_b, <8 x i32> %passThru, i8 %mask) {
1910 ;CHECK-LABEL: test_mask_xor_epi32_rmk_256
1911 ;CHECK: vpxord (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xef,0x0f]
1912 %b = load <8 x i32>, <8 x i32>* %ptr_b
1913 %res = call <8 x i32> @llvm.x86.avx512.mask.pxor.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask)
1917 define <8 x i32> @test_mask_xor_epi32_rmkz_256(<8 x i32> %a, <8 x i32>* %ptr_b, i8 %mask) {
1918 ;CHECK-LABEL: test_mask_xor_epi32_rmkz_256
1919 ;CHECK: vpxord (%rdi), %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xef,0x07]
1920 %b = load <8 x i32>, <8 x i32>* %ptr_b
1921 %res = call <8 x i32> @llvm.x86.avx512.mask.pxor.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask)
1925 define <8 x i32> @test_mask_xor_epi32_rmb_256(<8 x i32> %a, i32* %ptr_b) {
1926 ;CHECK-LABEL: test_mask_xor_epi32_rmb_256
1927 ;CHECK: vpxord (%rdi){1to8}, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x38,0xef,0x07]
1928 %q = load i32, i32* %ptr_b
1929 %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
1930 %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
1931 %res = call <8 x i32> @llvm.x86.avx512.mask.pxor.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1)
1935 define <8 x i32> @test_mask_xor_epi32_rmbk_256(<8 x i32> %a, i32* %ptr_b, <8 x i32> %passThru, i8 %mask) {
1936 ;CHECK-LABEL: test_mask_xor_epi32_rmbk_256
1937 ;CHECK: vpxord (%rdi){1to8}, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x39,0xef,0x0f]
1938 %q = load i32, i32* %ptr_b
1939 %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
1940 %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
1941 %res = call <8 x i32> @llvm.x86.avx512.mask.pxor.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask)
1945 define <8 x i32> @test_mask_xor_epi32_rmbkz_256(<8 x i32> %a, i32* %ptr_b, i8 %mask) {
1946 ;CHECK-LABEL: test_mask_xor_epi32_rmbkz_256
1947 ;CHECK: vpxord (%rdi){1to8}, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xb9,0xef,0x07]
1948 %q = load i32, i32* %ptr_b
1949 %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
1950 %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
1951 %res = call <8 x i32> @llvm.x86.avx512.mask.pxor.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask)
1955 declare <8 x i32> @llvm.x86.avx512.mask.pxor.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8)
1957 define <4 x i32> @test_mask_andnot_epi32_rr_128(<4 x i32> %a, <4 x i32> %b) {
1958 ;CHECK-LABEL: test_mask_andnot_epi32_rr_128
1959 ;CHECK: vpandnd %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xdf,0xc1]
1960 %res = call <4 x i32> @llvm.x86.avx512.mask.pandn.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1)
1964 define <4 x i32> @test_mask_andnot_epi32_rrk_128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask) {
1965 ;CHECK-LABEL: test_mask_andnot_epi32_rrk_128
1966 ;CHECK: vpandnd %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xdf,0xd1]
1967 %res = call <4 x i32> @llvm.x86.avx512.mask.pandn.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask)
1971 define <4 x i32> @test_mask_andnot_epi32_rrkz_128(<4 x i32> %a, <4 x i32> %b, i8 %mask) {
1972 ;CHECK-LABEL: test_mask_andnot_epi32_rrkz_128
1973 ;CHECK: vpandnd %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xdf,0xc1]
1974 %res = call <4 x i32> @llvm.x86.avx512.mask.pandn.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask)
1978 define <4 x i32> @test_mask_andnot_epi32_rm_128(<4 x i32> %a, <4 x i32>* %ptr_b) {
1979 ;CHECK-LABEL: test_mask_andnot_epi32_rm_128
1980 ;CHECK: vpandnd (%rdi), %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xdf,0x07]
1981 %b = load <4 x i32>, <4 x i32>* %ptr_b
1982 %res = call <4 x i32> @llvm.x86.avx512.mask.pandn.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1)
1986 define <4 x i32> @test_mask_andnot_epi32_rmk_128(<4 x i32> %a, <4 x i32>* %ptr_b, <4 x i32> %passThru, i8 %mask) {
1987 ;CHECK-LABEL: test_mask_andnot_epi32_rmk_128
1988 ;CHECK: vpandnd (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xdf,0x0f]
1989 %b = load <4 x i32>, <4 x i32>* %ptr_b
1990 %res = call <4 x i32> @llvm.x86.avx512.mask.pandn.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask)
1994 define <4 x i32> @test_mask_andnot_epi32_rmkz_128(<4 x i32> %a, <4 x i32>* %ptr_b, i8 %mask) {
1995 ;CHECK-LABEL: test_mask_andnot_epi32_rmkz_128
1996 ;CHECK: vpandnd (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xdf,0x07]
1997 %b = load <4 x i32>, <4 x i32>* %ptr_b
1998 %res = call <4 x i32> @llvm.x86.avx512.mask.pandn.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask)
2002 define <4 x i32> @test_mask_andnot_epi32_rmb_128(<4 x i32> %a, i32* %ptr_b) {
2003 ;CHECK-LABEL: test_mask_andnot_epi32_rmb_128
2004 ;CHECK: vpandnd (%rdi){1to4}, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x18,0xdf,0x07]
2005 %q = load i32, i32* %ptr_b
2006 %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
2007 %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
2008 %res = call <4 x i32> @llvm.x86.avx512.mask.pandn.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1)
2012 define <4 x i32> @test_mask_andnot_epi32_rmbk_128(<4 x i32> %a, i32* %ptr_b, <4 x i32> %passThru, i8 %mask) {
2013 ;CHECK-LABEL: test_mask_andnot_epi32_rmbk_128
2014 ;CHECK: vpandnd (%rdi){1to4}, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x19,0xdf,0x0f]
2015 %q = load i32, i32* %ptr_b
2016 %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
2017 %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
2018 %res = call <4 x i32> @llvm.x86.avx512.mask.pandn.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask)
2022 define <4 x i32> @test_mask_andnot_epi32_rmbkz_128(<4 x i32> %a, i32* %ptr_b, i8 %mask) {
2023 ;CHECK-LABEL: test_mask_andnot_epi32_rmbkz_128
2024 ;CHECK: vpandnd (%rdi){1to4}, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x99,0xdf,0x07]
2025 %q = load i32, i32* %ptr_b
2026 %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
2027 %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
2028 %res = call <4 x i32> @llvm.x86.avx512.mask.pandn.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask)
2032 declare <4 x i32> @llvm.x86.avx512.mask.pandn.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)
2034 define <8 x i32> @test_mask_andnot_epi32_rr_256(<8 x i32> %a, <8 x i32> %b) {
2035 ;CHECK-LABEL: test_mask_andnot_epi32_rr_256
2036 ;CHECK: vpandnd %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xdf,0xc1]
2037 %res = call <8 x i32> @llvm.x86.avx512.mask.pandn.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1)
2041 define <8 x i32> @test_mask_andnot_epi32_rrk_256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask) {
2042 ;CHECK-LABEL: test_mask_andnot_epi32_rrk_256
2043 ;CHECK: vpandnd %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xdf,0xd1]
2044 %res = call <8 x i32> @llvm.x86.avx512.mask.pandn.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask)
2048 define <8 x i32> @test_mask_andnot_epi32_rrkz_256(<8 x i32> %a, <8 x i32> %b, i8 %mask) {
2049 ;CHECK-LABEL: test_mask_andnot_epi32_rrkz_256
2050 ;CHECK: vpandnd %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xdf,0xc1]
2051 %res = call <8 x i32> @llvm.x86.avx512.mask.pandn.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask)
2055 define <8 x i32> @test_mask_andnot_epi32_rm_256(<8 x i32> %a, <8 x i32>* %ptr_b) {
2056 ;CHECK-LABEL: test_mask_andnot_epi32_rm_256
2057 ;CHECK: vpandnd (%rdi), %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xdf,0x07]
2058 %b = load <8 x i32>, <8 x i32>* %ptr_b
2059 %res = call <8 x i32> @llvm.x86.avx512.mask.pandn.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1)
2063 define <8 x i32> @test_mask_andnot_epi32_rmk_256(<8 x i32> %a, <8 x i32>* %ptr_b, <8 x i32> %passThru, i8 %mask) {
2064 ;CHECK-LABEL: test_mask_andnot_epi32_rmk_256
2065 ;CHECK: vpandnd (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xdf,0x0f]
2066 %b = load <8 x i32>, <8 x i32>* %ptr_b
2067 %res = call <8 x i32> @llvm.x86.avx512.mask.pandn.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask)
2071 define <8 x i32> @test_mask_andnot_epi32_rmkz_256(<8 x i32> %a, <8 x i32>* %ptr_b, i8 %mask) {
2072 ;CHECK-LABEL: test_mask_andnot_epi32_rmkz_256
2073 ;CHECK: vpandnd (%rdi), %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xdf,0x07]
2074 %b = load <8 x i32>, <8 x i32>* %ptr_b
2075 %res = call <8 x i32> @llvm.x86.avx512.mask.pandn.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask)
2079 define <8 x i32> @test_mask_andnot_epi32_rmb_256(<8 x i32> %a, i32* %ptr_b) {
2080 ;CHECK-LABEL: test_mask_andnot_epi32_rmb_256
2081 ;CHECK: vpandnd (%rdi){1to8}, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x38,0xdf,0x07]
2082 %q = load i32, i32* %ptr_b
2083 %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
2084 %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
2085 %res = call <8 x i32> @llvm.x86.avx512.mask.pandn.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1)
2089 define <8 x i32> @test_mask_andnot_epi32_rmbk_256(<8 x i32> %a, i32* %ptr_b, <8 x i32> %passThru, i8 %mask) {
2090 ;CHECK-LABEL: test_mask_andnot_epi32_rmbk_256
2091 ;CHECK: vpandnd (%rdi){1to8}, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x39,0xdf,0x0f]
2092 %q = load i32, i32* %ptr_b
2093 %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
2094 %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
2095 %res = call <8 x i32> @llvm.x86.avx512.mask.pandn.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask)
2099 define <8 x i32> @test_mask_andnot_epi32_rmbkz_256(<8 x i32> %a, i32* %ptr_b, i8 %mask) {
2100 ;CHECK-LABEL: test_mask_andnot_epi32_rmbkz_256
2101 ;CHECK: vpandnd (%rdi){1to8}, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xb9,0xdf,0x07]
2102 %q = load i32, i32* %ptr_b
2103 %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
2104 %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
2105 %res = call <8 x i32> @llvm.x86.avx512.mask.pandn.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask)
2109 declare <8 x i32> @llvm.x86.avx512.mask.pandn.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8)
2111 define <2 x i64> @test_mask_andnot_epi64_rr_128(<2 x i64> %a, <2 x i64> %b) {
2112 ;CHECK-LABEL: test_mask_andnot_epi64_rr_128
2113 ;CHECK: vpandnq %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0xdf,0xc1]
2114 %res = call <2 x i64> @llvm.x86.avx512.mask.pandn.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> zeroinitializer, i8 -1)
2118 define <2 x i64> @test_mask_andnot_epi64_rrk_128(<2 x i64> %a, <2 x i64> %b, <2 x i64> %passThru, i8 %mask) {
2119 ;CHECK-LABEL: test_mask_andnot_epi64_rrk_128
2120 ;CHECK: vpandnq %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x09,0xdf,0xd1]
2121 %res = call <2 x i64> @llvm.x86.avx512.mask.pandn.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> %passThru, i8 %mask)
2125 define <2 x i64> @test_mask_andnot_epi64_rrkz_128(<2 x i64> %a, <2 x i64> %b, i8 %mask) {
2126 ;CHECK-LABEL: test_mask_andnot_epi64_rrkz_128
2127 ;CHECK: vpandnq %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0x89,0xdf,0xc1]
2128 %res = call <2 x i64> @llvm.x86.avx512.mask.pandn.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> zeroinitializer, i8 %mask)
2132 define <2 x i64> @test_mask_andnot_epi64_rm_128(<2 x i64> %a, <2 x i64>* %ptr_b) {
2133 ;CHECK-LABEL: test_mask_andnot_epi64_rm_128
2134 ;CHECK: vpandnq (%rdi), %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0xdf,0x07]
2135 %b = load <2 x i64>, <2 x i64>* %ptr_b
2136 %res = call <2 x i64> @llvm.x86.avx512.mask.pandn.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> zeroinitializer, i8 -1)
2140 define <2 x i64> @test_mask_andnot_epi64_rmk_128(<2 x i64> %a, <2 x i64>* %ptr_b, <2 x i64> %passThru, i8 %mask) {
2141 ;CHECK-LABEL: test_mask_andnot_epi64_rmk_128
2142 ;CHECK: vpandnq (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x09,0xdf,0x0f]
2143 %b = load <2 x i64>, <2 x i64>* %ptr_b
2144 %res = call <2 x i64> @llvm.x86.avx512.mask.pandn.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> %passThru, i8 %mask)
2148 define <2 x i64> @test_mask_andnot_epi64_rmkz_128(<2 x i64> %a, <2 x i64>* %ptr_b, i8 %mask) {
2149 ;CHECK-LABEL: test_mask_andnot_epi64_rmkz_128
2150 ;CHECK: vpandnq (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0x89,0xdf,0x07]
2151 %b = load <2 x i64>, <2 x i64>* %ptr_b
2152 %res = call <2 x i64> @llvm.x86.avx512.mask.pandn.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> zeroinitializer, i8 %mask)
2156 define <2 x i64> @test_mask_andnot_epi64_rmb_128(<2 x i64> %a, i64* %ptr_b) {
2157 ;CHECK-LABEL: test_mask_andnot_epi64_rmb_128
2158 ;CHECK: vpandnq (%rdi){1to2}, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x18,0xdf,0x07]
2159 %q = load i64, i64* %ptr_b
2160 %vecinit.i = insertelement <2 x i64> undef, i64 %q, i32 0
2161 %b = shufflevector <2 x i64> %vecinit.i, <2 x i64> undef, <2 x i32> zeroinitializer
2162 %res = call <2 x i64> @llvm.x86.avx512.mask.pandn.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> zeroinitializer, i8 -1)
2166 define <2 x i64> @test_mask_andnot_epi64_rmbk_128(<2 x i64> %a, i64* %ptr_b, <2 x i64> %passThru, i8 %mask) {
2167 ;CHECK-LABEL: test_mask_andnot_epi64_rmbk_128
2168 ;CHECK: vpandnq (%rdi){1to2}, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x19,0xdf,0x0f]
2169 %q = load i64, i64* %ptr_b
2170 %vecinit.i = insertelement <2 x i64> undef, i64 %q, i32 0
2171 %b = shufflevector <2 x i64> %vecinit.i, <2 x i64> undef, <2 x i32> zeroinitializer
2172 %res = call <2 x i64> @llvm.x86.avx512.mask.pandn.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> %passThru, i8 %mask)
2176 define <2 x i64> @test_mask_andnot_epi64_rmbkz_128(<2 x i64> %a, i64* %ptr_b, i8 %mask) {
2177 ;CHECK-LABEL: test_mask_andnot_epi64_rmbkz_128
2178 ;CHECK: vpandnq (%rdi){1to2}, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0x99,0xdf,0x07]
2179 %q = load i64, i64* %ptr_b
2180 %vecinit.i = insertelement <2 x i64> undef, i64 %q, i32 0
2181 %b = shufflevector <2 x i64> %vecinit.i, <2 x i64> undef, <2 x i32> zeroinitializer
2182 %res = call <2 x i64> @llvm.x86.avx512.mask.pandn.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> zeroinitializer, i8 %mask)
2186 declare <2 x i64> @llvm.x86.avx512.mask.pandn.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8)
2188 define <4 x i64> @test_mask_andnot_epi64_rr_256(<4 x i64> %a, <4 x i64> %b) {
2189 ;CHECK-LABEL: test_mask_andnot_epi64_rr_256
2190 ;CHECK: vpandnq %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0xdf,0xc1]
2191 %res = call <4 x i64> @llvm.x86.avx512.mask.pandn.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> zeroinitializer, i8 -1)
2195 define <4 x i64> @test_mask_andnot_epi64_rrk_256(<4 x i64> %a, <4 x i64> %b, <4 x i64> %passThru, i8 %mask) {
2196 ;CHECK-LABEL: test_mask_andnot_epi64_rrk_256
2197 ;CHECK: vpandnq %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x29,0xdf,0xd1]
2198 %res = call <4 x i64> @llvm.x86.avx512.mask.pandn.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> %passThru, i8 %mask)
2202 define <4 x i64> @test_mask_andnot_epi64_rrkz_256(<4 x i64> %a, <4 x i64> %b, i8 %mask) {
2203 ;CHECK-LABEL: test_mask_andnot_epi64_rrkz_256
2204 ;CHECK: vpandnq %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xa9,0xdf,0xc1]
2205 %res = call <4 x i64> @llvm.x86.avx512.mask.pandn.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> zeroinitializer, i8 %mask)
2209 define <4 x i64> @test_mask_andnot_epi64_rm_256(<4 x i64> %a, <4 x i64>* %ptr_b) {
2210 ;CHECK-LABEL: test_mask_andnot_epi64_rm_256
2211 ;CHECK: vpandnq (%rdi), %ymm0, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0xdf,0x07]
2212 %b = load <4 x i64>, <4 x i64>* %ptr_b
2213 %res = call <4 x i64> @llvm.x86.avx512.mask.pandn.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> zeroinitializer, i8 -1)
2217 define <4 x i64> @test_mask_andnot_epi64_rmk_256(<4 x i64> %a, <4 x i64>* %ptr_b, <4 x i64> %passThru, i8 %mask) {
2218 ;CHECK-LABEL: test_mask_andnot_epi64_rmk_256
2219 ;CHECK: vpandnq (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x29,0xdf,0x0f]
2220 %b = load <4 x i64>, <4 x i64>* %ptr_b
2221 %res = call <4 x i64> @llvm.x86.avx512.mask.pandn.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> %passThru, i8 %mask)
2225 define <4 x i64> @test_mask_andnot_epi64_rmkz_256(<4 x i64> %a, <4 x i64>* %ptr_b, i8 %mask) {
2226 ;CHECK-LABEL: test_mask_andnot_epi64_rmkz_256
2227 ;CHECK: vpandnq (%rdi), %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xa9,0xdf,0x07]
2228 %b = load <4 x i64>, <4 x i64>* %ptr_b
2229 %res = call <4 x i64> @llvm.x86.avx512.mask.pandn.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> zeroinitializer, i8 %mask)
2233 define <4 x i64> @test_mask_andnot_epi64_rmb_256(<4 x i64> %a, i64* %ptr_b) {
2234 ;CHECK-LABEL: test_mask_andnot_epi64_rmb_256
2235 ;CHECK: vpandnq (%rdi){1to4}, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x38,0xdf,0x07]
2236 %q = load i64, i64* %ptr_b
2237 %vecinit.i = insertelement <4 x i64> undef, i64 %q, i32 0
2238 %b = shufflevector <4 x i64> %vecinit.i, <4 x i64> undef, <4 x i32> zeroinitializer
2239 %res = call <4 x i64> @llvm.x86.avx512.mask.pandn.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> zeroinitializer, i8 -1)
2243 define <4 x i64> @test_mask_andnot_epi64_rmbk_256(<4 x i64> %a, i64* %ptr_b, <4 x i64> %passThru, i8 %mask) {
2244 ;CHECK-LABEL: test_mask_andnot_epi64_rmbk_256
2245 ;CHECK: vpandnq (%rdi){1to4}, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x39,0xdf,0x0f]
2246 %q = load i64, i64* %ptr_b
2247 %vecinit.i = insertelement <4 x i64> undef, i64 %q, i32 0
2248 %b = shufflevector <4 x i64> %vecinit.i, <4 x i64> undef, <4 x i32> zeroinitializer
2249 %res = call <4 x i64> @llvm.x86.avx512.mask.pandn.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> %passThru, i8 %mask)
2253 define <4 x i64> @test_mask_andnot_epi64_rmbkz_256(<4 x i64> %a, i64* %ptr_b, i8 %mask) {
2254 ;CHECK-LABEL: test_mask_andnot_epi64_rmbkz_256
2255 ;CHECK: vpandnq (%rdi){1to4}, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xb9,0xdf,0x07]
2256 %q = load i64, i64* %ptr_b
2257 %vecinit.i = insertelement <4 x i64> undef, i64 %q, i32 0
2258 %b = shufflevector <4 x i64> %vecinit.i, <4 x i64> undef, <4 x i32> zeroinitializer
2259 %res = call <4 x i64> @llvm.x86.avx512.mask.pandn.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> zeroinitializer, i8 %mask)
2263 declare <4 x i64> @llvm.x86.avx512.mask.pandn.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i8)
2265 define i8 @test_cmpps_256(<8 x float> %a, <8 x float> %b) {
2266 ;CHECK: vcmpleps %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf1,0x7c,0x28,0xc2,0xc1,0x02]
2267 %res = call i8 @llvm.x86.avx512.mask.cmp.ps.256(<8 x float> %a, <8 x float> %b, i32 2, i8 -1)
2270 declare i8 @llvm.x86.avx512.mask.cmp.ps.256(<8 x float> , <8 x float> , i32, i8)
2272 define i8 @test_cmpps_128(<4 x float> %a, <4 x float> %b) {
2273 ;CHECK: vcmpleps %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf1,0x7c,0x08,0xc2,0xc1,0x02]
2274 %res = call i8 @llvm.x86.avx512.mask.cmp.ps.128(<4 x float> %a, <4 x float> %b, i32 2, i8 -1)
2277 declare i8 @llvm.x86.avx512.mask.cmp.ps.128(<4 x float> , <4 x float> , i32, i8)
2279 define i8 @test_cmppd_256(<4 x double> %a, <4 x double> %b) {
2280 ;CHECK: vcmplepd %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf1,0xfd,0x28,0xc2,0xc1,0x02]
2281 %res = call i8 @llvm.x86.avx512.mask.cmp.pd.256(<4 x double> %a, <4 x double> %b, i32 2, i8 -1)
2284 declare i8 @llvm.x86.avx512.mask.cmp.pd.256(<4 x double> , <4 x double> , i32, i8)
2286 define i8 @test_cmppd_128(<2 x double> %a, <2 x double> %b) {
2287 ;CHECK: vcmplepd %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf1,0xfd,0x08,0xc2,0xc1,0x02]
2288 %res = call i8 @llvm.x86.avx512.mask.cmp.pd.128(<2 x double> %a, <2 x double> %b, i32 2, i8 -1)
2291 declare i8 @llvm.x86.avx512.mask.cmp.pd.128(<2 x double> , <2 x double> , i32, i8)
2293 define <8 x float> @test_mm512_maskz_add_ps_256(<8 x float> %a0, <8 x float> %a1, i8 %mask) {
2294 ;CHECK-LABEL: test_mm512_maskz_add_ps_256
2295 ;CHECK: vaddps %ymm1, %ymm0, %ymm0 {%k1} {z}
2296 %res = call <8 x float> @llvm.x86.avx512.mask.add.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float>zeroinitializer, i8 %mask)
2297 ret <8 x float> %res
2300 define <8 x float> @test_mm512_mask_add_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask) {
2301 ;CHECK-LABEL: test_mm512_mask_add_ps_256
2302 ;CHECK: vaddps %ymm1, %ymm0, %ymm2 {%k1}
2303 %res = call <8 x float> @llvm.x86.avx512.mask.add.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask)
2304 ret <8 x float> %res
2307 define <8 x float> @test_mm512_add_ps_256(<8 x float> %a0, <8 x float> %a1, i8 %mask) {
2308 ;CHECK-LABEL: test_mm512_add_ps_256
2309 ;CHECK: vaddps %ymm1, %ymm0, %ymm0
2310 %res = call <8 x float> @llvm.x86.avx512.mask.add.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float>zeroinitializer, i8 -1)
2311 ret <8 x float> %res
2313 declare <8 x float> @llvm.x86.avx512.mask.add.ps.256(<8 x float>, <8 x float>, <8 x float>, i8)
2315 define <4 x float> @test_mm512_maskz_add_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask) {
2316 ;CHECK-LABEL: test_mm512_maskz_add_ps_128
2317 ;CHECK: vaddps %xmm1, %xmm0, %xmm0 {%k1} {z}
2318 %res = call <4 x float> @llvm.x86.avx512.mask.add.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float>zeroinitializer, i8 %mask)
2319 ret <4 x float> %res
2322 define <4 x float> @test_mm512_mask_add_ps_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask) {
2323 ;CHECK-LABEL: test_mm512_mask_add_ps_128
2324 ;CHECK: vaddps %xmm1, %xmm0, %xmm2 {%k1}
2325 %res = call <4 x float> @llvm.x86.avx512.mask.add.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask)
2326 ret <4 x float> %res
2329 define <4 x float> @test_mm512_add_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask) {
2330 ;CHECK-LABEL: test_mm512_add_ps_128
2331 ;CHECK: vaddps %xmm1, %xmm0, %xmm0
2332 %res = call <4 x float> @llvm.x86.avx512.mask.add.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float>zeroinitializer, i8 -1)
2333 ret <4 x float> %res
2335 declare <4 x float> @llvm.x86.avx512.mask.add.ps.128(<4 x float>, <4 x float>, <4 x float>, i8)
2337 define <8 x float> @test_mm512_maskz_sub_ps_256(<8 x float> %a0, <8 x float> %a1, i8 %mask) {
2338 ;CHECK-LABEL: test_mm512_maskz_sub_ps_256
2339 ;CHECK: vsubps %ymm1, %ymm0, %ymm0 {%k1} {z}
2340 %res = call <8 x float> @llvm.x86.avx512.mask.sub.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float>zeroinitializer, i8 %mask)
2341 ret <8 x float> %res
2344 define <8 x float> @test_mm512_mask_sub_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask) {
2345 ;CHECK-LABEL: test_mm512_mask_sub_ps_256
2346 ;CHECK: vsubps %ymm1, %ymm0, %ymm2 {%k1}
2347 %res = call <8 x float> @llvm.x86.avx512.mask.sub.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask)
2348 ret <8 x float> %res
2351 define <8 x float> @test_mm512_sub_ps_256(<8 x float> %a0, <8 x float> %a1, i8 %mask) {
2352 ;CHECK-LABEL: test_mm512_sub_ps_256
2353 ;CHECK: vsubps %ymm1, %ymm0, %ymm0
2354 %res = call <8 x float> @llvm.x86.avx512.mask.sub.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float>zeroinitializer, i8 -1)
2355 ret <8 x float> %res
2357 declare <8 x float> @llvm.x86.avx512.mask.sub.ps.256(<8 x float>, <8 x float>, <8 x float>, i8)
2359 define <4 x float> @test_mm512_maskz_sub_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask) {
2360 ;CHECK-LABEL: test_mm512_maskz_sub_ps_128
2361 ;CHECK: vsubps %xmm1, %xmm0, %xmm0 {%k1} {z}
2362 %res = call <4 x float> @llvm.x86.avx512.mask.sub.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float>zeroinitializer, i8 %mask)
2363 ret <4 x float> %res
2366 define <4 x float> @test_mm512_mask_sub_ps_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask) {
2367 ;CHECK-LABEL: test_mm512_mask_sub_ps_128
2368 ;CHECK: vsubps %xmm1, %xmm0, %xmm2 {%k1}
2369 %res = call <4 x float> @llvm.x86.avx512.mask.sub.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask)
2370 ret <4 x float> %res
2373 define <4 x float> @test_mm512_sub_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask) {
2374 ;CHECK-LABEL: test_mm512_sub_ps_128
2375 ;CHECK: vsubps %xmm1, %xmm0, %xmm0
2376 %res = call <4 x float> @llvm.x86.avx512.mask.sub.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float>zeroinitializer, i8 -1)
2377 ret <4 x float> %res
2379 declare <4 x float> @llvm.x86.avx512.mask.sub.ps.128(<4 x float>, <4 x float>, <4 x float>, i8)
2381 define <8 x float> @test_mm512_maskz_mul_ps_256(<8 x float> %a0, <8 x float> %a1, i8 %mask) {
2382 ;CHECK-LABEL: test_mm512_maskz_mul_ps_256
2383 ;CHECK: vmulps %ymm1, %ymm0, %ymm0 {%k1} {z}
2384 %res = call <8 x float> @llvm.x86.avx512.mask.mul.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float>zeroinitializer, i8 %mask)
2385 ret <8 x float> %res
2388 define <8 x float> @test_mm512_mask_mul_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask) {
2389 ;CHECK-LABEL: test_mm512_mask_mul_ps_256
2390 ;CHECK: vmulps %ymm1, %ymm0, %ymm2 {%k1}
2391 %res = call <8 x float> @llvm.x86.avx512.mask.mul.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask)
2392 ret <8 x float> %res
2395 define <8 x float> @test_mm512_mul_ps_256(<8 x float> %a0, <8 x float> %a1, i8 %mask) {
2396 ;CHECK-LABEL: test_mm512_mul_ps_256
2397 ;CHECK: vmulps %ymm1, %ymm0, %ymm0
2398 %res = call <8 x float> @llvm.x86.avx512.mask.mul.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float>zeroinitializer, i8 -1)
2399 ret <8 x float> %res
2401 declare <8 x float> @llvm.x86.avx512.mask.mul.ps.256(<8 x float>, <8 x float>, <8 x float>, i8)
2403 define <4 x float> @test_mm512_maskz_mul_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask) {
2404 ;CHECK-LABEL: test_mm512_maskz_mul_ps_128
2405 ;CHECK: vmulps %xmm1, %xmm0, %xmm0 {%k1} {z}
2406 %res = call <4 x float> @llvm.x86.avx512.mask.mul.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float>zeroinitializer, i8 %mask)
2407 ret <4 x float> %res
2410 define <4 x float> @test_mm512_mask_mul_ps_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask) {
2411 ;CHECK-LABEL: test_mm512_mask_mul_ps_128
2412 ;CHECK: vmulps %xmm1, %xmm0, %xmm2 {%k1}
2413 %res = call <4 x float> @llvm.x86.avx512.mask.mul.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask)
2414 ret <4 x float> %res
2417 define <4 x float> @test_mm512_mul_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask) {
2418 ;CHECK-LABEL: test_mm512_mul_ps_128
2419 ;CHECK: vmulps %xmm1, %xmm0, %xmm0
2420 %res = call <4 x float> @llvm.x86.avx512.mask.mul.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float>zeroinitializer, i8 -1)
2421 ret <4 x float> %res
2423 declare <4 x float> @llvm.x86.avx512.mask.mul.ps.128(<4 x float>, <4 x float>, <4 x float>, i8)
2425 define <8 x float> @test_mm512_maskz_div_ps_256(<8 x float> %a0, <8 x float> %a1, i8 %mask) {
2426 ;CHECK-LABEL: test_mm512_maskz_div_ps_256
2427 ;CHECK: vdivps %ymm1, %ymm0, %ymm0 {%k1} {z}
2428 %res = call <8 x float> @llvm.x86.avx512.mask.div.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float>zeroinitializer, i8 %mask)
2429 ret <8 x float> %res
2432 define <8 x float> @test_mm512_mask_div_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask) {
2433 ;CHECK-LABEL: test_mm512_mask_div_ps_256
2434 ;CHECK: vdivps %ymm1, %ymm0, %ymm2 {%k1}
2435 %res = call <8 x float> @llvm.x86.avx512.mask.div.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask)
2436 ret <8 x float> %res
2439 define <8 x float> @test_mm512_div_ps_256(<8 x float> %a0, <8 x float> %a1, i8 %mask) {
2440 ;CHECK-LABEL: test_mm512_div_ps_256
2441 ;CHECK: vdivps %ymm1, %ymm0, %ymm0
2442 %res = call <8 x float> @llvm.x86.avx512.mask.div.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float>zeroinitializer, i8 -1)
2443 ret <8 x float> %res
2445 declare <8 x float> @llvm.x86.avx512.mask.div.ps.256(<8 x float>, <8 x float>, <8 x float>, i8)
2447 define <4 x float> @test_mm512_maskz_div_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask) {
2448 ;CHECK-LABEL: test_mm512_maskz_div_ps_128
2449 ;CHECK: vdivps %xmm1, %xmm0, %xmm0 {%k1} {z}
2450 %res = call <4 x float> @llvm.x86.avx512.mask.div.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float>zeroinitializer, i8 %mask)
2451 ret <4 x float> %res
2454 define <4 x float> @test_mm512_mask_div_ps_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask) {
2455 ;CHECK-LABEL: test_mm512_mask_div_ps_128
2456 ;CHECK: vdivps %xmm1, %xmm0, %xmm2 {%k1}
2457 %res = call <4 x float> @llvm.x86.avx512.mask.div.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask)
2458 ret <4 x float> %res
2461 define <4 x float> @test_mm512_div_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask) {
2462 ;CHECK-LABEL: test_mm512_div_ps_128
2463 ;CHECK: vdivps %xmm1, %xmm0, %xmm0
2464 %res = call <4 x float> @llvm.x86.avx512.mask.div.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float>zeroinitializer, i8 -1)
2465 ret <4 x float> %res
2467 declare <4 x float> @llvm.x86.avx512.mask.div.ps.128(<4 x float>, <4 x float>, <4 x float>, i8)
2469 define <8 x float> @test_mm512_maskz_max_ps_256(<8 x float> %a0, <8 x float> %a1, i8 %mask) {
2470 ;CHECK-LABEL: test_mm512_maskz_max_ps_256
2471 ;CHECK: vmaxps %ymm1, %ymm0, %ymm0 {%k1} {z}
2472 %res = call <8 x float> @llvm.x86.avx512.mask.max.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float>zeroinitializer, i8 %mask)
2473 ret <8 x float> %res
2476 define <8 x float> @test_mm512_mask_max_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask) {
2477 ;CHECK-LABEL: test_mm512_mask_max_ps_256
2478 ;CHECK: vmaxps %ymm1, %ymm0, %ymm2 {%k1}
2479 %res = call <8 x float> @llvm.x86.avx512.mask.max.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask)
2480 ret <8 x float> %res
2483 define <8 x float> @test_mm512_max_ps_256(<8 x float> %a0, <8 x float> %a1, i8 %mask) {
2484 ;CHECK-LABEL: test_mm512_max_ps_256
2485 ;CHECK: vmaxps %ymm1, %ymm0, %ymm0
2486 %res = call <8 x float> @llvm.x86.avx512.mask.max.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float>zeroinitializer, i8 -1)
2487 ret <8 x float> %res
2489 declare <8 x float> @llvm.x86.avx512.mask.max.ps.256(<8 x float>, <8 x float>, <8 x float>, i8)
2491 define <4 x float> @test_mm512_maskz_max_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask) {
2492 ;CHECK-LABEL: test_mm512_maskz_max_ps_128
2493 ;CHECK: vmaxps %xmm1, %xmm0, %xmm0 {%k1} {z}
2494 %res = call <4 x float> @llvm.x86.avx512.mask.max.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float>zeroinitializer, i8 %mask)
2495 ret <4 x float> %res
2498 define <4 x float> @test_mm512_mask_max_ps_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask) {
2499 ;CHECK-LABEL: test_mm512_mask_max_ps_128
2500 ;CHECK: vmaxps %xmm1, %xmm0, %xmm2 {%k1}
2501 %res = call <4 x float> @llvm.x86.avx512.mask.max.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask)
2502 ret <4 x float> %res
2505 define <4 x float> @test_mm512_max_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask) {
2506 ;CHECK-LABEL: test_mm512_max_ps_128
2507 ;CHECK: vmaxps %xmm1, %xmm0, %xmm0
2508 %res = call <4 x float> @llvm.x86.avx512.mask.max.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float>zeroinitializer, i8 -1)
2509 ret <4 x float> %res
2511 declare <4 x float> @llvm.x86.avx512.mask.max.ps.128(<4 x float>, <4 x float>, <4 x float>, i8)
2513 define <8 x float> @test_mm512_maskz_min_ps_256(<8 x float> %a0, <8 x float> %a1, i8 %mask) {
2514 ;CHECK-LABEL: test_mm512_maskz_min_ps_256
2515 ;CHECK: vminps %ymm1, %ymm0, %ymm0 {%k1} {z}
2516 %res = call <8 x float> @llvm.x86.avx512.mask.min.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float>zeroinitializer, i8 %mask)
2517 ret <8 x float> %res
2520 define <8 x float> @test_mm512_mask_min_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask) {
2521 ;CHECK-LABEL: test_mm512_mask_min_ps_256
2522 ;CHECK: vminps %ymm1, %ymm0, %ymm2 {%k1}
2523 %res = call <8 x float> @llvm.x86.avx512.mask.min.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask)
2524 ret <8 x float> %res
2527 define <8 x float> @test_mm512_min_ps_256(<8 x float> %a0, <8 x float> %a1, i8 %mask) {
2528 ;CHECK-LABEL: test_mm512_min_ps_256
2529 ;CHECK: vminps %ymm1, %ymm0, %ymm0
2530 %res = call <8 x float> @llvm.x86.avx512.mask.min.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float>zeroinitializer, i8 -1)
2531 ret <8 x float> %res
2533 declare <8 x float> @llvm.x86.avx512.mask.min.ps.256(<8 x float>, <8 x float>, <8 x float>, i8)
2535 define <4 x float> @test_mm512_maskz_min_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask) {
2536 ;CHECK-LABEL: test_mm512_maskz_min_ps_128
2537 ;CHECK: vminps %xmm1, %xmm0, %xmm0 {%k1} {z}
2538 %res = call <4 x float> @llvm.x86.avx512.mask.min.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float>zeroinitializer, i8 %mask)
2539 ret <4 x float> %res
2542 define <4 x float> @test_mm512_mask_min_ps_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask) {
2543 ;CHECK-LABEL: test_mm512_mask_min_ps_128
2544 ;CHECK: vminps %xmm1, %xmm0, %xmm2 {%k1}
2545 %res = call <4 x float> @llvm.x86.avx512.mask.min.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask)
2546 ret <4 x float> %res
2549 define <4 x float> @test_mm512_min_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask) {
2550 ;CHECK-LABEL: test_mm512_min_ps_128
2551 ;CHECK: vminps %xmm1, %xmm0, %xmm0
2552 %res = call <4 x float> @llvm.x86.avx512.mask.min.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float>zeroinitializer, i8 -1)
2553 ret <4 x float> %res
2555 declare <4 x float> @llvm.x86.avx512.mask.min.ps.128(<4 x float>, <4 x float>, <4 x float>, i8)
2557 define <4 x double> @test_sqrt_pd_256(<4 x double> %a0, i8 %mask) {
2558 ; CHECK-LABEL: test_sqrt_pd_256
2560 %res = call <4 x double> @llvm.x86.avx512.mask.sqrt.pd.256(<4 x double> %a0, <4 x double> zeroinitializer, i8 %mask)
2561 ret <4 x double> %res
2563 declare <4 x double> @llvm.x86.avx512.mask.sqrt.pd.256(<4 x double>, <4 x double>, i8) nounwind readnone
2565 define <8 x float> @test_sqrt_ps_256(<8 x float> %a0, i8 %mask) {
2566 ; CHECK-LABEL: test_sqrt_ps_256
2568 %res = call <8 x float> @llvm.x86.avx512.mask.sqrt.ps.256(<8 x float> %a0, <8 x float> zeroinitializer, i8 %mask)
2569 ret <8 x float> %res
2572 declare <8 x float> @llvm.x86.avx512.mask.sqrt.ps.256(<8 x float>, <8 x float>, i8) nounwind readnone
2574 define <4 x double> @test_getexp_pd_256(<4 x double> %a0) {
2575 ; CHECK-LABEL: test_getexp_pd_256
2577 %res = call <4 x double> @llvm.x86.avx512.mask.getexp.pd.256(<4 x double> %a0, <4 x double> zeroinitializer, i8 -1)
2578 ret <4 x double> %res
2581 declare <4 x double> @llvm.x86.avx512.mask.getexp.pd.256(<4 x double>, <4 x double>, i8) nounwind readnone
2583 define <8 x float> @test_getexp_ps_256(<8 x float> %a0) {
2584 ; CHECK-LABEL: test_getexp_ps_256
2586 %res = call <8 x float> @llvm.x86.avx512.mask.getexp.ps.256(<8 x float> %a0, <8 x float> zeroinitializer, i8 -1)
2587 ret <8 x float> %res
2589 declare <8 x float> @llvm.x86.avx512.mask.getexp.ps.256(<8 x float>, <8 x float>, i8) nounwind readnone
2591 declare <4 x i32> @llvm.x86.avx512.mask.pmaxs.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)
2593 ; CHECK-LABEL: @test_int_x86_avx512_mask_pmaxs_d_128
2595 ; CHECK: vpmaxsd %xmm
2597 define <4 x i32>@test_int_x86_avx512_mask_pmaxs_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %mask) {
2598 %res = call <4 x i32> @llvm.x86.avx512.mask.pmaxs.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2 ,i8 %mask)
2599 %res1 = call <4 x i32> @llvm.x86.avx512.mask.pmaxs.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> zeroinitializer, i8 %mask)
2600 %res2 = add <4 x i32> %res, %res1
2604 declare <8 x i32> @llvm.x86.avx512.mask.pmaxs.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8)
2606 ; CHECK-LABEL: @test_int_x86_avx512_mask_pmaxs_d_256
2608 ; CHECK: vpmaxsd %ymm
2610 define <8 x i32>@test_int_x86_avx512_mask_pmaxs_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) {
2611 %res = call <8 x i32> @llvm.x86.avx512.mask.pmaxs.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3)
2612 %res1 = call <8 x i32> @llvm.x86.avx512.mask.pmaxs.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1)
2613 %res2 = add <8 x i32> %res, %res1
2617 declare <2 x i64> @llvm.x86.avx512.mask.pmaxs.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8)
2619 ; CHECK-LABEL: @test_int_x86_avx512_mask_pmaxs_q_128
2621 ; CHECK: vpmaxsq %xmm
2623 define <2 x i64>@test_int_x86_avx512_mask_pmaxs_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) {
2624 %res = call <2 x i64> @llvm.x86.avx512.mask.pmaxs.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3)
2625 %res1 = call <2 x i64> @llvm.x86.avx512.mask.pmaxs.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 -1)
2626 %res2 = add <2 x i64> %res, %res1
2630 declare <4 x i64> @llvm.x86.avx512.mask.pmaxs.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i8)
2632 ; CHECK-LABEL: @test_int_x86_avx512_mask_pmaxs_q_256
2634 ; CHECK: vpmaxsq %ymm
2636 define <4 x i64>@test_int_x86_avx512_mask_pmaxs_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %mask) {
2637 %res = call <4 x i64> @llvm.x86.avx512.mask.pmaxs.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %mask)
2638 %res1 = call <4 x i64> @llvm.x86.avx512.mask.pmaxs.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> zeroinitializer, i8 %mask)
2639 %res2 = add <4 x i64> %res, %res1
2643 declare <4 x i32> @llvm.x86.avx512.mask.pmaxu.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)
2645 ; CHECK-LABEL: @test_int_x86_avx512_mask_pmaxu_d_128
2647 ; CHECK: vpmaxud %xmm
2649 define <4 x i32>@test_int_x86_avx512_mask_pmaxu_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2,i8 %mask) {
2650 %res = call <4 x i32> @llvm.x86.avx512.mask.pmaxu.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %mask)
2651 %res1 = call <4 x i32> @llvm.x86.avx512.mask.pmaxu.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> zeroinitializer, i8 %mask)
2652 %res2 = add <4 x i32> %res, %res1
2656 declare <8 x i32> @llvm.x86.avx512.mask.pmaxu.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8)
2658 ; CHECK-LABEL: @test_int_x86_avx512_mask_pmaxu_d_256
2660 ; CHECK: vpmaxud %ymm
2662 define <8 x i32>@test_int_x86_avx512_mask_pmaxu_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) {
2663 %res = call <8 x i32> @llvm.x86.avx512.mask.pmaxu.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3)
2664 %res1 = call <8 x i32> @llvm.x86.avx512.mask.pmaxu.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1)
2665 %res2 = add <8 x i32> %res, %res1
2669 declare <2 x i64> @llvm.x86.avx512.mask.pmaxu.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8)
2671 ; CHECK-LABEL: @test_int_x86_avx512_mask_pmaxu_q_128
2673 ; CHECK: vpmaxuq %xmm
2675 define <2 x i64>@test_int_x86_avx512_mask_pmaxu_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) {
2676 %res = call <2 x i64> @llvm.x86.avx512.mask.pmaxu.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3)
2677 %res1 = call <2 x i64> @llvm.x86.avx512.mask.pmaxu.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 -1)
2678 %res2 = add <2 x i64> %res, %res1
2682 declare <4 x i64> @llvm.x86.avx512.mask.pmaxu.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i8)
2684 ; CHECK-LABEL: @test_int_x86_avx512_mask_pmaxu_q_256
2686 ; CHECK: vpmaxuq %ymm
2688 define <4 x i64>@test_int_x86_avx512_mask_pmaxu_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %mask) {
2689 %res = call <4 x i64> @llvm.x86.avx512.mask.pmaxu.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %mask)
2690 %res1 = call <4 x i64> @llvm.x86.avx512.mask.pmaxu.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> zeroinitializer, i8 %mask)
2691 %res2 = add <4 x i64> %res, %res1
2695 declare <4 x i32> @llvm.x86.avx512.mask.pmins.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)
2697 ; CHECK-LABEL: @test_int_x86_avx512_mask_pmins_d_128
2699 ; CHECK: vpminsd %xmm
2701 define <4 x i32>@test_int_x86_avx512_mask_pmins_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %mask) {
2702 %res = call <4 x i32> @llvm.x86.avx512.mask.pmins.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %mask)
2703 %res1 = call <4 x i32> @llvm.x86.avx512.mask.pmins.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> zeroinitializer, i8 %mask)
2704 %res2 = add <4 x i32> %res, %res1
2708 declare <8 x i32> @llvm.x86.avx512.mask.pmins.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8)
2710 ; CHECK-LABEL: @test_int_x86_avx512_mask_pmins_d_256
2712 ; CHECK: vpminsd %ymm
2714 define <8 x i32>@test_int_x86_avx512_mask_pmins_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) {
2715 %res = call <8 x i32> @llvm.x86.avx512.mask.pmins.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3)
2716 %res1 = call <8 x i32> @llvm.x86.avx512.mask.pmins.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1)
2717 %res2 = add <8 x i32> %res, %res1
2721 declare <2 x i64> @llvm.x86.avx512.mask.pmins.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8)
2723 ; CHECK-LABEL: @test_int_x86_avx512_mask_pmins_q_128
2725 ; CHECK: vpminsq %xmm
2727 define <2 x i64>@test_int_x86_avx512_mask_pmins_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) {
2728 %res = call <2 x i64> @llvm.x86.avx512.mask.pmins.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3)
2729 %res1 = call <2 x i64> @llvm.x86.avx512.mask.pmins.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 -1)
2730 %res2 = add <2 x i64> %res, %res1
2734 declare <4 x i64> @llvm.x86.avx512.mask.pmins.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i8)
2736 ; CHECK-LABEL: @test_int_x86_avx512_mask_pmins_q_256
2738 ; CHECK: vpminsq %ymm
2740 define <4 x i64>@test_int_x86_avx512_mask_pmins_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %mask) {
2741 %res = call <4 x i64> @llvm.x86.avx512.mask.pmins.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %mask)
2742 %res1 = call <4 x i64> @llvm.x86.avx512.mask.pmins.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> zeroinitializer, i8 %mask)
2743 %res2 = add <4 x i64> %res, %res1
2747 declare <4 x i32> @llvm.x86.avx512.mask.pminu.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)
2749 ; CHECK-LABEL: @test_int_x86_avx512_mask_pminu_d_128
2751 ; CHECK: vpminud %xmm
2753 define <4 x i32>@test_int_x86_avx512_mask_pminu_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %mask) {
2754 %res = call <4 x i32> @llvm.x86.avx512.mask.pminu.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %mask)
2755 %res1 = call <4 x i32> @llvm.x86.avx512.mask.pminu.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> zeroinitializer, i8 %mask)
2756 %res2 = add <4 x i32> %res, %res1
2760 declare <8 x i32> @llvm.x86.avx512.mask.pminu.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8)
2762 ; CHECK-LABEL: @test_int_x86_avx512_mask_pminu_d_256
2764 ; CHECK: vpminud %ymm
2766 define <8 x i32>@test_int_x86_avx512_mask_pminu_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) {
2767 %res = call <8 x i32> @llvm.x86.avx512.mask.pminu.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3)
2768 %res1 = call <8 x i32> @llvm.x86.avx512.mask.pminu.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1)
2769 %res2 = add <8 x i32> %res, %res1
2773 declare <2 x i64> @llvm.x86.avx512.mask.pminu.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8)
2775 ; CHECK-LABEL: @test_int_x86_avx512_mask_pminu_q_128
2777 ; CHECK: vpminuq %xmm
2779 define <2 x i64>@test_int_x86_avx512_mask_pminu_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) {
2780 %res = call <2 x i64> @llvm.x86.avx512.mask.pminu.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3)
2781 %res1 = call <2 x i64> @llvm.x86.avx512.mask.pminu.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 -1)
2782 %res2 = add <2 x i64> %res, %res1
2786 declare <4 x i64> @llvm.x86.avx512.mask.pminu.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i8)
2788 ; CHECK-LABEL: @test_int_x86_avx512_mask_pminu_q_256
2790 ; CHECK: vpminuq %ymm
2792 define <4 x i64>@test_int_x86_avx512_mask_pminu_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %mask) {
2793 %res = call <4 x i64> @llvm.x86.avx512.mask.pminu.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %mask)
2794 %res1 = call <4 x i64> @llvm.x86.avx512.mask.pminu.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> zeroinitializer, i8 %mask)
2795 %res2 = add <4 x i64> %res, %res1
2799 declare <4 x i32> @llvm.x86.avx512.mask.vpermt2var.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)
2801 ; CHECK-LABEL: @test_int_x86_avx512_mask_vpermt2var_d_128
2804 ; CHECK: vpermt2d %xmm{{.*}}{%k1}
2806 define <4 x i32>@test_int_x86_avx512_mask_vpermt2var_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) {
2807 %res = call <4 x i32> @llvm.x86.avx512.mask.vpermt2var.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3)
2808 %res1 = call <4 x i32> @llvm.x86.avx512.mask.vpermt2var.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 -1)
2809 %res2 = add <4 x i32> %res, %res1
2813 declare <4 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)
2815 ; CHECK-LABEL: @test_int_x86_avx512_maskz_vpermt2var_d_128
2818 ; CHECK: vpermt2d %xmm{{.*}}{%k1} {z}
2819 define <4 x i32>@test_int_x86_avx512_maskz_vpermt2var_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) {
2820 %res = call <4 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3)
2821 %res1 = call <4 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 -1)
2822 %res2 = add <4 x i32> %res, %res1
2826 declare <8 x i32> @llvm.x86.avx512.mask.vpermt2var.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8)
2828 ; CHECK-LABEL: @test_int_x86_avx512_mask_vpermt2var_d_256
2831 ; CHECK: vpermt2d %ymm{{.*}}{%k1}
2833 define <8 x i32>@test_int_x86_avx512_mask_vpermt2var_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) {
2834 %res = call <8 x i32> @llvm.x86.avx512.mask.vpermt2var.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3)
2835 %res1 = call <8 x i32> @llvm.x86.avx512.mask.vpermt2var.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1)
2836 %res2 = add <8 x i32> %res, %res1
2840 declare <8 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8)
2842 ; CHECK-LABEL: @test_int_x86_avx512_maskz_vpermt2var_d_256
2845 ; CHECK: vpermt2d {{.*}}{%k1} {z}
2846 define <8 x i32>@test_int_x86_avx512_maskz_vpermt2var_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) {
2847 %res = call <8 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3)
2848 %res1 = call <8 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1)
2849 %res2 = add <8 x i32> %res, %res1
2853 declare <2 x double> @llvm.x86.avx512.mask.vpermi2var.pd.128(<2 x double>, <2 x i64>, <2 x double>, i8)
2855 ; CHECK-LABEL: @test_int_x86_avx512_mask_vpermi2var_pd_128
2858 ; CHECK: vpermi2pd %xmm{{.*}}{%k1}
2859 define <2 x double>@test_int_x86_avx512_mask_vpermi2var_pd_128(<2 x double> %x0, <2 x i64> %x1, <2 x double> %x2, i8 %x3) {
2860 %res = call <2 x double> @llvm.x86.avx512.mask.vpermi2var.pd.128(<2 x double> %x0, <2 x i64> %x1, <2 x double> %x2, i8 %x3)
2861 %res1 = call <2 x double> @llvm.x86.avx512.mask.vpermi2var.pd.128(<2 x double> %x0, <2 x i64> %x1, <2 x double> %x2, i8 -1)
2862 %res2 = fadd <2 x double> %res, %res1
2863 ret <2 x double> %res2
2866 declare <4 x double> @llvm.x86.avx512.mask.vpermi2var.pd.256(<4 x double>, <4 x i64>, <4 x double>, i8)
2868 ; CHECK-LABEL: @test_int_x86_avx512_mask_vpermi2var_pd_256
2871 ; CHECK: vpermi2pd %ymm{{.*}}{%k1}
2872 define <4 x double>@test_int_x86_avx512_mask_vpermi2var_pd_256(<4 x double> %x0, <4 x i64> %x1, <4 x double> %x2, i8 %x3) {
2873 %res = call <4 x double> @llvm.x86.avx512.mask.vpermi2var.pd.256(<4 x double> %x0, <4 x i64> %x1, <4 x double> %x2, i8 %x3)
2874 %res1 = call <4 x double> @llvm.x86.avx512.mask.vpermi2var.pd.256(<4 x double> %x0, <4 x i64> %x1, <4 x double> %x2, i8 -1)
2875 %res2 = fadd <4 x double> %res, %res1
2876 ret <4 x double> %res2
2879 declare <4 x float> @llvm.x86.avx512.mask.vpermi2var.ps.128(<4 x float>, <4 x i32>, <4 x float>, i8)
2881 ; CHECK-LABEL: @test_int_x86_avx512_mask_vpermi2var_ps_128
2884 ; CHECK: vpermi2ps %xmm{{.*}}{%k1}
2885 define <4 x float>@test_int_x86_avx512_mask_vpermi2var_ps_128(<4 x float> %x0, <4 x i32> %x1, <4 x float> %x2, i8 %x3) {
2886 %res = call <4 x float> @llvm.x86.avx512.mask.vpermi2var.ps.128(<4 x float> %x0, <4 x i32> %x1, <4 x float> %x2, i8 %x3)
2887 %res1 = call <4 x float> @llvm.x86.avx512.mask.vpermi2var.ps.128(<4 x float> %x0, <4 x i32> %x1, <4 x float> %x2, i8 -1)
2888 %res2 = fadd <4 x float> %res, %res1
2889 ret <4 x float> %res2
2892 declare <8 x float> @llvm.x86.avx512.mask.vpermi2var.ps.256(<8 x float>, <8 x i32>, <8 x float>, i8)
2894 ; CHECK-LABEL: @test_int_x86_avx512_mask_vpermi2var_ps_256
2897 ; CHECK: vpermi2ps %ymm{{.*}}{%k1}
2898 define <8 x float>@test_int_x86_avx512_mask_vpermi2var_ps_256(<8 x float> %x0, <8 x i32> %x1, <8 x float> %x2, i8 %x3) {
2899 %res = call <8 x float> @llvm.x86.avx512.mask.vpermi2var.ps.256(<8 x float> %x0, <8 x i32> %x1, <8 x float> %x2, i8 %x3)
2900 %res1 = call <8 x float> @llvm.x86.avx512.mask.vpermi2var.ps.256(<8 x float> %x0, <8 x i32> %x1, <8 x float> %x2, i8 -1)
2901 %res2 = fadd <8 x float> %res, %res1
2902 ret <8 x float> %res2
2905 declare <2 x i64> @llvm.x86.avx512.mask.pabs.q.128(<2 x i64>, <2 x i64>, i8)
2907 ; CHECK-LABEL: @test_int_x86_avx512_mask_pabs_q_128
2910 ; CHECK: vpabsq{{.*}}{%k1}
2911 define <2 x i64>@test_int_x86_avx512_mask_pabs_q_128(<2 x i64> %x0, <2 x i64> %x1, i8 %x2) {
2912 %res = call <2 x i64> @llvm.x86.avx512.mask.pabs.q.128(<2 x i64> %x0, <2 x i64> %x1, i8 %x2)
2913 %res1 = call <2 x i64> @llvm.x86.avx512.mask.pabs.q.128(<2 x i64> %x0, <2 x i64> %x1, i8 -1)
2914 %res2 = add <2 x i64> %res, %res1
2918 declare <4 x i64> @llvm.x86.avx512.mask.pabs.q.256(<4 x i64>, <4 x i64>, i8)
2920 ; CHECK-LABEL: @test_int_x86_avx512_mask_pabs_q_256
2923 ; CHECK: vpabsq{{.*}}{%k1}
2924 define <4 x i64>@test_int_x86_avx512_mask_pabs_q_256(<4 x i64> %x0, <4 x i64> %x1, i8 %x2) {
2925 %res = call <4 x i64> @llvm.x86.avx512.mask.pabs.q.256(<4 x i64> %x0, <4 x i64> %x1, i8 %x2)
2926 %res1 = call <4 x i64> @llvm.x86.avx512.mask.pabs.q.256(<4 x i64> %x0, <4 x i64> %x1, i8 -1)
2927 %res2 = add <4 x i64> %res, %res1
2931 declare <4 x i32> @llvm.x86.avx512.mask.pabs.d.128(<4 x i32>, <4 x i32>, i8)
2933 ; CHECK-LABEL: @test_int_x86_avx512_mask_pabs_d_128
2936 ; CHECK: vpabsd{{.*}}{%k1}
2937 define <4 x i32>@test_int_x86_avx512_mask_pabs_d_128(<4 x i32> %x0, <4 x i32> %x1, i8 %x2) {
2938 %res = call <4 x i32> @llvm.x86.avx512.mask.pabs.d.128(<4 x i32> %x0, <4 x i32> %x1, i8 %x2)
2939 %res1 = call <4 x i32> @llvm.x86.avx512.mask.pabs.d.128(<4 x i32> %x0, <4 x i32> %x1, i8 -1)
2940 %res2 = add <4 x i32> %res, %res1
2944 declare <8 x i32> @llvm.x86.avx512.mask.pabs.d.256(<8 x i32>, <8 x i32>, i8)
2946 ; CHECK-LABEL: @test_int_x86_avx512_mask_pabs_d_256
2949 ; CHECK: vpabsd{{.*}}{%k1}
2950 define <8 x i32>@test_int_x86_avx512_mask_pabs_d_256(<8 x i32> %x0, <8 x i32> %x1, i8 %x2) {
2951 %res = call <8 x i32> @llvm.x86.avx512.mask.pabs.d.256(<8 x i32> %x0, <8 x i32> %x1, i8 %x2)
2952 %res1 = call <8 x i32> @llvm.x86.avx512.mask.pabs.d.256(<8 x i32> %x0, <8 x i32> %x1, i8 -1)
2953 %res2 = add <8 x i32> %res, %res1
2958 declare <2 x double> @llvm.x86.avx512.mask.scalef.pd.128(<2 x double>, <2 x double>, <2 x double>, i8)
2960 ; CHECK-LABEL: @test_int_x86_avx512_mask_scalef_pd_128
2963 ; CHECK: vscalefpd{{.*}}{%k1}
2964 define <2 x double>@test_int_x86_avx512_mask_scalef_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) {
2965 %res = call <2 x double> @llvm.x86.avx512.mask.scalef.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3)
2966 %res1 = call <2 x double> @llvm.x86.avx512.mask.scalef.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1)
2967 %res2 = fadd <2 x double> %res, %res1
2968 ret <2 x double> %res2
2971 declare <4 x double> @llvm.x86.avx512.mask.scalef.pd.256(<4 x double>, <4 x double>, <4 x double>, i8)
2973 ; CHECK-LABEL: @test_int_x86_avx512_mask_scalef_pd_256
2976 ; CHECK: vscalefpd{{.*}}{%k1}
2977 define <4 x double>@test_int_x86_avx512_mask_scalef_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) {
2978 %res = call <4 x double> @llvm.x86.avx512.mask.scalef.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3)
2979 %res1 = call <4 x double> @llvm.x86.avx512.mask.scalef.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 -1)
2980 %res2 = fadd <4 x double> %res, %res1
2981 ret <4 x double> %res2
2984 declare <4 x float> @llvm.x86.avx512.mask.scalef.ps.128(<4 x float>, <4 x float>, <4 x float>, i8)
2985 ; CHECK-LABEL: @test_int_x86_avx512_mask_scalef_ps_128
2988 ; CHECK: vscalefps{{.*}}{%k1}
2989 define <4 x float>@test_int_x86_avx512_mask_scalef_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) {
2990 %res = call <4 x float> @llvm.x86.avx512.mask.scalef.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3)
2991 %res1 = call <4 x float> @llvm.x86.avx512.mask.scalef.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1)
2992 %res2 = fadd <4 x float> %res, %res1
2993 ret <4 x float> %res2
2996 declare <8 x float> @llvm.x86.avx512.mask.scalef.ps.256(<8 x float>, <8 x float>, <8 x float>, i8)
2997 ; CHECK-LABEL: @test_int_x86_avx512_mask_scalef_ps_256
3000 ; CHECK: vscalefps{{.*}}{%k1}
3001 define <8 x float>@test_int_x86_avx512_mask_scalef_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) {
3002 %res = call <8 x float> @llvm.x86.avx512.mask.scalef.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3)
3003 %res1 = call <8 x float> @llvm.x86.avx512.mask.scalef.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 -1)
3004 %res2 = fadd <8 x float> %res, %res1
3005 ret <8 x float> %res2
3008 declare <2 x double> @llvm.x86.avx512.mask.unpckh.pd.128(<2 x double>, <2 x double>, <2 x double>, i8)
3010 define <2 x double>@test_int_x86_avx512_mask_unpckh_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) {
3011 ; CHECK-LABEL: test_int_x86_avx512_mask_unpckh_pd_128:
3012 ; CHECK: vunpckhpd %xmm1, %xmm0, %xmm2 {%k1}
3013 ; CHECK-NEXT: vunpckhpd %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x15,0xc1]
3014 %res = call <2 x double> @llvm.x86.avx512.mask.unpckh.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3)
3015 %res1 = call <2 x double> @llvm.x86.avx512.mask.unpckh.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1)
3016 %res2 = fadd <2 x double> %res, %res1
3017 ret <2 x double> %res2
3020 declare <4 x double> @llvm.x86.avx512.mask.unpckh.pd.256(<4 x double>, <4 x double>, <4 x double>, i8)
3022 define <4 x double>@test_int_x86_avx512_mask_unpckh_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) {
3023 ; CHECK-LABEL: test_int_x86_avx512_mask_unpckh_pd_256:
3024 ; CHECK: vunpckhpd %ymm1, %ymm0, %ymm2 {%k1}
3025 ; CHECK-NEXT: vunpckhpd %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x15,0xc1]
3026 %res = call <4 x double> @llvm.x86.avx512.mask.unpckh.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3)
3027 %res1 = call <4 x double> @llvm.x86.avx512.mask.unpckh.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 -1)
3028 %res2 = fadd <4 x double> %res, %res1
3029 ret <4 x double> %res2
3032 declare <4 x float> @llvm.x86.avx512.mask.unpckh.ps.128(<4 x float>, <4 x float>, <4 x float>, i8)
3034 define <4 x float>@test_int_x86_avx512_mask_unpckh_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) {
3035 ; CHECK-LABEL: test_int_x86_avx512_mask_unpckh_ps_128:
3036 ; CHECK: vunpckhps %xmm1, %xmm0, %xmm2 {%k1}
3037 ; CHECK-NEXT: vunpckhps %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x15,0xc1]
3038 %res = call <4 x float> @llvm.x86.avx512.mask.unpckh.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3)
3039 %res1 = call <4 x float> @llvm.x86.avx512.mask.unpckh.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1)
3040 %res2 = fadd <4 x float> %res, %res1
3041 ret <4 x float> %res2
3044 declare <8 x float> @llvm.x86.avx512.mask.unpckh.ps.256(<8 x float>, <8 x float>, <8 x float>, i8)
3046 define <8 x float>@test_int_x86_avx512_mask_unpckh_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) {
3047 ; CHECK-LABEL: test_int_x86_avx512_mask_unpckh_ps_256:
3049 ; CHECK: vunpckhps %ymm1, %ymm0, %ymm2 {%k1}
3050 ; CHECK-NEXT: vunpckhps %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x15,0xc1]
3051 %res = call <8 x float> @llvm.x86.avx512.mask.unpckh.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3)
3052 %res1 = call <8 x float> @llvm.x86.avx512.mask.unpckh.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 -1)
3053 %res2 = fadd <8 x float> %res, %res1
3054 ret <8 x float> %res2
3057 declare <2 x double> @llvm.x86.avx512.mask.unpckl.pd.128(<2 x double>, <2 x double>, <2 x double>, i8)
3059 define <2 x double>@test_int_x86_avx512_mask_unpckl_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) {
3060 ; CHECK-LABEL: test_int_x86_avx512_mask_unpckl_pd_128:
3061 ; CHECK: vunpcklpd %xmm1, %xmm0, %xmm2 {%k1}
3062 ; CHECK-NEXT: vunpcklpd %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x14,0xc1]
3063 %res = call <2 x double> @llvm.x86.avx512.mask.unpckl.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3)
3064 %res1 = call <2 x double> @llvm.x86.avx512.mask.unpckl.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1)
3065 %res2 = fadd <2 x double> %res, %res1
3066 ret <2 x double> %res2
3069 declare <4 x double> @llvm.x86.avx512.mask.unpckl.pd.256(<4 x double>, <4 x double>, <4 x double>, i8)
3071 define <4 x double>@test_int_x86_avx512_mask_unpckl_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) {
3072 ; CHECK-LABEL: test_int_x86_avx512_mask_unpckl_pd_256:
3073 ; CHECK: vunpcklpd %ymm1, %ymm0, %ymm2 {%k1}
3074 ; CHECK-NEXT: vunpcklpd %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x14,0xc1]
3075 %res = call <4 x double> @llvm.x86.avx512.mask.unpckl.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3)
3076 %res1 = call <4 x double> @llvm.x86.avx512.mask.unpckl.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 -1)
3077 %res2 = fadd <4 x double> %res, %res1
3078 ret <4 x double> %res2
3081 declare <4 x float> @llvm.x86.avx512.mask.unpckl.ps.128(<4 x float>, <4 x float>, <4 x float>, i8)
3083 define <4 x float>@test_int_x86_avx512_mask_unpckl_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) {
3084 ; CHECK-LABEL: test_int_x86_avx512_mask_unpckl_ps_128:
3085 ; CHECK: vunpcklps %xmm1, %xmm0, %xmm2 {%k1}
3086 ; CHECK-NEXT: vunpcklps %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x14,0xc1]
3087 %res = call <4 x float> @llvm.x86.avx512.mask.unpckl.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3)
3088 %res1 = call <4 x float> @llvm.x86.avx512.mask.unpckl.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1)
3089 %res2 = fadd <4 x float> %res, %res1
3090 ret <4 x float> %res2
3093 declare <8 x float> @llvm.x86.avx512.mask.unpckl.ps.256(<8 x float>, <8 x float>, <8 x float>, i8)
3095 define <8 x float>@test_int_x86_avx512_mask_unpckl_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) {
3096 ; CHECK-LABEL: test_int_x86_avx512_mask_unpckl_ps_256:
3097 ; CHECK: vunpcklps %ymm1, %ymm0, %ymm2 {%k1}
3098 ; CHECK-NEXT: vunpcklps %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x14,0xc1]
3099 %res = call <8 x float> @llvm.x86.avx512.mask.unpckl.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3)
3100 %res1 = call <8 x float> @llvm.x86.avx512.mask.unpckl.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 -1)
3101 %res2 = fadd <8 x float> %res, %res1
3102 ret <8 x float> %res2
3105 declare <4 x i32> @llvm.x86.avx512.mask.punpckhd.q.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)
3107 define <4 x i32>@test_int_x86_avx512_mask_punpckhd_q_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) {
3108 ; CHECK-LABEL: test_int_x86_avx512_mask_punpckhd_q_128:
3109 ; CHECK: vpunpckhdq %xmm1, %xmm0, %xmm2 {%k1}
3110 ; CHECK-NEXT: vpunpckhdq %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0x6a,0xc1]
3111 %res = call <4 x i32> @llvm.x86.avx512.mask.punpckhd.q.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3)
3112 %res1 = call <4 x i32> @llvm.x86.avx512.mask.punpckhd.q.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 -1)
3113 %res2 = add <4 x i32> %res, %res1
3117 declare <4 x i32> @llvm.x86.avx512.mask.punpckld.q.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)
3119 define <4 x i32>@test_int_x86_avx512_mask_punpckld_q_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) {
3120 ; CHECK-LABEL: test_int_x86_avx512_mask_punpckld_q_128:
3121 ; CHECK: vpunpckldq %xmm1, %xmm0, %xmm2 {%k1}
3122 ; CHECK-NEXT: vpunpckldq %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0x62,0xc1]
3123 %res = call <4 x i32> @llvm.x86.avx512.mask.punpckld.q.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3)
3124 %res1 = call <4 x i32> @llvm.x86.avx512.mask.punpckld.q.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 -1)
3125 %res2 = add <4 x i32> %res, %res1
3129 declare <8 x i32> @llvm.x86.avx512.mask.punpckhd.q.256(<8 x i32>, <8 x i32>, <8 x i32>, i8)
3131 define <8 x i32>@test_int_x86_avx512_mask_punpckhd_q_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) {
3132 ; CHECK-LABEL: test_int_x86_avx512_mask_punpckhd_q_256:
3134 ; CHECK: vpunpckhdq %ymm1, %ymm0, %ymm2 {%k1}
3135 ; CHECK-NEXT: vpunpckhdq %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0x6a,0xc1]
3136 %res = call <8 x i32> @llvm.x86.avx512.mask.punpckhd.q.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3)
3137 %res1 = call <8 x i32> @llvm.x86.avx512.mask.punpckhd.q.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1)
3138 %res2 = add <8 x i32> %res, %res1
3142 declare <8 x i32> @llvm.x86.avx512.mask.punpckld.q.256(<8 x i32>, <8 x i32>, <8 x i32>, i8)
3144 define <8 x i32>@test_int_x86_avx512_mask_punpckld_q_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) {
3145 ; CHECK-LABEL: test_int_x86_avx512_mask_punpckld_q_256:
3146 ; CHECK: vpunpckldq %ymm1, %ymm0, %ymm2 {%k1}
3147 ; CHECK-NEXT: vpunpckldq %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0x62,0xc1]
3148 %res = call <8 x i32> @llvm.x86.avx512.mask.punpckld.q.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3)
3149 %res1 = call <8 x i32> @llvm.x86.avx512.mask.punpckld.q.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1)
3150 %res2 = add <8 x i32> %res, %res1
3154 declare <2 x i64> @llvm.x86.avx512.mask.punpckhqd.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8)
3156 define <2 x i64>@test_int_x86_avx512_mask_punpckhqd_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) {
3157 ; CHECK-LABEL: test_int_x86_avx512_mask_punpckhqd_q_128:
3158 ; CHECK: vpunpckhqdq %xmm1, %xmm0, %xmm2 {%k1}
3159 ; CHECK-NEXT: vpunpckhqdq %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x6d,0xc1]
3160 %res = call <2 x i64> @llvm.x86.avx512.mask.punpckhqd.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3)
3161 %res1 = call <2 x i64> @llvm.x86.avx512.mask.punpckhqd.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 -1)
3162 %res2 = add <2 x i64> %res, %res1
3166 declare <2 x i64> @llvm.x86.avx512.mask.punpcklqd.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8)
3168 define <2 x i64>@test_int_x86_avx512_mask_punpcklqd_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) {
3169 ; CHECK-LABEL: test_int_x86_avx512_mask_punpcklqd_q_128:
3170 ; CHECK: vpunpcklqdq %xmm1, %xmm0, %xmm2 {%k1}
3171 ; CHECK-NEXT: vpunpcklqdq %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x6c,0xc1]
3172 %res = call <2 x i64> @llvm.x86.avx512.mask.punpcklqd.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3)
3173 %res1 = call <2 x i64> @llvm.x86.avx512.mask.punpcklqd.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 -1)
3174 %res2 = add <2 x i64> %res, %res1
3178 declare <4 x i64> @llvm.x86.avx512.mask.punpcklqd.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i8)
3180 define <4 x i64>@test_int_x86_avx512_mask_punpcklqd_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) {
3181 ; CHECK-LABEL: test_int_x86_avx512_mask_punpcklqd_q_256:
3182 ; CHECK: vpunpcklqdq %ymm1, %ymm0, %ymm2 {%k1}
3183 ; CHECK-NEXT: vpunpcklqdq %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x6c,0xc1]
3184 %res = call <4 x i64> @llvm.x86.avx512.mask.punpcklqd.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3)
3185 %res1 = call <4 x i64> @llvm.x86.avx512.mask.punpcklqd.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 -1)
3186 %res2 = add <4 x i64> %res, %res1
3190 declare <4 x i64> @llvm.x86.avx512.mask.punpckhqd.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i8)
3192 define <4 x i64>@test_int_x86_avx512_mask_punpckhqd_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) {
3193 ; CHECK-LABEL: test_int_x86_avx512_mask_punpckhqd_q_256:
3194 ; CHECK: vpunpckhqdq %ymm1, %ymm0, %ymm2 {%k1}
3195 ; CHECK-NEXT: vpunpckhqdq %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x6d,0xc1]
3196 %res = call <4 x i64> @llvm.x86.avx512.mask.punpckhqd.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3)
3197 %res1 = call <4 x i64> @llvm.x86.avx512.mask.punpckhqd.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 -1)
3198 %res2 = add <4 x i64> %res, %res1
3202 declare <16 x i8> @llvm.x86.avx512.mask.pmov.qb.128(<2 x i64>, <16 x i8>, i8)
3204 define <16 x i8>@test_int_x86_avx512_mask_pmov_qb_128(<2 x i64> %x0, <16 x i8> %x1, i8 %x2) {
3205 ; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qb_128:
3206 ; CHECK: vpmovqb %xmm0, %xmm1 {%k1}
3207 ; CHECK-NEXT: vpmovqb %xmm0, %xmm2 {%k1} {z}
3208 ; CHECK-NEXT: vpmovqb %xmm0, %xmm0
3209 %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmov.qb.128(<2 x i64> %x0, <16 x i8> %x1, i8 -1)
3210 %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmov.qb.128(<2 x i64> %x0, <16 x i8> %x1, i8 %x2)
3211 %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmov.qb.128(<2 x i64> %x0, <16 x i8> zeroinitializer, i8 %x2)
3212 %res3 = add <16 x i8> %res0, %res1
3213 %res4 = add <16 x i8> %res3, %res2
3217 declare void @llvm.x86.avx512.mask.pmov.qb.mem.128(i8* %ptr, <2 x i64>, i8)
3219 define void @test_int_x86_avx512_mask_pmov_qb_mem_128(i8* %ptr, <2 x i64> %x1, i8 %x2) {
3220 ; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qb_mem_128:
3221 ; CHECK: vpmovqb %xmm0, (%rdi)
3222 ; CHECK: vpmovqb %xmm0, (%rdi) {%k1}
3223 call void @llvm.x86.avx512.mask.pmov.qb.mem.128(i8* %ptr, <2 x i64> %x1, i8 -1)
3224 call void @llvm.x86.avx512.mask.pmov.qb.mem.128(i8* %ptr, <2 x i64> %x1, i8 %x2)
3228 declare <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.128(<2 x i64>, <16 x i8>, i8)
3230 define <16 x i8>@test_int_x86_avx512_mask_pmovs_qb_128(<2 x i64> %x0, <16 x i8> %x1, i8 %x2) {
3231 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qb_128:
3232 ; CHECK: vpmovsqb %xmm0, %xmm1 {%k1}
3233 ; CHECK-NEXT: vpmovsqb %xmm0, %xmm2 {%k1} {z}
3234 ; CHECK-NEXT: vpmovsqb %xmm0, %xmm0
3235 %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.128(<2 x i64> %x0, <16 x i8> %x1, i8 -1)
3236 %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.128(<2 x i64> %x0, <16 x i8> %x1, i8 %x2)
3237 %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.128(<2 x i64> %x0, <16 x i8> zeroinitializer, i8 %x2)
3238 %res3 = add <16 x i8> %res0, %res1
3239 %res4 = add <16 x i8> %res3, %res2
3243 declare void @llvm.x86.avx512.mask.pmovs.qb.mem.128(i8* %ptr, <2 x i64>, i8)
3245 define void @test_int_x86_avx512_mask_pmovs_qb_mem_128(i8* %ptr, <2 x i64> %x1, i8 %x2) {
3246 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qb_mem_128:
3247 ; CHECK: vpmovsqb %xmm0, (%rdi)
3248 ; CHECK: vpmovsqb %xmm0, (%rdi) {%k1}
3249 call void @llvm.x86.avx512.mask.pmovs.qb.mem.128(i8* %ptr, <2 x i64> %x1, i8 -1)
3250 call void @llvm.x86.avx512.mask.pmovs.qb.mem.128(i8* %ptr, <2 x i64> %x1, i8 %x2)
3254 declare <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.128(<2 x i64>, <16 x i8>, i8)
3256 define <16 x i8>@test_int_x86_avx512_mask_pmovus_qb_128(<2 x i64> %x0, <16 x i8> %x1, i8 %x2) {
3257 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qb_128:
3258 ; CHECK: vpmovusqb %xmm0, %xmm1 {%k1}
3259 ; CHECK-NEXT: vpmovusqb %xmm0, %xmm2 {%k1} {z}
3260 ; CHECK-NEXT: vpmovusqb %xmm0, %xmm0
3261 %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.128(<2 x i64> %x0, <16 x i8> %x1, i8 -1)
3262 %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.128(<2 x i64> %x0, <16 x i8> %x1, i8 %x2)
3263 %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.128(<2 x i64> %x0, <16 x i8> zeroinitializer, i8 %x2)
3264 %res3 = add <16 x i8> %res0, %res1
3265 %res4 = add <16 x i8> %res3, %res2
3269 declare void @llvm.x86.avx512.mask.pmovus.qb.mem.128(i8* %ptr, <2 x i64>, i8)
3271 define void @test_int_x86_avx512_mask_pmovus_qb_mem_128(i8* %ptr, <2 x i64> %x1, i8 %x2) {
3272 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qb_mem_128:
3273 ; CHECK: vpmovusqb %xmm0, (%rdi)
3274 ; CHECK: vpmovusqb %xmm0, (%rdi) {%k1}
3275 call void @llvm.x86.avx512.mask.pmovus.qb.mem.128(i8* %ptr, <2 x i64> %x1, i8 -1)
3276 call void @llvm.x86.avx512.mask.pmovus.qb.mem.128(i8* %ptr, <2 x i64> %x1, i8 %x2)
3280 declare <16 x i8> @llvm.x86.avx512.mask.pmov.qb.256(<4 x i64>, <16 x i8>, i8)
3282 define <16 x i8>@test_int_x86_avx512_mask_pmov_qb_256(<4 x i64> %x0, <16 x i8> %x1, i8 %x2) {
3283 ; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qb_256:
3284 ; CHECK: vpmovqb %ymm0, %xmm1 {%k1}
3285 ; CHECK-NEXT: vpmovqb %ymm0, %xmm2 {%k1} {z}
3286 ; CHECK-NEXT: vpmovqb %ymm0, %xmm0
3287 %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmov.qb.256(<4 x i64> %x0, <16 x i8> %x1, i8 -1)
3288 %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmov.qb.256(<4 x i64> %x0, <16 x i8> %x1, i8 %x2)
3289 %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmov.qb.256(<4 x i64> %x0, <16 x i8> zeroinitializer, i8 %x2)
3290 %res3 = add <16 x i8> %res0, %res1
3291 %res4 = add <16 x i8> %res3, %res2
3295 declare void @llvm.x86.avx512.mask.pmov.qb.mem.256(i8* %ptr, <4 x i64>, i8)
3297 define void @test_int_x86_avx512_mask_pmov_qb_mem_256(i8* %ptr, <4 x i64> %x1, i8 %x2) {
3298 ; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qb_mem_256:
3299 ; CHECK: vpmovqb %ymm0, (%rdi)
3300 ; CHECK: vpmovqb %ymm0, (%rdi) {%k1}
3301 call void @llvm.x86.avx512.mask.pmov.qb.mem.256(i8* %ptr, <4 x i64> %x1, i8 -1)
3302 call void @llvm.x86.avx512.mask.pmov.qb.mem.256(i8* %ptr, <4 x i64> %x1, i8 %x2)
3306 declare <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.256(<4 x i64>, <16 x i8>, i8)
3308 define <16 x i8>@test_int_x86_avx512_mask_pmovs_qb_256(<4 x i64> %x0, <16 x i8> %x1, i8 %x2) {
3309 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qb_256:
3310 ; CHECK: vpmovsqb %ymm0, %xmm1 {%k1}
3311 ; CHECK-NEXT: vpmovsqb %ymm0, %xmm2 {%k1} {z}
3312 ; CHECK-NEXT: vpmovsqb %ymm0, %xmm0
3313 %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.256(<4 x i64> %x0, <16 x i8> %x1, i8 -1)
3314 %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.256(<4 x i64> %x0, <16 x i8> %x1, i8 %x2)
3315 %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.256(<4 x i64> %x0, <16 x i8> zeroinitializer, i8 %x2)
3316 %res3 = add <16 x i8> %res0, %res1
3317 %res4 = add <16 x i8> %res3, %res2
3321 declare void @llvm.x86.avx512.mask.pmovs.qb.mem.256(i8* %ptr, <4 x i64>, i8)
3323 define void @test_int_x86_avx512_mask_pmovs_qb_mem_256(i8* %ptr, <4 x i64> %x1, i8 %x2) {
3324 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qb_mem_256:
3325 ; CHECK: vpmovsqb %ymm0, (%rdi)
3326 ; CHECK: vpmovsqb %ymm0, (%rdi) {%k1}
3327 call void @llvm.x86.avx512.mask.pmovs.qb.mem.256(i8* %ptr, <4 x i64> %x1, i8 -1)
3328 call void @llvm.x86.avx512.mask.pmovs.qb.mem.256(i8* %ptr, <4 x i64> %x1, i8 %x2)
3332 declare <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.256(<4 x i64>, <16 x i8>, i8)
3334 define <16 x i8>@test_int_x86_avx512_mask_pmovus_qb_256(<4 x i64> %x0, <16 x i8> %x1, i8 %x2) {
3335 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qb_256:
3336 ; CHECK: vpmovusqb %ymm0, %xmm1 {%k1}
3337 ; CHECK-NEXT: vpmovusqb %ymm0, %xmm2 {%k1} {z}
3338 ; CHECK-NEXT: vpmovusqb %ymm0, %xmm0
3339 %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.256(<4 x i64> %x0, <16 x i8> %x1, i8 -1)
3340 %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.256(<4 x i64> %x0, <16 x i8> %x1, i8 %x2)
3341 %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.256(<4 x i64> %x0, <16 x i8> zeroinitializer, i8 %x2)
3342 %res3 = add <16 x i8> %res0, %res1
3343 %res4 = add <16 x i8> %res3, %res2
3347 declare void @llvm.x86.avx512.mask.pmovus.qb.mem.256(i8* %ptr, <4 x i64>, i8)
3349 define void @test_int_x86_avx512_mask_pmovus_qb_mem_256(i8* %ptr, <4 x i64> %x1, i8 %x2) {
3350 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qb_mem_256:
3351 ; CHECK: vpmovusqb %ymm0, (%rdi)
3352 ; CHECK: vpmovusqb %ymm0, (%rdi) {%k1}
3353 call void @llvm.x86.avx512.mask.pmovus.qb.mem.256(i8* %ptr, <4 x i64> %x1, i8 -1)
3354 call void @llvm.x86.avx512.mask.pmovus.qb.mem.256(i8* %ptr, <4 x i64> %x1, i8 %x2)
3358 declare <8 x i16> @llvm.x86.avx512.mask.pmov.qw.128(<2 x i64>, <8 x i16>, i8)
3360 define <8 x i16>@test_int_x86_avx512_mask_pmov_qw_128(<2 x i64> %x0, <8 x i16> %x1, i8 %x2) {
3361 ; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qw_128:
3362 ; CHECK: vpmovqw %xmm0, %xmm1 {%k1}
3363 ; CHECK-NEXT: vpmovqw %xmm0, %xmm2 {%k1} {z}
3364 ; CHECK-NEXT: vpmovqw %xmm0, %xmm0
3365 %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmov.qw.128(<2 x i64> %x0, <8 x i16> %x1, i8 -1)
3366 %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmov.qw.128(<2 x i64> %x0, <8 x i16> %x1, i8 %x2)
3367 %res2 = call <8 x i16> @llvm.x86.avx512.mask.pmov.qw.128(<2 x i64> %x0, <8 x i16> zeroinitializer, i8 %x2)
3368 %res3 = add <8 x i16> %res0, %res1
3369 %res4 = add <8 x i16> %res3, %res2
3373 declare void @llvm.x86.avx512.mask.pmov.qw.mem.128(i8* %ptr, <2 x i64>, i8)
3375 define void @test_int_x86_avx512_mask_pmov_qw_mem_128(i8* %ptr, <2 x i64> %x1, i8 %x2) {
3376 ; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qw_mem_128:
3377 ; CHECK: vpmovqw %xmm0, (%rdi)
3378 ; CHECK: vpmovqw %xmm0, (%rdi) {%k1}
3379 call void @llvm.x86.avx512.mask.pmov.qw.mem.128(i8* %ptr, <2 x i64> %x1, i8 -1)
3380 call void @llvm.x86.avx512.mask.pmov.qw.mem.128(i8* %ptr, <2 x i64> %x1, i8 %x2)
3384 declare <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.128(<2 x i64>, <8 x i16>, i8)
3386 define <8 x i16>@test_int_x86_avx512_mask_pmovs_qw_128(<2 x i64> %x0, <8 x i16> %x1, i8 %x2) {
3387 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qw_128:
3388 ; CHECK: vpmovsqw %xmm0, %xmm1 {%k1}
3389 ; CHECK-NEXT: vpmovsqw %xmm0, %xmm2 {%k1} {z}
3390 ; CHECK-NEXT: vpmovsqw %xmm0, %xmm0
3391 %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.128(<2 x i64> %x0, <8 x i16> %x1, i8 -1)
3392 %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.128(<2 x i64> %x0, <8 x i16> %x1, i8 %x2)
3393 %res2 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.128(<2 x i64> %x0, <8 x i16> zeroinitializer, i8 %x2)
3394 %res3 = add <8 x i16> %res0, %res1
3395 %res4 = add <8 x i16> %res3, %res2
3399 declare void @llvm.x86.avx512.mask.pmovs.qw.mem.128(i8* %ptr, <2 x i64>, i8)
3401 define void @test_int_x86_avx512_mask_pmovs_qw_mem_128(i8* %ptr, <2 x i64> %x1, i8 %x2) {
3402 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qw_mem_128:
3403 ; CHECK: vpmovsqw %xmm0, (%rdi)
3404 ; CHECK: vpmovsqw %xmm0, (%rdi) {%k1}
3405 call void @llvm.x86.avx512.mask.pmovs.qw.mem.128(i8* %ptr, <2 x i64> %x1, i8 -1)
3406 call void @llvm.x86.avx512.mask.pmovs.qw.mem.128(i8* %ptr, <2 x i64> %x1, i8 %x2)
3410 declare <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.128(<2 x i64>, <8 x i16>, i8)
3412 define <8 x i16>@test_int_x86_avx512_mask_pmovus_qw_128(<2 x i64> %x0, <8 x i16> %x1, i8 %x2) {
3413 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qw_128:
3414 ; CHECK: vpmovusqw %xmm0, %xmm1 {%k1}
3415 ; CHECK-NEXT: vpmovusqw %xmm0, %xmm2 {%k1} {z}
3416 ; CHECK-NEXT: vpmovusqw %xmm0, %xmm0
3417 %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.128(<2 x i64> %x0, <8 x i16> %x1, i8 -1)
3418 %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.128(<2 x i64> %x0, <8 x i16> %x1, i8 %x2)
3419 %res2 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.128(<2 x i64> %x0, <8 x i16> zeroinitializer, i8 %x2)
3420 %res3 = add <8 x i16> %res0, %res1
3421 %res4 = add <8 x i16> %res3, %res2
3425 declare void @llvm.x86.avx512.mask.pmovus.qw.mem.128(i8* %ptr, <2 x i64>, i8)
3427 define void @test_int_x86_avx512_mask_pmovus_qw_mem_128(i8* %ptr, <2 x i64> %x1, i8 %x2) {
3428 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qw_mem_128:
3429 ; CHECK: vpmovusqw %xmm0, (%rdi)
3430 ; CHECK: vpmovusqw %xmm0, (%rdi) {%k1}
3431 call void @llvm.x86.avx512.mask.pmovus.qw.mem.128(i8* %ptr, <2 x i64> %x1, i8 -1)
3432 call void @llvm.x86.avx512.mask.pmovus.qw.mem.128(i8* %ptr, <2 x i64> %x1, i8 %x2)
3436 declare <8 x i16> @llvm.x86.avx512.mask.pmov.qw.256(<4 x i64>, <8 x i16>, i8)
3438 define <8 x i16>@test_int_x86_avx512_mask_pmov_qw_256(<4 x i64> %x0, <8 x i16> %x1, i8 %x2) {
3439 ; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qw_256:
3440 ; CHECK: vpmovqw %ymm0, %xmm1 {%k1}
3441 ; CHECK-NEXT: vpmovqw %ymm0, %xmm2 {%k1} {z}
3442 ; CHECK-NEXT: vpmovqw %ymm0, %xmm0
3443 %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmov.qw.256(<4 x i64> %x0, <8 x i16> %x1, i8 -1)
3444 %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmov.qw.256(<4 x i64> %x0, <8 x i16> %x1, i8 %x2)
3445 %res2 = call <8 x i16> @llvm.x86.avx512.mask.pmov.qw.256(<4 x i64> %x0, <8 x i16> zeroinitializer, i8 %x2)
3446 %res3 = add <8 x i16> %res0, %res1
3447 %res4 = add <8 x i16> %res3, %res2
3451 declare void @llvm.x86.avx512.mask.pmov.qw.mem.256(i8* %ptr, <4 x i64>, i8)
3453 define void @test_int_x86_avx512_mask_pmov_qw_mem_256(i8* %ptr, <4 x i64> %x1, i8 %x2) {
3454 ; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qw_mem_256:
3455 ; CHECK: vpmovqw %ymm0, (%rdi)
3456 ; CHECK: vpmovqw %ymm0, (%rdi) {%k1}
3457 call void @llvm.x86.avx512.mask.pmov.qw.mem.256(i8* %ptr, <4 x i64> %x1, i8 -1)
3458 call void @llvm.x86.avx512.mask.pmov.qw.mem.256(i8* %ptr, <4 x i64> %x1, i8 %x2)
3462 declare <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.256(<4 x i64>, <8 x i16>, i8)
3464 define <8 x i16>@test_int_x86_avx512_mask_pmovs_qw_256(<4 x i64> %x0, <8 x i16> %x1, i8 %x2) {
3465 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qw_256:
3466 ; CHECK: vpmovsqw %ymm0, %xmm1 {%k1}
3467 ; CHECK-NEXT: vpmovsqw %ymm0, %xmm2 {%k1} {z}
3468 ; CHECK-NEXT: vpmovsqw %ymm0, %xmm0
3469 %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.256(<4 x i64> %x0, <8 x i16> %x1, i8 -1)
3470 %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.256(<4 x i64> %x0, <8 x i16> %x1, i8 %x2)
3471 %res2 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.256(<4 x i64> %x0, <8 x i16> zeroinitializer, i8 %x2)
3472 %res3 = add <8 x i16> %res0, %res1
3473 %res4 = add <8 x i16> %res3, %res2
3477 declare void @llvm.x86.avx512.mask.pmovs.qw.mem.256(i8* %ptr, <4 x i64>, i8)
3479 define void @test_int_x86_avx512_mask_pmovs_qw_mem_256(i8* %ptr, <4 x i64> %x1, i8 %x2) {
3480 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qw_mem_256:
3481 ; CHECK: vpmovsqw %ymm0, (%rdi)
3482 ; CHECK: vpmovsqw %ymm0, (%rdi) {%k1}
3483 call void @llvm.x86.avx512.mask.pmovs.qw.mem.256(i8* %ptr, <4 x i64> %x1, i8 -1)
3484 call void @llvm.x86.avx512.mask.pmovs.qw.mem.256(i8* %ptr, <4 x i64> %x1, i8 %x2)
3488 declare <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.256(<4 x i64>, <8 x i16>, i8)
3490 define <8 x i16>@test_int_x86_avx512_mask_pmovus_qw_256(<4 x i64> %x0, <8 x i16> %x1, i8 %x2) {
3491 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qw_256:
3492 ; CHECK: vpmovusqw %ymm0, %xmm1 {%k1}
3493 ; CHECK-NEXT: vpmovusqw %ymm0, %xmm2 {%k1} {z}
3494 ; CHECK-NEXT: vpmovusqw %ymm0, %xmm0
3495 %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.256(<4 x i64> %x0, <8 x i16> %x1, i8 -1)
3496 %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.256(<4 x i64> %x0, <8 x i16> %x1, i8 %x2)
3497 %res2 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.256(<4 x i64> %x0, <8 x i16> zeroinitializer, i8 %x2)
3498 %res3 = add <8 x i16> %res0, %res1
3499 %res4 = add <8 x i16> %res3, %res2
3503 declare void @llvm.x86.avx512.mask.pmovus.qw.mem.256(i8* %ptr, <4 x i64>, i8)
3505 define void @test_int_x86_avx512_mask_pmovus_qw_mem_256(i8* %ptr, <4 x i64> %x1, i8 %x2) {
3506 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qw_mem_256:
3507 ; CHECK: vpmovusqw %ymm0, (%rdi)
3508 ; CHECK: vpmovusqw %ymm0, (%rdi) {%k1}
3509 call void @llvm.x86.avx512.mask.pmovus.qw.mem.256(i8* %ptr, <4 x i64> %x1, i8 -1)
3510 call void @llvm.x86.avx512.mask.pmovus.qw.mem.256(i8* %ptr, <4 x i64> %x1, i8 %x2)
3514 declare <4 x i32> @llvm.x86.avx512.mask.pmov.qd.128(<2 x i64>, <4 x i32>, i8)
3516 define <4 x i32>@test_int_x86_avx512_mask_pmov_qd_128(<2 x i64> %x0, <4 x i32> %x1, i8 %x2) {
3517 ; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qd_128:
3518 ; CHECK: vpmovqd %xmm0, %xmm1 {%k1}
3519 ; CHECK-NEXT: vpmovqd %xmm0, %xmm2 {%k1} {z}
3520 ; CHECK-NEXT: vpmovqd %xmm0, %xmm0
3521 %res0 = call <4 x i32> @llvm.x86.avx512.mask.pmov.qd.128(<2 x i64> %x0, <4 x i32> %x1, i8 -1)
3522 %res1 = call <4 x i32> @llvm.x86.avx512.mask.pmov.qd.128(<2 x i64> %x0, <4 x i32> %x1, i8 %x2)
3523 %res2 = call <4 x i32> @llvm.x86.avx512.mask.pmov.qd.128(<2 x i64> %x0, <4 x i32> zeroinitializer, i8 %x2)
3524 %res3 = add <4 x i32> %res0, %res1
3525 %res4 = add <4 x i32> %res3, %res2
3529 declare void @llvm.x86.avx512.mask.pmov.qd.mem.128(i8* %ptr, <2 x i64>, i8)
3531 define void @test_int_x86_avx512_mask_pmov_qd_mem_128(i8* %ptr, <2 x i64> %x1, i8 %x2) {
3532 ; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qd_mem_128:
3533 ; CHECK: vpmovqd %xmm0, (%rdi)
3534 ; CHECK: vpmovqd %xmm0, (%rdi) {%k1}
3535 call void @llvm.x86.avx512.mask.pmov.qd.mem.128(i8* %ptr, <2 x i64> %x1, i8 -1)
3536 call void @llvm.x86.avx512.mask.pmov.qd.mem.128(i8* %ptr, <2 x i64> %x1, i8 %x2)
3540 declare <4 x i32> @llvm.x86.avx512.mask.pmovs.qd.128(<2 x i64>, <4 x i32>, i8)
3542 define <4 x i32>@test_int_x86_avx512_mask_pmovs_qd_128(<2 x i64> %x0, <4 x i32> %x1, i8 %x2) {
3543 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qd_128:
3544 ; CHECK: vpmovsqd %xmm0, %xmm1 {%k1}
3545 ; CHECK-NEXT: vpmovsqd %xmm0, %xmm2 {%k1} {z}
3546 ; CHECK-NEXT: vpmovsqd %xmm0, %xmm0
3547 %res0 = call <4 x i32> @llvm.x86.avx512.mask.pmovs.qd.128(<2 x i64> %x0, <4 x i32> %x1, i8 -1)
3548 %res1 = call <4 x i32> @llvm.x86.avx512.mask.pmovs.qd.128(<2 x i64> %x0, <4 x i32> %x1, i8 %x2)
3549 %res2 = call <4 x i32> @llvm.x86.avx512.mask.pmovs.qd.128(<2 x i64> %x0, <4 x i32> zeroinitializer, i8 %x2)
3550 %res3 = add <4 x i32> %res0, %res1
3551 %res4 = add <4 x i32> %res3, %res2
3555 declare void @llvm.x86.avx512.mask.pmovs.qd.mem.128(i8* %ptr, <2 x i64>, i8)
3557 define void @test_int_x86_avx512_mask_pmovs_qd_mem_128(i8* %ptr, <2 x i64> %x1, i8 %x2) {
3558 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qd_mem_128:
3559 ; CHECK: vpmovsqd %xmm0, (%rdi)
3560 ; CHECK: vpmovsqd %xmm0, (%rdi) {%k1}
3561 call void @llvm.x86.avx512.mask.pmovs.qd.mem.128(i8* %ptr, <2 x i64> %x1, i8 -1)
3562 call void @llvm.x86.avx512.mask.pmovs.qd.mem.128(i8* %ptr, <2 x i64> %x1, i8 %x2)
3566 declare <4 x i32> @llvm.x86.avx512.mask.pmovus.qd.128(<2 x i64>, <4 x i32>, i8)
3568 define <4 x i32>@test_int_x86_avx512_mask_pmovus_qd_128(<2 x i64> %x0, <4 x i32> %x1, i8 %x2) {
3569 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qd_128:
3570 ; CHECK: vpmovusqd %xmm0, %xmm1 {%k1}
3571 ; CHECK-NEXT: vpmovusqd %xmm0, %xmm2 {%k1} {z}
3572 ; CHECK-NEXT: vpmovusqd %xmm0, %xmm0
3573 %res0 = call <4 x i32> @llvm.x86.avx512.mask.pmovus.qd.128(<2 x i64> %x0, <4 x i32> %x1, i8 -1)
3574 %res1 = call <4 x i32> @llvm.x86.avx512.mask.pmovus.qd.128(<2 x i64> %x0, <4 x i32> %x1, i8 %x2)
3575 %res2 = call <4 x i32> @llvm.x86.avx512.mask.pmovus.qd.128(<2 x i64> %x0, <4 x i32> zeroinitializer, i8 %x2)
3576 %res3 = add <4 x i32> %res0, %res1
3577 %res4 = add <4 x i32> %res3, %res2
3581 declare void @llvm.x86.avx512.mask.pmovus.qd.mem.128(i8* %ptr, <2 x i64>, i8)
3583 define void @test_int_x86_avx512_mask_pmovus_qd_mem_128(i8* %ptr, <2 x i64> %x1, i8 %x2) {
3584 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qd_mem_128:
3585 ; CHECK: vpmovusqd %xmm0, (%rdi)
3586 ; CHECK: vpmovusqd %xmm0, (%rdi) {%k1}
3587 call void @llvm.x86.avx512.mask.pmovus.qd.mem.128(i8* %ptr, <2 x i64> %x1, i8 -1)
3588 call void @llvm.x86.avx512.mask.pmovus.qd.mem.128(i8* %ptr, <2 x i64> %x1, i8 %x2)
3592 declare <4 x i32> @llvm.x86.avx512.mask.pmov.qd.256(<4 x i64>, <4 x i32>, i8)
3594 define <4 x i32>@test_int_x86_avx512_mask_pmov_qd_256(<4 x i64> %x0, <4 x i32> %x1, i8 %x2) {
3595 ; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qd_256:
3596 ; CHECK: vpmovqd %ymm0, %xmm1 {%k1}
3597 ; CHECK-NEXT: vpmovqd %ymm0, %xmm2 {%k1} {z}
3598 ; CHECK-NEXT: vpmovqd %ymm0, %xmm0
3599 %res0 = call <4 x i32> @llvm.x86.avx512.mask.pmov.qd.256(<4 x i64> %x0, <4 x i32> %x1, i8 -1)
3600 %res1 = call <4 x i32> @llvm.x86.avx512.mask.pmov.qd.256(<4 x i64> %x0, <4 x i32> %x1, i8 %x2)
3601 %res2 = call <4 x i32> @llvm.x86.avx512.mask.pmov.qd.256(<4 x i64> %x0, <4 x i32> zeroinitializer, i8 %x2)
3602 %res3 = add <4 x i32> %res0, %res1
3603 %res4 = add <4 x i32> %res3, %res2
3607 declare void @llvm.x86.avx512.mask.pmov.qd.mem.256(i8* %ptr, <4 x i64>, i8)
3609 define void @test_int_x86_avx512_mask_pmov_qd_mem_256(i8* %ptr, <4 x i64> %x1, i8 %x2) {
3610 ; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qd_mem_256:
3611 ; CHECK: vpmovqd %ymm0, (%rdi)
3612 ; CHECK: vpmovqd %ymm0, (%rdi) {%k1}
3613 call void @llvm.x86.avx512.mask.pmov.qd.mem.256(i8* %ptr, <4 x i64> %x1, i8 -1)
3614 call void @llvm.x86.avx512.mask.pmov.qd.mem.256(i8* %ptr, <4 x i64> %x1, i8 %x2)
3618 declare <4 x i32> @llvm.x86.avx512.mask.pmovs.qd.256(<4 x i64>, <4 x i32>, i8)
3620 define <4 x i32>@test_int_x86_avx512_mask_pmovs_qd_256(<4 x i64> %x0, <4 x i32> %x1, i8 %x2) {
3621 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qd_256:
3622 ; CHECK: vpmovsqd %ymm0, %xmm1 {%k1}
3623 ; CHECK-NEXT: vpmovsqd %ymm0, %xmm2 {%k1} {z}
3624 ; CHECK-NEXT: vpmovsqd %ymm0, %xmm0
3625 %res0 = call <4 x i32> @llvm.x86.avx512.mask.pmovs.qd.256(<4 x i64> %x0, <4 x i32> %x1, i8 -1)
3626 %res1 = call <4 x i32> @llvm.x86.avx512.mask.pmovs.qd.256(<4 x i64> %x0, <4 x i32> %x1, i8 %x2)
3627 %res2 = call <4 x i32> @llvm.x86.avx512.mask.pmovs.qd.256(<4 x i64> %x0, <4 x i32> zeroinitializer, i8 %x2)
3628 %res3 = add <4 x i32> %res0, %res1
3629 %res4 = add <4 x i32> %res3, %res2
3633 declare void @llvm.x86.avx512.mask.pmovs.qd.mem.256(i8* %ptr, <4 x i64>, i8)
3635 define void @test_int_x86_avx512_mask_pmovs_qd_mem_256(i8* %ptr, <4 x i64> %x1, i8 %x2) {
3636 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qd_mem_256:
3637 ; CHECK: vpmovsqd %ymm0, (%rdi)
3638 ; CHECK: vpmovsqd %ymm0, (%rdi) {%k1}
3639 call void @llvm.x86.avx512.mask.pmovs.qd.mem.256(i8* %ptr, <4 x i64> %x1, i8 -1)
3640 call void @llvm.x86.avx512.mask.pmovs.qd.mem.256(i8* %ptr, <4 x i64> %x1, i8 %x2)
3644 declare <4 x i32> @llvm.x86.avx512.mask.pmovus.qd.256(<4 x i64>, <4 x i32>, i8)
3646 define <4 x i32>@test_int_x86_avx512_mask_pmovus_qd_256(<4 x i64> %x0, <4 x i32> %x1, i8 %x2) {
3647 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qd_256:
3648 ; CHECK: vpmovusqd %ymm0, %xmm1 {%k1}
3649 ; CHECK-NEXT: vpmovusqd %ymm0, %xmm2 {%k1} {z}
3650 ; CHECK-NEXT: vpmovusqd %ymm0, %xmm0
3651 %res0 = call <4 x i32> @llvm.x86.avx512.mask.pmovus.qd.256(<4 x i64> %x0, <4 x i32> %x1, i8 -1)
3652 %res1 = call <4 x i32> @llvm.x86.avx512.mask.pmovus.qd.256(<4 x i64> %x0, <4 x i32> %x1, i8 %x2)
3653 %res2 = call <4 x i32> @llvm.x86.avx512.mask.pmovus.qd.256(<4 x i64> %x0, <4 x i32> zeroinitializer, i8 %x2)
3654 %res3 = add <4 x i32> %res0, %res1
3655 %res4 = add <4 x i32> %res3, %res2
3659 declare void @llvm.x86.avx512.mask.pmovus.qd.mem.256(i8* %ptr, <4 x i64>, i8)
3661 define void @test_int_x86_avx512_mask_pmovus_qd_mem_256(i8* %ptr, <4 x i64> %x1, i8 %x2) {
3662 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qd_mem_256:
3663 ; CHECK: vpmovusqd %ymm0, (%rdi)
3664 ; CHECK: vpmovusqd %ymm0, (%rdi) {%k1}
3665 call void @llvm.x86.avx512.mask.pmovus.qd.mem.256(i8* %ptr, <4 x i64> %x1, i8 -1)
3666 call void @llvm.x86.avx512.mask.pmovus.qd.mem.256(i8* %ptr, <4 x i64> %x1, i8 %x2)
3670 declare <16 x i8> @llvm.x86.avx512.mask.pmov.db.128(<4 x i32>, <16 x i8>, i8)
3672 define <16 x i8>@test_int_x86_avx512_mask_pmov_db_128(<4 x i32> %x0, <16 x i8> %x1, i8 %x2) {
3673 ; CHECK-LABEL: test_int_x86_avx512_mask_pmov_db_128:
3674 ; CHECK: vpmovdb %xmm0, %xmm1 {%k1}
3675 ; CHECK-NEXT: vpmovdb %xmm0, %xmm2 {%k1} {z}
3676 ; CHECK-NEXT: vpmovdb %xmm0, %xmm0
3677 %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmov.db.128(<4 x i32> %x0, <16 x i8> %x1, i8 -1)
3678 %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmov.db.128(<4 x i32> %x0, <16 x i8> %x1, i8 %x2)
3679 %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmov.db.128(<4 x i32> %x0, <16 x i8> zeroinitializer, i8 %x2)
3680 %res3 = add <16 x i8> %res0, %res1
3681 %res4 = add <16 x i8> %res3, %res2
3685 declare void @llvm.x86.avx512.mask.pmov.db.mem.128(i8* %ptr, <4 x i32>, i8)
3687 define void @test_int_x86_avx512_mask_pmov_db_mem_128(i8* %ptr, <4 x i32> %x1, i8 %x2) {
3688 ; CHECK-LABEL: test_int_x86_avx512_mask_pmov_db_mem_128:
3689 ; CHECK: vpmovdb %xmm0, (%rdi)
3690 ; CHECK: vpmovdb %xmm0, (%rdi) {%k1}
3691 call void @llvm.x86.avx512.mask.pmov.db.mem.128(i8* %ptr, <4 x i32> %x1, i8 -1)
3692 call void @llvm.x86.avx512.mask.pmov.db.mem.128(i8* %ptr, <4 x i32> %x1, i8 %x2)
3696 declare <16 x i8> @llvm.x86.avx512.mask.pmovs.db.128(<4 x i32>, <16 x i8>, i8)
3698 define <16 x i8>@test_int_x86_avx512_mask_pmovs_db_128(<4 x i32> %x0, <16 x i8> %x1, i8 %x2) {
3699 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_db_128:
3700 ; CHECK: vpmovsdb %xmm0, %xmm1 {%k1}
3701 ; CHECK-NEXT: vpmovsdb %xmm0, %xmm2 {%k1} {z}
3702 ; CHECK-NEXT: vpmovsdb %xmm0, %xmm0
3703 %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.db.128(<4 x i32> %x0, <16 x i8> %x1, i8 -1)
3704 %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.db.128(<4 x i32> %x0, <16 x i8> %x1, i8 %x2)
3705 %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.db.128(<4 x i32> %x0, <16 x i8> zeroinitializer, i8 %x2)
3706 %res3 = add <16 x i8> %res0, %res1
3707 %res4 = add <16 x i8> %res3, %res2
3711 declare void @llvm.x86.avx512.mask.pmovs.db.mem.128(i8* %ptr, <4 x i32>, i8)
3713 define void @test_int_x86_avx512_mask_pmovs_db_mem_128(i8* %ptr, <4 x i32> %x1, i8 %x2) {
3714 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_db_mem_128:
3715 ; CHECK: vpmovsdb %xmm0, (%rdi)
3716 ; CHECK: vpmovsdb %xmm0, (%rdi) {%k1}
3717 call void @llvm.x86.avx512.mask.pmovs.db.mem.128(i8* %ptr, <4 x i32> %x1, i8 -1)
3718 call void @llvm.x86.avx512.mask.pmovs.db.mem.128(i8* %ptr, <4 x i32> %x1, i8 %x2)
3722 declare <16 x i8> @llvm.x86.avx512.mask.pmovus.db.128(<4 x i32>, <16 x i8>, i8)
3724 define <16 x i8>@test_int_x86_avx512_mask_pmovus_db_128(<4 x i32> %x0, <16 x i8> %x1, i8 %x2) {
3725 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_db_128:
3726 ; CHECK: vpmovusdb %xmm0, %xmm1 {%k1}
3727 ; CHECK-NEXT: vpmovusdb %xmm0, %xmm2 {%k1} {z}
3728 ; CHECK-NEXT: vpmovusdb %xmm0, %xmm0
3729 %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.db.128(<4 x i32> %x0, <16 x i8> %x1, i8 -1)
3730 %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.db.128(<4 x i32> %x0, <16 x i8> %x1, i8 %x2)
3731 %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.db.128(<4 x i32> %x0, <16 x i8> zeroinitializer, i8 %x2)
3732 %res3 = add <16 x i8> %res0, %res1
3733 %res4 = add <16 x i8> %res3, %res2
3737 declare void @llvm.x86.avx512.mask.pmovus.db.mem.128(i8* %ptr, <4 x i32>, i8)
3739 define void @test_int_x86_avx512_mask_pmovus_db_mem_128(i8* %ptr, <4 x i32> %x1, i8 %x2) {
3740 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_db_mem_128:
3741 ; CHECK: vpmovusdb %xmm0, (%rdi)
3742 ; CHECK: vpmovusdb %xmm0, (%rdi) {%k1}
3743 call void @llvm.x86.avx512.mask.pmovus.db.mem.128(i8* %ptr, <4 x i32> %x1, i8 -1)
3744 call void @llvm.x86.avx512.mask.pmovus.db.mem.128(i8* %ptr, <4 x i32> %x1, i8 %x2)
3748 declare <16 x i8> @llvm.x86.avx512.mask.pmov.db.256(<8 x i32>, <16 x i8>, i8)
3750 define <16 x i8>@test_int_x86_avx512_mask_pmov_db_256(<8 x i32> %x0, <16 x i8> %x1, i8 %x2) {
3751 ; CHECK-LABEL: test_int_x86_avx512_mask_pmov_db_256:
3752 ; CHECK: vpmovdb %ymm0, %xmm1 {%k1}
3753 ; CHECK-NEXT: vpmovdb %ymm0, %xmm2 {%k1} {z}
3754 ; CHECK-NEXT: vpmovdb %ymm0, %xmm0
3755 %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmov.db.256(<8 x i32> %x0, <16 x i8> %x1, i8 -1)
3756 %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmov.db.256(<8 x i32> %x0, <16 x i8> %x1, i8 %x2)
3757 %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmov.db.256(<8 x i32> %x0, <16 x i8> zeroinitializer, i8 %x2)
3758 %res3 = add <16 x i8> %res0, %res1
3759 %res4 = add <16 x i8> %res3, %res2
3763 declare void @llvm.x86.avx512.mask.pmov.db.mem.256(i8* %ptr, <8 x i32>, i8)
3765 define void @test_int_x86_avx512_mask_pmov_db_mem_256(i8* %ptr, <8 x i32> %x1, i8 %x2) {
3766 ; CHECK-LABEL: test_int_x86_avx512_mask_pmov_db_mem_256:
3767 ; CHECK: vpmovdb %ymm0, (%rdi)
3768 ; CHECK: vpmovdb %ymm0, (%rdi) {%k1}
3769 call void @llvm.x86.avx512.mask.pmov.db.mem.256(i8* %ptr, <8 x i32> %x1, i8 -1)
3770 call void @llvm.x86.avx512.mask.pmov.db.mem.256(i8* %ptr, <8 x i32> %x1, i8 %x2)
3774 declare <16 x i8> @llvm.x86.avx512.mask.pmovs.db.256(<8 x i32>, <16 x i8>, i8)
3776 define <16 x i8>@test_int_x86_avx512_mask_pmovs_db_256(<8 x i32> %x0, <16 x i8> %x1, i8 %x2) {
3777 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_db_256:
3778 ; CHECK: vpmovsdb %ymm0, %xmm1 {%k1}
3779 ; CHECK-NEXT: vpmovsdb %ymm0, %xmm2 {%k1} {z}
3780 ; CHECK-NEXT: vpmovsdb %ymm0, %xmm0
3781 %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.db.256(<8 x i32> %x0, <16 x i8> %x1, i8 -1)
3782 %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.db.256(<8 x i32> %x0, <16 x i8> %x1, i8 %x2)
3783 %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.db.256(<8 x i32> %x0, <16 x i8> zeroinitializer, i8 %x2)
3784 %res3 = add <16 x i8> %res0, %res1
3785 %res4 = add <16 x i8> %res3, %res2
3789 declare void @llvm.x86.avx512.mask.pmovs.db.mem.256(i8* %ptr, <8 x i32>, i8)
3791 define void @test_int_x86_avx512_mask_pmovs_db_mem_256(i8* %ptr, <8 x i32> %x1, i8 %x2) {
3792 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_db_mem_256:
3793 ; CHECK: vpmovsdb %ymm0, (%rdi)
3794 ; CHECK: vpmovsdb %ymm0, (%rdi) {%k1}
3795 call void @llvm.x86.avx512.mask.pmovs.db.mem.256(i8* %ptr, <8 x i32> %x1, i8 -1)
3796 call void @llvm.x86.avx512.mask.pmovs.db.mem.256(i8* %ptr, <8 x i32> %x1, i8 %x2)
3800 declare <16 x i8> @llvm.x86.avx512.mask.pmovus.db.256(<8 x i32>, <16 x i8>, i8)
3802 define <16 x i8>@test_int_x86_avx512_mask_pmovus_db_256(<8 x i32> %x0, <16 x i8> %x1, i8 %x2) {
3803 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_db_256:
3804 ; CHECK: vpmovusdb %ymm0, %xmm1 {%k1}
3805 ; CHECK-NEXT: vpmovusdb %ymm0, %xmm2 {%k1} {z}
3806 ; CHECK-NEXT: vpmovusdb %ymm0, %xmm0
3807 %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.db.256(<8 x i32> %x0, <16 x i8> %x1, i8 -1)
3808 %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.db.256(<8 x i32> %x0, <16 x i8> %x1, i8 %x2)
3809 %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.db.256(<8 x i32> %x0, <16 x i8> zeroinitializer, i8 %x2)
3810 %res3 = add <16 x i8> %res0, %res1
3811 %res4 = add <16 x i8> %res3, %res2
3815 declare void @llvm.x86.avx512.mask.pmovus.db.mem.256(i8* %ptr, <8 x i32>, i8)
3817 define void @test_int_x86_avx512_mask_pmovus_db_mem_256(i8* %ptr, <8 x i32> %x1, i8 %x2) {
3818 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_db_mem_256:
3819 ; CHECK: vpmovusdb %ymm0, (%rdi)
3820 ; CHECK: vpmovusdb %ymm0, (%rdi) {%k1}
3821 call void @llvm.x86.avx512.mask.pmovus.db.mem.256(i8* %ptr, <8 x i32> %x1, i8 -1)
3822 call void @llvm.x86.avx512.mask.pmovus.db.mem.256(i8* %ptr, <8 x i32> %x1, i8 %x2)
3826 declare <8 x i16> @llvm.x86.avx512.mask.pmov.dw.128(<4 x i32>, <8 x i16>, i8)
3828 define <8 x i16>@test_int_x86_avx512_mask_pmov_dw_128(<4 x i32> %x0, <8 x i16> %x1, i8 %x2) {
3829 ; CHECK-LABEL: test_int_x86_avx512_mask_pmov_dw_128:
3830 ; CHECK: vpmovdw %xmm0, %xmm1 {%k1}
3831 ; CHECK-NEXT: vpmovdw %xmm0, %xmm2 {%k1} {z}
3832 ; CHECK-NEXT: vpmovdw %xmm0, %xmm0
3833 %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmov.dw.128(<4 x i32> %x0, <8 x i16> %x1, i8 -1)
3834 %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmov.dw.128(<4 x i32> %x0, <8 x i16> %x1, i8 %x2)
3835 %res2 = call <8 x i16> @llvm.x86.avx512.mask.pmov.dw.128(<4 x i32> %x0, <8 x i16> zeroinitializer, i8 %x2)
3836 %res3 = add <8 x i16> %res0, %res1
3837 %res4 = add <8 x i16> %res3, %res2
3841 declare void @llvm.x86.avx512.mask.pmov.dw.mem.128(i8* %ptr, <4 x i32>, i8)
3843 define void @test_int_x86_avx512_mask_pmov_dw_mem_128(i8* %ptr, <4 x i32> %x1, i8 %x2) {
3844 ; CHECK-LABEL: test_int_x86_avx512_mask_pmov_dw_mem_128:
3845 ; CHECK: vpmovdw %xmm0, (%rdi)
3846 ; CHECK: vpmovdw %xmm0, (%rdi) {%k1}
3847 call void @llvm.x86.avx512.mask.pmov.dw.mem.128(i8* %ptr, <4 x i32> %x1, i8 -1)
3848 call void @llvm.x86.avx512.mask.pmov.dw.mem.128(i8* %ptr, <4 x i32> %x1, i8 %x2)
3852 declare <8 x i16> @llvm.x86.avx512.mask.pmovs.dw.128(<4 x i32>, <8 x i16>, i8)
3854 define <8 x i16>@test_int_x86_avx512_mask_pmovs_dw_128(<4 x i32> %x0, <8 x i16> %x1, i8 %x2) {
3855 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_dw_128:
3856 ; CHECK: vpmovsdw %xmm0, %xmm1 {%k1}
3857 ; CHECK-NEXT: vpmovsdw %xmm0, %xmm2 {%k1} {z}
3858 ; CHECK-NEXT: vpmovsdw %xmm0, %xmm0
3859 %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.dw.128(<4 x i32> %x0, <8 x i16> %x1, i8 -1)
3860 %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.dw.128(<4 x i32> %x0, <8 x i16> %x1, i8 %x2)
3861 %res2 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.dw.128(<4 x i32> %x0, <8 x i16> zeroinitializer, i8 %x2)
3862 %res3 = add <8 x i16> %res0, %res1
3863 %res4 = add <8 x i16> %res3, %res2
3867 declare void @llvm.x86.avx512.mask.pmovs.dw.mem.128(i8* %ptr, <4 x i32>, i8)
3869 define void @test_int_x86_avx512_mask_pmovs_dw_mem_128(i8* %ptr, <4 x i32> %x1, i8 %x2) {
3870 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_dw_mem_128:
3871 ; CHECK: vpmovsdw %xmm0, (%rdi)
3872 ; CHECK: vpmovsdw %xmm0, (%rdi) {%k1}
3873 call void @llvm.x86.avx512.mask.pmovs.dw.mem.128(i8* %ptr, <4 x i32> %x1, i8 -1)
3874 call void @llvm.x86.avx512.mask.pmovs.dw.mem.128(i8* %ptr, <4 x i32> %x1, i8 %x2)
3878 declare <8 x i16> @llvm.x86.avx512.mask.pmovus.dw.128(<4 x i32>, <8 x i16>, i8)
3880 define <8 x i16>@test_int_x86_avx512_mask_pmovus_dw_128(<4 x i32> %x0, <8 x i16> %x1, i8 %x2) {
3881 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_dw_128:
3882 ; CHECK: vpmovusdw %xmm0, %xmm1 {%k1}
3883 ; CHECK-NEXT: vpmovusdw %xmm0, %xmm2 {%k1} {z}
3884 ; CHECK-NEXT: vpmovusdw %xmm0, %xmm0
3885 %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.dw.128(<4 x i32> %x0, <8 x i16> %x1, i8 -1)
3886 %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.dw.128(<4 x i32> %x0, <8 x i16> %x1, i8 %x2)
3887 %res2 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.dw.128(<4 x i32> %x0, <8 x i16> zeroinitializer, i8 %x2)
3888 %res3 = add <8 x i16> %res0, %res1
3889 %res4 = add <8 x i16> %res3, %res2
3893 declare void @llvm.x86.avx512.mask.pmovus.dw.mem.128(i8* %ptr, <4 x i32>, i8)
3895 define void @test_int_x86_avx512_mask_pmovus_dw_mem_128(i8* %ptr, <4 x i32> %x1, i8 %x2) {
3896 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_dw_mem_128:
3897 ; CHECK: vpmovusdw %xmm0, (%rdi)
3898 ; CHECK: vpmovusdw %xmm0, (%rdi) {%k1}
3899 call void @llvm.x86.avx512.mask.pmovus.dw.mem.128(i8* %ptr, <4 x i32> %x1, i8 -1)
3900 call void @llvm.x86.avx512.mask.pmovus.dw.mem.128(i8* %ptr, <4 x i32> %x1, i8 %x2)
3904 declare <8 x i16> @llvm.x86.avx512.mask.pmov.dw.256(<8 x i32>, <8 x i16>, i8)
3906 define <8 x i16>@test_int_x86_avx512_mask_pmov_dw_256(<8 x i32> %x0, <8 x i16> %x1, i8 %x2) {
3907 ; CHECK-LABEL: test_int_x86_avx512_mask_pmov_dw_256:
3908 ; CHECK: vpmovdw %ymm0, %xmm1 {%k1}
3909 ; CHECK-NEXT: vpmovdw %ymm0, %xmm2 {%k1} {z}
3910 ; CHECK-NEXT: vpmovdw %ymm0, %xmm0
3911 %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmov.dw.256(<8 x i32> %x0, <8 x i16> %x1, i8 -1)
3912 %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmov.dw.256(<8 x i32> %x0, <8 x i16> %x1, i8 %x2)
3913 %res2 = call <8 x i16> @llvm.x86.avx512.mask.pmov.dw.256(<8 x i32> %x0, <8 x i16> zeroinitializer, i8 %x2)
3914 %res3 = add <8 x i16> %res0, %res1
3915 %res4 = add <8 x i16> %res3, %res2
3919 declare void @llvm.x86.avx512.mask.pmov.dw.mem.256(i8* %ptr, <8 x i32>, i8)
3921 define void @test_int_x86_avx512_mask_pmov_dw_mem_256(i8* %ptr, <8 x i32> %x1, i8 %x2) {
3922 ; CHECK-LABEL: test_int_x86_avx512_mask_pmov_dw_mem_256:
3923 ; CHECK: vpmovdw %ymm0, (%rdi)
3924 ; CHECK: vpmovdw %ymm0, (%rdi) {%k1}
3925 call void @llvm.x86.avx512.mask.pmov.dw.mem.256(i8* %ptr, <8 x i32> %x1, i8 -1)
3926 call void @llvm.x86.avx512.mask.pmov.dw.mem.256(i8* %ptr, <8 x i32> %x1, i8 %x2)
3930 declare <8 x i16> @llvm.x86.avx512.mask.pmovs.dw.256(<8 x i32>, <8 x i16>, i8)
3932 define <8 x i16>@test_int_x86_avx512_mask_pmovs_dw_256(<8 x i32> %x0, <8 x i16> %x1, i8 %x2) {
3933 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_dw_256:
3934 ; CHECK: vpmovsdw %ymm0, %xmm1 {%k1}
3935 ; CHECK-NEXT: vpmovsdw %ymm0, %xmm2 {%k1} {z}
3936 ; CHECK-NEXT: vpmovsdw %ymm0, %xmm0
3937 %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.dw.256(<8 x i32> %x0, <8 x i16> %x1, i8 -1)
3938 %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.dw.256(<8 x i32> %x0, <8 x i16> %x1, i8 %x2)
3939 %res2 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.dw.256(<8 x i32> %x0, <8 x i16> zeroinitializer, i8 %x2)
3940 %res3 = add <8 x i16> %res0, %res1
3941 %res4 = add <8 x i16> %res3, %res2
3945 declare void @llvm.x86.avx512.mask.pmovs.dw.mem.256(i8* %ptr, <8 x i32>, i8)
3947 define void @test_int_x86_avx512_mask_pmovs_dw_mem_256(i8* %ptr, <8 x i32> %x1, i8 %x2) {
3948 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_dw_mem_256:
3949 ; CHECK: vpmovsdw %ymm0, (%rdi)
3950 ; CHECK: vpmovsdw %ymm0, (%rdi) {%k1}
3951 call void @llvm.x86.avx512.mask.pmovs.dw.mem.256(i8* %ptr, <8 x i32> %x1, i8 -1)
3952 call void @llvm.x86.avx512.mask.pmovs.dw.mem.256(i8* %ptr, <8 x i32> %x1, i8 %x2)
3956 declare <8 x i16> @llvm.x86.avx512.mask.pmovus.dw.256(<8 x i32>, <8 x i16>, i8)
3958 define <8 x i16>@test_int_x86_avx512_mask_pmovus_dw_256(<8 x i32> %x0, <8 x i16> %x1, i8 %x2) {
3959 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_dw_256:
3960 ; CHECK: vpmovusdw %ymm0, %xmm1 {%k1}
3961 ; CHECK-NEXT: vpmovusdw %ymm0, %xmm2 {%k1} {z}
3962 ; CHECK-NEXT: vpmovusdw %ymm0, %xmm0
3963 %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.dw.256(<8 x i32> %x0, <8 x i16> %x1, i8 -1)
3964 %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.dw.256(<8 x i32> %x0, <8 x i16> %x1, i8 %x2)
3965 %res2 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.dw.256(<8 x i32> %x0, <8 x i16> zeroinitializer, i8 %x2)
3966 %res3 = add <8 x i16> %res0, %res1
3967 %res4 = add <8 x i16> %res3, %res2
3971 declare void @llvm.x86.avx512.mask.pmovus.dw.mem.256(i8* %ptr, <8 x i32>, i8)
3973 define void @test_int_x86_avx512_mask_pmovus_dw_mem_256(i8* %ptr, <8 x i32> %x1, i8 %x2) {
3974 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_dw_mem_256:
3975 ; CHECK: vpmovusdw %ymm0, (%rdi)
3976 ; CHECK: vpmovusdw %ymm0, (%rdi) {%k1}
3977 call void @llvm.x86.avx512.mask.pmovus.dw.mem.256(i8* %ptr, <8 x i32> %x1, i8 -1)
3978 call void @llvm.x86.avx512.mask.pmovus.dw.mem.256(i8* %ptr, <8 x i32> %x1, i8 %x2)
3982 declare <2 x double> @llvm.x86.avx512.mask.cvtdq2pd.128(<4 x i32>, <2 x double>, i8)
3984 define <2 x double>@test_int_x86_avx512_mask_cvt_dq2pd_128(<4 x i32> %x0, <2 x double> %x1, i8 %x2) {
3985 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_dq2pd_128:
3987 ; CHECK-NEXT: movzbl %dil, %eax
3988 ; CHECK-NEXT: kmovw %eax, %k1
3989 ; CHECK-NEXT: vcvtdq2pd %xmm0, %xmm1 {%k1}
3990 ; CHECK-NEXT: vcvtdq2pd %xmm0, %xmm0
3991 ; CHECK-NEXT: vaddpd %xmm0, %xmm1, %xmm0
3993 %res = call <2 x double> @llvm.x86.avx512.mask.cvtdq2pd.128(<4 x i32> %x0, <2 x double> %x1, i8 %x2)
3994 %res1 = call <2 x double> @llvm.x86.avx512.mask.cvtdq2pd.128(<4 x i32> %x0, <2 x double> %x1, i8 -1)
3995 %res2 = fadd <2 x double> %res, %res1
3996 ret <2 x double> %res2
3999 declare <4 x double> @llvm.x86.avx512.mask.cvtdq2pd.256(<4 x i32>, <4 x double>, i8)
4001 define <4 x double>@test_int_x86_avx512_mask_cvt_dq2pd_256(<4 x i32> %x0, <4 x double> %x1, i8 %x2) {
4002 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_dq2pd_256:
4004 ; CHECK-NEXT: movzbl %dil, %eax
4005 ; CHECK-NEXT: kmovw %eax, %k1
4006 ; CHECK-NEXT: vcvtdq2pd %xmm0, %ymm1 {%k1}
4007 ; CHECK-NEXT: vcvtdq2pd %xmm0, %ymm0
4008 ; CHECK-NEXT: vaddpd %ymm0, %ymm1, %ymm0
4010 %res = call <4 x double> @llvm.x86.avx512.mask.cvtdq2pd.256(<4 x i32> %x0, <4 x double> %x1, i8 %x2)
4011 %res1 = call <4 x double> @llvm.x86.avx512.mask.cvtdq2pd.256(<4 x i32> %x0, <4 x double> %x1, i8 -1)
4012 %res2 = fadd <4 x double> %res, %res1
4013 ret <4 x double> %res2
4016 declare <4 x float> @llvm.x86.avx512.mask.cvtdq2ps.128(<4 x i32>, <4 x float>, i8)
4018 define <4 x float>@test_int_x86_avx512_mask_cvt_dq2ps_128(<4 x i32> %x0, <4 x float> %x1, i8 %x2) {
4019 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_dq2ps_128:
4021 ; CHECK-NEXT: movzbl %dil, %eax
4022 ; CHECK-NEXT: kmovw %eax, %k1
4023 ; CHECK-NEXT: vcvtdq2ps %xmm0, %xmm1 {%k1}
4024 ; CHECK-NEXT: vcvtdq2ps %xmm0, %xmm0
4025 ; CHECK-NEXT: vaddps %xmm0, %xmm1, %xmm0
4027 %res = call <4 x float> @llvm.x86.avx512.mask.cvtdq2ps.128(<4 x i32> %x0, <4 x float> %x1, i8 %x2)
4028 %res1 = call <4 x float> @llvm.x86.avx512.mask.cvtdq2ps.128(<4 x i32> %x0, <4 x float> %x1, i8 -1)
4029 %res2 = fadd <4 x float> %res, %res1
4030 ret <4 x float> %res2
4033 declare <8 x float> @llvm.x86.avx512.mask.cvtdq2ps.256(<8 x i32>, <8 x float>, i8)
4035 define <8 x float>@test_int_x86_avx512_mask_cvt_dq2ps_256(<8 x i32> %x0, <8 x float> %x1, i8 %x2) {
4036 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_dq2ps_256:
4038 ; CHECK-NEXT: movzbl %dil, %eax
4039 ; CHECK-NEXT: kmovw %eax, %k1
4040 ; CHECK-NEXT: vcvtdq2ps %ymm0, %ymm1 {%k1}
4041 ; CHECK-NEXT: vcvtdq2ps %ymm0, %ymm0
4042 ; CHECK-NEXT: vaddps %ymm0, %ymm1, %ymm0
4044 %res = call <8 x float> @llvm.x86.avx512.mask.cvtdq2ps.256(<8 x i32> %x0, <8 x float> %x1, i8 %x2)
4045 %res1 = call <8 x float> @llvm.x86.avx512.mask.cvtdq2ps.256(<8 x i32> %x0, <8 x float> %x1, i8 -1)
4046 %res2 = fadd <8 x float> %res, %res1
4047 ret <8 x float> %res2
4050 declare <4 x i32> @llvm.x86.avx512.mask.cvtpd2dq.128(<2 x double>, <4 x i32>, i8)
4052 define <4 x i32>@test_int_x86_avx512_mask_cvt_pd2dq_128(<2 x double> %x0, <4 x i32> %x1, i8 %x2) {
4053 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_pd2dq_128:
4055 ; CHECK-NEXT: movzbl %dil, %eax
4056 ; CHECK-NEXT: kmovw %eax, %k1
4057 ; CHECK-NEXT: vcvtpd2dq %xmm0, %xmm1 {%k1}
4058 ; CHECK-NEXT: vcvtpd2dq %xmm0, %xmm0
4059 ; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0
4061 %res = call <4 x i32> @llvm.x86.avx512.mask.cvtpd2dq.128(<2 x double> %x0, <4 x i32> %x1, i8 %x2)
4062 %res1 = call <4 x i32> @llvm.x86.avx512.mask.cvtpd2dq.128(<2 x double> %x0, <4 x i32> %x1, i8 -1)
4063 %res2 = add <4 x i32> %res, %res1
4067 declare <4 x i32> @llvm.x86.avx512.mask.cvtpd2dq.256(<4 x double>, <4 x i32>, i8)
4069 define <4 x i32>@test_int_x86_avx512_mask_cvt_pd2dq_256(<4 x double> %x0, <4 x i32> %x1, i8 %x2) {
4070 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_pd2dq_256:
4072 ; CHECK-NEXT: movzbl %dil, %eax
4073 ; CHECK-NEXT: kmovw %eax, %k1
4074 ; CHECK-NEXT: vcvtpd2dq %ymm0, %xmm1 {%k1}
4075 ; CHECK-NEXT: vcvtpd2dq %ymm0, %xmm0
4076 ; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0
4078 %res = call <4 x i32> @llvm.x86.avx512.mask.cvtpd2dq.256(<4 x double> %x0, <4 x i32> %x1, i8 %x2)
4079 %res1 = call <4 x i32> @llvm.x86.avx512.mask.cvtpd2dq.256(<4 x double> %x0, <4 x i32> %x1, i8 -1)
4080 %res2 = add <4 x i32> %res, %res1
4084 declare <4 x float> @llvm.x86.avx512.mask.cvtpd2ps.256(<4 x double>, <4 x float>, i8)
4086 define <4 x float>@test_int_x86_avx512_mask_cvt_pd2ps_256(<4 x double> %x0, <4 x float> %x1, i8 %x2) {
4087 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_pd2ps_256:
4089 ; CHECK-NEXT: movzbl %dil, %eax
4090 ; CHECK-NEXT: kmovw %eax, %k1
4091 ; CHECK-NEXT: vcvtpd2ps %ymm0, %xmm1 {%k1}
4092 ; CHECK-NEXT: vcvtpd2ps %ymm0, %xmm0
4093 ; CHECK-NEXT: vaddps %xmm0, %xmm1, %xmm0
4095 %res = call <4 x float> @llvm.x86.avx512.mask.cvtpd2ps.256(<4 x double> %x0, <4 x float> %x1, i8 %x2)
4096 %res1 = call <4 x float> @llvm.x86.avx512.mask.cvtpd2ps.256(<4 x double> %x0, <4 x float> %x1, i8 -1)
4097 %res2 = fadd <4 x float> %res, %res1
4098 ret <4 x float> %res2
4101 declare <4 x float> @llvm.x86.avx512.mask.cvtpd2ps(<2 x double>, <4 x float>, i8)
4103 define <4 x float>@test_int_x86_avx512_mask_cvt_pd2ps(<2 x double> %x0, <4 x float> %x1, i8 %x2) {
4104 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_pd2ps:
4106 ; CHECK-NEXT: movzbl %dil, %eax
4107 ; CHECK-NEXT: kmovw %eax, %k1
4108 ; CHECK-NEXT: vcvtpd2ps %xmm0, %xmm1 {%k1}
4109 ; CHECK-NEXT: vcvtpd2ps %xmm0, %xmm0
4110 ; CHECK-NEXT: vaddps %xmm0, %xmm1, %xmm0
4112 %res = call <4 x float> @llvm.x86.avx512.mask.cvtpd2ps(<2 x double> %x0, <4 x float> %x1, i8 %x2)
4113 %res1 = call <4 x float> @llvm.x86.avx512.mask.cvtpd2ps(<2 x double> %x0, <4 x float> %x1, i8 -1)
4114 %res2 = fadd <4 x float> %res, %res1
4115 ret <4 x float> %res2
4118 declare <4 x i32> @llvm.x86.avx512.mask.cvtpd2udq.128(<2 x double>, <4 x i32>, i8)
4120 define <4 x i32>@test_int_x86_avx512_mask_cvt_pd2udq_128(<2 x double> %x0, <4 x i32> %x1, i8 %x2) {
4121 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_pd2udq_128:
4123 ; CHECK-NEXT: movzbl %dil, %eax
4124 ; CHECK-NEXT: kmovw %eax, %k1
4125 ; CHECK-NEXT: vcvtpd2udq %xmm0, %xmm1 {%k1}
4126 ; CHECK-NEXT: vcvtpd2udq %xmm0, %xmm0
4127 ; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0
4129 %res = call <4 x i32> @llvm.x86.avx512.mask.cvtpd2udq.128(<2 x double> %x0, <4 x i32> %x1, i8 %x2)
4130 %res1 = call <4 x i32> @llvm.x86.avx512.mask.cvtpd2udq.128(<2 x double> %x0, <4 x i32> %x1, i8 -1)
4131 %res2 = add <4 x i32> %res, %res1
4135 declare <4 x i32> @llvm.x86.avx512.mask.cvtpd2udq.256(<4 x double>, <4 x i32>, i8)
4137 define <4 x i32>@test_int_x86_avx512_mask_cvt_pd2udq_256(<4 x double> %x0, <4 x i32> %x1, i8 %x2) {
4138 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_pd2udq_256:
4140 ; CHECK-NEXT: movzbl %dil, %eax
4141 ; CHECK-NEXT: kmovw %eax, %k1
4142 ; CHECK-NEXT: vcvtpd2udq %ymm0, %xmm1 {%k1}
4143 ; CHECK-NEXT: vcvtpd2udq %ymm0, %xmm0
4144 ; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0
4146 %res = call <4 x i32> @llvm.x86.avx512.mask.cvtpd2udq.256(<4 x double> %x0, <4 x i32> %x1, i8 %x2)
4147 %res1 = call <4 x i32> @llvm.x86.avx512.mask.cvtpd2udq.256(<4 x double> %x0, <4 x i32> %x1, i8 -1)
4148 %res2 = add <4 x i32> %res, %res1
4152 declare <4 x i32> @llvm.x86.avx512.mask.cvtps2dq.128(<4 x float>, <4 x i32>, i8)
4154 define <4 x i32>@test_int_x86_avx512_mask_cvt_ps2dq_128(<4 x float> %x0, <4 x i32> %x1, i8 %x2) {
4155 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ps2dq_128:
4157 ; CHECK-NEXT: movzbl %dil, %eax
4158 ; CHECK-NEXT: kmovw %eax, %k1
4159 ; CHECK-NEXT: vcvtps2dq %xmm0, %xmm1 {%k1}
4160 ; CHECK-NEXT: vcvtps2dq %xmm0, %xmm0
4161 ; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0
4163 %res = call <4 x i32> @llvm.x86.avx512.mask.cvtps2dq.128(<4 x float> %x0, <4 x i32> %x1, i8 %x2)
4164 %res1 = call <4 x i32> @llvm.x86.avx512.mask.cvtps2dq.128(<4 x float> %x0, <4 x i32> %x1, i8 -1)
4165 %res2 = add <4 x i32> %res, %res1
4169 declare <8 x i32> @llvm.x86.avx512.mask.cvtps2dq.256(<8 x float>, <8 x i32>, i8)
4171 define <8 x i32>@test_int_x86_avx512_mask_cvt_ps2dq_256(<8 x float> %x0, <8 x i32> %x1, i8 %x2) {
4172 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ps2dq_256:
4174 ; CHECK-NEXT: movzbl %dil, %eax
4175 ; CHECK-NEXT: kmovw %eax, %k1
4176 ; CHECK-NEXT: vcvtps2dq %ymm0, %ymm1 {%k1}
4177 ; CHECK-NEXT: vcvtps2dq %ymm0, %ymm0
4178 ; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0
4180 %res = call <8 x i32> @llvm.x86.avx512.mask.cvtps2dq.256(<8 x float> %x0, <8 x i32> %x1, i8 %x2)
4181 %res1 = call <8 x i32> @llvm.x86.avx512.mask.cvtps2dq.256(<8 x float> %x0, <8 x i32> %x1, i8 -1)
4182 %res2 = add <8 x i32> %res, %res1
4186 declare <2 x double> @llvm.x86.avx512.mask.cvtps2pd.128(<4 x float>, <2 x double>, i8)
4188 define <2 x double>@test_int_x86_avx512_mask_cvt_ps2pd_128(<4 x float> %x0, <2 x double> %x1, i8 %x2) {
4189 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ps2pd_128:
4191 ; CHECK-NEXT: movzbl %dil, %eax
4192 ; CHECK-NEXT: kmovw %eax, %k1
4193 ; CHECK-NEXT: vcvtps2pd %xmm0, %xmm1 {%k1}
4194 ; CHECK-NEXT: vcvtps2pd %xmm0, %xmm0
4195 ; CHECK-NEXT: vaddpd %xmm0, %xmm1, %xmm0
4197 %res = call <2 x double> @llvm.x86.avx512.mask.cvtps2pd.128(<4 x float> %x0, <2 x double> %x1, i8 %x2)
4198 %res1 = call <2 x double> @llvm.x86.avx512.mask.cvtps2pd.128(<4 x float> %x0, <2 x double> %x1, i8 -1)
4199 %res2 = fadd <2 x double> %res, %res1
4200 ret <2 x double> %res2
4203 declare <4 x double> @llvm.x86.avx512.mask.cvtps2pd.256(<4 x float>, <4 x double>, i8)
4205 define <4 x double>@test_int_x86_avx512_mask_cvt_ps2pd_256(<4 x float> %x0, <4 x double> %x1, i8 %x2) {
4206 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ps2pd_256:
4208 ; CHECK-NEXT: movzbl %dil, %eax
4209 ; CHECK-NEXT: kmovw %eax, %k1
4210 ; CHECK-NEXT: vcvtps2pd %xmm0, %ymm1 {%k1}
4211 ; CHECK-NEXT: vcvtps2pd %xmm0, %ymm0
4212 ; CHECK-NEXT: vaddpd %ymm0, %ymm1, %ymm0
4214 %res = call <4 x double> @llvm.x86.avx512.mask.cvtps2pd.256(<4 x float> %x0, <4 x double> %x1, i8 %x2)
4215 %res1 = call <4 x double> @llvm.x86.avx512.mask.cvtps2pd.256(<4 x float> %x0, <4 x double> %x1, i8 -1)
4216 %res2 = fadd <4 x double> %res, %res1
4217 ret <4 x double> %res2
4220 declare <4 x i32> @llvm.x86.avx512.mask.cvtps2udq.128(<4 x float>, <4 x i32>, i8)
4222 define <4 x i32>@test_int_x86_avx512_mask_cvt_ps2udq_128(<4 x float> %x0, <4 x i32> %x1, i8 %x2) {
4223 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ps2udq_128:
4225 ; CHECK-NEXT: movzbl %dil, %eax
4226 ; CHECK-NEXT: kmovw %eax, %k1
4227 ; CHECK-NEXT: vcvtps2udq %xmm0, %xmm1 {%k1}
4228 ; CHECK-NEXT: vcvtps2udq %xmm0, %xmm0
4229 ; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0
4231 %res = call <4 x i32> @llvm.x86.avx512.mask.cvtps2udq.128(<4 x float> %x0, <4 x i32> %x1, i8 %x2)
4232 %res1 = call <4 x i32> @llvm.x86.avx512.mask.cvtps2udq.128(<4 x float> %x0, <4 x i32> %x1, i8 -1)
4233 %res2 = add <4 x i32> %res, %res1
4237 declare <8 x i32> @llvm.x86.avx512.mask.cvtps2udq.256(<8 x float>, <8 x i32>, i8)
4239 define <8 x i32>@test_int_x86_avx512_mask_cvt_ps2udq_256(<8 x float> %x0, <8 x i32> %x1, i8 %x2) {
4240 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ps2udq_256:
4242 ; CHECK-NEXT: movzbl %dil, %eax
4243 ; CHECK-NEXT: kmovw %eax, %k1
4244 ; CHECK-NEXT: vcvtps2udq %ymm0, %ymm1 {%k1}
4245 ; CHECK-NEXT: vcvtps2udq %ymm0, %ymm0
4246 ; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0
4248 %res = call <8 x i32> @llvm.x86.avx512.mask.cvtps2udq.256(<8 x float> %x0, <8 x i32> %x1, i8 %x2)
4249 %res1 = call <8 x i32> @llvm.x86.avx512.mask.cvtps2udq.256(<8 x float> %x0, <8 x i32> %x1, i8 -1)
4250 %res2 = add <8 x i32> %res, %res1
4254 declare <4 x i32> @llvm.x86.avx512.mask.cvttpd2dq.128(<2 x double>, <4 x i32>, i8)
4256 define <4 x i32>@test_int_x86_avx512_mask_cvtt_pd2dq_128(<2 x double> %x0, <4 x i32> %x1, i8 %x2) {
4257 ; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_pd2dq_128:
4259 ; CHECK-NEXT: movzbl %dil, %eax
4260 ; CHECK-NEXT: kmovw %eax, %k1
4261 ; CHECK-NEXT: vcvttpd2dq %xmm0, %xmm1 {%k1}
4262 ; CHECK-NEXT: vcvttpd2dq %xmm0, %xmm0
4263 ; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0
4265 %res = call <4 x i32> @llvm.x86.avx512.mask.cvttpd2dq.128(<2 x double> %x0, <4 x i32> %x1, i8 %x2)
4266 %res1 = call <4 x i32> @llvm.x86.avx512.mask.cvttpd2dq.128(<2 x double> %x0, <4 x i32> %x1, i8 -1)
4267 %res2 = add <4 x i32> %res, %res1
4271 declare <4 x i32> @llvm.x86.avx512.mask.cvttpd2dq.256(<4 x double>, <4 x i32>, i8)
4273 define <4 x i32>@test_int_x86_avx512_mask_cvtt_pd2dq_256(<4 x double> %x0, <4 x i32> %x1, i8 %x2) {
4274 ; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_pd2dq_256:
4276 ; CHECK-NEXT: movzbl %dil, %eax
4277 ; CHECK-NEXT: kmovw %eax, %k1
4278 ; CHECK-NEXT: vcvttpd2dq %ymm0, %xmm1 {%k1}
4279 ; CHECK-NEXT: vcvttpd2dq %ymm0, %xmm0
4280 ; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0
4282 %res = call <4 x i32> @llvm.x86.avx512.mask.cvttpd2dq.256(<4 x double> %x0, <4 x i32> %x1, i8 %x2)
4283 %res1 = call <4 x i32> @llvm.x86.avx512.mask.cvttpd2dq.256(<4 x double> %x0, <4 x i32> %x1, i8 -1)
4284 %res2 = add <4 x i32> %res, %res1
4288 declare <4 x i32> @llvm.x86.avx512.mask.cvttpd2udq.128(<2 x double>, <4 x i32>, i8)
4290 define <4 x i32>@test_int_x86_avx512_mask_cvtt_pd2udq_128(<2 x double> %x0, <4 x i32> %x1, i8 %x2) {
4291 ; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_pd2udq_128:
4293 ; CHECK-NEXT: movzbl %dil, %eax
4294 ; CHECK-NEXT: kmovw %eax, %k1
4295 ; CHECK-NEXT: vcvttpd2udq %xmm0, %xmm1 {%k1}
4296 ; CHECK-NEXT: vcvttpd2udq %xmm0, %xmm0
4297 ; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0
4299 %res = call <4 x i32> @llvm.x86.avx512.mask.cvttpd2udq.128(<2 x double> %x0, <4 x i32> %x1, i8 %x2)
4300 %res1 = call <4 x i32> @llvm.x86.avx512.mask.cvttpd2udq.128(<2 x double> %x0, <4 x i32> %x1, i8 -1)
4301 %res2 = add <4 x i32> %res, %res1
4305 declare <4 x i32> @llvm.x86.avx512.mask.cvttpd2udq.256(<4 x double>, <4 x i32>, i8)
4307 define <4 x i32>@test_int_x86_avx512_mask_cvtt_pd2udq_256(<4 x double> %x0, <4 x i32> %x1, i8 %x2) {
4308 ; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_pd2udq_256:
4310 ; CHECK-NEXT: movzbl %dil, %eax
4311 ; CHECK-NEXT: kmovw %eax, %k1
4312 ; CHECK-NEXT: vcvttpd2udq %ymm0, %xmm1 {%k1}
4313 ; CHECK-NEXT: vcvttpd2udq %ymm0, %xmm0
4314 ; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0
4316 %res = call <4 x i32> @llvm.x86.avx512.mask.cvttpd2udq.256(<4 x double> %x0, <4 x i32> %x1, i8 %x2)
4317 %res1 = call <4 x i32> @llvm.x86.avx512.mask.cvttpd2udq.256(<4 x double> %x0, <4 x i32> %x1, i8 -1)
4318 %res2 = add <4 x i32> %res, %res1
4322 declare <4 x i32> @llvm.x86.avx512.mask.cvttps2dq.128(<4 x float>, <4 x i32>, i8)
4324 define <4 x i32>@test_int_x86_avx512_mask_cvtt_ps2dq_128(<4 x float> %x0, <4 x i32> %x1, i8 %x2) {
4325 ; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ps2dq_128:
4327 ; CHECK-NEXT: movzbl %dil, %eax
4328 ; CHECK-NEXT: kmovw %eax, %k1
4329 ; CHECK-NEXT: vcvttps2dq %xmm0, %xmm1 {%k1}
4330 ; CHECK-NEXT: vcvttps2dq %xmm0, %xmm0
4331 ; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0
4333 %res = call <4 x i32> @llvm.x86.avx512.mask.cvttps2dq.128(<4 x float> %x0, <4 x i32> %x1, i8 %x2)
4334 %res1 = call <4 x i32> @llvm.x86.avx512.mask.cvttps2dq.128(<4 x float> %x0, <4 x i32> %x1, i8 -1)
4335 %res2 = add <4 x i32> %res, %res1
4339 declare <8 x i32> @llvm.x86.avx512.mask.cvttps2dq.256(<8 x float>, <8 x i32>, i8)
4341 define <8 x i32>@test_int_x86_avx512_mask_cvtt_ps2dq_256(<8 x float> %x0, <8 x i32> %x1, i8 %x2) {
4342 ; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ps2dq_256:
4344 ; CHECK-NEXT: movzbl %dil, %eax
4345 ; CHECK-NEXT: kmovw %eax, %k1
4346 ; CHECK-NEXT: vcvttps2dq %ymm0, %ymm1 {%k1}
4347 ; CHECK-NEXT: vcvttps2dq %ymm0, %ymm0
4348 ; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0
4350 %res = call <8 x i32> @llvm.x86.avx512.mask.cvttps2dq.256(<8 x float> %x0, <8 x i32> %x1, i8 %x2)
4351 %res1 = call <8 x i32> @llvm.x86.avx512.mask.cvttps2dq.256(<8 x float> %x0, <8 x i32> %x1, i8 -1)
4352 %res2 = add <8 x i32> %res, %res1
4356 declare <4 x i32> @llvm.x86.avx512.mask.cvttps2udq.128(<4 x float>, <4 x i32>, i8)
4358 define <4 x i32>@test_int_x86_avx512_mask_cvtt_ps2udq_128(<4 x float> %x0, <4 x i32> %x1, i8 %x2) {
4359 ; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ps2udq_128:
4361 ; CHECK-NEXT: movzbl %dil, %eax
4362 ; CHECK-NEXT: kmovw %eax, %k1
4363 ; CHECK-NEXT: vcvttps2udq %xmm0, %xmm1 {%k1}
4364 ; CHECK-NEXT: vcvttps2udq %xmm0, %xmm0
4365 ; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0
4367 %res = call <4 x i32> @llvm.x86.avx512.mask.cvttps2udq.128(<4 x float> %x0, <4 x i32> %x1, i8 %x2)
4368 %res1 = call <4 x i32> @llvm.x86.avx512.mask.cvttps2udq.128(<4 x float> %x0, <4 x i32> %x1, i8 -1)
4369 %res2 = add <4 x i32> %res, %res1
4373 declare <8 x i32> @llvm.x86.avx512.mask.cvttps2udq.256(<8 x float>, <8 x i32>, i8)
4375 define <8 x i32>@test_int_x86_avx512_mask_cvtt_ps2udq_256(<8 x float> %x0, <8 x i32> %x1, i8 %x2) {
4376 ; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ps2udq_256:
4378 ; CHECK-NEXT: movzbl %dil, %eax
4379 ; CHECK-NEXT: kmovw %eax, %k1
4380 ; CHECK-NEXT: vcvttps2udq %ymm0, %ymm1 {%k1}
4381 ; CHECK-NEXT: vcvttps2udq %ymm0, %ymm0
4382 ; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0
4384 %res = call <8 x i32> @llvm.x86.avx512.mask.cvttps2udq.256(<8 x float> %x0, <8 x i32> %x1, i8 %x2)
4385 %res1 = call <8 x i32> @llvm.x86.avx512.mask.cvttps2udq.256(<8 x float> %x0, <8 x i32> %x1, i8 -1)
4386 %res2 = add <8 x i32> %res, %res1
4390 declare <2 x double> @llvm.x86.avx512.mask.cvtudq2pd.128(<4 x i32>, <2 x double>, i8)
4392 define <2 x double>@test_int_x86_avx512_mask_cvt_udq2pd_128(<4 x i32> %x0, <2 x double> %x1, i8 %x2) {
4393 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_udq2pd_128:
4395 ; CHECK-NEXT: movzbl %dil, %eax
4396 ; CHECK-NEXT: kmovw %eax, %k1
4397 ; CHECK-NEXT: vcvtudq2pd %xmm0, %xmm1 {%k1}
4398 ; CHECK-NEXT: vcvtudq2pd %xmm0, %xmm0
4399 ; CHECK-NEXT: vaddpd %xmm0, %xmm1, %xmm0
4401 %res = call <2 x double> @llvm.x86.avx512.mask.cvtudq2pd.128(<4 x i32> %x0, <2 x double> %x1, i8 %x2)
4402 %res1 = call <2 x double> @llvm.x86.avx512.mask.cvtudq2pd.128(<4 x i32> %x0, <2 x double> %x1, i8 -1)
4403 %res2 = fadd <2 x double> %res, %res1
4404 ret <2 x double> %res2
4407 declare <4 x double> @llvm.x86.avx512.mask.cvtudq2pd.256(<4 x i32>, <4 x double>, i8)
4409 define <4 x double>@test_int_x86_avx512_mask_cvt_udq2pd_256(<4 x i32> %x0, <4 x double> %x1, i8 %x2) {
4410 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_udq2pd_256:
4412 ; CHECK-NEXT: movzbl %dil, %eax
4413 ; CHECK-NEXT: kmovw %eax, %k1
4414 ; CHECK-NEXT: vcvtudq2pd %xmm0, %ymm1 {%k1}
4415 ; CHECK-NEXT: vcvtudq2pd %xmm0, %ymm0
4416 ; CHECK-NEXT: vaddpd %ymm0, %ymm1, %ymm0
4418 %res = call <4 x double> @llvm.x86.avx512.mask.cvtudq2pd.256(<4 x i32> %x0, <4 x double> %x1, i8 %x2)
4419 %res1 = call <4 x double> @llvm.x86.avx512.mask.cvtudq2pd.256(<4 x i32> %x0, <4 x double> %x1, i8 -1)
4420 %res2 = fadd <4 x double> %res, %res1
4421 ret <4 x double> %res2
4424 declare <4 x float> @llvm.x86.avx512.mask.cvtudq2ps.128(<4 x i32>, <4 x float>, i8)
4426 define <4 x float>@test_int_x86_avx512_mask_cvt_udq2ps_128(<4 x i32> %x0, <4 x float> %x1, i8 %x2) {
4427 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_udq2ps_128:
4429 ; CHECK-NEXT: movzbl %dil, %eax
4430 ; CHECK-NEXT: kmovw %eax, %k1
4431 ; CHECK-NEXT: vcvtudq2ps %xmm0, %xmm1 {%k1}
4432 ; CHECK-NEXT: vcvtudq2ps %xmm0, %xmm0
4433 ; CHECK-NEXT: vaddps %xmm0, %xmm1, %xmm0
4435 %res = call <4 x float> @llvm.x86.avx512.mask.cvtudq2ps.128(<4 x i32> %x0, <4 x float> %x1, i8 %x2)
4436 %res1 = call <4 x float> @llvm.x86.avx512.mask.cvtudq2ps.128(<4 x i32> %x0, <4 x float> %x1, i8 -1)
4437 %res2 = fadd <4 x float> %res, %res1
4438 ret <4 x float> %res2
4441 declare <8 x float> @llvm.x86.avx512.mask.cvtudq2ps.256(<8 x i32>, <8 x float>, i8)
4443 define <8 x float>@test_int_x86_avx512_mask_cvt_udq2ps_256(<8 x i32> %x0, <8 x float> %x1, i8 %x2) {
4444 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_udq2ps_256:
4446 ; CHECK-NEXT: movzbl %dil, %eax
4447 ; CHECK-NEXT: kmovw %eax, %k1
4448 ; CHECK-NEXT: vcvtudq2ps %ymm0, %ymm1 {%k1}
4449 ; CHECK-NEXT: vcvtudq2ps %ymm0, %ymm0
4450 ; CHECK-NEXT: vaddps %ymm0, %ymm1, %ymm0
4452 %res = call <8 x float> @llvm.x86.avx512.mask.cvtudq2ps.256(<8 x i32> %x0, <8 x float> %x1, i8 %x2)
4453 %res1 = call <8 x float> @llvm.x86.avx512.mask.cvtudq2ps.256(<8 x i32> %x0, <8 x float> %x1, i8 -1)
4454 %res2 = fadd <8 x float> %res, %res1
4455 ret <8 x float> %res2
4458 declare <2 x double> @llvm.x86.avx512.mask.rndscale.pd.128(<2 x double>, i32, <2 x double>, i8)
4459 ; CHECK-LABEL: @test_int_x86_avx512_mask_rndscale_pd_128
4462 ; CHECK: vrndscalepd {{.*}}{%k1}
4463 ; CHECK: vrndscalepd
4464 define <2 x double>@test_int_x86_avx512_mask_rndscale_pd_128(<2 x double> %x0, <2 x double> %x2, i8 %x3) {
4465 %res = call <2 x double> @llvm.x86.avx512.mask.rndscale.pd.128(<2 x double> %x0, i32 4, <2 x double> %x2, i8 %x3)
4466 %res1 = call <2 x double> @llvm.x86.avx512.mask.rndscale.pd.128(<2 x double> %x0, i32 88, <2 x double> %x2, i8 -1)
4467 %res2 = fadd <2 x double> %res, %res1
4468 ret <2 x double> %res2
4471 declare <4 x double> @llvm.x86.avx512.mask.rndscale.pd.256(<4 x double>, i32, <4 x double>, i8)
4472 ; CHECK-LABEL: @test_int_x86_avx512_mask_rndscale_pd_256
4475 ; CHECK: vrndscalepd {{.*}}{%k1}
4476 ; CHECK: vrndscalepd
4477 define <4 x double>@test_int_x86_avx512_mask_rndscale_pd_256(<4 x double> %x0, <4 x double> %x2, i8 %x3) {
4478 %res = call <4 x double> @llvm.x86.avx512.mask.rndscale.pd.256(<4 x double> %x0, i32 4, <4 x double> %x2, i8 %x3)
4479 %res1 = call <4 x double> @llvm.x86.avx512.mask.rndscale.pd.256(<4 x double> %x0, i32 88, <4 x double> %x2, i8 -1)
4480 %res2 = fadd <4 x double> %res, %res1
4481 ret <4 x double> %res2
4484 declare <4 x float> @llvm.x86.avx512.mask.rndscale.ps.128(<4 x float>, i32, <4 x float>, i8)
4485 ; CHECK-LABEL: @test_int_x86_avx512_mask_rndscale_ps_128
4488 ; CHECK: vrndscaleps {{.*}}{%k1}
4489 ; CHECK: vrndscaleps
4490 define <4 x float>@test_int_x86_avx512_mask_rndscale_ps_128(<4 x float> %x0, <4 x float> %x2, i8 %x3) {
4491 %res = call <4 x float> @llvm.x86.avx512.mask.rndscale.ps.128(<4 x float> %x0, i32 88, <4 x float> %x2, i8 %x3)
4492 %res1 = call <4 x float> @llvm.x86.avx512.mask.rndscale.ps.128(<4 x float> %x0, i32 4, <4 x float> %x2, i8 -1)
4493 %res2 = fadd <4 x float> %res, %res1
4494 ret <4 x float> %res2
4497 declare <8 x float> @llvm.x86.avx512.mask.rndscale.ps.256(<8 x float>, i32, <8 x float>, i8)
4499 ; CHECK-LABEL: @test_int_x86_avx512_mask_rndscale_ps_256
4502 ; CHECK: vrndscaleps {{.*}}{%k1}
4503 ; CHECK: vrndscaleps
4504 define <8 x float>@test_int_x86_avx512_mask_rndscale_ps_256(<8 x float> %x0, <8 x float> %x2, i8 %x3) {
4505 %res = call <8 x float> @llvm.x86.avx512.mask.rndscale.ps.256(<8 x float> %x0, i32 5, <8 x float> %x2, i8 %x3)
4506 %res1 = call <8 x float> @llvm.x86.avx512.mask.rndscale.ps.256(<8 x float> %x0, i32 66, <8 x float> %x2, i8 -1)
4507 %res2 = fadd <8 x float> %res, %res1
4508 ret <8 x float> %res2
4511 declare <8 x float> @llvm.x86.avx512.mask.shuf.f32x4.256(<8 x float>, <8 x float>, i32, <8 x float>, i8)
4513 define <8 x float>@test_int_x86_avx512_mask_shuf_f32x4_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x3, i8 %x4) {
4514 ; CHECK-LABEL: test_int_x86_avx512_mask_shuf_f32x4_256:
4516 ; CHECK-NEXT: movzbl %dil, %eax
4517 ; CHECK-NEXT: kmovw %eax, %k1
4518 ; CHECK-NEXT: vshuff32x4 $22, %ymm1, %ymm0, %ymm2 {%k1}
4519 ; CHECK-NEXT: ## ymm2 = ymm0[0,1,2,3],ymm1[4,5,6,7]
4520 ; CHECK-NEXT: vshuff32x4 $22, %ymm1, %ymm0, %ymm3 {%k1} {z}
4521 ; CHECK-NEXT: ## ymm3 = ymm0[0,1,2,3],ymm1[4,5,6,7]
4522 ; CHECK-NEXT: vshuff32x4 $22, %ymm1, %ymm0, %ymm0
4523 ; CHECK-NEXT: ## ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
4524 ; CHECK-NEXT: vaddps %ymm0, %ymm2, %ymm0
4525 ; CHECK-NEXT: vaddps %ymm0, %ymm3, %ymm0
4527 %res = call <8 x float> @llvm.x86.avx512.mask.shuf.f32x4.256(<8 x float> %x0, <8 x float> %x1, i32 22, <8 x float> %x3, i8 %x4)
4528 %res1 = call <8 x float> @llvm.x86.avx512.mask.shuf.f32x4.256(<8 x float> %x0, <8 x float> %x1, i32 22, <8 x float> %x3, i8 -1)
4529 %res2 = call <8 x float> @llvm.x86.avx512.mask.shuf.f32x4.256(<8 x float> %x0, <8 x float> %x1, i32 22, <8 x float> zeroinitializer, i8 %x4)
4530 %res3 = fadd <8 x float> %res, %res1
4531 %res4 = fadd <8 x float> %res2, %res3
4532 ret <8 x float> %res4
4535 declare <4 x double> @llvm.x86.avx512.mask.shuf.f64x2.256(<4 x double>, <4 x double>, i32, <4 x double>, i8)
4537 define <4 x double>@test_int_x86_avx512_mask_shuf_f64x2_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x3, i8 %x4) {
4538 ; CHECK-LABEL: test_int_x86_avx512_mask_shuf_f64x2_256:
4540 ; CHECK-NEXT: movzbl %dil, %eax
4541 ; CHECK-NEXT: kmovw %eax, %k1
4542 ; CHECK-NEXT: vshuff64x2 $22, %ymm1, %ymm0, %ymm2 {%k1}
4543 ; CHECK-NEXT: ## ymm2 = ymm0[0,1],ymm1[2,3]
4544 ; CHECK-NEXT: vshuff64x2 $22, %ymm1, %ymm0, %ymm3 {%k1} {z}
4545 ; CHECK-NEXT: ## ymm3 = ymm0[0,1],ymm1[2,3]
4546 ; CHECK-NEXT: vshuff64x2 $22, %ymm1, %ymm0, %ymm0
4547 ; CHECK-NEXT: ## ymm0 = ymm0[0,1],ymm1[2,3]
4548 ; CHECK-NEXT: vaddpd %ymm0, %ymm2, %ymm0
4549 ; CHECK-NEXT: vaddpd %ymm0, %ymm3, %ymm0
4551 %res = call <4 x double> @llvm.x86.avx512.mask.shuf.f64x2.256(<4 x double> %x0, <4 x double> %x1, i32 22, <4 x double> %x3, i8 %x4)
4552 %res1 = call <4 x double> @llvm.x86.avx512.mask.shuf.f64x2.256(<4 x double> %x0, <4 x double> %x1, i32 22, <4 x double> %x3, i8 -1)
4553 %res2 = call <4 x double> @llvm.x86.avx512.mask.shuf.f64x2.256(<4 x double> %x0, <4 x double> %x1, i32 22, <4 x double> zeroinitializer, i8 %x4)
4554 %res3 = fadd <4 x double> %res, %res1
4555 %res4 = fadd <4 x double> %res2, %res3
4556 ret <4 x double> %res4
4559 declare <8 x i32> @llvm.x86.avx512.mask.shuf.i32x4.256(<8 x i32>, <8 x i32>, i32, <8 x i32>, i8)
4561 define <8 x i32>@test_int_x86_avx512_mask_shuf_i32x4_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x3, i8 %x4) {
4562 ; CHECK-LABEL: test_int_x86_avx512_mask_shuf_i32x4_256:
4564 ; CHECK-NEXT: movzbl %dil, %eax
4565 ; CHECK-NEXT: kmovw %eax, %k1
4566 ; CHECK-NEXT: vshufi32x4 $22, %ymm1, %ymm0, %ymm2 {%k1}
4567 ; CHECK-NEXT: ## ymm2 = ymm0[0,1,2,3],ymm1[4,5,6,7]
4568 ; CHECK-NEXT: vshufi32x4 $22, %ymm1, %ymm0, %ymm0
4569 ; CHECK-NEXT: ## ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
4570 ; CHECK-NEXT: vpaddd %ymm0, %ymm2, %ymm0
4572 %res = call <8 x i32> @llvm.x86.avx512.mask.shuf.i32x4.256(<8 x i32> %x0, <8 x i32> %x1, i32 22, <8 x i32> %x3, i8 %x4)
4573 %res1 = call <8 x i32> @llvm.x86.avx512.mask.shuf.i32x4.256(<8 x i32> %x0, <8 x i32> %x1, i32 22, <8 x i32> %x3, i8 -1)
4574 %res2 = add <8 x i32> %res, %res1
4578 declare <4 x i64> @llvm.x86.avx512.mask.shuf.i64x2.256(<4 x i64>, <4 x i64>, i32, <4 x i64>, i8)
4580 define <4 x i64>@test_int_x86_avx512_mask_shuf_i64x2_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x3, i8 %x4) {
4581 ; CHECK-LABEL: test_int_x86_avx512_mask_shuf_i64x2_256:
4583 ; CHECK-NEXT: movzbl %dil, %eax
4584 ; CHECK-NEXT: kmovw %eax, %k1
4585 ; CHECK-NEXT: vshufi64x2 $22, %ymm1, %ymm0, %ymm2 {%k1}
4586 ; CHECK-NEXT: ## ymm2 = ymm0[0,1],ymm1[2,3]
4587 ; CHECK-NEXT: vshufi64x2 $22, %ymm1, %ymm0, %ymm0
4588 ; CHECK-NEXT: ## ymm0 = ymm0[0,1],ymm1[2,3]
4589 ; CHECK-NEXT: vpaddq %ymm0, %ymm2, %ymm0
4591 %res = call <4 x i64> @llvm.x86.avx512.mask.shuf.i64x2.256(<4 x i64> %x0, <4 x i64> %x1, i32 22, <4 x i64> %x3, i8 %x4)
4592 %res1 = call <4 x i64> @llvm.x86.avx512.mask.shuf.i64x2.256(<4 x i64> %x0, <4 x i64> %x1, i32 22, <4 x i64> %x3, i8 -1)
4593 %res2 = add <4 x i64> %res, %res1
4597 declare <4 x float> @llvm.x86.avx512.mask.vextractf32x4.256(<8 x float>, i32, <4 x float>, i8)
4599 define <4 x float>@test_int_x86_avx512_mask_vextractf32x4_256(<8 x float> %x0, <4 x float> %x2, i8 %x3) {
4600 ; CHECK-LABEL: test_int_x86_avx512_mask_vextractf32x4_256:
4602 ; CHECK-NEXT: kmovw %edi, %k1
4603 ; CHECK-NEXT: vextractf32x4 $1, %ymm0, %xmm1 {%k1}
4604 ; CHECK-NEXT: vextractf32x4 $1, %ymm0, %xmm2 {%k1} {z}
4605 ; CHECK-NEXT: vextractf32x4 $1, %ymm0, %xmm0
4606 ; CHECK-NEXT: vaddps %xmm2, %xmm1, %xmm1
4607 ; CHECK-NEXT: vaddps %xmm1, %xmm0, %xmm0
4609 %res = call <4 x float> @llvm.x86.avx512.mask.vextractf32x4.256(<8 x float> %x0, i32 1, <4 x float> %x2, i8 %x3)
4610 %res1 = call <4 x float> @llvm.x86.avx512.mask.vextractf32x4.256(<8 x float> %x0, i32 1, <4 x float> zeroinitializer, i8 %x3)
4611 %res2 = call <4 x float> @llvm.x86.avx512.mask.vextractf32x4.256(<8 x float> %x0, i32 1, <4 x float> zeroinitializer, i8 -1)
4612 %res3 = fadd <4 x float> %res, %res1
4613 %res4 = fadd <4 x float> %res2, %res3
4614 ret <4 x float> %res4
4617 declare <2 x double> @llvm.x86.avx512.mask.getmant.pd.128(<2 x double>, i32, <2 x double>, i8)
4619 define <2 x double>@test_int_x86_avx512_mask_getmant_pd_128(<2 x double> %x0, <2 x double> %x2, i8 %x3) {
4620 ; CHECK-LABEL: test_int_x86_avx512_mask_getmant_pd_128:
4622 ; CHECK-NEXT: movzbl %dil, %eax
4623 ; CHECK-NEXT: kmovw %eax, %k1
4624 ; CHECK-NEXT: vgetmantpd $11, %xmm0, %xmm1 {%k1}
4625 ; CHECK-NEXT: vgetmantpd $11, %xmm0, %xmm2 {%k1} {z}
4626 ; CHECK-NEXT: vgetmantpd $11, %xmm0, %xmm0
4627 ; CHECK-NEXT: vaddpd %xmm0, %xmm1, %xmm0
4628 ; CHECK-NEXT: vaddpd %xmm0, %xmm2, %xmm0
4630 %res = call <2 x double> @llvm.x86.avx512.mask.getmant.pd.128(<2 x double> %x0, i32 11, <2 x double> %x2, i8 %x3)
4631 %res2 = call <2 x double> @llvm.x86.avx512.mask.getmant.pd.128(<2 x double> %x0, i32 11, <2 x double> zeroinitializer, i8 %x3)
4632 %res1 = call <2 x double> @llvm.x86.avx512.mask.getmant.pd.128(<2 x double> %x0, i32 11, <2 x double> %x2, i8 -1)
4633 %res3 = fadd <2 x double> %res, %res1
4634 %res4 = fadd <2 x double> %res2, %res3
4635 ret <2 x double> %res4
4638 declare <4 x double> @llvm.x86.avx512.mask.getmant.pd.256(<4 x double>, i32, <4 x double>, i8)
4640 define <4 x double>@test_int_x86_avx512_mask_getmant_pd_256(<4 x double> %x0, <4 x double> %x2, i8 %x3) {
4641 ; CHECK-LABEL: test_int_x86_avx512_mask_getmant_pd_256:
4643 ; CHECK-NEXT: movzbl %dil, %eax
4644 ; CHECK-NEXT: kmovw %eax, %k1
4645 ; CHECK-NEXT: vgetmantpd $11, %ymm0, %ymm1 {%k1}
4646 ; CHECK-NEXT: vgetmantpd $11, %ymm0, %ymm0
4647 ; CHECK-NEXT: vaddpd %ymm0, %ymm1, %ymm0
4649 %res = call <4 x double> @llvm.x86.avx512.mask.getmant.pd.256(<4 x double> %x0, i32 11, <4 x double> %x2, i8 %x3)
4650 %res1 = call <4 x double> @llvm.x86.avx512.mask.getmant.pd.256(<4 x double> %x0, i32 11, <4 x double> %x2, i8 -1)
4651 %res2 = fadd <4 x double> %res, %res1
4652 ret <4 x double> %res2
4655 declare <4 x float> @llvm.x86.avx512.mask.getmant.ps.128(<4 x float>, i32, <4 x float>, i8)
4657 define <4 x float>@test_int_x86_avx512_mask_getmant_ps_128(<4 x float> %x0, <4 x float> %x2, i8 %x3) {
4658 ; CHECK-LABEL: test_int_x86_avx512_mask_getmant_ps_128:
4660 ; CHECK-NEXT: movzbl %dil, %eax
4661 ; CHECK-NEXT: kmovw %eax, %k1
4662 ; CHECK-NEXT: vgetmantps $11, %xmm0, %xmm1 {%k1}
4663 ; CHECK-NEXT: vgetmantps $11, %xmm0, %xmm0
4664 ; CHECK-NEXT: vaddps %xmm0, %xmm1, %xmm0
4666 %res = call <4 x float> @llvm.x86.avx512.mask.getmant.ps.128(<4 x float> %x0, i32 11, <4 x float> %x2, i8 %x3)
4667 %res1 = call <4 x float> @llvm.x86.avx512.mask.getmant.ps.128(<4 x float> %x0, i32 11, <4 x float> %x2, i8 -1)
4668 %res2 = fadd <4 x float> %res, %res1
4669 ret <4 x float> %res2
4672 declare <8 x float> @llvm.x86.avx512.mask.getmant.ps.256(<8 x float>, i32, <8 x float>, i8)
4674 define <8 x float>@test_int_x86_avx512_mask_getmant_ps_256(<8 x float> %x0, <8 x float> %x2, i8 %x3) {
4675 ; CHECK-LABEL: test_int_x86_avx512_mask_getmant_ps_256:
4677 ; CHECK-NEXT: movzbl %dil, %eax
4678 ; CHECK-NEXT: kmovw %eax, %k1
4679 ; CHECK-NEXT: vgetmantps $11, %ymm0, %ymm1 {%k1}
4680 ; CHECK-NEXT: vgetmantps $11, %ymm0, %ymm0
4681 ; CHECK-NEXT: vaddps %ymm0, %ymm1, %ymm0
4683 %res = call <8 x float> @llvm.x86.avx512.mask.getmant.ps.256(<8 x float> %x0, i32 11, <8 x float> %x2, i8 %x3)
4684 %res1 = call <8 x float> @llvm.x86.avx512.mask.getmant.ps.256(<8 x float> %x0, i32 11, <8 x float> %x2, i8 -1)
4685 %res2 = fadd <8 x float> %res, %res1
4686 ret <8 x float> %res2
4689 declare <2 x double> @llvm.x86.avx512.mask.shuf.pd.128(<2 x double>, <2 x double>, i32, <2 x double>, i8)
4691 define <2 x double>@test_int_x86_avx512_mask_shuf_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x3, i8 %x4) {
4692 ; CHECK-LABEL: test_int_x86_avx512_mask_shuf_pd_128:
4694 ; CHECK-NEXT: movzbl %dil, %eax
4695 ; CHECK-NEXT: kmovw %eax, %k1
4696 ; CHECK-NEXT: vshufpd $22, %xmm1, %xmm0, %xmm2 {%k1}
4697 ; CHECK-NEXT: vshufpd $22, %xmm1, %xmm0, %xmm3 {%k1} {z}
4698 ; CHECK-NEXT: vshufpd $22, %xmm1, %xmm0, %xmm0
4699 ; CHECK: vaddpd %xmm0, %xmm2, %xmm0
4700 ; CHECK-NEXT: vaddpd %xmm0, %xmm3, %xmm0
4702 %res = call <2 x double> @llvm.x86.avx512.mask.shuf.pd.128(<2 x double> %x0, <2 x double> %x1, i32 22, <2 x double> %x3, i8 %x4)
4703 %res1 = call <2 x double> @llvm.x86.avx512.mask.shuf.pd.128(<2 x double> %x0, <2 x double> %x1, i32 22, <2 x double> %x3, i8 -1)
4704 %res2 = call <2 x double> @llvm.x86.avx512.mask.shuf.pd.128(<2 x double> %x0, <2 x double> %x1, i32 22, <2 x double> zeroinitializer, i8 %x4)
4705 %res3 = fadd <2 x double> %res, %res1
4706 %res4 = fadd <2 x double> %res2, %res3
4707 ret <2 x double> %res4
4710 declare <4 x double> @llvm.x86.avx512.mask.shuf.pd.256(<4 x double>, <4 x double>, i32, <4 x double>, i8)
4712 define <4 x double>@test_int_x86_avx512_mask_shuf_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x3, i8 %x4) {
4713 ; CHECK-LABEL: test_int_x86_avx512_mask_shuf_pd_256:
4715 ; CHECK-NEXT: movzbl %dil, %eax
4716 ; CHECK-NEXT: kmovw %eax, %k1
4717 ; CHECK-NEXT: vshufpd $22, %ymm1, %ymm0, %ymm2 {%k1}
4718 ; CHECK-NEXT: vshufpd $22, %ymm1, %ymm0, %ymm0
4719 ; CHECK: vaddpd %ymm0, %ymm2, %ymm0
4721 %res = call <4 x double> @llvm.x86.avx512.mask.shuf.pd.256(<4 x double> %x0, <4 x double> %x1, i32 22, <4 x double> %x3, i8 %x4)
4722 %res1 = call <4 x double> @llvm.x86.avx512.mask.shuf.pd.256(<4 x double> %x0, <4 x double> %x1, i32 22, <4 x double> %x3, i8 -1)
4723 %res2 = fadd <4 x double> %res, %res1
4724 ret <4 x double> %res2
4727 declare <4 x float> @llvm.x86.avx512.mask.shuf.ps.128(<4 x float>, <4 x float>, i32, <4 x float>, i8)
4729 define <4 x float>@test_int_x86_avx512_mask_shuf_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x3, i8 %x4) {
4730 ; CHECK-LABEL: test_int_x86_avx512_mask_shuf_ps_128:
4732 ; CHECK-NEXT: movzbl %dil, %eax
4733 ; CHECK-NEXT: kmovw %eax, %k1
4734 ; CHECK-NEXT: vshufps $22, %xmm1, %xmm0, %xmm2 {%k1}
4735 ; CHECK-NEXT: vshufps $22, %xmm1, %xmm0, %xmm0
4736 ; CHECK: vaddps %xmm0, %xmm2, %xmm0
4738 %res = call <4 x float> @llvm.x86.avx512.mask.shuf.ps.128(<4 x float> %x0, <4 x float> %x1, i32 22, <4 x float> %x3, i8 %x4)
4739 %res1 = call <4 x float> @llvm.x86.avx512.mask.shuf.ps.128(<4 x float> %x0, <4 x float> %x1, i32 22, <4 x float> %x3, i8 -1)
4740 %res2 = fadd <4 x float> %res, %res1
4741 ret <4 x float> %res2
4744 declare <8 x float> @llvm.x86.avx512.mask.shuf.ps.256(<8 x float>, <8 x float>, i32, <8 x float>, i8)
4746 define <8 x float>@test_int_x86_avx512_mask_shuf_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x3, i8 %x4) {
4747 ; CHECK-LABEL: test_int_x86_avx512_mask_shuf_ps_256:
4749 ; CHECK-NEXT: movzbl %dil, %eax
4750 ; CHECK-NEXT: kmovw %eax, %k1
4751 ; CHECK-NEXT: vshufps $22, %ymm1, %ymm0, %ymm2 {%k1}
4752 ; CHECK-NEXT: vshufps $22, %ymm1, %ymm0, %ymm0
4753 ; CHECK: vaddps %ymm0, %ymm2, %ymm0
4755 %res = call <8 x float> @llvm.x86.avx512.mask.shuf.ps.256(<8 x float> %x0, <8 x float> %x1, i32 22, <8 x float> %x3, i8 %x4)
4756 %res1 = call <8 x float> @llvm.x86.avx512.mask.shuf.ps.256(<8 x float> %x0, <8 x float> %x1, i32 22, <8 x float> %x3, i8 -1)
4757 %res2 = fadd <8 x float> %res, %res1
4758 ret <8 x float> %res2
4761 declare <4 x i32> @llvm.x86.avx512.mask.valign.d.128(<4 x i32>, <4 x i32>, i32, <4 x i32>, i8)
4763 define <4 x i32>@test_int_x86_avx512_mask_valign_d_128(<4 x i32> %x0, <4 x i32> %x1,<4 x i32> %x3, i8 %x4) {
4764 ; CHECK-LABEL: test_int_x86_avx512_mask_valign_d_128:
4766 ; CHECK-NEXT: movzbl %dil, %eax
4767 ; CHECK-NEXT: kmovw %eax, %k1
4768 ; CHECK-NEXT: valignd $22, %xmm1, %xmm0, %xmm2 {%k1}
4769 ; CHECK-NEXT: valignd $22, %xmm1, %xmm0, %xmm3 {%k1} {z}
4770 ; CHECK-NEXT: valignd $22, %xmm1, %xmm0, %xmm0
4771 ; CHECK-NEXT: vpaddd %xmm0, %xmm2, %xmm0
4772 ; CHECK-NEXT: vpaddd %xmm3, %xmm0, %xmm0
4774 %res = call <4 x i32> @llvm.x86.avx512.mask.valign.d.128(<4 x i32> %x0, <4 x i32> %x1, i32 22, <4 x i32> %x3, i8 %x4)
4775 %res1 = call <4 x i32> @llvm.x86.avx512.mask.valign.d.128(<4 x i32> %x0, <4 x i32> %x1, i32 22, <4 x i32> %x3, i8 -1)
4776 %res2 = call <4 x i32> @llvm.x86.avx512.mask.valign.d.128(<4 x i32> %x0, <4 x i32> %x1, i32 22, <4 x i32> zeroinitializer,i8 %x4)
4777 %res3 = add <4 x i32> %res, %res1
4778 %res4 = add <4 x i32> %res3, %res2
4782 declare <8 x i32> @llvm.x86.avx512.mask.valign.d.256(<8 x i32>, <8 x i32>, i32, <8 x i32>, i8)
4784 define <8 x i32>@test_int_x86_avx512_mask_valign_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x3, i8 %x4) {
4785 ; CHECK-LABEL: test_int_x86_avx512_mask_valign_d_256:
4787 ; CHECK-NEXT: movzbl %dil, %eax
4788 ; CHECK-NEXT: kmovw %eax, %k1
4789 ; CHECK-NEXT: valignd $22, %ymm1, %ymm0, %ymm2 {%k1}
4790 ; CHECK-NEXT: valignd $22, %ymm1, %ymm0, %ymm0
4791 ; CHECK-NEXT: vpaddd %ymm0, %ymm2, %ymm0
4793 %res = call <8 x i32> @llvm.x86.avx512.mask.valign.d.256(<8 x i32> %x0, <8 x i32> %x1, i32 22, <8 x i32> %x3, i8 %x4)
4794 %res1 = call <8 x i32> @llvm.x86.avx512.mask.valign.d.256(<8 x i32> %x0, <8 x i32> %x1, i32 22, <8 x i32> %x3, i8 -1)
4795 %res2 = add <8 x i32> %res, %res1
4799 declare <2 x i64> @llvm.x86.avx512.mask.valign.q.128(<2 x i64>, <2 x i64>, i32, <2 x i64>, i8)
4801 define <2 x i64>@test_int_x86_avx512_mask_valign_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x3, i8 %x4) {
4802 ; CHECK-LABEL: test_int_x86_avx512_mask_valign_q_128:
4804 ; CHECK-NEXT: movzbl %dil, %eax
4805 ; CHECK-NEXT: kmovw %eax, %k1
4806 ; CHECK-NEXT: valignq $22, %xmm1, %xmm0, %xmm2 {%k1}
4807 ; CHECK-NEXT: valignq $22, %xmm1, %xmm0, %xmm0
4808 ; CHECK-NEXT: vpaddq %xmm0, %xmm2, %xmm0
4810 %res = call <2 x i64> @llvm.x86.avx512.mask.valign.q.128(<2 x i64> %x0, <2 x i64> %x1, i32 22, <2 x i64> %x3, i8 %x4)
4811 %res1 = call <2 x i64> @llvm.x86.avx512.mask.valign.q.128(<2 x i64> %x0, <2 x i64> %x1, i32 22, <2 x i64> %x3, i8 -1)
4812 %res2 = add <2 x i64> %res, %res1
4816 declare <4 x i64> @llvm.x86.avx512.mask.valign.q.256(<4 x i64>, <4 x i64>, i32, <4 x i64>, i8)
4818 define <4 x i64>@test_int_x86_avx512_mask_valign_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x3, i8 %x4) {
4819 ; CHECK-LABEL: test_int_x86_avx512_mask_valign_q_256:
4821 ; CHECK-NEXT: movzbl %dil, %eax
4822 ; CHECK-NEXT: kmovw %eax, %k1
4823 ; CHECK-NEXT: valignq $22, %ymm1, %ymm0, %ymm2 {%k1}
4824 ; CHECK-NEXT: valignq $22, %ymm1, %ymm0, %ymm0
4825 ; CHECK-NEXT: vpaddq %ymm0, %ymm2, %ymm0
4827 %res = call <4 x i64> @llvm.x86.avx512.mask.valign.q.256(<4 x i64> %x0, <4 x i64> %x1, i32 22, <4 x i64> %x3, i8 %x4)
4828 %res1 = call <4 x i64> @llvm.x86.avx512.mask.valign.q.256(<4 x i64> %x0, <4 x i64> %x1, i32 22, <4 x i64> %x3, i8 -1)
4829 %res2 = add <4 x i64> %res, %res1
4833 declare <4 x double> @llvm.x86.avx512.mask.vpermil.pd.256(<4 x double>, i32, <4 x double>, i8)
4835 define <4 x double>@test_int_x86_avx512_mask_vpermil_pd_256(<4 x double> %x0, <4 x double> %x2, i8 %x3) {
4836 ; CHECK-LABEL: test_int_x86_avx512_mask_vpermil_pd_256:
4838 ; CHECK-NEXT: movzbl %dil, %eax
4839 ; CHECK-NEXT: kmovw %eax, %k1
4840 ; CHECK-NEXT: vpermilpd $22, %ymm0, %ymm1 {%k1}
4841 ; CHECK-NEXT: vpermilpd $22, %ymm0, %ymm2 {%k1} {z}
4842 ; CHECK-NEXT: vpermilpd $22, %ymm0, %ymm0
4843 ; CHECK-NEXT: vaddpd %ymm2, %ymm1, %ymm1
4844 ; CHECK-NEXT: vaddpd %ymm1, %ymm0, %ymm0
4846 %res = call <4 x double> @llvm.x86.avx512.mask.vpermil.pd.256(<4 x double> %x0, i32 22, <4 x double> %x2, i8 %x3)
4847 %res1 = call <4 x double> @llvm.x86.avx512.mask.vpermil.pd.256(<4 x double> %x0, i32 22, <4 x double> zeroinitializer, i8 %x3)
4848 %res2 = call <4 x double> @llvm.x86.avx512.mask.vpermil.pd.256(<4 x double> %x0, i32 22, <4 x double> %x2, i8 -1)
4849 %res3 = fadd <4 x double> %res, %res1
4850 %res4 = fadd <4 x double> %res2, %res3
4851 ret <4 x double> %res4
4854 declare <2 x double> @llvm.x86.avx512.mask.vpermil.pd.128(<2 x double>, i32, <2 x double>, i8)
4856 define <2 x double>@test_int_x86_avx512_mask_vpermil_pd_128(<2 x double> %x0, <2 x double> %x2, i8 %x3) {
4857 ; CHECK-LABEL: test_int_x86_avx512_mask_vpermil_pd_128:
4859 ; CHECK-NEXT: movzbl %dil, %eax
4860 ; CHECK-NEXT: kmovw %eax, %k1
4861 ; CHECK-NEXT: vpermilpd $1, %xmm0, %xmm1 {%k1}
4862 ; CHECK-NEXT: vpermilpd $1, %xmm0, %xmm2 {%k1} {z}
4863 ; CHECK-NEXT: vpermilpd $1, %xmm0, %xmm0
4864 ; CHECK-NEXT: vaddpd %xmm2, %xmm1, %xmm1
4865 ; CHECK-NEXT: vaddpd %xmm0, %xmm1, %xmm0
4867 %res = call <2 x double> @llvm.x86.avx512.mask.vpermil.pd.128(<2 x double> %x0, i32 1, <2 x double> %x2, i8 %x3)
4868 %res1 = call <2 x double> @llvm.x86.avx512.mask.vpermil.pd.128(<2 x double> %x0, i32 1, <2 x double> zeroinitializer, i8 %x3)
4869 %res2 = call <2 x double> @llvm.x86.avx512.mask.vpermil.pd.128(<2 x double> %x0, i32 1, <2 x double> %x2, i8 -1)
4870 %res3 = fadd <2 x double> %res, %res1
4871 %res4 = fadd <2 x double> %res3, %res2
4872 ret <2 x double> %res4
4875 declare <8 x float> @llvm.x86.avx512.mask.vpermil.ps.256(<8 x float>, i32, <8 x float>, i8)
4877 define <8 x float>@test_int_x86_avx512_mask_vpermil_ps_256(<8 x float> %x0, <8 x float> %x2, i8 %x3) {
4878 ; CHECK-LABEL: test_int_x86_avx512_mask_vpermil_ps_256:
4880 ; CHECK-NEXT: movzbl %dil, %eax
4881 ; CHECK-NEXT: kmovw %eax, %k1
4882 ; CHECK-NEXT: vpermilps $22, %ymm0, %ymm1 {%k1}
4883 ; CHECK-NEXT: vpermilps $22, %ymm0, %ymm2 {%k1} {z}
4884 ; CHECK-NEXT: vpermilps $22, %ymm0, %ymm0
4885 ; CHECK-NEXT: vaddps %ymm2, %ymm1, %ymm1
4886 ; CHECK-NEXT: vaddps %ymm0, %ymm1, %ymm0
4888 %res = call <8 x float> @llvm.x86.avx512.mask.vpermil.ps.256(<8 x float> %x0, i32 22, <8 x float> %x2, i8 %x3)
4889 %res1 = call <8 x float> @llvm.x86.avx512.mask.vpermil.ps.256(<8 x float> %x0, i32 22, <8 x float> zeroinitializer, i8 %x3)
4890 %res2 = call <8 x float> @llvm.x86.avx512.mask.vpermil.ps.256(<8 x float> %x0, i32 22, <8 x float> %x2, i8 -1)
4891 %res3 = fadd <8 x float> %res, %res1
4892 %res4 = fadd <8 x float> %res3, %res2
4893 ret <8 x float> %res4
4896 declare <4 x float> @llvm.x86.avx512.mask.vpermil.ps.128(<4 x float>, i32, <4 x float>, i8)
4898 define <4 x float>@test_int_x86_avx512_mask_vpermil_ps_128(<4 x float> %x0, <4 x float> %x2, i8 %x3) {
4899 ; CHECK-LABEL: test_int_x86_avx512_mask_vpermil_ps_128:
4901 ; CHECK-NEXT: movzbl %dil, %eax
4902 ; CHECK-NEXT: kmovw %eax, %k1
4903 ; CHECK-NEXT: vpermilps $22, %xmm0, %xmm1 {%k1}
4904 ; CHECK-NEXT: vpermilps $22, %xmm0, %xmm2 {%k1} {z}
4905 ; CHECK-NEXT: vpermilps $22, %xmm0, %xmm0
4906 ; CHECK-NEXT: vaddps %xmm2, %xmm1, %xmm1
4907 ; CHECK-NEXT: vaddps %xmm1, %xmm0, %xmm0
4909 %res = call <4 x float> @llvm.x86.avx512.mask.vpermil.ps.128(<4 x float> %x0, i32 22, <4 x float> %x2, i8 %x3)
4910 %res1 = call <4 x float> @llvm.x86.avx512.mask.vpermil.ps.128(<4 x float> %x0, i32 22, <4 x float> zeroinitializer, i8 %x3)
4911 %res2 = call <4 x float> @llvm.x86.avx512.mask.vpermil.ps.128(<4 x float> %x0, i32 22, <4 x float> %x2, i8 -1)
4912 %res3 = fadd <4 x float> %res, %res1
4913 %res4 = fadd <4 x float> %res2, %res3
4914 ret <4 x float> %res4
4917 declare <4 x double> @llvm.x86.avx512.mask.vpermilvar.pd.256(<4 x double>, <4 x i64>, <4 x double>, i8)
4919 define <4 x double>@test_int_x86_avx512_mask_vpermilvar_pd_256(<4 x double> %x0, <4 x i64> %x1, <4 x double> %x2, i8 %x3) {
4920 ; CHECK-LABEL: test_int_x86_avx512_mask_vpermilvar_pd_256:
4922 ; CHECK-NEXT: movzbl %dil, %eax
4923 ; CHECK-NEXT: kmovw %eax, %k1
4924 ; CHECK-NEXT: vpermilpd %ymm1, %ymm0, %ymm2 {%k1}
4925 ; CHECK-NEXT: vpermilpd %ymm1, %ymm0, %ymm3 {%k1} {z}
4926 ; CHECK-NEXT: vpermilpd %ymm1, %ymm0, %ymm0
4927 ; CHECK-NEXT: vaddpd %ymm3, %ymm2, %ymm1
4928 ; CHECK-NEXT: vaddpd %ymm1, %ymm0, %ymm0
4930 %res = call <4 x double> @llvm.x86.avx512.mask.vpermilvar.pd.256(<4 x double> %x0, <4 x i64> %x1, <4 x double> %x2, i8 %x3)
4931 %res1 = call <4 x double> @llvm.x86.avx512.mask.vpermilvar.pd.256(<4 x double> %x0, <4 x i64> %x1, <4 x double> zeroinitializer, i8 %x3)
4932 %res2 = call <4 x double> @llvm.x86.avx512.mask.vpermilvar.pd.256(<4 x double> %x0, <4 x i64> %x1, <4 x double> %x2, i8 -1)
4933 %res3 = fadd <4 x double> %res, %res1
4934 %res4 = fadd <4 x double> %res2, %res3
4935 ret <4 x double> %res4
4938 declare <2 x double> @llvm.x86.avx512.mask.vpermilvar.pd.128(<2 x double>, <2 x i64>, <2 x double>, i8)
4940 define <2 x double>@test_int_x86_avx512_mask_vpermilvar_pd_128(<2 x double> %x0, <2 x i64> %x1, <2 x double> %x2, i8 %x3) {
4941 ; CHECK-LABEL: test_int_x86_avx512_mask_vpermilvar_pd_128:
4943 ; CHECK-NEXT: movzbl %dil, %eax
4944 ; CHECK-NEXT: kmovw %eax, %k1
4945 ; CHECK-NEXT: vpermilpd %xmm1, %xmm0, %xmm2 {%k1}
4946 ; CHECK-NEXT: vpermilpd %xmm1, %xmm0, %xmm3 {%k1} {z}
4947 ; CHECK-NEXT: vpermilpd %xmm1, %xmm0, %xmm0
4948 ; CHECK-NEXT: vaddpd %xmm3, %xmm2, %xmm1
4949 ; CHECK-NEXT: vaddpd %xmm0, %xmm1, %xmm0
4951 %res = call <2 x double> @llvm.x86.avx512.mask.vpermilvar.pd.128(<2 x double> %x0, <2 x i64> %x1, <2 x double> %x2, i8 %x3)
4952 %res1 = call <2 x double> @llvm.x86.avx512.mask.vpermilvar.pd.128(<2 x double> %x0, <2 x i64> %x1, <2 x double> zeroinitializer, i8 %x3)
4953 %res2 = call <2 x double> @llvm.x86.avx512.mask.vpermilvar.pd.128(<2 x double> %x0, <2 x i64> %x1, <2 x double> %x2, i8 -1)
4954 %res3 = fadd <2 x double> %res, %res1
4955 %res4 = fadd <2 x double> %res3, %res2
4956 ret <2 x double> %res4
4959 declare <8 x float> @llvm.x86.avx512.mask.vpermilvar.ps.256(<8 x float>, <8 x i32>, <8 x float>, i8)
4961 define <8 x float>@test_int_x86_avx512_mask_vpermilvar_ps_256(<8 x float> %x0, <8 x i32> %x1, <8 x float> %x2, i8 %x3) {
4962 ; CHECK-LABEL: test_int_x86_avx512_mask_vpermilvar_ps_256:
4964 ; CHECK-NEXT: movzbl %dil, %eax
4965 ; CHECK-NEXT: kmovw %eax, %k1
4966 ; CHECK-NEXT: vpermilps %ymm1, %ymm0, %ymm2 {%k1}
4967 ; CHECK-NEXT: vpermilps %ymm1, %ymm0, %ymm3 {%k1} {z}
4968 ; CHECK-NEXT: vpermilps %ymm1, %ymm0, %ymm0
4969 ; CHECK-NEXT: vaddps %ymm3, %ymm2, %ymm1
4970 ; CHECK-NEXT: vaddps %ymm0, %ymm1, %ymm0
4972 %res = call <8 x float> @llvm.x86.avx512.mask.vpermilvar.ps.256(<8 x float> %x0, <8 x i32> %x1, <8 x float> %x2, i8 %x3)
4973 %res1 = call <8 x float> @llvm.x86.avx512.mask.vpermilvar.ps.256(<8 x float> %x0, <8 x i32> %x1, <8 x float> zeroinitializer, i8 %x3)
4974 %res2 = call <8 x float> @llvm.x86.avx512.mask.vpermilvar.ps.256(<8 x float> %x0, <8 x i32> %x1, <8 x float> %x2, i8 -1)
4975 %res3 = fadd <8 x float> %res, %res1
4976 %res4 = fadd <8 x float> %res3, %res2
4977 ret <8 x float> %res4
4980 declare <4 x float> @llvm.x86.avx512.mask.vpermilvar.ps.128(<4 x float>, <4 x i32>, <4 x float>, i8)
4982 define <4 x float>@test_int_x86_avx512_mask_vpermilvar_ps_128(<4 x float> %x0, <4 x i32> %x1, <4 x float> %x2, i8 %x3) {
4983 ; CHECK-LABEL: test_int_x86_avx512_mask_vpermilvar_ps_128:
4985 ; CHECK-NEXT: movzbl %dil, %eax
4986 ; CHECK-NEXT: kmovw %eax, %k1
4987 ; CHECK-NEXT: vpermilps %xmm1, %xmm0, %xmm2 {%k1}
4988 ; CHECK-NEXT: vpermilps %xmm1, %xmm0, %xmm3 {%k1} {z}
4989 ; CHECK-NEXT: vpermilps %xmm1, %xmm0, %xmm0
4990 ; CHECK-NEXT: vaddps %xmm3, %xmm2, %xmm1
4991 ; CHECK-NEXT: vaddps %xmm1, %xmm0, %xmm0
4993 %res = call <4 x float> @llvm.x86.avx512.mask.vpermilvar.ps.128(<4 x float> %x0, <4 x i32> %x1, <4 x float> %x2, i8 %x3)
4994 %res1 = call <4 x float> @llvm.x86.avx512.mask.vpermilvar.ps.128(<4 x float> %x0, <4 x i32> %x1, <4 x float> zeroinitializer, i8 %x3)
4995 %res2 = call <4 x float> @llvm.x86.avx512.mask.vpermilvar.ps.128(<4 x float> %x0, <4 x i32> %x1, <4 x float> %x2, i8 -1)
4996 %res3 = fadd <4 x float> %res, %res1
4997 %res4 = fadd <4 x float> %res2, %res3
4998 ret <4 x float> %res4
5001 declare <8 x float> @llvm.x86.avx512.mask.insertf32x4.256(<8 x float>, <4 x float>, i32, <8 x float>, i8)
5003 define <8 x float>@test_int_x86_avx512_mask_insertf32x4_256(<8 x float> %x0, <4 x float> %x1, <8 x float> %x3, i8 %x4) {
5004 ; CHECK-LABEL: test_int_x86_avx512_mask_insertf32x4_256:
5006 ; CHECK-NEXT: movzbl %dil, %eax
5007 ; CHECK-NEXT: kmovw %eax, %k1
5008 ; CHECK-NEXT: vinsertf32x4 $1, %xmm1, %ymm0, %ymm2 {%k1}
5009 ; CHECK-NEXT: vinsertf32x4 $1, %xmm1, %ymm0, %ymm3 {%k1} {z}
5010 ; CHECK-NEXT: vinsertf32x4 $1, %xmm1, %ymm0, %ymm0
5011 ; CHECK-NEXT: vaddps %ymm0, %ymm2, %ymm0
5012 ; CHECK-NEXT: vaddps %ymm0, %ymm3, %ymm0
5014 %res = call <8 x float> @llvm.x86.avx512.mask.insertf32x4.256(<8 x float> %x0, <4 x float> %x1, i32 1, <8 x float> %x3, i8 %x4)
5015 %res1 = call <8 x float> @llvm.x86.avx512.mask.insertf32x4.256(<8 x float> %x0, <4 x float> %x1, i32 1, <8 x float> %x3, i8 -1)
5016 %res2 = call <8 x float> @llvm.x86.avx512.mask.insertf32x4.256(<8 x float> %x0, <4 x float> %x1, i32 1, <8 x float> zeroinitializer, i8 %x4)
5017 %res3 = fadd <8 x float> %res, %res1
5018 %res4 = fadd <8 x float> %res2, %res3
5019 ret <8 x float> %res4
5022 declare <8 x i32> @llvm.x86.avx512.mask.inserti32x4.256(<8 x i32>, <4 x i32>, i32, <8 x i32>, i8)
5024 define <8 x i32>@test_int_x86_avx512_mask_inserti32x4_256(<8 x i32> %x0, <4 x i32> %x1, <8 x i32> %x3, i8 %x4) {
5025 ; CHECK-LABEL: test_int_x86_avx512_mask_inserti32x4_256:
5027 ; CHECK-NEXT: movzbl %dil, %eax
5028 ; CHECK-NEXT: kmovw %eax, %k1
5029 ; CHECK-NEXT: vinserti32x4 $1, %xmm1, %ymm0, %ymm2 {%k1}
5030 ; CHECK-NEXT: vinserti32x4 $1, %xmm1, %ymm0, %ymm3 {%k1} {z}
5031 ; CHECK-NEXT: vinserti32x4 $1, %xmm1, %ymm0, %ymm0
5032 ; CHECK-NEXT: vpaddd %ymm0, %ymm2, %ymm0
5033 ; CHECK-NEXT: vpaddd %ymm0, %ymm3, %ymm0
5036 %res = call <8 x i32> @llvm.x86.avx512.mask.inserti32x4.256(<8 x i32> %x0, <4 x i32> %x1, i32 1, <8 x i32> %x3, i8 %x4)
5037 %res1 = call <8 x i32> @llvm.x86.avx512.mask.inserti32x4.256(<8 x i32> %x0, <4 x i32> %x1, i32 1, <8 x i32> %x3, i8 -1)
5038 %res2 = call <8 x i32> @llvm.x86.avx512.mask.inserti32x4.256(<8 x i32> %x0, <4 x i32> %x1, i32 1, <8 x i32> zeroinitializer, i8 %x4)
5039 %res3 = add <8 x i32> %res, %res1
5040 %res4 = add <8 x i32> %res2, %res3
5044 declare <4 x i32> @llvm.x86.avx512.mask.pternlog.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i32, i8)
5046 define <4 x i32>@test_int_x86_avx512_mask_pternlog_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x4) {
5047 ; CHECK-LABEL: test_int_x86_avx512_mask_pternlog_d_128:
5049 ; CHECK-NEXT: movzbl %dil, %eax
5050 ; CHECK-NEXT: kmovw %eax, %k1
5051 ; CHECK-NEXT: vmovaps %zmm0, %zmm3
5052 ; CHECK-NEXT: vpternlogd $33, %xmm2, %xmm1, %xmm3 {%k1}
5053 ; CHECK-NEXT: vpternlogd $33, %xmm2, %xmm1, %xmm0
5054 ; CHECK-NEXT: vpaddd %xmm0, %xmm3, %xmm0
5056 %res = call <4 x i32> @llvm.x86.avx512.mask.pternlog.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i32 33, i8 %x4)
5057 %res1 = call <4 x i32> @llvm.x86.avx512.mask.pternlog.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i32 33, i8 -1)
5058 %res2 = add <4 x i32> %res, %res1
5062 declare <4 x i32> @llvm.x86.avx512.maskz.pternlog.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i32, i8)
5064 define <4 x i32>@test_int_x86_avx512_maskz_pternlog_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x4) {
5065 ; CHECK-LABEL: test_int_x86_avx512_maskz_pternlog_d_128:
5067 ; CHECK-NEXT: movzbl %dil, %eax
5068 ; CHECK-NEXT: kmovw %eax, %k1
5069 ; CHECK-NEXT: vmovaps %zmm0, %zmm3
5070 ; CHECK-NEXT: vpternlogd $33, %xmm2, %xmm1, %xmm3 {%k1} {z}
5071 ; CHECK-NEXT: vpternlogd $33, %xmm2, %xmm1, %xmm0
5072 ; CHECK-NEXT: vpaddd %xmm0, %xmm3, %xmm0
5074 %res = call <4 x i32> @llvm.x86.avx512.maskz.pternlog.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i32 33, i8 %x4)
5075 %res1 = call <4 x i32> @llvm.x86.avx512.maskz.pternlog.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i32 33, i8 -1)
5076 %res2 = add <4 x i32> %res, %res1
5080 declare <8 x i32> @llvm.x86.avx512.mask.pternlog.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i32, i8)
5082 define <8 x i32>@test_int_x86_avx512_mask_pternlog_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x4) {
5083 ; CHECK-LABEL: test_int_x86_avx512_mask_pternlog_d_256:
5085 ; CHECK-NEXT: movzbl %dil, %eax
5086 ; CHECK-NEXT: kmovw %eax, %k1
5087 ; CHECK-NEXT: vmovaps %zmm0, %zmm3
5088 ; CHECK-NEXT: vpternlogd $33, %ymm2, %ymm1, %ymm3 {%k1}
5089 ; CHECK-NEXT: vpternlogd $33, %ymm2, %ymm1, %ymm0
5090 ; CHECK-NEXT: vpaddd %ymm0, %ymm3, %ymm0
5092 %res = call <8 x i32> @llvm.x86.avx512.mask.pternlog.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i32 33, i8 %x4)
5093 %res1 = call <8 x i32> @llvm.x86.avx512.mask.pternlog.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i32 33, i8 -1)
5094 %res2 = add <8 x i32> %res, %res1
5098 declare <8 x i32> @llvm.x86.avx512.maskz.pternlog.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i32, i8)
5100 define <8 x i32>@test_int_x86_avx512_maskz_pternlog_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x4) {
5101 ; CHECK-LABEL: test_int_x86_avx512_maskz_pternlog_d_256:
5103 ; CHECK-NEXT: movzbl %dil, %eax
5104 ; CHECK-NEXT: kmovw %eax, %k1
5105 ; CHECK-NEXT: vmovaps %zmm0, %zmm3
5106 ; CHECK-NEXT: vpternlogd $33, %ymm2, %ymm1, %ymm3 {%k1} {z}
5107 ; CHECK-NEXT: vpternlogd $33, %ymm2, %ymm1, %ymm0
5108 ; CHECK-NEXT: vpaddd %ymm0, %ymm3, %ymm0
5110 %res = call <8 x i32> @llvm.x86.avx512.maskz.pternlog.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i32 33, i8 %x4)
5111 %res1 = call <8 x i32> @llvm.x86.avx512.maskz.pternlog.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i32 33, i8 -1)
5112 %res2 = add <8 x i32> %res, %res1
5116 declare <2 x i64> @llvm.x86.avx512.mask.pternlog.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i32, i8)
5118 define <2 x i64>@test_int_x86_avx512_mask_pternlog_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x4) {
5119 ; CHECK-LABEL: test_int_x86_avx512_mask_pternlog_q_128:
5121 ; CHECK-NEXT: movzbl %dil, %eax
5122 ; CHECK-NEXT: kmovw %eax, %k1
5123 ; CHECK-NEXT: vmovaps %zmm0, %zmm3
5124 ; CHECK-NEXT: vpternlogq $33, %xmm2, %xmm1, %xmm3 {%k1}
5125 ; CHECK-NEXT: vpternlogq $33, %xmm2, %xmm1, %xmm0
5126 ; CHECK-NEXT: vpaddq %xmm0, %xmm3, %xmm0
5128 %res = call <2 x i64> @llvm.x86.avx512.mask.pternlog.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i32 33, i8 %x4)
5129 %res1 = call <2 x i64> @llvm.x86.avx512.mask.pternlog.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i32 33, i8 -1)
5130 %res2 = add <2 x i64> %res, %res1
5134 declare <2 x i64> @llvm.x86.avx512.maskz.pternlog.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i32, i8)
5136 define <2 x i64>@test_int_x86_avx512_maskz_pternlog_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x4) {
5137 ; CHECK-LABEL: test_int_x86_avx512_maskz_pternlog_q_128:
5139 ; CHECK-NEXT: movzbl %dil, %eax
5140 ; CHECK-NEXT: kmovw %eax, %k1
5141 ; CHECK-NEXT: vmovaps %zmm0, %zmm3
5142 ; CHECK-NEXT: vpternlogq $33, %xmm2, %xmm1, %xmm3 {%k1} {z}
5143 ; CHECK-NEXT: vpternlogq $33, %xmm2, %xmm1, %xmm0
5144 ; CHECK-NEXT: vpaddq %xmm0, %xmm3, %xmm0
5146 %res = call <2 x i64> @llvm.x86.avx512.maskz.pternlog.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i32 33, i8 %x4)
5147 %res1 = call <2 x i64> @llvm.x86.avx512.maskz.pternlog.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i32 33, i8 -1)
5148 %res2 = add <2 x i64> %res, %res1
5152 declare <4 x i64> @llvm.x86.avx512.mask.pternlog.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i32, i8)
5154 define <4 x i64>@test_int_x86_avx512_mask_pternlog_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x4) {
5155 ; CHECK-LABEL: test_int_x86_avx512_mask_pternlog_q_256:
5157 ; CHECK-NEXT: movzbl %dil, %eax
5158 ; CHECK-NEXT: kmovw %eax, %k1
5159 ; CHECK-NEXT: vmovaps %zmm0, %zmm3
5160 ; CHECK-NEXT: vpternlogq $33, %ymm2, %ymm1, %ymm3 {%k1}
5161 ; CHECK-NEXT: vpternlogq $33, %ymm2, %ymm1, %ymm0
5162 ; CHECK-NEXT: vpaddq %ymm0, %ymm3, %ymm0
5164 %res = call <4 x i64> @llvm.x86.avx512.mask.pternlog.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i32 33, i8 %x4)
5165 %res1 = call <4 x i64> @llvm.x86.avx512.mask.pternlog.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i32 33, i8 -1)
5166 %res2 = add <4 x i64> %res, %res1
5170 declare <4 x i64> @llvm.x86.avx512.maskz.pternlog.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i32, i8)
5172 define <4 x i64>@test_int_x86_avx512_maskz_pternlog_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x4) {
5173 ; CHECK-LABEL: test_int_x86_avx512_maskz_pternlog_q_256:
5175 ; CHECK-NEXT: movzbl %dil, %eax
5176 ; CHECK-NEXT: kmovw %eax, %k1
5177 ; CHECK-NEXT: vmovaps %zmm0, %zmm3
5178 ; CHECK-NEXT: vpternlogq $33, %ymm2, %ymm1, %ymm3 {%k1} {z}
5179 ; CHECK-NEXT: vpternlogq $33, %ymm2, %ymm1, %ymm0
5180 ; CHECK-NEXT: vpaddq %ymm0, %ymm3, %ymm0
5182 %res = call <4 x i64> @llvm.x86.avx512.maskz.pternlog.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i32 33, i8 %x4)
5183 %res1 = call <4 x i64> @llvm.x86.avx512.maskz.pternlog.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i32 33, i8 -1)
5184 %res2 = add <4 x i64> %res, %res1
5188 declare <8 x i32> @llvm.x86.avx512.pbroadcastd.256(<4 x i32>, <8 x i32>, i8)
5190 define <8 x i32>@test_int_x86_avx512_pbroadcastd_256(<4 x i32> %x0, <8 x i32> %x1, i8 %mask) {
5191 ; CHECK-LABEL: test_int_x86_avx512_pbroadcastd_256:
5193 ; CHECK-NEXT: movzbl %dil, %eax
5194 ; CHECK-NEXT: kmovw %eax, %k1
5195 ; CHECK-NEXT: vpbroadcastd %xmm0, %ymm1 {%k1}
5196 ; CHECK-NEXT: vpbroadcastd %xmm0, %ymm2 {%k1} {z}
5197 ; CHECK-NEXT: vpbroadcastd %xmm0, %ymm0
5198 ; CHECK-NEXT: vpaddd %ymm1, %ymm0, %ymm0
5199 ; CHECK-NEXT: vpaddd %ymm0, %ymm2, %ymm0
5201 %res = call <8 x i32> @llvm.x86.avx512.pbroadcastd.256(<4 x i32> %x0, <8 x i32> %x1, i8 -1)
5202 %res1 = call <8 x i32> @llvm.x86.avx512.pbroadcastd.256(<4 x i32> %x0, <8 x i32> %x1, i8 %mask)
5203 %res2 = call <8 x i32> @llvm.x86.avx512.pbroadcastd.256(<4 x i32> %x0, <8 x i32> zeroinitializer, i8 %mask)
5204 %res3 = add <8 x i32> %res, %res1
5205 %res4 = add <8 x i32> %res2, %res3
5209 declare <4 x i32> @llvm.x86.avx512.pbroadcastd.128(<4 x i32>, <4 x i32>, i8)
5211 define <4 x i32>@test_int_x86_avx512_pbroadcastd_128(<4 x i32> %x0, <4 x i32> %x1, i8 %mask) {
5212 ; CHECK-LABEL: test_int_x86_avx512_pbroadcastd_128:
5214 ; CHECK-NEXT: movzbl %dil, %eax
5215 ; CHECK-NEXT: kmovw %eax, %k1
5216 ; CHECK-NEXT: vpbroadcastd %xmm0, %xmm1 {%k1}
5217 ; CHECK-NEXT: vpbroadcastd %xmm0, %xmm2 {%k1} {z}
5218 ; CHECK-NEXT: vpbroadcastd %xmm0, %xmm0
5219 ; CHECK-NEXT: vpaddd %xmm1, %xmm0, %xmm0
5220 ; CHECK-NEXT: vpaddd %xmm0, %xmm2, %xmm0
5222 %res = call <4 x i32> @llvm.x86.avx512.pbroadcastd.128(<4 x i32> %x0, <4 x i32> %x1, i8 -1)
5223 %res1 = call <4 x i32> @llvm.x86.avx512.pbroadcastd.128(<4 x i32> %x0, <4 x i32> %x1, i8 %mask)
5224 %res2 = call <4 x i32> @llvm.x86.avx512.pbroadcastd.128(<4 x i32> %x0, <4 x i32> zeroinitializer, i8 %mask)
5225 %res3 = add <4 x i32> %res, %res1
5226 %res4 = add <4 x i32> %res2, %res3
5230 declare <4 x i64> @llvm.x86.avx512.pbroadcastq.256(<2 x i64>, <4 x i64>, i8)
5232 define <4 x i64>@test_int_x86_avx512_pbroadcastq_256(<2 x i64> %x0, <4 x i64> %x1, i8 %mask) {
5233 ; CHECK-LABEL: test_int_x86_avx512_pbroadcastq_256:
5235 ; CHECK-NEXT: movzbl %dil, %eax
5236 ; CHECK-NEXT: kmovw %eax, %k1
5237 ; CHECK-NEXT: vpbroadcastq %xmm0, %ymm1 {%k1}
5238 ; CHECK-NEXT: vpbroadcastq %xmm0, %ymm2 {%k1} {z}
5239 ; CHECK-NEXT: vpbroadcastq %xmm0, %ymm0
5240 ; CHECK-NEXT: vpaddq %ymm1, %ymm0, %ymm0
5241 ; CHECK-NEXT: vpaddq %ymm0, %ymm2, %ymm0
5243 %res = call <4 x i64> @llvm.x86.avx512.pbroadcastq.256(<2 x i64> %x0, <4 x i64> %x1,i8 -1)
5244 %res1 = call <4 x i64> @llvm.x86.avx512.pbroadcastq.256(<2 x i64> %x0, <4 x i64> %x1,i8 %mask)
5245 %res2 = call <4 x i64> @llvm.x86.avx512.pbroadcastq.256(<2 x i64> %x0, <4 x i64> zeroinitializer,i8 %mask)
5246 %res3 = add <4 x i64> %res, %res1
5247 %res4 = add <4 x i64> %res2, %res3
5251 declare <2 x i64> @llvm.x86.avx512.pbroadcastq.128(<2 x i64>, <2 x i64>, i8)
5253 define <2 x i64>@test_int_x86_avx512_pbroadcastq_128(<2 x i64> %x0, <2 x i64> %x1, i8 %mask) {
5254 ; CHECK-LABEL: test_int_x86_avx512_pbroadcastq_128:
5256 ; CHECK-NEXT: movzbl %dil, %eax
5257 ; CHECK-NEXT: kmovw %eax, %k1
5258 ; CHECK-NEXT: vpbroadcastq %xmm0, %xmm1 {%k1}
5259 ; CHECK-NEXT: vpbroadcastq %xmm0, %xmm2 {%k1} {z}
5260 ; CHECK-NEXT: vpbroadcastq %xmm0, %xmm0
5261 ; CHECK-NEXT: vpaddq %xmm1, %xmm0, %xmm0
5262 ; CHECK-NEXT: vpaddq %xmm0, %xmm2, %xmm0
5264 %res = call <2 x i64> @llvm.x86.avx512.pbroadcastq.128(<2 x i64> %x0, <2 x i64> %x1,i8 -1)
5265 %res1 = call <2 x i64> @llvm.x86.avx512.pbroadcastq.128(<2 x i64> %x0, <2 x i64> %x1,i8 %mask)
5266 %res2 = call <2 x i64> @llvm.x86.avx512.pbroadcastq.128(<2 x i64> %x0, <2 x i64> zeroinitializer,i8 %mask)
5267 %res3 = add <2 x i64> %res, %res1
5268 %res4 = add <2 x i64> %res2, %res3
5272 define <4 x float> @test_x86_vcvtph2ps_128(<8 x i16> %a0) {
5273 ; CHECK: test_x86_vcvtph2ps_128
5274 ; CHECK: vcvtph2ps %xmm0, %xmm0
5275 %res = call <4 x float> @llvm.x86.avx512.mask.vcvtph2ps.128(<8 x i16> %a0, <4 x float> zeroinitializer, i8 -1)
5276 ret <4 x float> %res
5279 define <4 x float> @test_x86_vcvtph2ps_128_rrk(<8 x i16> %a0,<4 x float> %a1, i8 %mask) {
5280 ; CHECK: test_x86_vcvtph2ps_128_rrk
5281 ; CHECK: vcvtph2ps %xmm0, %xmm1 {%k1}
5282 %res = call <4 x float> @llvm.x86.avx512.mask.vcvtph2ps.128(<8 x i16> %a0, <4 x float> %a1, i8 %mask)
5283 ret <4 x float> %res
5287 define <4 x float> @test_x86_vcvtph2ps_128_rrkz(<8 x i16> %a0, i8 %mask) {
5288 ; CHECK: test_x86_vcvtph2ps_128_rrkz
5289 ; CHECK: vcvtph2ps %xmm0, %xmm0 {%k1} {z}
5290 %res = call <4 x float> @llvm.x86.avx512.mask.vcvtph2ps.128(<8 x i16> %a0, <4 x float> zeroinitializer, i8 %mask)
5291 ret <4 x float> %res
5294 declare <4 x float> @llvm.x86.avx512.mask.vcvtph2ps.128(<8 x i16>, <4 x float>, i8) nounwind readonly
5296 define <8 x float> @test_x86_vcvtph2ps_256(<8 x i16> %a0) {
5297 ; CHECK: test_x86_vcvtph2ps_256
5298 ; CHECK: vcvtph2ps %xmm0, %ymm0
5299 %res = call <8 x float> @llvm.x86.avx512.mask.vcvtph2ps.256(<8 x i16> %a0, <8 x float> zeroinitializer, i8 -1)
5300 ret <8 x float> %res
5303 define <8 x float> @test_x86_vcvtph2ps_256_rrk(<8 x i16> %a0,<8 x float> %a1, i8 %mask) {
5304 ; CHECK: test_x86_vcvtph2ps_256_rrk
5305 ; CHECK: vcvtph2ps %xmm0, %ymm1 {%k1}
5306 %res = call <8 x float> @llvm.x86.avx512.mask.vcvtph2ps.256(<8 x i16> %a0, <8 x float> %a1, i8 %mask)
5307 ret <8 x float> %res
5310 define <8 x float> @test_x86_vcvtph2ps_256_rrkz(<8 x i16> %a0, i8 %mask) {
5311 ; CHECK: test_x86_vcvtph2ps_256_rrkz
5312 ; CHECK: vcvtph2ps %xmm0, %ymm0 {%k1} {z}
5313 %res = call <8 x float> @llvm.x86.avx512.mask.vcvtph2ps.256(<8 x i16> %a0, <8 x float> zeroinitializer, i8 %mask)
5314 ret <8 x float> %res
5317 declare <8 x float> @llvm.x86.avx512.mask.vcvtph2ps.256(<8 x i16>, <8 x float>, i8) nounwind readonly
5319 define <8 x i16> @test_x86_vcvtps2ph_128(<4 x float> %a0) {
5320 ; CHECK: test_x86_vcvtps2ph_128
5321 ; CHECK: vcvtps2ph $2, %xmm0, %xmm0
5322 %res = call <8 x i16> @llvm.x86.avx512.mask.vcvtps2ph.128(<4 x float> %a0, i32 2, <8 x i16> zeroinitializer, i8 -1)
5327 declare <8 x i16> @llvm.x86.avx512.mask.vcvtps2ph.128(<4 x float>, i32, <8 x i16>, i8) nounwind readonly
5329 define <8 x i16> @test_x86_vcvtps2ph_256(<8 x float> %a0) {
5330 ; CHECK: test_x86_vcvtps2ph_256
5331 ; CHECK: vcvtps2ph $2, %ymm0, %xmm0
5332 %res = call <8 x i16> @llvm.x86.avx512.mask.vcvtps2ph.256(<8 x float> %a0, i32 2, <8 x i16> zeroinitializer, i8 -1)
5336 declare <8 x i16> @llvm.x86.avx512.mask.vcvtps2ph.256(<8 x float>, i32, <8 x i16>, i8) nounwind readonly