1 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx --show-mc-encoding| FileCheck %s
3 define i64 @test_pcmpeq_b(<64 x i8> %a, <64 x i8> %b) {
4 ; CHECK-LABEL: test_pcmpeq_b
5 ; CHECK: vpcmpeqb %zmm1, %zmm0, %k0 ##
6 %res = call i64 @llvm.x86.avx512.mask.pcmpeq.b.512(<64 x i8> %a, <64 x i8> %b, i64 -1)
10 define i64 @test_mask_pcmpeq_b(<64 x i8> %a, <64 x i8> %b, i64 %mask) {
11 ; CHECK-LABEL: test_mask_pcmpeq_b
12 ; CHECK: vpcmpeqb %zmm1, %zmm0, %k0 {%k1} ##
13 %res = call i64 @llvm.x86.avx512.mask.pcmpeq.b.512(<64 x i8> %a, <64 x i8> %b, i64 %mask)
17 declare i64 @llvm.x86.avx512.mask.pcmpeq.b.512(<64 x i8>, <64 x i8>, i64)
19 define i32 @test_pcmpeq_w(<32 x i16> %a, <32 x i16> %b) {
20 ; CHECK-LABEL: test_pcmpeq_w
21 ; CHECK: vpcmpeqw %zmm1, %zmm0, %k0 ##
22 %res = call i32 @llvm.x86.avx512.mask.pcmpeq.w.512(<32 x i16> %a, <32 x i16> %b, i32 -1)
26 define i32 @test_mask_pcmpeq_w(<32 x i16> %a, <32 x i16> %b, i32 %mask) {
27 ; CHECK-LABEL: test_mask_pcmpeq_w
28 ; CHECK: vpcmpeqw %zmm1, %zmm0, %k0 {%k1} ##
29 %res = call i32 @llvm.x86.avx512.mask.pcmpeq.w.512(<32 x i16> %a, <32 x i16> %b, i32 %mask)
33 declare i32 @llvm.x86.avx512.mask.pcmpeq.w.512(<32 x i16>, <32 x i16>, i32)
35 define i64 @test_pcmpgt_b(<64 x i8> %a, <64 x i8> %b) {
36 ; CHECK-LABEL: test_pcmpgt_b
37 ; CHECK: vpcmpgtb %zmm1, %zmm0, %k0 ##
38 %res = call i64 @llvm.x86.avx512.mask.pcmpgt.b.512(<64 x i8> %a, <64 x i8> %b, i64 -1)
42 define i64 @test_mask_pcmpgt_b(<64 x i8> %a, <64 x i8> %b, i64 %mask) {
43 ; CHECK-LABEL: test_mask_pcmpgt_b
44 ; CHECK: vpcmpgtb %zmm1, %zmm0, %k0 {%k1} ##
45 %res = call i64 @llvm.x86.avx512.mask.pcmpgt.b.512(<64 x i8> %a, <64 x i8> %b, i64 %mask)
49 declare i64 @llvm.x86.avx512.mask.pcmpgt.b.512(<64 x i8>, <64 x i8>, i64)
51 define i32 @test_pcmpgt_w(<32 x i16> %a, <32 x i16> %b) {
52 ; CHECK-LABEL: test_pcmpgt_w
53 ; CHECK: vpcmpgtw %zmm1, %zmm0, %k0 ##
54 %res = call i32 @llvm.x86.avx512.mask.pcmpgt.w.512(<32 x i16> %a, <32 x i16> %b, i32 -1)
58 define i32 @test_mask_pcmpgt_w(<32 x i16> %a, <32 x i16> %b, i32 %mask) {
59 ; CHECK-LABEL: test_mask_pcmpgt_w
60 ; CHECK: vpcmpgtw %zmm1, %zmm0, %k0 {%k1} ##
61 %res = call i32 @llvm.x86.avx512.mask.pcmpgt.w.512(<32 x i16> %a, <32 x i16> %b, i32 %mask)
65 declare i32 @llvm.x86.avx512.mask.pcmpgt.w.512(<32 x i16>, <32 x i16>, i32)
67 define <8 x i64> @test_cmp_b_512(<64 x i8> %a0, <64 x i8> %a1) {
68 ; CHECK_LABEL: test_cmp_b_512
69 ; CHECK: vpcmpeqb %zmm1, %zmm0, %k0 ##
70 %res0 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 0, i64 -1)
71 %vec0 = insertelement <8 x i64> undef, i64 %res0, i32 0
72 ; CHECK: vpcmpltb %zmm1, %zmm0, %k0 ##
73 %res1 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 1, i64 -1)
74 %vec1 = insertelement <8 x i64> %vec0, i64 %res1, i32 1
75 ; CHECK: vpcmpleb %zmm1, %zmm0, %k0 ##
76 %res2 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 2, i64 -1)
77 %vec2 = insertelement <8 x i64> %vec1, i64 %res2, i32 2
78 ; CHECK: vpcmpunordb %zmm1, %zmm0, %k0 ##
79 %res3 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 3, i64 -1)
80 %vec3 = insertelement <8 x i64> %vec2, i64 %res3, i32 3
81 ; CHECK: vpcmpneqb %zmm1, %zmm0, %k0 ##
82 %res4 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 4, i64 -1)
83 %vec4 = insertelement <8 x i64> %vec3, i64 %res4, i32 4
84 ; CHECK: vpcmpnltb %zmm1, %zmm0, %k0 ##
85 %res5 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 5, i64 -1)
86 %vec5 = insertelement <8 x i64> %vec4, i64 %res5, i32 5
87 ; CHECK: vpcmpnleb %zmm1, %zmm0, %k0 ##
88 %res6 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 6, i64 -1)
89 %vec6 = insertelement <8 x i64> %vec5, i64 %res6, i32 6
90 ; CHECK: vpcmpordb %zmm1, %zmm0, %k0 ##
91 %res7 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 7, i64 -1)
92 %vec7 = insertelement <8 x i64> %vec6, i64 %res7, i32 7
96 define <8 x i64> @test_mask_cmp_b_512(<64 x i8> %a0, <64 x i8> %a1, i64 %mask) {
97 ; CHECK_LABEL: test_mask_cmp_b_512
98 ; CHECK: vpcmpeqb %zmm1, %zmm0, %k0 {%k1} ##
99 %res0 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 0, i64 %mask)
100 %vec0 = insertelement <8 x i64> undef, i64 %res0, i32 0
101 ; CHECK: vpcmpltb %zmm1, %zmm0, %k0 {%k1} ##
102 %res1 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 1, i64 %mask)
103 %vec1 = insertelement <8 x i64> %vec0, i64 %res1, i32 1
104 ; CHECK: vpcmpleb %zmm1, %zmm0, %k0 {%k1} ##
105 %res2 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 2, i64 %mask)
106 %vec2 = insertelement <8 x i64> %vec1, i64 %res2, i32 2
107 ; CHECK: vpcmpunordb %zmm1, %zmm0, %k0 {%k1} ##
108 %res3 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 3, i64 %mask)
109 %vec3 = insertelement <8 x i64> %vec2, i64 %res3, i32 3
110 ; CHECK: vpcmpneqb %zmm1, %zmm0, %k0 {%k1} ##
111 %res4 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 4, i64 %mask)
112 %vec4 = insertelement <8 x i64> %vec3, i64 %res4, i32 4
113 ; CHECK: vpcmpnltb %zmm1, %zmm0, %k0 {%k1} ##
114 %res5 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 5, i64 %mask)
115 %vec5 = insertelement <8 x i64> %vec4, i64 %res5, i32 5
116 ; CHECK: vpcmpnleb %zmm1, %zmm0, %k0 {%k1} ##
117 %res6 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 6, i64 %mask)
118 %vec6 = insertelement <8 x i64> %vec5, i64 %res6, i32 6
119 ; CHECK: vpcmpordb %zmm1, %zmm0, %k0 {%k1} ##
120 %res7 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 7, i64 %mask)
121 %vec7 = insertelement <8 x i64> %vec6, i64 %res7, i32 7
125 declare i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8>, <64 x i8>, i32, i64) nounwind readnone
127 define <8 x i64> @test_ucmp_b_512(<64 x i8> %a0, <64 x i8> %a1) {
128 ; CHECK_LABEL: test_ucmp_b_512
129 ; CHECK: vpcmpequb %zmm1, %zmm0, %k0 ##
130 %res0 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 0, i64 -1)
131 %vec0 = insertelement <8 x i64> undef, i64 %res0, i32 0
132 ; CHECK: vpcmpltub %zmm1, %zmm0, %k0 ##
133 %res1 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 1, i64 -1)
134 %vec1 = insertelement <8 x i64> %vec0, i64 %res1, i32 1
135 ; CHECK: vpcmpleub %zmm1, %zmm0, %k0 ##
136 %res2 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 2, i64 -1)
137 %vec2 = insertelement <8 x i64> %vec1, i64 %res2, i32 2
138 ; CHECK: vpcmpunordub %zmm1, %zmm0, %k0 ##
139 %res3 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 3, i64 -1)
140 %vec3 = insertelement <8 x i64> %vec2, i64 %res3, i32 3
141 ; CHECK: vpcmpnequb %zmm1, %zmm0, %k0 ##
142 %res4 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 4, i64 -1)
143 %vec4 = insertelement <8 x i64> %vec3, i64 %res4, i32 4
144 ; CHECK: vpcmpnltub %zmm1, %zmm0, %k0 ##
145 %res5 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 5, i64 -1)
146 %vec5 = insertelement <8 x i64> %vec4, i64 %res5, i32 5
147 ; CHECK: vpcmpnleub %zmm1, %zmm0, %k0 ##
148 %res6 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 6, i64 -1)
149 %vec6 = insertelement <8 x i64> %vec5, i64 %res6, i32 6
150 ; CHECK: vpcmpordub %zmm1, %zmm0, %k0 ##
151 %res7 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 7, i64 -1)
152 %vec7 = insertelement <8 x i64> %vec6, i64 %res7, i32 7
156 define <8 x i64> @test_mask_x86_avx512_ucmp_b_512(<64 x i8> %a0, <64 x i8> %a1, i64 %mask) {
157 ; CHECK_LABEL: test_mask_ucmp_b_512
158 ; CHECK: vpcmpequb %zmm1, %zmm0, %k0 {%k1} ##
159 %res0 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 0, i64 %mask)
160 %vec0 = insertelement <8 x i64> undef, i64 %res0, i32 0
161 ; CHECK: vpcmpltub %zmm1, %zmm0, %k0 {%k1} ##
162 %res1 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 1, i64 %mask)
163 %vec1 = insertelement <8 x i64> %vec0, i64 %res1, i32 1
164 ; CHECK: vpcmpleub %zmm1, %zmm0, %k0 {%k1} ##
165 %res2 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 2, i64 %mask)
166 %vec2 = insertelement <8 x i64> %vec1, i64 %res2, i32 2
167 ; CHECK: vpcmpunordub %zmm1, %zmm0, %k0 {%k1} ##
168 %res3 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 3, i64 %mask)
169 %vec3 = insertelement <8 x i64> %vec2, i64 %res3, i32 3
170 ; CHECK: vpcmpnequb %zmm1, %zmm0, %k0 {%k1} ##
171 %res4 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 4, i64 %mask)
172 %vec4 = insertelement <8 x i64> %vec3, i64 %res4, i32 4
173 ; CHECK: vpcmpnltub %zmm1, %zmm0, %k0 {%k1} ##
174 %res5 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 5, i64 %mask)
175 %vec5 = insertelement <8 x i64> %vec4, i64 %res5, i32 5
176 ; CHECK: vpcmpnleub %zmm1, %zmm0, %k0 {%k1} ##
177 %res6 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 6, i64 %mask)
178 %vec6 = insertelement <8 x i64> %vec5, i64 %res6, i32 6
179 ; CHECK: vpcmpordub %zmm1, %zmm0, %k0 {%k1} ##
180 %res7 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 7, i64 %mask)
181 %vec7 = insertelement <8 x i64> %vec6, i64 %res7, i32 7
185 declare i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8>, <64 x i8>, i32, i64) nounwind readnone
187 define <8 x i32> @test_cmp_w_512(<32 x i16> %a0, <32 x i16> %a1) {
188 ; CHECK_LABEL: test_cmp_w_512
189 ; CHECK: vpcmpeqw %zmm1, %zmm0, %k0 ##
190 %res0 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 0, i32 -1)
191 %vec0 = insertelement <8 x i32> undef, i32 %res0, i32 0
192 ; CHECK: vpcmpltw %zmm1, %zmm0, %k0 ##
193 %res1 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 1, i32 -1)
194 %vec1 = insertelement <8 x i32> %vec0, i32 %res1, i32 1
195 ; CHECK: vpcmplew %zmm1, %zmm0, %k0 ##
196 %res2 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 2, i32 -1)
197 %vec2 = insertelement <8 x i32> %vec1, i32 %res2, i32 2
198 ; CHECK: vpcmpunordw %zmm1, %zmm0, %k0 ##
199 %res3 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 3, i32 -1)
200 %vec3 = insertelement <8 x i32> %vec2, i32 %res3, i32 3
201 ; CHECK: vpcmpneqw %zmm1, %zmm0, %k0 ##
202 %res4 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 4, i32 -1)
203 %vec4 = insertelement <8 x i32> %vec3, i32 %res4, i32 4
204 ; CHECK: vpcmpnltw %zmm1, %zmm0, %k0 ##
205 %res5 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 5, i32 -1)
206 %vec5 = insertelement <8 x i32> %vec4, i32 %res5, i32 5
207 ; CHECK: vpcmpnlew %zmm1, %zmm0, %k0 ##
208 %res6 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 6, i32 -1)
209 %vec6 = insertelement <8 x i32> %vec5, i32 %res6, i32 6
210 ; CHECK: vpcmpordw %zmm1, %zmm0, %k0 ##
211 %res7 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 7, i32 -1)
212 %vec7 = insertelement <8 x i32> %vec6, i32 %res7, i32 7
216 define <8 x i32> @test_mask_cmp_w_512(<32 x i16> %a0, <32 x i16> %a1, i32 %mask) {
217 ; CHECK_LABEL: test_mask_cmp_w_512
218 ; CHECK: vpcmpeqw %zmm1, %zmm0, %k0 {%k1} ##
219 %res0 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 0, i32 %mask)
220 %vec0 = insertelement <8 x i32> undef, i32 %res0, i32 0
221 ; CHECK: vpcmpltw %zmm1, %zmm0, %k0 {%k1} ##
222 %res1 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 1, i32 %mask)
223 %vec1 = insertelement <8 x i32> %vec0, i32 %res1, i32 1
224 ; CHECK: vpcmplew %zmm1, %zmm0, %k0 {%k1} ##
225 %res2 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 2, i32 %mask)
226 %vec2 = insertelement <8 x i32> %vec1, i32 %res2, i32 2
227 ; CHECK: vpcmpunordw %zmm1, %zmm0, %k0 {%k1} ##
228 %res3 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 3, i32 %mask)
229 %vec3 = insertelement <8 x i32> %vec2, i32 %res3, i32 3
230 ; CHECK: vpcmpneqw %zmm1, %zmm0, %k0 {%k1} ##
231 %res4 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 4, i32 %mask)
232 %vec4 = insertelement <8 x i32> %vec3, i32 %res4, i32 4
233 ; CHECK: vpcmpnltw %zmm1, %zmm0, %k0 {%k1} ##
234 %res5 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 5, i32 %mask)
235 %vec5 = insertelement <8 x i32> %vec4, i32 %res5, i32 5
236 ; CHECK: vpcmpnlew %zmm1, %zmm0, %k0 {%k1} ##
237 %res6 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 6, i32 %mask)
238 %vec6 = insertelement <8 x i32> %vec5, i32 %res6, i32 6
239 ; CHECK: vpcmpordw %zmm1, %zmm0, %k0 {%k1} ##
240 %res7 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 7, i32 %mask)
241 %vec7 = insertelement <8 x i32> %vec6, i32 %res7, i32 7
245 declare i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16>, <32 x i16>, i32, i32) nounwind readnone
247 define <8 x i32> @test_ucmp_w_512(<32 x i16> %a0, <32 x i16> %a1) {
248 ; CHECK_LABEL: test_ucmp_w_512
249 ; CHECK: vpcmpequw %zmm1, %zmm0, %k0 ##
250 %res0 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 0, i32 -1)
251 %vec0 = insertelement <8 x i32> undef, i32 %res0, i32 0
252 ; CHECK: vpcmpltuw %zmm1, %zmm0, %k0 ##
253 %res1 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 1, i32 -1)
254 %vec1 = insertelement <8 x i32> %vec0, i32 %res1, i32 1
255 ; CHECK: vpcmpleuw %zmm1, %zmm0, %k0 ##
256 %res2 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 2, i32 -1)
257 %vec2 = insertelement <8 x i32> %vec1, i32 %res2, i32 2
258 ; CHECK: vpcmpunorduw %zmm1, %zmm0, %k0 ##
259 %res3 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 3, i32 -1)
260 %vec3 = insertelement <8 x i32> %vec2, i32 %res3, i32 3
261 ; CHECK: vpcmpnequw %zmm1, %zmm0, %k0 ##
262 %res4 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 4, i32 -1)
263 %vec4 = insertelement <8 x i32> %vec3, i32 %res4, i32 4
264 ; CHECK: vpcmpnltuw %zmm1, %zmm0, %k0 ##
265 %res5 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 5, i32 -1)
266 %vec5 = insertelement <8 x i32> %vec4, i32 %res5, i32 5
267 ; CHECK: vpcmpnleuw %zmm1, %zmm0, %k0 ##
268 %res6 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 6, i32 -1)
269 %vec6 = insertelement <8 x i32> %vec5, i32 %res6, i32 6
270 ; CHECK: vpcmporduw %zmm1, %zmm0, %k0 ##
271 %res7 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 7, i32 -1)
272 %vec7 = insertelement <8 x i32> %vec6, i32 %res7, i32 7
276 define <8 x i32> @test_mask_ucmp_w_512(<32 x i16> %a0, <32 x i16> %a1, i32 %mask) {
277 ; CHECK_LABEL: test_mask_ucmp_w_512
278 ; CHECK: vpcmpequw %zmm1, %zmm0, %k0 {%k1} ##
279 %res0 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 0, i32 %mask)
280 %vec0 = insertelement <8 x i32> undef, i32 %res0, i32 0
281 ; CHECK: vpcmpltuw %zmm1, %zmm0, %k0 {%k1} ##
282 %res1 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 1, i32 %mask)
283 %vec1 = insertelement <8 x i32> %vec0, i32 %res1, i32 1
284 ; CHECK: vpcmpleuw %zmm1, %zmm0, %k0 {%k1} ##
285 %res2 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 2, i32 %mask)
286 %vec2 = insertelement <8 x i32> %vec1, i32 %res2, i32 2
287 ; CHECK: vpcmpunorduw %zmm1, %zmm0, %k0 {%k1} ##
288 %res3 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 3, i32 %mask)
289 %vec3 = insertelement <8 x i32> %vec2, i32 %res3, i32 3
290 ; CHECK: vpcmpnequw %zmm1, %zmm0, %k0 {%k1} ##
291 %res4 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 4, i32 %mask)
292 %vec4 = insertelement <8 x i32> %vec3, i32 %res4, i32 4
293 ; CHECK: vpcmpnltuw %zmm1, %zmm0, %k0 {%k1} ##
294 %res5 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 5, i32 %mask)
295 %vec5 = insertelement <8 x i32> %vec4, i32 %res5, i32 5
296 ; CHECK: vpcmpnleuw %zmm1, %zmm0, %k0 {%k1} ##
297 %res6 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 6, i32 %mask)
298 %vec6 = insertelement <8 x i32> %vec5, i32 %res6, i32 6
299 ; CHECK: vpcmporduw %zmm1, %zmm0, %k0 {%k1} ##
300 %res7 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 7, i32 %mask)
301 %vec7 = insertelement <8 x i32> %vec6, i32 %res7, i32 7
305 declare i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16>, <32 x i16>, i32, i32) nounwind readnone
307 ; CHECK-LABEL: test_x86_mask_blend_b_256
309 define <32 x i8> @test_x86_mask_blend_b_256(i32 %a0, <32 x i8> %a1, <32 x i8> %a2) {
310 %res = call <32 x i8> @llvm.x86.avx512.mask.blend.b.256(<32 x i8> %a1, <32 x i8> %a2, i32 %a0) ; <<32 x i8>> [#uses=1]
313 declare <32 x i8> @llvm.x86.avx512.mask.blend.b.256(<32 x i8>, <32 x i8>, i32) nounwind readonly
315 ; CHECK-LABEL: test_x86_mask_blend_w_256
316 define <16 x i16> @test_x86_mask_blend_w_256(i16 %mask, <16 x i16> %a1, <16 x i16> %a2) {
318 %res = call <16 x i16> @llvm.x86.avx512.mask.blend.w.256(<16 x i16> %a1, <16 x i16> %a2, i16 %mask) ; <<16 x i16>> [#uses=1]
321 declare <16 x i16> @llvm.x86.avx512.mask.blend.w.256(<16 x i16>, <16 x i16>, i16) nounwind readonly
323 ; CHECK-LABEL: test_x86_mask_blend_b_512
325 define <64 x i8> @test_x86_mask_blend_b_512(i64 %a0, <64 x i8> %a1, <64 x i8> %a2) {
326 %res = call <64 x i8> @llvm.x86.avx512.mask.blend.b.512(<64 x i8> %a1, <64 x i8> %a2, i64 %a0) ; <<64 x i8>> [#uses=1]
329 declare <64 x i8> @llvm.x86.avx512.mask.blend.b.512(<64 x i8>, <64 x i8>, i64) nounwind readonly
331 ; CHECK-LABEL: test_x86_mask_blend_w_512
332 define <32 x i16> @test_x86_mask_blend_w_512(i32 %mask, <32 x i16> %a1, <32 x i16> %a2) {
334 %res = call <32 x i16> @llvm.x86.avx512.mask.blend.w.512(<32 x i16> %a1, <32 x i16> %a2, i32 %mask) ; <<32 x i16>> [#uses=1]
337 declare <32 x i16> @llvm.x86.avx512.mask.blend.w.512(<32 x i16>, <32 x i16>, i32) nounwind readonly
339 ; CHECK-LABEL: test_x86_mask_blend_b_128
341 define <16 x i8> @test_x86_mask_blend_b_128(i16 %a0, <16 x i8> %a1, <16 x i8> %a2) {
342 %res = call <16 x i8> @llvm.x86.avx512.mask.blend.b.128(<16 x i8> %a1, <16 x i8> %a2, i16 %a0) ; <<16 x i8>> [#uses=1]
345 declare <16 x i8> @llvm.x86.avx512.mask.blend.b.128(<16 x i8>, <16 x i8>, i16) nounwind readonly
347 ; CHECK-LABEL: test_x86_mask_blend_w_128
348 define <8 x i16> @test_x86_mask_blend_w_128(i8 %mask, <8 x i16> %a1, <8 x i16> %a2) {
350 %res = call <8 x i16> @llvm.x86.avx512.mask.blend.w.128(<8 x i16> %a1, <8 x i16> %a2, i8 %mask) ; <<8 x i16>> [#uses=1]
353 declare <8 x i16> @llvm.x86.avx512.mask.blend.w.128(<8 x i16>, <8 x i16>, i8) nounwind readonly
355 define <32 x i16> @test_mask_packs_epi32_rr_512(<16 x i32> %a, <16 x i32> %b) {
356 ;CHECK-LABEL: test_mask_packs_epi32_rr_512
357 ;CHECK: vpackssdw %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0x6b,0xc1]
358 %res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 -1)
362 define <32 x i16> @test_mask_packs_epi32_rrk_512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask) {
363 ;CHECK-LABEL: test_mask_packs_epi32_rrk_512
364 ;CHECK: vpackssdw %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0x6b,0xd1]
365 %res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask)
369 define <32 x i16> @test_mask_packs_epi32_rrkz_512(<16 x i32> %a, <16 x i32> %b, i32 %mask) {
370 ;CHECK-LABEL: test_mask_packs_epi32_rrkz_512
371 ;CHECK: vpackssdw %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0x6b,0xc1]
372 %res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 %mask)
376 define <32 x i16> @test_mask_packs_epi32_rm_512(<16 x i32> %a, <16 x i32>* %ptr_b) {
377 ;CHECK-LABEL: test_mask_packs_epi32_rm_512
378 ;CHECK: vpackssdw (%rdi), %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0x6b,0x07]
379 %b = load <16 x i32>, <16 x i32>* %ptr_b
380 %res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 -1)
384 define <32 x i16> @test_mask_packs_epi32_rmk_512(<16 x i32> %a, <16 x i32>* %ptr_b, <32 x i16> %passThru, i32 %mask) {
385 ;CHECK-LABEL: test_mask_packs_epi32_rmk_512
386 ;CHECK: vpackssdw (%rdi), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0x6b,0x0f]
387 %b = load <16 x i32>, <16 x i32>* %ptr_b
388 %res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask)
392 define <32 x i16> @test_mask_packs_epi32_rmkz_512(<16 x i32> %a, <16 x i32>* %ptr_b, i32 %mask) {
393 ;CHECK-LABEL: test_mask_packs_epi32_rmkz_512
394 ;CHECK: vpackssdw (%rdi), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0x6b,0x07]
395 %b = load <16 x i32>, <16 x i32>* %ptr_b
396 %res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 %mask)
400 define <32 x i16> @test_mask_packs_epi32_rmb_512(<16 x i32> %a, i32* %ptr_b) {
401 ;CHECK-LABEL: test_mask_packs_epi32_rmb_512
402 ;CHECK: vpackssdw (%rdi){1to16}, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x58,0x6b,0x07]
403 %q = load i32, i32* %ptr_b
404 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
405 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
406 %res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 -1)
410 define <32 x i16> @test_mask_packs_epi32_rmbk_512(<16 x i32> %a, i32* %ptr_b, <32 x i16> %passThru, i32 %mask) {
411 ;CHECK-LABEL: test_mask_packs_epi32_rmbk_512
412 ;CHECK: vpackssdw (%rdi){1to16}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x59,0x6b,0x0f]
413 %q = load i32, i32* %ptr_b
414 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
415 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
416 %res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask)
420 define <32 x i16> @test_mask_packs_epi32_rmbkz_512(<16 x i32> %a, i32* %ptr_b, i32 %mask) {
421 ;CHECK-LABEL: test_mask_packs_epi32_rmbkz_512
422 ;CHECK: vpackssdw (%rdi){1to16}, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xd9,0x6b,0x07]
423 %q = load i32, i32* %ptr_b
424 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
425 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
426 %res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 %mask)
430 declare <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32>, <16 x i32>, <32 x i16>, i32)
432 define <64 x i8> @test_mask_packs_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) {
433 ;CHECK-LABEL: test_mask_packs_epi16_rr_512
434 ;CHECK: vpacksswb %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x63,0xc1]
435 %res = call <64 x i8> @llvm.x86.avx512.mask.packsswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> zeroinitializer, i64 -1)
439 define <64 x i8> @test_mask_packs_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, <64 x i8> %passThru, i64 %mask) {
440 ;CHECK-LABEL: test_mask_packs_epi16_rrk_512
441 ;CHECK: vpacksswb %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0x63,0xd1]
442 %res = call <64 x i8> @llvm.x86.avx512.mask.packsswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> %passThru, i64 %mask)
446 define <64 x i8> @test_mask_packs_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i64 %mask) {
447 ;CHECK-LABEL: test_mask_packs_epi16_rrkz_512
448 ;CHECK: vpacksswb %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0x63,0xc1]
449 %res = call <64 x i8> @llvm.x86.avx512.mask.packsswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> zeroinitializer, i64 %mask)
453 define <64 x i8> @test_mask_packs_epi16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) {
454 ;CHECK-LABEL: test_mask_packs_epi16_rm_512
455 ;CHECK: vpacksswb (%rdi), %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x63,0x07]
456 %b = load <32 x i16>, <32 x i16>* %ptr_b
457 %res = call <64 x i8> @llvm.x86.avx512.mask.packsswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> zeroinitializer, i64 -1)
461 define <64 x i8> @test_mask_packs_epi16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <64 x i8> %passThru, i64 %mask) {
462 ;CHECK-LABEL: test_mask_packs_epi16_rmk_512
463 ;CHECK: vpacksswb (%rdi), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0x63,0x0f]
464 %b = load <32 x i16>, <32 x i16>* %ptr_b
465 %res = call <64 x i8> @llvm.x86.avx512.mask.packsswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> %passThru, i64 %mask)
469 define <64 x i8> @test_mask_packs_epi16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i64 %mask) {
470 ;CHECK-LABEL: test_mask_packs_epi16_rmkz_512
471 ;CHECK: vpacksswb (%rdi), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0x63,0x07]
472 %b = load <32 x i16>, <32 x i16>* %ptr_b
473 %res = call <64 x i8> @llvm.x86.avx512.mask.packsswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> zeroinitializer, i64 %mask)
477 declare <64 x i8> @llvm.x86.avx512.mask.packsswb.512(<32 x i16>, <32 x i16>, <64 x i8>, i64)
480 define <32 x i16> @test_mask_packus_epi32_rr_512(<16 x i32> %a, <16 x i32> %b) {
481 ;CHECK-LABEL: test_mask_packus_epi32_rr_512
482 ;CHECK: vpackusdw %zmm1, %zmm0, %zmm0
483 %res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 -1)
487 define <32 x i16> @test_mask_packus_epi32_rrk_512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask) {
488 ;CHECK-LABEL: test_mask_packus_epi32_rrk_512
489 ;CHECK: vpackusdw %zmm1, %zmm0, %zmm2 {%k1}
490 %res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask)
494 define <32 x i16> @test_mask_packus_epi32_rrkz_512(<16 x i32> %a, <16 x i32> %b, i32 %mask) {
495 ;CHECK-LABEL: test_mask_packus_epi32_rrkz_512
496 ;CHECK: vpackusdw %zmm1, %zmm0, %zmm0 {%k1} {z}
497 %res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 %mask)
501 define <32 x i16> @test_mask_packus_epi32_rm_512(<16 x i32> %a, <16 x i32>* %ptr_b) {
502 ;CHECK-LABEL: test_mask_packus_epi32_rm_512
503 ;CHECK: vpackusdw (%rdi), %zmm0, %zmm0
504 %b = load <16 x i32>, <16 x i32>* %ptr_b
505 %res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 -1)
509 define <32 x i16> @test_mask_packus_epi32_rmk_512(<16 x i32> %a, <16 x i32>* %ptr_b, <32 x i16> %passThru, i32 %mask) {
510 ;CHECK-LABEL: test_mask_packus_epi32_rmk_512
511 ;CHECK: vpackusdw (%rdi), %zmm0, %zmm1 {%k1}
512 %b = load <16 x i32>, <16 x i32>* %ptr_b
513 %res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask)
517 define <32 x i16> @test_mask_packus_epi32_rmkz_512(<16 x i32> %a, <16 x i32>* %ptr_b, i32 %mask) {
518 ;CHECK-LABEL: test_mask_packus_epi32_rmkz_512
519 ;CHECK: vpackusdw (%rdi), %zmm0, %zmm0 {%k1} {z}
520 %b = load <16 x i32>, <16 x i32>* %ptr_b
521 %res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 %mask)
525 define <32 x i16> @test_mask_packus_epi32_rmb_512(<16 x i32> %a, i32* %ptr_b) {
526 ;CHECK-LABEL: test_mask_packus_epi32_rmb_512
527 ;CHECK: vpackusdw (%rdi){1to16}, %zmm0, %zmm0
528 %q = load i32, i32* %ptr_b
529 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
530 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
531 %res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 -1)
535 define <32 x i16> @test_mask_packus_epi32_rmbk_512(<16 x i32> %a, i32* %ptr_b, <32 x i16> %passThru, i32 %mask) {
536 ;CHECK-LABEL: test_mask_packus_epi32_rmbk_512
537 ;CHECK: vpackusdw (%rdi){1to16}, %zmm0, %zmm1 {%k1}
538 %q = load i32, i32* %ptr_b
539 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
540 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
541 %res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask)
545 define <32 x i16> @test_mask_packus_epi32_rmbkz_512(<16 x i32> %a, i32* %ptr_b, i32 %mask) {
546 ;CHECK-LABEL: test_mask_packus_epi32_rmbkz_512
547 ;CHECK: vpackusdw (%rdi){1to16}, %zmm0, %zmm0 {%k1} {z}
548 %q = load i32, i32* %ptr_b
549 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
550 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
551 %res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 %mask)
555 declare <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32>, <16 x i32>, <32 x i16>, i32)
557 define <64 x i8> @test_mask_packus_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) {
558 ;CHECK-LABEL: test_mask_packus_epi16_rr_512
559 ;CHECK: vpackuswb %zmm1, %zmm0, %zmm0
560 %res = call <64 x i8> @llvm.x86.avx512.mask.packuswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> zeroinitializer, i64 -1)
564 define <64 x i8> @test_mask_packus_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, <64 x i8> %passThru, i64 %mask) {
565 ;CHECK-LABEL: test_mask_packus_epi16_rrk_512
566 ;CHECK: vpackuswb %zmm1, %zmm0, %zmm2 {%k1}
567 %res = call <64 x i8> @llvm.x86.avx512.mask.packuswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> %passThru, i64 %mask)
571 define <64 x i8> @test_mask_packus_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i64 %mask) {
572 ;CHECK-LABEL: test_mask_packus_epi16_rrkz_512
573 ;CHECK: vpackuswb %zmm1, %zmm0, %zmm0 {%k1} {z}
574 %res = call <64 x i8> @llvm.x86.avx512.mask.packuswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> zeroinitializer, i64 %mask)
578 define <64 x i8> @test_mask_packus_epi16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) {
579 ;CHECK-LABEL: test_mask_packus_epi16_rm_512
580 ;CHECK: vpackuswb (%rdi), %zmm0, %zmm0
581 %b = load <32 x i16>, <32 x i16>* %ptr_b
582 %res = call <64 x i8> @llvm.x86.avx512.mask.packuswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> zeroinitializer, i64 -1)
586 define <64 x i8> @test_mask_packus_epi16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <64 x i8> %passThru, i64 %mask) {
587 ;CHECK-LABEL: test_mask_packus_epi16_rmk_512
588 ;CHECK: vpackuswb (%rdi), %zmm0, %zmm1 {%k1}
589 %b = load <32 x i16>, <32 x i16>* %ptr_b
590 %res = call <64 x i8> @llvm.x86.avx512.mask.packuswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> %passThru, i64 %mask)
594 define <64 x i8> @test_mask_packus_epi16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i64 %mask) {
595 ;CHECK-LABEL: test_mask_packus_epi16_rmkz_512
596 ;CHECK: vpackuswb (%rdi), %zmm0, %zmm0 {%k1} {z}
597 %b = load <32 x i16>, <32 x i16>* %ptr_b
598 %res = call <64 x i8> @llvm.x86.avx512.mask.packuswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> zeroinitializer, i64 %mask)
602 declare <64 x i8> @llvm.x86.avx512.mask.packuswb.512(<32 x i16>, <32 x i16>, <64 x i8>, i64)
604 define <32 x i16> @test_mask_adds_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) {
605 ;CHECK-LABEL: test_mask_adds_epi16_rr_512
606 ;CHECK: vpaddsw %zmm1, %zmm0, %zmm0
607 %res = call <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1)
611 define <32 x i16> @test_mask_adds_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) {
612 ;CHECK-LABEL: test_mask_adds_epi16_rrk_512
613 ;CHECK: vpaddsw %zmm1, %zmm0, %zmm2 {%k1}
614 %res = call <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
618 define <32 x i16> @test_mask_adds_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i32 %mask) {
619 ;CHECK-LABEL: test_mask_adds_epi16_rrkz_512
620 ;CHECK: vpaddsw %zmm1, %zmm0, %zmm0 {%k1} {z}
621 %res = call <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask)
625 define <32 x i16> @test_mask_adds_epi16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) {
626 ;CHECK-LABEL: test_mask_adds_epi16_rm_512
627 ;CHECK: vpaddsw (%rdi), %zmm0, %zmm0
628 %b = load <32 x i16>, <32 x i16>* %ptr_b
629 %res = call <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1)
633 define <32 x i16> @test_mask_adds_epi16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <32 x i16> %passThru, i32 %mask) {
634 ;CHECK-LABEL: test_mask_adds_epi16_rmk_512
635 ;CHECK: vpaddsw (%rdi), %zmm0, %zmm1 {%k1}
636 %b = load <32 x i16>, <32 x i16>* %ptr_b
637 %res = call <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
641 define <32 x i16> @test_mask_adds_epi16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i32 %mask) {
642 ;CHECK-LABEL: test_mask_adds_epi16_rmkz_512
643 ;CHECK: vpaddsw (%rdi), %zmm0, %zmm0 {%k1} {z}
644 %b = load <32 x i16>, <32 x i16>* %ptr_b
645 %res = call <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask)
649 declare <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
651 define <32 x i16> @test_mask_subs_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) {
652 ;CHECK-LABEL: test_mask_subs_epi16_rr_512
653 ;CHECK: vpsubsw %zmm1, %zmm0, %zmm0
654 %res = call <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1)
658 define <32 x i16> @test_mask_subs_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) {
659 ;CHECK-LABEL: test_mask_subs_epi16_rrk_512
660 ;CHECK: vpsubsw %zmm1, %zmm0, %zmm2 {%k1}
661 %res = call <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
665 define <32 x i16> @test_mask_subs_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i32 %mask) {
666 ;CHECK-LABEL: test_mask_subs_epi16_rrkz_512
667 ;CHECK: vpsubsw %zmm1, %zmm0, %zmm0 {%k1} {z}
668 %res = call <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask)
672 define <32 x i16> @test_mask_subs_epi16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) {
673 ;CHECK-LABEL: test_mask_subs_epi16_rm_512
674 ;CHECK: vpsubsw (%rdi), %zmm0, %zmm0
675 %b = load <32 x i16>, <32 x i16>* %ptr_b
676 %res = call <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1)
680 define <32 x i16> @test_mask_subs_epi16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <32 x i16> %passThru, i32 %mask) {
681 ;CHECK-LABEL: test_mask_subs_epi16_rmk_512
682 ;CHECK: vpsubsw (%rdi), %zmm0, %zmm1 {%k1}
683 %b = load <32 x i16>, <32 x i16>* %ptr_b
684 %res = call <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
688 define <32 x i16> @test_mask_subs_epi16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i32 %mask) {
689 ;CHECK-LABEL: test_mask_subs_epi16_rmkz_512
690 ;CHECK: vpsubsw (%rdi), %zmm0, %zmm0 {%k1} {z}
691 %b = load <32 x i16>, <32 x i16>* %ptr_b
692 %res = call <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask)
696 declare <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
698 define <32 x i16> @test_mask_adds_epu16_rr_512(<32 x i16> %a, <32 x i16> %b) {
699 ;CHECK-LABEL: test_mask_adds_epu16_rr_512
700 ;CHECK: vpaddusw %zmm1, %zmm0, %zmm0
701 %res = call <32 x i16> @llvm.x86.avx512.mask.paddus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1)
705 define <32 x i16> @test_mask_adds_epu16_rrk_512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) {
706 ;CHECK-LABEL: test_mask_adds_epu16_rrk_512
707 ;CHECK: vpaddusw %zmm1, %zmm0, %zmm2 {%k1}
708 %res = call <32 x i16> @llvm.x86.avx512.mask.paddus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
712 define <32 x i16> @test_mask_adds_epu16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i32 %mask) {
713 ;CHECK-LABEL: test_mask_adds_epu16_rrkz_512
714 ;CHECK: vpaddusw %zmm1, %zmm0, %zmm0 {%k1} {z}
715 %res = call <32 x i16> @llvm.x86.avx512.mask.paddus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask)
719 define <32 x i16> @test_mask_adds_epu16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) {
720 ;CHECK-LABEL: test_mask_adds_epu16_rm_512
721 ;CHECK: vpaddusw (%rdi), %zmm0, %zmm0
722 %b = load <32 x i16>, <32 x i16>* %ptr_b
723 %res = call <32 x i16> @llvm.x86.avx512.mask.paddus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1)
727 define <32 x i16> @test_mask_adds_epu16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <32 x i16> %passThru, i32 %mask) {
728 ;CHECK-LABEL: test_mask_adds_epu16_rmk_512
729 ;CHECK: vpaddusw (%rdi), %zmm0, %zmm1 {%k1}
730 %b = load <32 x i16>, <32 x i16>* %ptr_b
731 %res = call <32 x i16> @llvm.x86.avx512.mask.paddus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
735 define <32 x i16> @test_mask_adds_epu16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i32 %mask) {
736 ;CHECK-LABEL: test_mask_adds_epu16_rmkz_512
737 ;CHECK: vpaddusw (%rdi), %zmm0, %zmm0 {%k1} {z}
738 %b = load <32 x i16>, <32 x i16>* %ptr_b
739 %res = call <32 x i16> @llvm.x86.avx512.mask.paddus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask)
743 declare <32 x i16> @llvm.x86.avx512.mask.paddus.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
745 define <32 x i16> @test_mask_subs_epu16_rr_512(<32 x i16> %a, <32 x i16> %b) {
746 ;CHECK-LABEL: test_mask_subs_epu16_rr_512
747 ;CHECK: vpsubusw %zmm1, %zmm0, %zmm0
748 %res = call <32 x i16> @llvm.x86.avx512.mask.psubus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1)
752 define <32 x i16> @test_mask_subs_epu16_rrk_512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) {
753 ;CHECK-LABEL: test_mask_subs_epu16_rrk_512
754 ;CHECK: vpsubusw %zmm1, %zmm0, %zmm2 {%k1}
755 %res = call <32 x i16> @llvm.x86.avx512.mask.psubus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
759 define <32 x i16> @test_mask_subs_epu16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i32 %mask) {
760 ;CHECK-LABEL: test_mask_subs_epu16_rrkz_512
761 ;CHECK: vpsubusw %zmm1, %zmm0, %zmm0 {%k1} {z}
762 %res = call <32 x i16> @llvm.x86.avx512.mask.psubus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask)
766 define <32 x i16> @test_mask_subs_epu16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) {
767 ;CHECK-LABEL: test_mask_subs_epu16_rm_512
768 ;CHECK: vpsubusw (%rdi), %zmm0, %zmm0
769 %b = load <32 x i16>, <32 x i16>* %ptr_b
770 %res = call <32 x i16> @llvm.x86.avx512.mask.psubus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1)
774 define <32 x i16> @test_mask_subs_epu16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <32 x i16> %passThru, i32 %mask) {
775 ;CHECK-LABEL: test_mask_subs_epu16_rmk_512
776 ;CHECK: vpsubusw (%rdi), %zmm0, %zmm1 {%k1}
777 %b = load <32 x i16>, <32 x i16>* %ptr_b
778 %res = call <32 x i16> @llvm.x86.avx512.mask.psubus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
782 define <32 x i16> @test_mask_subs_epu16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i32 %mask) {
783 ;CHECK-LABEL: test_mask_subs_epu16_rmkz_512
784 ;CHECK: vpsubusw (%rdi), %zmm0, %zmm0 {%k1} {z}
785 %b = load <32 x i16>, <32 x i16>* %ptr_b
786 %res = call <32 x i16> @llvm.x86.avx512.mask.psubus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask)
790 declare <32 x i16> @llvm.x86.avx512.mask.psubus.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
792 declare <64 x i8> @llvm.x86.avx512.mask.pmaxs.b.512(<64 x i8>, <64 x i8>, <64 x i8>, i64)
794 ; CHECK-LABEL: @test_int_x86_avx512_mask_pmaxs_b_512
796 ; CHECK: vpmaxsb %zmm
798 define <64 x i8>@test_int_x86_avx512_mask_pmaxs_b_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) {
799 %res = call <64 x i8> @llvm.x86.avx512.mask.pmaxs.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3)
800 %res1 = call <64 x i8> @llvm.x86.avx512.mask.pmaxs.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 -1)
801 %res2 = add <64 x i8> %res, %res1
805 declare <32 x i16> @llvm.x86.avx512.mask.pmaxs.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
807 ; CHECK-LABEL: @test_int_x86_avx512_mask_pmaxs_w_512
809 ; CHECK: vpmaxsw %zmm
811 define <32 x i16>@test_int_x86_avx512_mask_pmaxs_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
812 %res = call <32 x i16> @llvm.x86.avx512.mask.pmaxs.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
813 %res1 = call <32 x i16> @llvm.x86.avx512.mask.pmaxs.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
814 %res2 = add <32 x i16> %res, %res1
818 declare <64 x i8> @llvm.x86.avx512.mask.pmaxu.b.512(<64 x i8>, <64 x i8>, <64 x i8>, i64)
820 ; CHECK-LABEL: @test_int_x86_avx512_mask_pmaxu_b_512
822 ; CHECK: vpmaxub %zmm
824 define <64 x i8>@test_int_x86_avx512_mask_pmaxu_b_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) {
825 %res = call <64 x i8> @llvm.x86.avx512.mask.pmaxu.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3)
826 %res1 = call <64 x i8> @llvm.x86.avx512.mask.pmaxu.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 -1)
827 %res2 = add <64 x i8> %res, %res1
831 declare <32 x i16> @llvm.x86.avx512.mask.pmaxu.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
833 ; CHECK-LABEL: @test_int_x86_avx512_mask_pmaxu_w_512
835 ; CHECK: vpmaxuw %zmm
837 define <32 x i16>@test_int_x86_avx512_mask_pmaxu_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
838 %res = call <32 x i16> @llvm.x86.avx512.mask.pmaxu.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
839 %res1 = call <32 x i16> @llvm.x86.avx512.mask.pmaxu.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
840 %res2 = add <32 x i16> %res, %res1
844 declare <64 x i8> @llvm.x86.avx512.mask.pmins.b.512(<64 x i8>, <64 x i8>, <64 x i8>, i64)
846 ; CHECK-LABEL: @test_int_x86_avx512_mask_pmins_b_512
848 ; CHECK: vpminsb %zmm
850 define <64 x i8>@test_int_x86_avx512_mask_pmins_b_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) {
851 %res = call <64 x i8> @llvm.x86.avx512.mask.pmins.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3)
852 %res1 = call <64 x i8> @llvm.x86.avx512.mask.pmins.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 -1)
853 %res2 = add <64 x i8> %res, %res1
857 declare <32 x i16> @llvm.x86.avx512.mask.pmins.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
859 ; CHECK-LABEL: @test_int_x86_avx512_mask_pmins_w_512
861 ; CHECK: vpminsw %zmm
863 define <32 x i16>@test_int_x86_avx512_mask_pmins_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
864 %res = call <32 x i16> @llvm.x86.avx512.mask.pmins.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
865 %res1 = call <32 x i16> @llvm.x86.avx512.mask.pmins.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
866 %res2 = add <32 x i16> %res, %res1
870 declare <64 x i8> @llvm.x86.avx512.mask.pminu.b.512(<64 x i8>, <64 x i8>, <64 x i8>, i64)
872 ; CHECK-LABEL: @test_int_x86_avx512_mask_pminu_b_512
874 ; CHECK: vpminub %zmm
876 define <64 x i8>@test_int_x86_avx512_mask_pminu_b_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) {
877 %res = call <64 x i8> @llvm.x86.avx512.mask.pminu.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3)
878 %res1 = call <64 x i8> @llvm.x86.avx512.mask.pminu.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 -1)
879 %res2 = add <64 x i8> %res, %res1
883 declare <32 x i16> @llvm.x86.avx512.mask.pminu.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
885 ; CHECK-LABEL: @test_int_x86_avx512_mask_pminu_w_512
887 ; CHECK: vpminuw %zmm
889 define <32 x i16>@test_int_x86_avx512_mask_pminu_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
890 %res = call <32 x i16> @llvm.x86.avx512.mask.pminu.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
891 %res1 = call <32 x i16> @llvm.x86.avx512.mask.pminu.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
892 %res2 = add <32 x i16> %res, %res1
896 declare <32 x i16> @llvm.x86.avx512.mask.vpermt2var.hi.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
898 ; CHECK-LABEL: @test_int_x86_avx512_mask_vpermt2var_hi_512
901 ; CHECK: vpermt2w %zmm{{.*}}{%k1}
902 define <32 x i16>@test_int_x86_avx512_mask_vpermt2var_hi_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
903 %res = call <32 x i16> @llvm.x86.avx512.mask.vpermt2var.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
904 %res1 = call <32 x i16> @llvm.x86.avx512.mask.vpermt2var.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
905 %res2 = add <32 x i16> %res, %res1
909 declare <32 x i16> @llvm.x86.avx512.maskz.vpermt2var.hi.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
911 ; CHECK-LABEL: @test_int_x86_avx512_maskz_vpermt2var_hi_512
914 ; CHECK: vpermt2w %zmm{{.*}}{%k1} {z}
915 define <32 x i16>@test_int_x86_avx512_maskz_vpermt2var_hi_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
916 %res = call <32 x i16> @llvm.x86.avx512.maskz.vpermt2var.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
917 %res1 = call <32 x i16> @llvm.x86.avx512.maskz.vpermt2var.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
918 %res2 = add <32 x i16> %res, %res1
922 declare <32 x i16> @llvm.x86.avx512.mask.vpermi2var.hi.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
924 ; CHECK-LABEL: @test_int_x86_avx512_mask_vpermi2var_hi_512
927 ; CHECK: vpermi2w %zmm{{.*}}{%k1}
928 define <32 x i16>@test_int_x86_avx512_mask_vpermi2var_hi_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
929 %res = call <32 x i16> @llvm.x86.avx512.mask.vpermi2var.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
930 %res1 = call <32 x i16> @llvm.x86.avx512.mask.vpermi2var.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
931 %res2 = add <32 x i16> %res, %res1
935 declare <64 x i8> @llvm.x86.avx512.mask.pavg.b.512(<64 x i8>, <64 x i8>, <64 x i8>, i64)
937 ; CHECK-LABEL: @test_int_x86_avx512_mask_pavg_b_512
941 define <64 x i8>@test_int_x86_avx512_mask_pavg_b_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) {
942 %res = call <64 x i8> @llvm.x86.avx512.mask.pavg.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3)
943 %res1 = call <64 x i8> @llvm.x86.avx512.mask.pavg.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 -1)
944 %res2 = add <64 x i8> %res, %res1
948 declare <32 x i16> @llvm.x86.avx512.mask.pavg.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
950 ; CHECK-LABEL: @test_int_x86_avx512_mask_pavg_w_512
954 define <32 x i16>@test_int_x86_avx512_mask_pavg_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
955 %res = call <32 x i16> @llvm.x86.avx512.mask.pavg.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
956 %res1 = call <32 x i16> @llvm.x86.avx512.mask.pavg.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
957 %res2 = add <32 x i16> %res, %res1
961 declare <64 x i8> @llvm.x86.avx512.mask.pshuf.b.512(<64 x i8>, <64 x i8>, <64 x i8>, i64)
963 ; CHECK-LABEL: @test_int_x86_avx512_mask_pshuf_b_512
966 ; CHECK: vpshufb %zmm{{.*}}{%k1}
967 define <64 x i8>@test_int_x86_avx512_mask_pshuf_b_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) {
968 %res = call <64 x i8> @llvm.x86.avx512.mask.pshuf.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3)
969 %res1 = call <64 x i8> @llvm.x86.avx512.mask.pshuf.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 -1)
970 %res2 = add <64 x i8> %res, %res1
974 declare <32 x i16> @llvm.x86.avx512.mask.pabs.w.512(<32 x i16>, <32 x i16>, i32)
976 ; CHECK-LABEL: @test_int_x86_avx512_mask_pabs_w_512
979 ; CHECK: vpabsw{{.*}}{%k1}
980 define <32 x i16>@test_int_x86_avx512_mask_pabs_w_512(<32 x i16> %x0, <32 x i16> %x1, i32 %x2) {
981 %res = call <32 x i16> @llvm.x86.avx512.mask.pabs.w.512(<32 x i16> %x0, <32 x i16> %x1, i32 %x2)
982 %res1 = call <32 x i16> @llvm.x86.avx512.mask.pabs.w.512(<32 x i16> %x0, <32 x i16> %x1, i32 -1)
983 %res2 = add <32 x i16> %res, %res1
987 declare <64 x i8> @llvm.x86.avx512.mask.pabs.b.512(<64 x i8>, <64 x i8>, i64)
989 ; CHECK-LABEL: @test_int_x86_avx512_mask_pabs_b_512
992 ; CHECK: vpabsb{{.*}}{%k1}
993 define <64 x i8>@test_int_x86_avx512_mask_pabs_b_512(<64 x i8> %x0, <64 x i8> %x1, i64 %x2) {
994 %res = call <64 x i8> @llvm.x86.avx512.mask.pabs.b.512(<64 x i8> %x0, <64 x i8> %x1, i64 %x2)
995 %res1 = call <64 x i8> @llvm.x86.avx512.mask.pabs.b.512(<64 x i8> %x0, <64 x i8> %x1, i64 -1)
996 %res2 = add <64 x i8> %res, %res1
1000 declare <32 x i16> @llvm.x86.avx512.mask.pmulhu.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
1002 ; CHECK-LABEL: @test_int_x86_avx512_mask_pmulhu_w_512
1006 ; CHECK: vpmulhuw {{.*}}encoding: [0x62
1007 define <32 x i16>@test_int_x86_avx512_mask_pmulhu_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
1008 %res = call <32 x i16> @llvm.x86.avx512.mask.pmulhu.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
1009 %res1 = call <32 x i16> @llvm.x86.avx512.mask.pmulhu.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
1010 %res2 = add <32 x i16> %res, %res1
1011 ret <32 x i16> %res2
1014 declare <32 x i16> @llvm.x86.avx512.mask.pmulh.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
1016 ; CHECK-LABEL: @test_int_x86_avx512_mask_pmulh_w_512
1020 ; CHECK: vpmulhw {{.*}}encoding: [0x62
1021 define <32 x i16>@test_int_x86_avx512_mask_pmulh_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
1022 %res = call <32 x i16> @llvm.x86.avx512.mask.pmulh.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
1023 %res1 = call <32 x i16> @llvm.x86.avx512.mask.pmulh.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
1024 %res2 = add <32 x i16> %res, %res1
1025 ret <32 x i16> %res2