1 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512bw --show-mc-encoding| FileCheck %s
3 define i64 @test_pcmpeq_b(<64 x i8> %a, <64 x i8> %b) {
4 ; CHECK-LABEL: test_pcmpeq_b
5 ; CHECK: vpcmpeqb %zmm1, %zmm0, %k0 ##
6 %res = call i64 @llvm.x86.avx512.mask.pcmpeq.b.512(<64 x i8> %a, <64 x i8> %b, i64 -1)
10 define i64 @test_mask_pcmpeq_b(<64 x i8> %a, <64 x i8> %b, i64 %mask) {
11 ; CHECK-LABEL: test_mask_pcmpeq_b
12 ; CHECK: vpcmpeqb %zmm1, %zmm0, %k0 {%k1} ##
13 %res = call i64 @llvm.x86.avx512.mask.pcmpeq.b.512(<64 x i8> %a, <64 x i8> %b, i64 %mask)
17 declare i64 @llvm.x86.avx512.mask.pcmpeq.b.512(<64 x i8>, <64 x i8>, i64)
19 define i32 @test_pcmpeq_w(<32 x i16> %a, <32 x i16> %b) {
20 ; CHECK-LABEL: test_pcmpeq_w
21 ; CHECK: vpcmpeqw %zmm1, %zmm0, %k0 ##
22 %res = call i32 @llvm.x86.avx512.mask.pcmpeq.w.512(<32 x i16> %a, <32 x i16> %b, i32 -1)
26 define i32 @test_mask_pcmpeq_w(<32 x i16> %a, <32 x i16> %b, i32 %mask) {
27 ; CHECK-LABEL: test_mask_pcmpeq_w
28 ; CHECK: vpcmpeqw %zmm1, %zmm0, %k0 {%k1} ##
29 %res = call i32 @llvm.x86.avx512.mask.pcmpeq.w.512(<32 x i16> %a, <32 x i16> %b, i32 %mask)
33 declare i32 @llvm.x86.avx512.mask.pcmpeq.w.512(<32 x i16>, <32 x i16>, i32)
35 define i64 @test_pcmpgt_b(<64 x i8> %a, <64 x i8> %b) {
36 ; CHECK-LABEL: test_pcmpgt_b
37 ; CHECK: vpcmpgtb %zmm1, %zmm0, %k0 ##
38 %res = call i64 @llvm.x86.avx512.mask.pcmpgt.b.512(<64 x i8> %a, <64 x i8> %b, i64 -1)
42 define i64 @test_mask_pcmpgt_b(<64 x i8> %a, <64 x i8> %b, i64 %mask) {
43 ; CHECK-LABEL: test_mask_pcmpgt_b
44 ; CHECK: vpcmpgtb %zmm1, %zmm0, %k0 {%k1} ##
45 %res = call i64 @llvm.x86.avx512.mask.pcmpgt.b.512(<64 x i8> %a, <64 x i8> %b, i64 %mask)
49 declare i64 @llvm.x86.avx512.mask.pcmpgt.b.512(<64 x i8>, <64 x i8>, i64)
51 define i32 @test_pcmpgt_w(<32 x i16> %a, <32 x i16> %b) {
52 ; CHECK-LABEL: test_pcmpgt_w
53 ; CHECK: vpcmpgtw %zmm1, %zmm0, %k0 ##
54 %res = call i32 @llvm.x86.avx512.mask.pcmpgt.w.512(<32 x i16> %a, <32 x i16> %b, i32 -1)
58 define i32 @test_mask_pcmpgt_w(<32 x i16> %a, <32 x i16> %b, i32 %mask) {
59 ; CHECK-LABEL: test_mask_pcmpgt_w
60 ; CHECK: vpcmpgtw %zmm1, %zmm0, %k0 {%k1} ##
61 %res = call i32 @llvm.x86.avx512.mask.pcmpgt.w.512(<32 x i16> %a, <32 x i16> %b, i32 %mask)
65 declare i32 @llvm.x86.avx512.mask.pcmpgt.w.512(<32 x i16>, <32 x i16>, i32)
67 define <8 x i64> @test_cmp_b_512(<64 x i8> %a0, <64 x i8> %a1) {
68 ; CHECK_LABEL: test_cmp_b_512
69 ; CHECK: vpcmpeqb %zmm1, %zmm0, %k0 ##
70 %res0 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 0, i64 -1)
71 %vec0 = insertelement <8 x i64> undef, i64 %res0, i32 0
72 ; CHECK: vpcmpltb %zmm1, %zmm0, %k0 ##
73 %res1 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 1, i64 -1)
74 %vec1 = insertelement <8 x i64> %vec0, i64 %res1, i32 1
75 ; CHECK: vpcmpleb %zmm1, %zmm0, %k0 ##
76 %res2 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 2, i64 -1)
77 %vec2 = insertelement <8 x i64> %vec1, i64 %res2, i32 2
78 ; CHECK: vpcmpunordb %zmm1, %zmm0, %k0 ##
79 %res3 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 3, i64 -1)
80 %vec3 = insertelement <8 x i64> %vec2, i64 %res3, i32 3
81 ; CHECK: vpcmpneqb %zmm1, %zmm0, %k0 ##
82 %res4 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 4, i64 -1)
83 %vec4 = insertelement <8 x i64> %vec3, i64 %res4, i32 4
84 ; CHECK: vpcmpnltb %zmm1, %zmm0, %k0 ##
85 %res5 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 5, i64 -1)
86 %vec5 = insertelement <8 x i64> %vec4, i64 %res5, i32 5
87 ; CHECK: vpcmpnleb %zmm1, %zmm0, %k0 ##
88 %res6 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 6, i64 -1)
89 %vec6 = insertelement <8 x i64> %vec5, i64 %res6, i32 6
90 ; CHECK: vpcmpordb %zmm1, %zmm0, %k0 ##
91 %res7 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 7, i64 -1)
92 %vec7 = insertelement <8 x i64> %vec6, i64 %res7, i32 7
96 define <8 x i64> @test_mask_cmp_b_512(<64 x i8> %a0, <64 x i8> %a1, i64 %mask) {
97 ; CHECK_LABEL: test_mask_cmp_b_512
98 ; CHECK: vpcmpeqb %zmm1, %zmm0, %k0 {%k1} ##
99 %res0 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 0, i64 %mask)
100 %vec0 = insertelement <8 x i64> undef, i64 %res0, i32 0
101 ; CHECK: vpcmpltb %zmm1, %zmm0, %k0 {%k1} ##
102 %res1 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 1, i64 %mask)
103 %vec1 = insertelement <8 x i64> %vec0, i64 %res1, i32 1
104 ; CHECK: vpcmpleb %zmm1, %zmm0, %k0 {%k1} ##
105 %res2 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 2, i64 %mask)
106 %vec2 = insertelement <8 x i64> %vec1, i64 %res2, i32 2
107 ; CHECK: vpcmpunordb %zmm1, %zmm0, %k0 {%k1} ##
108 %res3 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 3, i64 %mask)
109 %vec3 = insertelement <8 x i64> %vec2, i64 %res3, i32 3
110 ; CHECK: vpcmpneqb %zmm1, %zmm0, %k0 {%k1} ##
111 %res4 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 4, i64 %mask)
112 %vec4 = insertelement <8 x i64> %vec3, i64 %res4, i32 4
113 ; CHECK: vpcmpnltb %zmm1, %zmm0, %k0 {%k1} ##
114 %res5 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 5, i64 %mask)
115 %vec5 = insertelement <8 x i64> %vec4, i64 %res5, i32 5
116 ; CHECK: vpcmpnleb %zmm1, %zmm0, %k0 {%k1} ##
117 %res6 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 6, i64 %mask)
118 %vec6 = insertelement <8 x i64> %vec5, i64 %res6, i32 6
119 ; CHECK: vpcmpordb %zmm1, %zmm0, %k0 {%k1} ##
120 %res7 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 7, i64 %mask)
121 %vec7 = insertelement <8 x i64> %vec6, i64 %res7, i32 7
125 declare i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8>, <64 x i8>, i32, i64) nounwind readnone
127 define <8 x i64> @test_ucmp_b_512(<64 x i8> %a0, <64 x i8> %a1) {
128 ; CHECK_LABEL: test_ucmp_b_512
129 ; CHECK: vpcmpequb %zmm1, %zmm0, %k0 ##
130 %res0 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 0, i64 -1)
131 %vec0 = insertelement <8 x i64> undef, i64 %res0, i32 0
132 ; CHECK: vpcmpltub %zmm1, %zmm0, %k0 ##
133 %res1 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 1, i64 -1)
134 %vec1 = insertelement <8 x i64> %vec0, i64 %res1, i32 1
135 ; CHECK: vpcmpleub %zmm1, %zmm0, %k0 ##
136 %res2 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 2, i64 -1)
137 %vec2 = insertelement <8 x i64> %vec1, i64 %res2, i32 2
138 ; CHECK: vpcmpunordub %zmm1, %zmm0, %k0 ##
139 %res3 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 3, i64 -1)
140 %vec3 = insertelement <8 x i64> %vec2, i64 %res3, i32 3
141 ; CHECK: vpcmpnequb %zmm1, %zmm0, %k0 ##
142 %res4 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 4, i64 -1)
143 %vec4 = insertelement <8 x i64> %vec3, i64 %res4, i32 4
144 ; CHECK: vpcmpnltub %zmm1, %zmm0, %k0 ##
145 %res5 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 5, i64 -1)
146 %vec5 = insertelement <8 x i64> %vec4, i64 %res5, i32 5
147 ; CHECK: vpcmpnleub %zmm1, %zmm0, %k0 ##
148 %res6 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 6, i64 -1)
149 %vec6 = insertelement <8 x i64> %vec5, i64 %res6, i32 6
150 ; CHECK: vpcmpordub %zmm1, %zmm0, %k0 ##
151 %res7 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 7, i64 -1)
152 %vec7 = insertelement <8 x i64> %vec6, i64 %res7, i32 7
156 define <8 x i64> @test_mask_x86_avx512_ucmp_b_512(<64 x i8> %a0, <64 x i8> %a1, i64 %mask) {
157 ; CHECK_LABEL: test_mask_ucmp_b_512
158 ; CHECK: vpcmpequb %zmm1, %zmm0, %k0 {%k1} ##
159 %res0 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 0, i64 %mask)
160 %vec0 = insertelement <8 x i64> undef, i64 %res0, i32 0
161 ; CHECK: vpcmpltub %zmm1, %zmm0, %k0 {%k1} ##
162 %res1 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 1, i64 %mask)
163 %vec1 = insertelement <8 x i64> %vec0, i64 %res1, i32 1
164 ; CHECK: vpcmpleub %zmm1, %zmm0, %k0 {%k1} ##
165 %res2 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 2, i64 %mask)
166 %vec2 = insertelement <8 x i64> %vec1, i64 %res2, i32 2
167 ; CHECK: vpcmpunordub %zmm1, %zmm0, %k0 {%k1} ##
168 %res3 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 3, i64 %mask)
169 %vec3 = insertelement <8 x i64> %vec2, i64 %res3, i32 3
170 ; CHECK: vpcmpnequb %zmm1, %zmm0, %k0 {%k1} ##
171 %res4 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 4, i64 %mask)
172 %vec4 = insertelement <8 x i64> %vec3, i64 %res4, i32 4
173 ; CHECK: vpcmpnltub %zmm1, %zmm0, %k0 {%k1} ##
174 %res5 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 5, i64 %mask)
175 %vec5 = insertelement <8 x i64> %vec4, i64 %res5, i32 5
176 ; CHECK: vpcmpnleub %zmm1, %zmm0, %k0 {%k1} ##
177 %res6 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 6, i64 %mask)
178 %vec6 = insertelement <8 x i64> %vec5, i64 %res6, i32 6
179 ; CHECK: vpcmpordub %zmm1, %zmm0, %k0 {%k1} ##
180 %res7 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 7, i64 %mask)
181 %vec7 = insertelement <8 x i64> %vec6, i64 %res7, i32 7
185 declare i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8>, <64 x i8>, i32, i64) nounwind readnone
187 define <8 x i32> @test_cmp_w_512(<32 x i16> %a0, <32 x i16> %a1) {
188 ; CHECK_LABEL: test_cmp_w_512
189 ; CHECK: vpcmpeqw %zmm1, %zmm0, %k0 ##
190 %res0 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 0, i32 -1)
191 %vec0 = insertelement <8 x i32> undef, i32 %res0, i32 0
192 ; CHECK: vpcmpltw %zmm1, %zmm0, %k0 ##
193 %res1 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 1, i32 -1)
194 %vec1 = insertelement <8 x i32> %vec0, i32 %res1, i32 1
195 ; CHECK: vpcmplew %zmm1, %zmm0, %k0 ##
196 %res2 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 2, i32 -1)
197 %vec2 = insertelement <8 x i32> %vec1, i32 %res2, i32 2
198 ; CHECK: vpcmpunordw %zmm1, %zmm0, %k0 ##
199 %res3 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 3, i32 -1)
200 %vec3 = insertelement <8 x i32> %vec2, i32 %res3, i32 3
201 ; CHECK: vpcmpneqw %zmm1, %zmm0, %k0 ##
202 %res4 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 4, i32 -1)
203 %vec4 = insertelement <8 x i32> %vec3, i32 %res4, i32 4
204 ; CHECK: vpcmpnltw %zmm1, %zmm0, %k0 ##
205 %res5 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 5, i32 -1)
206 %vec5 = insertelement <8 x i32> %vec4, i32 %res5, i32 5
207 ; CHECK: vpcmpnlew %zmm1, %zmm0, %k0 ##
208 %res6 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 6, i32 -1)
209 %vec6 = insertelement <8 x i32> %vec5, i32 %res6, i32 6
210 ; CHECK: vpcmpordw %zmm1, %zmm0, %k0 ##
211 %res7 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 7, i32 -1)
212 %vec7 = insertelement <8 x i32> %vec6, i32 %res7, i32 7
216 define <8 x i32> @test_mask_cmp_w_512(<32 x i16> %a0, <32 x i16> %a1, i32 %mask) {
217 ; CHECK_LABEL: test_mask_cmp_w_512
218 ; CHECK: vpcmpeqw %zmm1, %zmm0, %k0 {%k1} ##
219 %res0 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 0, i32 %mask)
220 %vec0 = insertelement <8 x i32> undef, i32 %res0, i32 0
221 ; CHECK: vpcmpltw %zmm1, %zmm0, %k0 {%k1} ##
222 %res1 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 1, i32 %mask)
223 %vec1 = insertelement <8 x i32> %vec0, i32 %res1, i32 1
224 ; CHECK: vpcmplew %zmm1, %zmm0, %k0 {%k1} ##
225 %res2 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 2, i32 %mask)
226 %vec2 = insertelement <8 x i32> %vec1, i32 %res2, i32 2
227 ; CHECK: vpcmpunordw %zmm1, %zmm0, %k0 {%k1} ##
228 %res3 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 3, i32 %mask)
229 %vec3 = insertelement <8 x i32> %vec2, i32 %res3, i32 3
230 ; CHECK: vpcmpneqw %zmm1, %zmm0, %k0 {%k1} ##
231 %res4 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 4, i32 %mask)
232 %vec4 = insertelement <8 x i32> %vec3, i32 %res4, i32 4
233 ; CHECK: vpcmpnltw %zmm1, %zmm0, %k0 {%k1} ##
234 %res5 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 5, i32 %mask)
235 %vec5 = insertelement <8 x i32> %vec4, i32 %res5, i32 5
236 ; CHECK: vpcmpnlew %zmm1, %zmm0, %k0 {%k1} ##
237 %res6 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 6, i32 %mask)
238 %vec6 = insertelement <8 x i32> %vec5, i32 %res6, i32 6
239 ; CHECK: vpcmpordw %zmm1, %zmm0, %k0 {%k1} ##
240 %res7 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 7, i32 %mask)
241 %vec7 = insertelement <8 x i32> %vec6, i32 %res7, i32 7
245 declare i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16>, <32 x i16>, i32, i32) nounwind readnone
247 define <8 x i32> @test_ucmp_w_512(<32 x i16> %a0, <32 x i16> %a1) {
248 ; CHECK_LABEL: test_ucmp_w_512
249 ; CHECK: vpcmpequw %zmm1, %zmm0, %k0 ##
250 %res0 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 0, i32 -1)
251 %vec0 = insertelement <8 x i32> undef, i32 %res0, i32 0
252 ; CHECK: vpcmpltuw %zmm1, %zmm0, %k0 ##
253 %res1 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 1, i32 -1)
254 %vec1 = insertelement <8 x i32> %vec0, i32 %res1, i32 1
255 ; CHECK: vpcmpleuw %zmm1, %zmm0, %k0 ##
256 %res2 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 2, i32 -1)
257 %vec2 = insertelement <8 x i32> %vec1, i32 %res2, i32 2
258 ; CHECK: vpcmpunorduw %zmm1, %zmm0, %k0 ##
259 %res3 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 3, i32 -1)
260 %vec3 = insertelement <8 x i32> %vec2, i32 %res3, i32 3
261 ; CHECK: vpcmpnequw %zmm1, %zmm0, %k0 ##
262 %res4 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 4, i32 -1)
263 %vec4 = insertelement <8 x i32> %vec3, i32 %res4, i32 4
264 ; CHECK: vpcmpnltuw %zmm1, %zmm0, %k0 ##
265 %res5 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 5, i32 -1)
266 %vec5 = insertelement <8 x i32> %vec4, i32 %res5, i32 5
267 ; CHECK: vpcmpnleuw %zmm1, %zmm0, %k0 ##
268 %res6 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 6, i32 -1)
269 %vec6 = insertelement <8 x i32> %vec5, i32 %res6, i32 6
270 ; CHECK: vpcmporduw %zmm1, %zmm0, %k0 ##
271 %res7 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 7, i32 -1)
272 %vec7 = insertelement <8 x i32> %vec6, i32 %res7, i32 7
276 define <8 x i32> @test_mask_ucmp_w_512(<32 x i16> %a0, <32 x i16> %a1, i32 %mask) {
277 ; CHECK_LABEL: test_mask_ucmp_w_512
278 ; CHECK: vpcmpequw %zmm1, %zmm0, %k0 {%k1} ##
279 %res0 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 0, i32 %mask)
280 %vec0 = insertelement <8 x i32> undef, i32 %res0, i32 0
281 ; CHECK: vpcmpltuw %zmm1, %zmm0, %k0 {%k1} ##
282 %res1 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 1, i32 %mask)
283 %vec1 = insertelement <8 x i32> %vec0, i32 %res1, i32 1
284 ; CHECK: vpcmpleuw %zmm1, %zmm0, %k0 {%k1} ##
285 %res2 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 2, i32 %mask)
286 %vec2 = insertelement <8 x i32> %vec1, i32 %res2, i32 2
287 ; CHECK: vpcmpunorduw %zmm1, %zmm0, %k0 {%k1} ##
288 %res3 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 3, i32 %mask)
289 %vec3 = insertelement <8 x i32> %vec2, i32 %res3, i32 3
290 ; CHECK: vpcmpnequw %zmm1, %zmm0, %k0 {%k1} ##
291 %res4 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 4, i32 %mask)
292 %vec4 = insertelement <8 x i32> %vec3, i32 %res4, i32 4
293 ; CHECK: vpcmpnltuw %zmm1, %zmm0, %k0 {%k1} ##
294 %res5 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 5, i32 %mask)
295 %vec5 = insertelement <8 x i32> %vec4, i32 %res5, i32 5
296 ; CHECK: vpcmpnleuw %zmm1, %zmm0, %k0 {%k1} ##
297 %res6 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 6, i32 %mask)
298 %vec6 = insertelement <8 x i32> %vec5, i32 %res6, i32 6
299 ; CHECK: vpcmporduw %zmm1, %zmm0, %k0 {%k1} ##
300 %res7 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 7, i32 %mask)
301 %vec7 = insertelement <8 x i32> %vec6, i32 %res7, i32 7
305 declare i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16>, <32 x i16>, i32, i32) nounwind readnone
307 declare <64 x i8> @llvm.x86.avx512.mask.blend.b.512(<64 x i8>, <64 x i8>, i64) nounwind readonly
309 ; CHECK-LABEL: test_x86_mask_blend_w_512
310 define <32 x i16> @test_x86_mask_blend_w_512(i32 %mask, <32 x i16> %a1, <32 x i16> %a2) {
312 %res = call <32 x i16> @llvm.x86.avx512.mask.blend.w.512(<32 x i16> %a1, <32 x i16> %a2, i32 %mask) ; <<32 x i16>> [#uses=1]
315 declare <32 x i16> @llvm.x86.avx512.mask.blend.w.512(<32 x i16>, <32 x i16>, i32) nounwind readonly
317 ; CHECK-LABEL: test_x86_mask_blend_b_512
319 define <64 x i8> @test_x86_mask_blend_b_512(i64 %a0, <64 x i8> %a1, <64 x i8> %a2) {
320 %res = call <64 x i8> @llvm.x86.avx512.mask.blend.b.512(<64 x i8> %a1, <64 x i8> %a2, i64 %a0) ; <<64 x i8>> [#uses=1]
324 define <32 x i16> @test_mask_packs_epi32_rr_512(<16 x i32> %a, <16 x i32> %b) {
325 ;CHECK-LABEL: test_mask_packs_epi32_rr_512
326 ;CHECK: vpackssdw %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0x6b,0xc1]
327 %res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 -1)
331 define <32 x i16> @test_mask_packs_epi32_rrk_512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask) {
332 ;CHECK-LABEL: test_mask_packs_epi32_rrk_512
333 ;CHECK: vpackssdw %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0x6b,0xd1]
334 %res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask)
338 define <32 x i16> @test_mask_packs_epi32_rrkz_512(<16 x i32> %a, <16 x i32> %b, i32 %mask) {
339 ;CHECK-LABEL: test_mask_packs_epi32_rrkz_512
340 ;CHECK: vpackssdw %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0x6b,0xc1]
341 %res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 %mask)
345 define <32 x i16> @test_mask_packs_epi32_rm_512(<16 x i32> %a, <16 x i32>* %ptr_b) {
346 ;CHECK-LABEL: test_mask_packs_epi32_rm_512
347 ;CHECK: vpackssdw (%rdi), %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0x6b,0x07]
348 %b = load <16 x i32>, <16 x i32>* %ptr_b
349 %res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 -1)
353 define <32 x i16> @test_mask_packs_epi32_rmk_512(<16 x i32> %a, <16 x i32>* %ptr_b, <32 x i16> %passThru, i32 %mask) {
354 ;CHECK-LABEL: test_mask_packs_epi32_rmk_512
355 ;CHECK: vpackssdw (%rdi), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0x6b,0x0f]
356 %b = load <16 x i32>, <16 x i32>* %ptr_b
357 %res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask)
361 define <32 x i16> @test_mask_packs_epi32_rmkz_512(<16 x i32> %a, <16 x i32>* %ptr_b, i32 %mask) {
362 ;CHECK-LABEL: test_mask_packs_epi32_rmkz_512
363 ;CHECK: vpackssdw (%rdi), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0x6b,0x07]
364 %b = load <16 x i32>, <16 x i32>* %ptr_b
365 %res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 %mask)
369 define <32 x i16> @test_mask_packs_epi32_rmb_512(<16 x i32> %a, i32* %ptr_b) {
370 ;CHECK-LABEL: test_mask_packs_epi32_rmb_512
371 ;CHECK: vpackssdw (%rdi){1to16}, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x58,0x6b,0x07]
372 %q = load i32, i32* %ptr_b
373 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
374 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
375 %res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 -1)
379 define <32 x i16> @test_mask_packs_epi32_rmbk_512(<16 x i32> %a, i32* %ptr_b, <32 x i16> %passThru, i32 %mask) {
380 ;CHECK-LABEL: test_mask_packs_epi32_rmbk_512
381 ;CHECK: vpackssdw (%rdi){1to16}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x59,0x6b,0x0f]
382 %q = load i32, i32* %ptr_b
383 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
384 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
385 %res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask)
389 define <32 x i16> @test_mask_packs_epi32_rmbkz_512(<16 x i32> %a, i32* %ptr_b, i32 %mask) {
390 ;CHECK-LABEL: test_mask_packs_epi32_rmbkz_512
391 ;CHECK: vpackssdw (%rdi){1to16}, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xd9,0x6b,0x07]
392 %q = load i32, i32* %ptr_b
393 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
394 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
395 %res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 %mask)
399 declare <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32>, <16 x i32>, <32 x i16>, i32)
401 define <64 x i8> @test_mask_packs_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) {
402 ;CHECK-LABEL: test_mask_packs_epi16_rr_512
403 ;CHECK: vpacksswb %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x63,0xc1]
404 %res = call <64 x i8> @llvm.x86.avx512.mask.packsswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> zeroinitializer, i64 -1)
408 define <64 x i8> @test_mask_packs_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, <64 x i8> %passThru, i64 %mask) {
409 ;CHECK-LABEL: test_mask_packs_epi16_rrk_512
410 ;CHECK: vpacksswb %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0x63,0xd1]
411 %res = call <64 x i8> @llvm.x86.avx512.mask.packsswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> %passThru, i64 %mask)
415 define <64 x i8> @test_mask_packs_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i64 %mask) {
416 ;CHECK-LABEL: test_mask_packs_epi16_rrkz_512
417 ;CHECK: vpacksswb %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0x63,0xc1]
418 %res = call <64 x i8> @llvm.x86.avx512.mask.packsswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> zeroinitializer, i64 %mask)
422 define <64 x i8> @test_mask_packs_epi16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) {
423 ;CHECK-LABEL: test_mask_packs_epi16_rm_512
424 ;CHECK: vpacksswb (%rdi), %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x63,0x07]
425 %b = load <32 x i16>, <32 x i16>* %ptr_b
426 %res = call <64 x i8> @llvm.x86.avx512.mask.packsswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> zeroinitializer, i64 -1)
430 define <64 x i8> @test_mask_packs_epi16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <64 x i8> %passThru, i64 %mask) {
431 ;CHECK-LABEL: test_mask_packs_epi16_rmk_512
432 ;CHECK: vpacksswb (%rdi), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0x63,0x0f]
433 %b = load <32 x i16>, <32 x i16>* %ptr_b
434 %res = call <64 x i8> @llvm.x86.avx512.mask.packsswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> %passThru, i64 %mask)
438 define <64 x i8> @test_mask_packs_epi16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i64 %mask) {
439 ;CHECK-LABEL: test_mask_packs_epi16_rmkz_512
440 ;CHECK: vpacksswb (%rdi), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0x63,0x07]
441 %b = load <32 x i16>, <32 x i16>* %ptr_b
442 %res = call <64 x i8> @llvm.x86.avx512.mask.packsswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> zeroinitializer, i64 %mask)
446 declare <64 x i8> @llvm.x86.avx512.mask.packsswb.512(<32 x i16>, <32 x i16>, <64 x i8>, i64)
449 define <32 x i16> @test_mask_packus_epi32_rr_512(<16 x i32> %a, <16 x i32> %b) {
450 ;CHECK-LABEL: test_mask_packus_epi32_rr_512
451 ;CHECK: vpackusdw %zmm1, %zmm0, %zmm0
452 %res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 -1)
456 define <32 x i16> @test_mask_packus_epi32_rrk_512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask) {
457 ;CHECK-LABEL: test_mask_packus_epi32_rrk_512
458 ;CHECK: vpackusdw %zmm1, %zmm0, %zmm2 {%k1}
459 %res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask)
463 define <32 x i16> @test_mask_packus_epi32_rrkz_512(<16 x i32> %a, <16 x i32> %b, i32 %mask) {
464 ;CHECK-LABEL: test_mask_packus_epi32_rrkz_512
465 ;CHECK: vpackusdw %zmm1, %zmm0, %zmm0 {%k1} {z}
466 %res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 %mask)
470 define <32 x i16> @test_mask_packus_epi32_rm_512(<16 x i32> %a, <16 x i32>* %ptr_b) {
471 ;CHECK-LABEL: test_mask_packus_epi32_rm_512
472 ;CHECK: vpackusdw (%rdi), %zmm0, %zmm0
473 %b = load <16 x i32>, <16 x i32>* %ptr_b
474 %res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 -1)
478 define <32 x i16> @test_mask_packus_epi32_rmk_512(<16 x i32> %a, <16 x i32>* %ptr_b, <32 x i16> %passThru, i32 %mask) {
479 ;CHECK-LABEL: test_mask_packus_epi32_rmk_512
480 ;CHECK: vpackusdw (%rdi), %zmm0, %zmm1 {%k1}
481 %b = load <16 x i32>, <16 x i32>* %ptr_b
482 %res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask)
486 define <32 x i16> @test_mask_packus_epi32_rmkz_512(<16 x i32> %a, <16 x i32>* %ptr_b, i32 %mask) {
487 ;CHECK-LABEL: test_mask_packus_epi32_rmkz_512
488 ;CHECK: vpackusdw (%rdi), %zmm0, %zmm0 {%k1} {z}
489 %b = load <16 x i32>, <16 x i32>* %ptr_b
490 %res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 %mask)
494 define <32 x i16> @test_mask_packus_epi32_rmb_512(<16 x i32> %a, i32* %ptr_b) {
495 ;CHECK-LABEL: test_mask_packus_epi32_rmb_512
496 ;CHECK: vpackusdw (%rdi){1to16}, %zmm0, %zmm0
497 %q = load i32, i32* %ptr_b
498 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
499 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
500 %res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 -1)
504 define <32 x i16> @test_mask_packus_epi32_rmbk_512(<16 x i32> %a, i32* %ptr_b, <32 x i16> %passThru, i32 %mask) {
505 ;CHECK-LABEL: test_mask_packus_epi32_rmbk_512
506 ;CHECK: vpackusdw (%rdi){1to16}, %zmm0, %zmm1 {%k1}
507 %q = load i32, i32* %ptr_b
508 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
509 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
510 %res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask)
514 define <32 x i16> @test_mask_packus_epi32_rmbkz_512(<16 x i32> %a, i32* %ptr_b, i32 %mask) {
515 ;CHECK-LABEL: test_mask_packus_epi32_rmbkz_512
516 ;CHECK: vpackusdw (%rdi){1to16}, %zmm0, %zmm0 {%k1} {z}
517 %q = load i32, i32* %ptr_b
518 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
519 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
520 %res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 %mask)
524 declare <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32>, <16 x i32>, <32 x i16>, i32)
526 define <64 x i8> @test_mask_packus_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) {
527 ;CHECK-LABEL: test_mask_packus_epi16_rr_512
528 ;CHECK: vpackuswb %zmm1, %zmm0, %zmm0
529 %res = call <64 x i8> @llvm.x86.avx512.mask.packuswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> zeroinitializer, i64 -1)
533 define <64 x i8> @test_mask_packus_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, <64 x i8> %passThru, i64 %mask) {
534 ;CHECK-LABEL: test_mask_packus_epi16_rrk_512
535 ;CHECK: vpackuswb %zmm1, %zmm0, %zmm2 {%k1}
536 %res = call <64 x i8> @llvm.x86.avx512.mask.packuswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> %passThru, i64 %mask)
540 define <64 x i8> @test_mask_packus_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i64 %mask) {
541 ;CHECK-LABEL: test_mask_packus_epi16_rrkz_512
542 ;CHECK: vpackuswb %zmm1, %zmm0, %zmm0 {%k1} {z}
543 %res = call <64 x i8> @llvm.x86.avx512.mask.packuswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> zeroinitializer, i64 %mask)
547 define <64 x i8> @test_mask_packus_epi16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) {
548 ;CHECK-LABEL: test_mask_packus_epi16_rm_512
549 ;CHECK: vpackuswb (%rdi), %zmm0, %zmm0
550 %b = load <32 x i16>, <32 x i16>* %ptr_b
551 %res = call <64 x i8> @llvm.x86.avx512.mask.packuswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> zeroinitializer, i64 -1)
555 define <64 x i8> @test_mask_packus_epi16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <64 x i8> %passThru, i64 %mask) {
556 ;CHECK-LABEL: test_mask_packus_epi16_rmk_512
557 ;CHECK: vpackuswb (%rdi), %zmm0, %zmm1 {%k1}
558 %b = load <32 x i16>, <32 x i16>* %ptr_b
559 %res = call <64 x i8> @llvm.x86.avx512.mask.packuswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> %passThru, i64 %mask)
563 define <64 x i8> @test_mask_packus_epi16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i64 %mask) {
564 ;CHECK-LABEL: test_mask_packus_epi16_rmkz_512
565 ;CHECK: vpackuswb (%rdi), %zmm0, %zmm0 {%k1} {z}
566 %b = load <32 x i16>, <32 x i16>* %ptr_b
567 %res = call <64 x i8> @llvm.x86.avx512.mask.packuswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> zeroinitializer, i64 %mask)
571 declare <64 x i8> @llvm.x86.avx512.mask.packuswb.512(<32 x i16>, <32 x i16>, <64 x i8>, i64)
573 define <32 x i16> @test_mask_adds_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) {
574 ;CHECK-LABEL: test_mask_adds_epi16_rr_512
575 ;CHECK: vpaddsw %zmm1, %zmm0, %zmm0
576 %res = call <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1)
580 define <32 x i16> @test_mask_adds_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) {
581 ;CHECK-LABEL: test_mask_adds_epi16_rrk_512
582 ;CHECK: vpaddsw %zmm1, %zmm0, %zmm2 {%k1}
583 %res = call <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
587 define <32 x i16> @test_mask_adds_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i32 %mask) {
588 ;CHECK-LABEL: test_mask_adds_epi16_rrkz_512
589 ;CHECK: vpaddsw %zmm1, %zmm0, %zmm0 {%k1} {z}
590 %res = call <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask)
594 define <32 x i16> @test_mask_adds_epi16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) {
595 ;CHECK-LABEL: test_mask_adds_epi16_rm_512
596 ;CHECK: vpaddsw (%rdi), %zmm0, %zmm0
597 %b = load <32 x i16>, <32 x i16>* %ptr_b
598 %res = call <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1)
602 define <32 x i16> @test_mask_adds_epi16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <32 x i16> %passThru, i32 %mask) {
603 ;CHECK-LABEL: test_mask_adds_epi16_rmk_512
604 ;CHECK: vpaddsw (%rdi), %zmm0, %zmm1 {%k1}
605 %b = load <32 x i16>, <32 x i16>* %ptr_b
606 %res = call <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
610 define <32 x i16> @test_mask_adds_epi16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i32 %mask) {
611 ;CHECK-LABEL: test_mask_adds_epi16_rmkz_512
612 ;CHECK: vpaddsw (%rdi), %zmm0, %zmm0 {%k1} {z}
613 %b = load <32 x i16>, <32 x i16>* %ptr_b
614 %res = call <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask)
618 declare <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
620 define <32 x i16> @test_mask_subs_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) {
621 ;CHECK-LABEL: test_mask_subs_epi16_rr_512
622 ;CHECK: vpsubsw %zmm1, %zmm0, %zmm0
623 %res = call <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1)
627 define <32 x i16> @test_mask_subs_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) {
628 ;CHECK-LABEL: test_mask_subs_epi16_rrk_512
629 ;CHECK: vpsubsw %zmm1, %zmm0, %zmm2 {%k1}
630 %res = call <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
634 define <32 x i16> @test_mask_subs_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i32 %mask) {
635 ;CHECK-LABEL: test_mask_subs_epi16_rrkz_512
636 ;CHECK: vpsubsw %zmm1, %zmm0, %zmm0 {%k1} {z}
637 %res = call <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask)
641 define <32 x i16> @test_mask_subs_epi16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) {
642 ;CHECK-LABEL: test_mask_subs_epi16_rm_512
643 ;CHECK: vpsubsw (%rdi), %zmm0, %zmm0
644 %b = load <32 x i16>, <32 x i16>* %ptr_b
645 %res = call <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1)
649 define <32 x i16> @test_mask_subs_epi16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <32 x i16> %passThru, i32 %mask) {
650 ;CHECK-LABEL: test_mask_subs_epi16_rmk_512
651 ;CHECK: vpsubsw (%rdi), %zmm0, %zmm1 {%k1}
652 %b = load <32 x i16>, <32 x i16>* %ptr_b
653 %res = call <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
657 define <32 x i16> @test_mask_subs_epi16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i32 %mask) {
658 ;CHECK-LABEL: test_mask_subs_epi16_rmkz_512
659 ;CHECK: vpsubsw (%rdi), %zmm0, %zmm0 {%k1} {z}
660 %b = load <32 x i16>, <32 x i16>* %ptr_b
661 %res = call <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask)
665 declare <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
667 define <32 x i16> @test_mask_adds_epu16_rr_512(<32 x i16> %a, <32 x i16> %b) {
668 ;CHECK-LABEL: test_mask_adds_epu16_rr_512
669 ;CHECK: vpaddusw %zmm1, %zmm0, %zmm0
670 %res = call <32 x i16> @llvm.x86.avx512.mask.paddus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1)
674 define <32 x i16> @test_mask_adds_epu16_rrk_512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) {
675 ;CHECK-LABEL: test_mask_adds_epu16_rrk_512
676 ;CHECK: vpaddusw %zmm1, %zmm0, %zmm2 {%k1}
677 %res = call <32 x i16> @llvm.x86.avx512.mask.paddus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
681 define <32 x i16> @test_mask_adds_epu16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i32 %mask) {
682 ;CHECK-LABEL: test_mask_adds_epu16_rrkz_512
683 ;CHECK: vpaddusw %zmm1, %zmm0, %zmm0 {%k1} {z}
684 %res = call <32 x i16> @llvm.x86.avx512.mask.paddus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask)
688 define <32 x i16> @test_mask_adds_epu16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) {
689 ;CHECK-LABEL: test_mask_adds_epu16_rm_512
690 ;CHECK: vpaddusw (%rdi), %zmm0, %zmm0
691 %b = load <32 x i16>, <32 x i16>* %ptr_b
692 %res = call <32 x i16> @llvm.x86.avx512.mask.paddus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1)
696 define <32 x i16> @test_mask_adds_epu16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <32 x i16> %passThru, i32 %mask) {
697 ;CHECK-LABEL: test_mask_adds_epu16_rmk_512
698 ;CHECK: vpaddusw (%rdi), %zmm0, %zmm1 {%k1}
699 %b = load <32 x i16>, <32 x i16>* %ptr_b
700 %res = call <32 x i16> @llvm.x86.avx512.mask.paddus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
704 define <32 x i16> @test_mask_adds_epu16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i32 %mask) {
705 ;CHECK-LABEL: test_mask_adds_epu16_rmkz_512
706 ;CHECK: vpaddusw (%rdi), %zmm0, %zmm0 {%k1} {z}
707 %b = load <32 x i16>, <32 x i16>* %ptr_b
708 %res = call <32 x i16> @llvm.x86.avx512.mask.paddus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask)
712 declare <32 x i16> @llvm.x86.avx512.mask.paddus.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
714 define <32 x i16> @test_mask_subs_epu16_rr_512(<32 x i16> %a, <32 x i16> %b) {
715 ;CHECK-LABEL: test_mask_subs_epu16_rr_512
716 ;CHECK: vpsubusw %zmm1, %zmm0, %zmm0
717 %res = call <32 x i16> @llvm.x86.avx512.mask.psubus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1)
721 define <32 x i16> @test_mask_subs_epu16_rrk_512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) {
722 ;CHECK-LABEL: test_mask_subs_epu16_rrk_512
723 ;CHECK: vpsubusw %zmm1, %zmm0, %zmm2 {%k1}
724 %res = call <32 x i16> @llvm.x86.avx512.mask.psubus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
728 define <32 x i16> @test_mask_subs_epu16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i32 %mask) {
729 ;CHECK-LABEL: test_mask_subs_epu16_rrkz_512
730 ;CHECK: vpsubusw %zmm1, %zmm0, %zmm0 {%k1} {z}
731 %res = call <32 x i16> @llvm.x86.avx512.mask.psubus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask)
735 define <32 x i16> @test_mask_subs_epu16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) {
736 ;CHECK-LABEL: test_mask_subs_epu16_rm_512
737 ;CHECK: vpsubusw (%rdi), %zmm0, %zmm0
738 %b = load <32 x i16>, <32 x i16>* %ptr_b
739 %res = call <32 x i16> @llvm.x86.avx512.mask.psubus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1)
743 define <32 x i16> @test_mask_subs_epu16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <32 x i16> %passThru, i32 %mask) {
744 ;CHECK-LABEL: test_mask_subs_epu16_rmk_512
745 ;CHECK: vpsubusw (%rdi), %zmm0, %zmm1 {%k1}
746 %b = load <32 x i16>, <32 x i16>* %ptr_b
747 %res = call <32 x i16> @llvm.x86.avx512.mask.psubus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
751 define <32 x i16> @test_mask_subs_epu16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i32 %mask) {
752 ;CHECK-LABEL: test_mask_subs_epu16_rmkz_512
753 ;CHECK: vpsubusw (%rdi), %zmm0, %zmm0 {%k1} {z}
754 %b = load <32 x i16>, <32 x i16>* %ptr_b
755 %res = call <32 x i16> @llvm.x86.avx512.mask.psubus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask)
759 declare <32 x i16> @llvm.x86.avx512.mask.psubus.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
761 declare <64 x i8> @llvm.x86.avx512.mask.pmaxs.b.512(<64 x i8>, <64 x i8>, <64 x i8>, i64)
763 ; CHECK-LABEL: @test_int_x86_avx512_mask_pmaxs_b_512
765 ; CHECK: vpmaxsb %zmm
767 define <64 x i8>@test_int_x86_avx512_mask_pmaxs_b_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) {
768 %res = call <64 x i8> @llvm.x86.avx512.mask.pmaxs.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3)
769 %res1 = call <64 x i8> @llvm.x86.avx512.mask.pmaxs.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 -1)
770 %res2 = add <64 x i8> %res, %res1
774 declare <32 x i16> @llvm.x86.avx512.mask.pmaxs.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
776 ; CHECK-LABEL: @test_int_x86_avx512_mask_pmaxs_w_512
778 ; CHECK: vpmaxsw %zmm
780 define <32 x i16>@test_int_x86_avx512_mask_pmaxs_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
781 %res = call <32 x i16> @llvm.x86.avx512.mask.pmaxs.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
782 %res1 = call <32 x i16> @llvm.x86.avx512.mask.pmaxs.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
783 %res2 = add <32 x i16> %res, %res1
787 declare <64 x i8> @llvm.x86.avx512.mask.pmaxu.b.512(<64 x i8>, <64 x i8>, <64 x i8>, i64)
789 ; CHECK-LABEL: @test_int_x86_avx512_mask_pmaxu_b_512
791 ; CHECK: vpmaxub %zmm
793 define <64 x i8>@test_int_x86_avx512_mask_pmaxu_b_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) {
794 %res = call <64 x i8> @llvm.x86.avx512.mask.pmaxu.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3)
795 %res1 = call <64 x i8> @llvm.x86.avx512.mask.pmaxu.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 -1)
796 %res2 = add <64 x i8> %res, %res1
800 declare <32 x i16> @llvm.x86.avx512.mask.pmaxu.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
802 ; CHECK-LABEL: @test_int_x86_avx512_mask_pmaxu_w_512
804 ; CHECK: vpmaxuw %zmm
806 define <32 x i16>@test_int_x86_avx512_mask_pmaxu_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
807 %res = call <32 x i16> @llvm.x86.avx512.mask.pmaxu.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
808 %res1 = call <32 x i16> @llvm.x86.avx512.mask.pmaxu.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
809 %res2 = add <32 x i16> %res, %res1
813 declare <64 x i8> @llvm.x86.avx512.mask.pmins.b.512(<64 x i8>, <64 x i8>, <64 x i8>, i64)
815 ; CHECK-LABEL: @test_int_x86_avx512_mask_pmins_b_512
817 ; CHECK: vpminsb %zmm
819 define <64 x i8>@test_int_x86_avx512_mask_pmins_b_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) {
820 %res = call <64 x i8> @llvm.x86.avx512.mask.pmins.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3)
821 %res1 = call <64 x i8> @llvm.x86.avx512.mask.pmins.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 -1)
822 %res2 = add <64 x i8> %res, %res1
826 declare <32 x i16> @llvm.x86.avx512.mask.pmins.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
828 ; CHECK-LABEL: @test_int_x86_avx512_mask_pmins_w_512
830 ; CHECK: vpminsw %zmm
832 define <32 x i16>@test_int_x86_avx512_mask_pmins_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
833 %res = call <32 x i16> @llvm.x86.avx512.mask.pmins.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
834 %res1 = call <32 x i16> @llvm.x86.avx512.mask.pmins.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
835 %res2 = add <32 x i16> %res, %res1
839 declare <64 x i8> @llvm.x86.avx512.mask.pminu.b.512(<64 x i8>, <64 x i8>, <64 x i8>, i64)
841 ; CHECK-LABEL: @test_int_x86_avx512_mask_pminu_b_512
843 ; CHECK: vpminub %zmm
845 define <64 x i8>@test_int_x86_avx512_mask_pminu_b_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) {
846 %res = call <64 x i8> @llvm.x86.avx512.mask.pminu.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3)
847 %res1 = call <64 x i8> @llvm.x86.avx512.mask.pminu.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 -1)
848 %res2 = add <64 x i8> %res, %res1
852 declare <32 x i16> @llvm.x86.avx512.mask.pminu.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
854 ; CHECK-LABEL: @test_int_x86_avx512_mask_pminu_w_512
856 ; CHECK: vpminuw %zmm
858 define <32 x i16>@test_int_x86_avx512_mask_pminu_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
859 %res = call <32 x i16> @llvm.x86.avx512.mask.pminu.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
860 %res1 = call <32 x i16> @llvm.x86.avx512.mask.pminu.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
861 %res2 = add <32 x i16> %res, %res1
865 declare <32 x i16> @llvm.x86.avx512.mask.vpermt2var.hi.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
867 ; CHECK-LABEL: @test_int_x86_avx512_mask_vpermt2var_hi_512
870 ; CHECK: vpermt2w %zmm{{.*}}{%k1}
871 define <32 x i16>@test_int_x86_avx512_mask_vpermt2var_hi_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
872 %res = call <32 x i16> @llvm.x86.avx512.mask.vpermt2var.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
873 %res1 = call <32 x i16> @llvm.x86.avx512.mask.vpermt2var.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
874 %res2 = add <32 x i16> %res, %res1
878 declare <32 x i16> @llvm.x86.avx512.maskz.vpermt2var.hi.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
880 ; CHECK-LABEL: @test_int_x86_avx512_maskz_vpermt2var_hi_512
883 ; CHECK: vpermt2w %zmm{{.*}}{%k1} {z}
884 define <32 x i16>@test_int_x86_avx512_maskz_vpermt2var_hi_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
885 %res = call <32 x i16> @llvm.x86.avx512.maskz.vpermt2var.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
886 %res1 = call <32 x i16> @llvm.x86.avx512.maskz.vpermt2var.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
887 %res2 = add <32 x i16> %res, %res1
891 declare <32 x i16> @llvm.x86.avx512.mask.vpermi2var.hi.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
893 ; CHECK-LABEL: @test_int_x86_avx512_mask_vpermi2var_hi_512
896 ; CHECK: vpermi2w %zmm{{.*}}{%k1}
897 define <32 x i16>@test_int_x86_avx512_mask_vpermi2var_hi_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
898 %res = call <32 x i16> @llvm.x86.avx512.mask.vpermi2var.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
899 %res1 = call <32 x i16> @llvm.x86.avx512.mask.vpermi2var.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
900 %res2 = add <32 x i16> %res, %res1
904 declare <64 x i8> @llvm.x86.avx512.mask.pavg.b.512(<64 x i8>, <64 x i8>, <64 x i8>, i64)
906 ; CHECK-LABEL: @test_int_x86_avx512_mask_pavg_b_512
910 define <64 x i8>@test_int_x86_avx512_mask_pavg_b_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) {
911 %res = call <64 x i8> @llvm.x86.avx512.mask.pavg.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3)
912 %res1 = call <64 x i8> @llvm.x86.avx512.mask.pavg.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 -1)
913 %res2 = add <64 x i8> %res, %res1
917 declare <32 x i16> @llvm.x86.avx512.mask.pavg.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
919 ; CHECK-LABEL: @test_int_x86_avx512_mask_pavg_w_512
923 define <32 x i16>@test_int_x86_avx512_mask_pavg_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
924 %res = call <32 x i16> @llvm.x86.avx512.mask.pavg.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
925 %res1 = call <32 x i16> @llvm.x86.avx512.mask.pavg.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
926 %res2 = add <32 x i16> %res, %res1
930 declare <64 x i8> @llvm.x86.avx512.mask.pshuf.b.512(<64 x i8>, <64 x i8>, <64 x i8>, i64)
932 ; CHECK-LABEL: @test_int_x86_avx512_mask_pshuf_b_512
935 ; CHECK: vpshufb %zmm{{.*}}{%k1}
936 define <64 x i8>@test_int_x86_avx512_mask_pshuf_b_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) {
937 %res = call <64 x i8> @llvm.x86.avx512.mask.pshuf.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3)
938 %res1 = call <64 x i8> @llvm.x86.avx512.mask.pshuf.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 -1)
939 %res2 = add <64 x i8> %res, %res1
943 declare <32 x i16> @llvm.x86.avx512.mask.pabs.w.512(<32 x i16>, <32 x i16>, i32)
945 ; CHECK-LABEL: @test_int_x86_avx512_mask_pabs_w_512
948 ; CHECK: vpabsw{{.*}}{%k1}
949 define <32 x i16>@test_int_x86_avx512_mask_pabs_w_512(<32 x i16> %x0, <32 x i16> %x1, i32 %x2) {
950 %res = call <32 x i16> @llvm.x86.avx512.mask.pabs.w.512(<32 x i16> %x0, <32 x i16> %x1, i32 %x2)
951 %res1 = call <32 x i16> @llvm.x86.avx512.mask.pabs.w.512(<32 x i16> %x0, <32 x i16> %x1, i32 -1)
952 %res2 = add <32 x i16> %res, %res1
956 declare <64 x i8> @llvm.x86.avx512.mask.pabs.b.512(<64 x i8>, <64 x i8>, i64)
958 ; CHECK-LABEL: @test_int_x86_avx512_mask_pabs_b_512
961 ; CHECK: vpabsb{{.*}}{%k1}
962 define <64 x i8>@test_int_x86_avx512_mask_pabs_b_512(<64 x i8> %x0, <64 x i8> %x1, i64 %x2) {
963 %res = call <64 x i8> @llvm.x86.avx512.mask.pabs.b.512(<64 x i8> %x0, <64 x i8> %x1, i64 %x2)
964 %res1 = call <64 x i8> @llvm.x86.avx512.mask.pabs.b.512(<64 x i8> %x0, <64 x i8> %x1, i64 -1)
965 %res2 = add <64 x i8> %res, %res1
969 declare <32 x i16> @llvm.x86.avx512.mask.pmulhu.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
971 ; CHECK-LABEL: @test_int_x86_avx512_mask_pmulhu_w_512
975 ; CHECK: vpmulhuw {{.*}}encoding: [0x62
976 define <32 x i16>@test_int_x86_avx512_mask_pmulhu_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
977 %res = call <32 x i16> @llvm.x86.avx512.mask.pmulhu.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
978 %res1 = call <32 x i16> @llvm.x86.avx512.mask.pmulhu.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
979 %res2 = add <32 x i16> %res, %res1
983 declare <32 x i16> @llvm.x86.avx512.mask.pmulh.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
985 ; CHECK-LABEL: @test_int_x86_avx512_mask_pmulh_w_512
989 ; CHECK: vpmulhw {{.*}}encoding: [0x62
990 define <32 x i16>@test_int_x86_avx512_mask_pmulh_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
991 %res = call <32 x i16> @llvm.x86.avx512.mask.pmulh.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
992 %res1 = call <32 x i16> @llvm.x86.avx512.mask.pmulh.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
993 %res2 = add <32 x i16> %res, %res1
997 declare <32 x i16> @llvm.x86.avx512.mask.pmul.hr.sw.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
999 ; CHECK-LABEL: @test_int_x86_avx512_mask_pmulhr_sw_512
1003 ; CHECK: vpmulhrsw {{.*}}encoding: [0x62
1004 define <32 x i16>@test_int_x86_avx512_mask_pmulhr_sw_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
1005 %res = call <32 x i16> @llvm.x86.avx512.mask.pmul.hr.sw.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
1006 %res1 = call <32 x i16> @llvm.x86.avx512.mask.pmul.hr.sw.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
1007 %res2 = add <32 x i16> %res, %res1
1008 ret <32 x i16> %res2
1011 declare <32 x i8> @llvm.x86.avx512.mask.pmov.wb.512(<32 x i16>, <32 x i8>, i32)
1013 define <32 x i8>@test_int_x86_avx512_mask_pmov_wb_512(<32 x i16> %x0, <32 x i8> %x1, i32 %x2) {
1014 ; CHECK-LABEL: test_int_x86_avx512_mask_pmov_wb_512:
1015 ; CHECK: vpmovwb %zmm0, %ymm1 {%k1}
1016 ; CHECK-NEXT: vpmovwb %zmm0, %ymm2 {%k1} {z}
1017 ; CHECK-NEXT: vpmovwb %zmm0, %ymm0
1018 %res0 = call <32 x i8> @llvm.x86.avx512.mask.pmov.wb.512(<32 x i16> %x0, <32 x i8> %x1, i32 -1)
1019 %res1 = call <32 x i8> @llvm.x86.avx512.mask.pmov.wb.512(<32 x i16> %x0, <32 x i8> %x1, i32 %x2)
1020 %res2 = call <32 x i8> @llvm.x86.avx512.mask.pmov.wb.512(<32 x i16> %x0, <32 x i8> zeroinitializer, i32 %x2)
1021 %res3 = add <32 x i8> %res0, %res1
1022 %res4 = add <32 x i8> %res3, %res2
1026 declare void @llvm.x86.avx512.mask.pmov.wb.mem.512(i8* %ptr, <32 x i16>, i32)
1028 define void @test_int_x86_avx512_mask_pmov_wb_mem_512(i8* %ptr, <32 x i16> %x1, i32 %x2) {
1029 ; CHECK-LABEL: test_int_x86_avx512_mask_pmov_wb_mem_512:
1030 ; CHECK: vpmovwb %zmm0, (%rdi)
1031 ; CHECK: vpmovwb %zmm0, (%rdi) {%k1}
1032 call void @llvm.x86.avx512.mask.pmov.wb.mem.512(i8* %ptr, <32 x i16> %x1, i32 -1)
1033 call void @llvm.x86.avx512.mask.pmov.wb.mem.512(i8* %ptr, <32 x i16> %x1, i32 %x2)
1037 declare <32 x i8> @llvm.x86.avx512.mask.pmovs.wb.512(<32 x i16>, <32 x i8>, i32)
1039 define <32 x i8>@test_int_x86_avx512_mask_pmovs_wb_512(<32 x i16> %x0, <32 x i8> %x1, i32 %x2) {
1040 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_wb_512:
1041 ; CHECK: vpmovswb %zmm0, %ymm1 {%k1}
1042 ; CHECK-NEXT: vpmovswb %zmm0, %ymm2 {%k1} {z}
1043 ; CHECK-NEXT: vpmovswb %zmm0, %ymm0
1044 %res0 = call <32 x i8> @llvm.x86.avx512.mask.pmovs.wb.512(<32 x i16> %x0, <32 x i8> %x1, i32 -1)
1045 %res1 = call <32 x i8> @llvm.x86.avx512.mask.pmovs.wb.512(<32 x i16> %x0, <32 x i8> %x1, i32 %x2)
1046 %res2 = call <32 x i8> @llvm.x86.avx512.mask.pmovs.wb.512(<32 x i16> %x0, <32 x i8> zeroinitializer, i32 %x2)
1047 %res3 = add <32 x i8> %res0, %res1
1048 %res4 = add <32 x i8> %res3, %res2
1052 declare void @llvm.x86.avx512.mask.pmovs.wb.mem.512(i8* %ptr, <32 x i16>, i32)
1054 define void @test_int_x86_avx512_mask_pmovs_wb_mem_512(i8* %ptr, <32 x i16> %x1, i32 %x2) {
1055 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_wb_mem_512:
1056 ; CHECK: vpmovswb %zmm0, (%rdi)
1057 ; CHECK: vpmovswb %zmm0, (%rdi) {%k1}
1058 call void @llvm.x86.avx512.mask.pmovs.wb.mem.512(i8* %ptr, <32 x i16> %x1, i32 -1)
1059 call void @llvm.x86.avx512.mask.pmovs.wb.mem.512(i8* %ptr, <32 x i16> %x1, i32 %x2)
1063 declare <32 x i8> @llvm.x86.avx512.mask.pmovus.wb.512(<32 x i16>, <32 x i8>, i32)
1065 define <32 x i8>@test_int_x86_avx512_mask_pmovus_wb_512(<32 x i16> %x0, <32 x i8> %x1, i32 %x2) {
1066 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_wb_512:
1067 ; CHECK: vpmovuswb %zmm0, %ymm1 {%k1}
1068 ; CHECK-NEXT: vpmovuswb %zmm0, %ymm2 {%k1} {z}
1069 ; CHECK-NEXT: vpmovuswb %zmm0, %ymm0
1070 %res0 = call <32 x i8> @llvm.x86.avx512.mask.pmovus.wb.512(<32 x i16> %x0, <32 x i8> %x1, i32 -1)
1071 %res1 = call <32 x i8> @llvm.x86.avx512.mask.pmovus.wb.512(<32 x i16> %x0, <32 x i8> %x1, i32 %x2)
1072 %res2 = call <32 x i8> @llvm.x86.avx512.mask.pmovus.wb.512(<32 x i16> %x0, <32 x i8> zeroinitializer, i32 %x2)
1073 %res3 = add <32 x i8> %res0, %res1
1074 %res4 = add <32 x i8> %res3, %res2
1078 declare void @llvm.x86.avx512.mask.pmovus.wb.mem.512(i8* %ptr, <32 x i16>, i32)
1080 define void @test_int_x86_avx512_mask_pmovus_wb_mem_512(i8* %ptr, <32 x i16> %x1, i32 %x2) {
1081 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_wb_mem_512:
1082 ; CHECK: vpmovuswb %zmm0, (%rdi)
1083 ; CHECK: vpmovuswb %zmm0, (%rdi) {%k1}
1084 call void @llvm.x86.avx512.mask.pmovus.wb.mem.512(i8* %ptr, <32 x i16> %x1, i32 -1)
1085 call void @llvm.x86.avx512.mask.pmovus.wb.mem.512(i8* %ptr, <32 x i16> %x1, i32 %x2)
1089 declare <32 x i16> @llvm.x86.avx512.mask.pmaddubs.w.512(<64 x i8>, <64 x i8>, <32 x i16>, i32)
1091 define <32 x i16>@test_int_x86_avx512_mask_pmaddubs_w_512(<64 x i8> %x0, <64 x i8> %x1, <32 x i16> %x2, i32 %x3) {
1092 ; CHECK-LABEL: test_int_x86_avx512_mask_pmaddubs_w_512:
1094 ; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
1095 ; CHECK-NEXT: vpmaddubsw %zmm1, %zmm0, %zmm2 {%k1}
1096 ; CHECK-NEXT: vpmaddubsw %zmm1, %zmm0, %zmm0
1097 ; CHECK-NEXT: vpaddw %zmm0, %zmm2, %zmm0
1099 %res = call <32 x i16> @llvm.x86.avx512.mask.pmaddubs.w.512(<64 x i8> %x0, <64 x i8> %x1, <32 x i16> %x2, i32 %x3)
1100 %res1 = call <32 x i16> @llvm.x86.avx512.mask.pmaddubs.w.512(<64 x i8> %x0, <64 x i8> %x1, <32 x i16> %x2, i32 -1)
1101 %res2 = add <32 x i16> %res, %res1
1102 ret <32 x i16> %res2
1105 declare <16 x i32> @llvm.x86.avx512.mask.pmaddw.d.512(<32 x i16>, <32 x i16>, <16 x i32>, i16)
1107 define <16 x i32>@test_int_x86_avx512_mask_pmaddw_d_512(<32 x i16> %x0, <32 x i16> %x1, <16 x i32> %x2, i16 %x3) {
1108 ; CHECK-LABEL: test_int_x86_avx512_mask_pmaddw_d_512:
1110 ; CHECK-NEXT: kmovw %edi, %k1
1111 ; CHECK-NEXT: vpmaddwd %zmm1, %zmm0, %zmm2 {%k1}
1112 ; CHECK-NEXT: vpmaddwd %zmm1, %zmm0, %zmm0
1113 ; CHECK-NEXT: vpaddd %zmm0, %zmm2, %zmm0
1115 %res = call <16 x i32> @llvm.x86.avx512.mask.pmaddw.d.512(<32 x i16> %x0, <32 x i16> %x1, <16 x i32> %x2, i16 %x3)
1116 %res1 = call <16 x i32> @llvm.x86.avx512.mask.pmaddw.d.512(<32 x i16> %x0, <32 x i16> %x1, <16 x i32> %x2, i16 -1)
1117 %res2 = add <16 x i32> %res, %res1
1118 ret <16 x i32> %res2