1 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512bw --show-mc-encoding| FileCheck %s
3 define i64 @test_pcmpeq_b(<64 x i8> %a, <64 x i8> %b) {
4 ; CHECK-LABEL: test_pcmpeq_b
5 ; CHECK: vpcmpeqb %zmm1, %zmm0, %k0 ##
6 %res = call i64 @llvm.x86.avx512.mask.pcmpeq.b.512(<64 x i8> %a, <64 x i8> %b, i64 -1)
10 define i64 @test_mask_pcmpeq_b(<64 x i8> %a, <64 x i8> %b, i64 %mask) {
11 ; CHECK-LABEL: test_mask_pcmpeq_b
12 ; CHECK: vpcmpeqb %zmm1, %zmm0, %k0 {%k1} ##
13 %res = call i64 @llvm.x86.avx512.mask.pcmpeq.b.512(<64 x i8> %a, <64 x i8> %b, i64 %mask)
17 declare i64 @llvm.x86.avx512.mask.pcmpeq.b.512(<64 x i8>, <64 x i8>, i64)
19 define i32 @test_pcmpeq_w(<32 x i16> %a, <32 x i16> %b) {
20 ; CHECK-LABEL: test_pcmpeq_w
21 ; CHECK: vpcmpeqw %zmm1, %zmm0, %k0 ##
22 %res = call i32 @llvm.x86.avx512.mask.pcmpeq.w.512(<32 x i16> %a, <32 x i16> %b, i32 -1)
26 define i32 @test_mask_pcmpeq_w(<32 x i16> %a, <32 x i16> %b, i32 %mask) {
27 ; CHECK-LABEL: test_mask_pcmpeq_w
28 ; CHECK: vpcmpeqw %zmm1, %zmm0, %k0 {%k1} ##
29 %res = call i32 @llvm.x86.avx512.mask.pcmpeq.w.512(<32 x i16> %a, <32 x i16> %b, i32 %mask)
33 declare i32 @llvm.x86.avx512.mask.pcmpeq.w.512(<32 x i16>, <32 x i16>, i32)
35 define i64 @test_pcmpgt_b(<64 x i8> %a, <64 x i8> %b) {
36 ; CHECK-LABEL: test_pcmpgt_b
37 ; CHECK: vpcmpgtb %zmm1, %zmm0, %k0 ##
38 %res = call i64 @llvm.x86.avx512.mask.pcmpgt.b.512(<64 x i8> %a, <64 x i8> %b, i64 -1)
42 define i64 @test_mask_pcmpgt_b(<64 x i8> %a, <64 x i8> %b, i64 %mask) {
43 ; CHECK-LABEL: test_mask_pcmpgt_b
44 ; CHECK: vpcmpgtb %zmm1, %zmm0, %k0 {%k1} ##
45 %res = call i64 @llvm.x86.avx512.mask.pcmpgt.b.512(<64 x i8> %a, <64 x i8> %b, i64 %mask)
49 declare i64 @llvm.x86.avx512.mask.pcmpgt.b.512(<64 x i8>, <64 x i8>, i64)
51 define i32 @test_pcmpgt_w(<32 x i16> %a, <32 x i16> %b) {
52 ; CHECK-LABEL: test_pcmpgt_w
53 ; CHECK: vpcmpgtw %zmm1, %zmm0, %k0 ##
54 %res = call i32 @llvm.x86.avx512.mask.pcmpgt.w.512(<32 x i16> %a, <32 x i16> %b, i32 -1)
58 define i32 @test_mask_pcmpgt_w(<32 x i16> %a, <32 x i16> %b, i32 %mask) {
59 ; CHECK-LABEL: test_mask_pcmpgt_w
60 ; CHECK: vpcmpgtw %zmm1, %zmm0, %k0 {%k1} ##
61 %res = call i32 @llvm.x86.avx512.mask.pcmpgt.w.512(<32 x i16> %a, <32 x i16> %b, i32 %mask)
65 declare i32 @llvm.x86.avx512.mask.pcmpgt.w.512(<32 x i16>, <32 x i16>, i32)
67 define <8 x i64> @test_cmp_b_512(<64 x i8> %a0, <64 x i8> %a1) {
68 ; CHECK_LABEL: test_cmp_b_512
69 ; CHECK: vpcmpeqb %zmm1, %zmm0, %k0 ##
70 %res0 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 0, i64 -1)
71 %vec0 = insertelement <8 x i64> undef, i64 %res0, i32 0
72 ; CHECK: vpcmpltb %zmm1, %zmm0, %k0 ##
73 %res1 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 1, i64 -1)
74 %vec1 = insertelement <8 x i64> %vec0, i64 %res1, i32 1
75 ; CHECK: vpcmpleb %zmm1, %zmm0, %k0 ##
76 %res2 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 2, i64 -1)
77 %vec2 = insertelement <8 x i64> %vec1, i64 %res2, i32 2
78 ; CHECK: vpcmpunordb %zmm1, %zmm0, %k0 ##
79 %res3 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 3, i64 -1)
80 %vec3 = insertelement <8 x i64> %vec2, i64 %res3, i32 3
81 ; CHECK: vpcmpneqb %zmm1, %zmm0, %k0 ##
82 %res4 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 4, i64 -1)
83 %vec4 = insertelement <8 x i64> %vec3, i64 %res4, i32 4
84 ; CHECK: vpcmpnltb %zmm1, %zmm0, %k0 ##
85 %res5 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 5, i64 -1)
86 %vec5 = insertelement <8 x i64> %vec4, i64 %res5, i32 5
87 ; CHECK: vpcmpnleb %zmm1, %zmm0, %k0 ##
88 %res6 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 6, i64 -1)
89 %vec6 = insertelement <8 x i64> %vec5, i64 %res6, i32 6
90 ; CHECK: vpcmpordb %zmm1, %zmm0, %k0 ##
91 %res7 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 7, i64 -1)
92 %vec7 = insertelement <8 x i64> %vec6, i64 %res7, i32 7
96 define <8 x i64> @test_mask_cmp_b_512(<64 x i8> %a0, <64 x i8> %a1, i64 %mask) {
97 ; CHECK_LABEL: test_mask_cmp_b_512
98 ; CHECK: vpcmpeqb %zmm1, %zmm0, %k0 {%k1} ##
99 %res0 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 0, i64 %mask)
100 %vec0 = insertelement <8 x i64> undef, i64 %res0, i32 0
101 ; CHECK: vpcmpltb %zmm1, %zmm0, %k0 {%k1} ##
102 %res1 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 1, i64 %mask)
103 %vec1 = insertelement <8 x i64> %vec0, i64 %res1, i32 1
104 ; CHECK: vpcmpleb %zmm1, %zmm0, %k0 {%k1} ##
105 %res2 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 2, i64 %mask)
106 %vec2 = insertelement <8 x i64> %vec1, i64 %res2, i32 2
107 ; CHECK: vpcmpunordb %zmm1, %zmm0, %k0 {%k1} ##
108 %res3 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 3, i64 %mask)
109 %vec3 = insertelement <8 x i64> %vec2, i64 %res3, i32 3
110 ; CHECK: vpcmpneqb %zmm1, %zmm0, %k0 {%k1} ##
111 %res4 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 4, i64 %mask)
112 %vec4 = insertelement <8 x i64> %vec3, i64 %res4, i32 4
113 ; CHECK: vpcmpnltb %zmm1, %zmm0, %k0 {%k1} ##
114 %res5 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 5, i64 %mask)
115 %vec5 = insertelement <8 x i64> %vec4, i64 %res5, i32 5
116 ; CHECK: vpcmpnleb %zmm1, %zmm0, %k0 {%k1} ##
117 %res6 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 6, i64 %mask)
118 %vec6 = insertelement <8 x i64> %vec5, i64 %res6, i32 6
119 ; CHECK: vpcmpordb %zmm1, %zmm0, %k0 {%k1} ##
120 %res7 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 7, i64 %mask)
121 %vec7 = insertelement <8 x i64> %vec6, i64 %res7, i32 7
125 declare i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8>, <64 x i8>, i32, i64) nounwind readnone
127 define <8 x i64> @test_ucmp_b_512(<64 x i8> %a0, <64 x i8> %a1) {
128 ; CHECK_LABEL: test_ucmp_b_512
129 ; CHECK: vpcmpequb %zmm1, %zmm0, %k0 ##
130 %res0 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 0, i64 -1)
131 %vec0 = insertelement <8 x i64> undef, i64 %res0, i32 0
132 ; CHECK: vpcmpltub %zmm1, %zmm0, %k0 ##
133 %res1 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 1, i64 -1)
134 %vec1 = insertelement <8 x i64> %vec0, i64 %res1, i32 1
135 ; CHECK: vpcmpleub %zmm1, %zmm0, %k0 ##
136 %res2 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 2, i64 -1)
137 %vec2 = insertelement <8 x i64> %vec1, i64 %res2, i32 2
138 ; CHECK: vpcmpunordub %zmm1, %zmm0, %k0 ##
139 %res3 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 3, i64 -1)
140 %vec3 = insertelement <8 x i64> %vec2, i64 %res3, i32 3
141 ; CHECK: vpcmpnequb %zmm1, %zmm0, %k0 ##
142 %res4 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 4, i64 -1)
143 %vec4 = insertelement <8 x i64> %vec3, i64 %res4, i32 4
144 ; CHECK: vpcmpnltub %zmm1, %zmm0, %k0 ##
145 %res5 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 5, i64 -1)
146 %vec5 = insertelement <8 x i64> %vec4, i64 %res5, i32 5
147 ; CHECK: vpcmpnleub %zmm1, %zmm0, %k0 ##
148 %res6 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 6, i64 -1)
149 %vec6 = insertelement <8 x i64> %vec5, i64 %res6, i32 6
150 ; CHECK: vpcmpordub %zmm1, %zmm0, %k0 ##
151 %res7 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 7, i64 -1)
152 %vec7 = insertelement <8 x i64> %vec6, i64 %res7, i32 7
156 define <8 x i64> @test_mask_x86_avx512_ucmp_b_512(<64 x i8> %a0, <64 x i8> %a1, i64 %mask) {
157 ; CHECK_LABEL: test_mask_ucmp_b_512
158 ; CHECK: vpcmpequb %zmm1, %zmm0, %k0 {%k1} ##
159 %res0 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 0, i64 %mask)
160 %vec0 = insertelement <8 x i64> undef, i64 %res0, i32 0
161 ; CHECK: vpcmpltub %zmm1, %zmm0, %k0 {%k1} ##
162 %res1 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 1, i64 %mask)
163 %vec1 = insertelement <8 x i64> %vec0, i64 %res1, i32 1
164 ; CHECK: vpcmpleub %zmm1, %zmm0, %k0 {%k1} ##
165 %res2 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 2, i64 %mask)
166 %vec2 = insertelement <8 x i64> %vec1, i64 %res2, i32 2
167 ; CHECK: vpcmpunordub %zmm1, %zmm0, %k0 {%k1} ##
168 %res3 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 3, i64 %mask)
169 %vec3 = insertelement <8 x i64> %vec2, i64 %res3, i32 3
170 ; CHECK: vpcmpnequb %zmm1, %zmm0, %k0 {%k1} ##
171 %res4 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 4, i64 %mask)
172 %vec4 = insertelement <8 x i64> %vec3, i64 %res4, i32 4
173 ; CHECK: vpcmpnltub %zmm1, %zmm0, %k0 {%k1} ##
174 %res5 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 5, i64 %mask)
175 %vec5 = insertelement <8 x i64> %vec4, i64 %res5, i32 5
176 ; CHECK: vpcmpnleub %zmm1, %zmm0, %k0 {%k1} ##
177 %res6 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 6, i64 %mask)
178 %vec6 = insertelement <8 x i64> %vec5, i64 %res6, i32 6
179 ; CHECK: vpcmpordub %zmm1, %zmm0, %k0 {%k1} ##
180 %res7 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 7, i64 %mask)
181 %vec7 = insertelement <8 x i64> %vec6, i64 %res7, i32 7
185 declare i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8>, <64 x i8>, i32, i64) nounwind readnone
187 define <8 x i32> @test_cmp_w_512(<32 x i16> %a0, <32 x i16> %a1) {
188 ; CHECK_LABEL: test_cmp_w_512
189 ; CHECK: vpcmpeqw %zmm1, %zmm0, %k0 ##
190 %res0 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 0, i32 -1)
191 %vec0 = insertelement <8 x i32> undef, i32 %res0, i32 0
192 ; CHECK: vpcmpltw %zmm1, %zmm0, %k0 ##
193 %res1 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 1, i32 -1)
194 %vec1 = insertelement <8 x i32> %vec0, i32 %res1, i32 1
195 ; CHECK: vpcmplew %zmm1, %zmm0, %k0 ##
196 %res2 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 2, i32 -1)
197 %vec2 = insertelement <8 x i32> %vec1, i32 %res2, i32 2
198 ; CHECK: vpcmpunordw %zmm1, %zmm0, %k0 ##
199 %res3 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 3, i32 -1)
200 %vec3 = insertelement <8 x i32> %vec2, i32 %res3, i32 3
201 ; CHECK: vpcmpneqw %zmm1, %zmm0, %k0 ##
202 %res4 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 4, i32 -1)
203 %vec4 = insertelement <8 x i32> %vec3, i32 %res4, i32 4
204 ; CHECK: vpcmpnltw %zmm1, %zmm0, %k0 ##
205 %res5 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 5, i32 -1)
206 %vec5 = insertelement <8 x i32> %vec4, i32 %res5, i32 5
207 ; CHECK: vpcmpnlew %zmm1, %zmm0, %k0 ##
208 %res6 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 6, i32 -1)
209 %vec6 = insertelement <8 x i32> %vec5, i32 %res6, i32 6
210 ; CHECK: vpcmpordw %zmm1, %zmm0, %k0 ##
211 %res7 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 7, i32 -1)
212 %vec7 = insertelement <8 x i32> %vec6, i32 %res7, i32 7
216 define <8 x i32> @test_mask_cmp_w_512(<32 x i16> %a0, <32 x i16> %a1, i32 %mask) {
217 ; CHECK_LABEL: test_mask_cmp_w_512
218 ; CHECK: vpcmpeqw %zmm1, %zmm0, %k0 {%k1} ##
219 %res0 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 0, i32 %mask)
220 %vec0 = insertelement <8 x i32> undef, i32 %res0, i32 0
221 ; CHECK: vpcmpltw %zmm1, %zmm0, %k0 {%k1} ##
222 %res1 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 1, i32 %mask)
223 %vec1 = insertelement <8 x i32> %vec0, i32 %res1, i32 1
224 ; CHECK: vpcmplew %zmm1, %zmm0, %k0 {%k1} ##
225 %res2 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 2, i32 %mask)
226 %vec2 = insertelement <8 x i32> %vec1, i32 %res2, i32 2
227 ; CHECK: vpcmpunordw %zmm1, %zmm0, %k0 {%k1} ##
228 %res3 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 3, i32 %mask)
229 %vec3 = insertelement <8 x i32> %vec2, i32 %res3, i32 3
230 ; CHECK: vpcmpneqw %zmm1, %zmm0, %k0 {%k1} ##
231 %res4 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 4, i32 %mask)
232 %vec4 = insertelement <8 x i32> %vec3, i32 %res4, i32 4
233 ; CHECK: vpcmpnltw %zmm1, %zmm0, %k0 {%k1} ##
234 %res5 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 5, i32 %mask)
235 %vec5 = insertelement <8 x i32> %vec4, i32 %res5, i32 5
236 ; CHECK: vpcmpnlew %zmm1, %zmm0, %k0 {%k1} ##
237 %res6 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 6, i32 %mask)
238 %vec6 = insertelement <8 x i32> %vec5, i32 %res6, i32 6
239 ; CHECK: vpcmpordw %zmm1, %zmm0, %k0 {%k1} ##
240 %res7 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 7, i32 %mask)
241 %vec7 = insertelement <8 x i32> %vec6, i32 %res7, i32 7
245 declare i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16>, <32 x i16>, i32, i32) nounwind readnone
247 define <8 x i32> @test_ucmp_w_512(<32 x i16> %a0, <32 x i16> %a1) {
248 ; CHECK_LABEL: test_ucmp_w_512
249 ; CHECK: vpcmpequw %zmm1, %zmm0, %k0 ##
250 %res0 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 0, i32 -1)
251 %vec0 = insertelement <8 x i32> undef, i32 %res0, i32 0
252 ; CHECK: vpcmpltuw %zmm1, %zmm0, %k0 ##
253 %res1 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 1, i32 -1)
254 %vec1 = insertelement <8 x i32> %vec0, i32 %res1, i32 1
255 ; CHECK: vpcmpleuw %zmm1, %zmm0, %k0 ##
256 %res2 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 2, i32 -1)
257 %vec2 = insertelement <8 x i32> %vec1, i32 %res2, i32 2
258 ; CHECK: vpcmpunorduw %zmm1, %zmm0, %k0 ##
259 %res3 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 3, i32 -1)
260 %vec3 = insertelement <8 x i32> %vec2, i32 %res3, i32 3
261 ; CHECK: vpcmpnequw %zmm1, %zmm0, %k0 ##
262 %res4 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 4, i32 -1)
263 %vec4 = insertelement <8 x i32> %vec3, i32 %res4, i32 4
264 ; CHECK: vpcmpnltuw %zmm1, %zmm0, %k0 ##
265 %res5 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 5, i32 -1)
266 %vec5 = insertelement <8 x i32> %vec4, i32 %res5, i32 5
267 ; CHECK: vpcmpnleuw %zmm1, %zmm0, %k0 ##
268 %res6 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 6, i32 -1)
269 %vec6 = insertelement <8 x i32> %vec5, i32 %res6, i32 6
270 ; CHECK: vpcmporduw %zmm1, %zmm0, %k0 ##
271 %res7 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 7, i32 -1)
272 %vec7 = insertelement <8 x i32> %vec6, i32 %res7, i32 7
276 define <8 x i32> @test_mask_ucmp_w_512(<32 x i16> %a0, <32 x i16> %a1, i32 %mask) {
277 ; CHECK_LABEL: test_mask_ucmp_w_512
278 ; CHECK: vpcmpequw %zmm1, %zmm0, %k0 {%k1} ##
279 %res0 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 0, i32 %mask)
280 %vec0 = insertelement <8 x i32> undef, i32 %res0, i32 0
281 ; CHECK: vpcmpltuw %zmm1, %zmm0, %k0 {%k1} ##
282 %res1 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 1, i32 %mask)
283 %vec1 = insertelement <8 x i32> %vec0, i32 %res1, i32 1
284 ; CHECK: vpcmpleuw %zmm1, %zmm0, %k0 {%k1} ##
285 %res2 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 2, i32 %mask)
286 %vec2 = insertelement <8 x i32> %vec1, i32 %res2, i32 2
287 ; CHECK: vpcmpunorduw %zmm1, %zmm0, %k0 {%k1} ##
288 %res3 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 3, i32 %mask)
289 %vec3 = insertelement <8 x i32> %vec2, i32 %res3, i32 3
290 ; CHECK: vpcmpnequw %zmm1, %zmm0, %k0 {%k1} ##
291 %res4 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 4, i32 %mask)
292 %vec4 = insertelement <8 x i32> %vec3, i32 %res4, i32 4
293 ; CHECK: vpcmpnltuw %zmm1, %zmm0, %k0 {%k1} ##
294 %res5 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 5, i32 %mask)
295 %vec5 = insertelement <8 x i32> %vec4, i32 %res5, i32 5
296 ; CHECK: vpcmpnleuw %zmm1, %zmm0, %k0 {%k1} ##
297 %res6 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 6, i32 %mask)
298 %vec6 = insertelement <8 x i32> %vec5, i32 %res6, i32 6
299 ; CHECK: vpcmporduw %zmm1, %zmm0, %k0 {%k1} ##
300 %res7 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 7, i32 %mask)
301 %vec7 = insertelement <8 x i32> %vec6, i32 %res7, i32 7
305 declare i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16>, <32 x i16>, i32, i32) nounwind readnone
307 declare <64 x i8> @llvm.x86.avx512.mask.blend.b.512(<64 x i8>, <64 x i8>, i64) nounwind readonly
309 ; CHECK-LABEL: test_x86_mask_blend_w_512
310 define <32 x i16> @test_x86_mask_blend_w_512(i32 %mask, <32 x i16> %a1, <32 x i16> %a2) {
312 %res = call <32 x i16> @llvm.x86.avx512.mask.blend.w.512(<32 x i16> %a1, <32 x i16> %a2, i32 %mask) ; <<32 x i16>> [#uses=1]
315 declare <32 x i16> @llvm.x86.avx512.mask.blend.w.512(<32 x i16>, <32 x i16>, i32) nounwind readonly
317 ; CHECK-LABEL: test_x86_mask_blend_b_512
319 define <64 x i8> @test_x86_mask_blend_b_512(i64 %a0, <64 x i8> %a1, <64 x i8> %a2) {
320 %res = call <64 x i8> @llvm.x86.avx512.mask.blend.b.512(<64 x i8> %a1, <64 x i8> %a2, i64 %a0) ; <<64 x i8>> [#uses=1]
324 define <32 x i16> @test_mask_packs_epi32_rr_512(<16 x i32> %a, <16 x i32> %b) {
325 ;CHECK-LABEL: test_mask_packs_epi32_rr_512
326 ;CHECK: vpackssdw %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0x6b,0xc1]
327 %res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 -1)
331 define <32 x i16> @test_mask_packs_epi32_rrk_512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask) {
332 ;CHECK-LABEL: test_mask_packs_epi32_rrk_512
333 ;CHECK: vpackssdw %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0x6b,0xd1]
334 %res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask)
338 define <32 x i16> @test_mask_packs_epi32_rrkz_512(<16 x i32> %a, <16 x i32> %b, i32 %mask) {
339 ;CHECK-LABEL: test_mask_packs_epi32_rrkz_512
340 ;CHECK: vpackssdw %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0x6b,0xc1]
341 %res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 %mask)
345 define <32 x i16> @test_mask_packs_epi32_rm_512(<16 x i32> %a, <16 x i32>* %ptr_b) {
346 ;CHECK-LABEL: test_mask_packs_epi32_rm_512
347 ;CHECK: vpackssdw (%rdi), %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0x6b,0x07]
348 %b = load <16 x i32>, <16 x i32>* %ptr_b
349 %res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 -1)
353 define <32 x i16> @test_mask_packs_epi32_rmk_512(<16 x i32> %a, <16 x i32>* %ptr_b, <32 x i16> %passThru, i32 %mask) {
354 ;CHECK-LABEL: test_mask_packs_epi32_rmk_512
355 ;CHECK: vpackssdw (%rdi), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0x6b,0x0f]
356 %b = load <16 x i32>, <16 x i32>* %ptr_b
357 %res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask)
361 define <32 x i16> @test_mask_packs_epi32_rmkz_512(<16 x i32> %a, <16 x i32>* %ptr_b, i32 %mask) {
362 ;CHECK-LABEL: test_mask_packs_epi32_rmkz_512
363 ;CHECK: vpackssdw (%rdi), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0x6b,0x07]
364 %b = load <16 x i32>, <16 x i32>* %ptr_b
365 %res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 %mask)
369 define <32 x i16> @test_mask_packs_epi32_rmb_512(<16 x i32> %a, i32* %ptr_b) {
370 ;CHECK-LABEL: test_mask_packs_epi32_rmb_512
371 ;CHECK: vpackssdw (%rdi){1to16}, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x58,0x6b,0x07]
372 %q = load i32, i32* %ptr_b
373 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
374 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
375 %res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 -1)
379 define <32 x i16> @test_mask_packs_epi32_rmbk_512(<16 x i32> %a, i32* %ptr_b, <32 x i16> %passThru, i32 %mask) {
380 ;CHECK-LABEL: test_mask_packs_epi32_rmbk_512
381 ;CHECK: vpackssdw (%rdi){1to16}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x59,0x6b,0x0f]
382 %q = load i32, i32* %ptr_b
383 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
384 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
385 %res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask)
389 define <32 x i16> @test_mask_packs_epi32_rmbkz_512(<16 x i32> %a, i32* %ptr_b, i32 %mask) {
390 ;CHECK-LABEL: test_mask_packs_epi32_rmbkz_512
391 ;CHECK: vpackssdw (%rdi){1to16}, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xd9,0x6b,0x07]
392 %q = load i32, i32* %ptr_b
393 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
394 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
395 %res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 %mask)
399 declare <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32>, <16 x i32>, <32 x i16>, i32)
401 define <64 x i8> @test_mask_packs_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) {
402 ;CHECK-LABEL: test_mask_packs_epi16_rr_512
403 ;CHECK: vpacksswb %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x63,0xc1]
404 %res = call <64 x i8> @llvm.x86.avx512.mask.packsswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> zeroinitializer, i64 -1)
408 define <64 x i8> @test_mask_packs_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, <64 x i8> %passThru, i64 %mask) {
409 ;CHECK-LABEL: test_mask_packs_epi16_rrk_512
410 ;CHECK: vpacksswb %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0x63,0xd1]
411 %res = call <64 x i8> @llvm.x86.avx512.mask.packsswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> %passThru, i64 %mask)
415 define <64 x i8> @test_mask_packs_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i64 %mask) {
416 ;CHECK-LABEL: test_mask_packs_epi16_rrkz_512
417 ;CHECK: vpacksswb %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0x63,0xc1]
418 %res = call <64 x i8> @llvm.x86.avx512.mask.packsswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> zeroinitializer, i64 %mask)
422 define <64 x i8> @test_mask_packs_epi16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) {
423 ;CHECK-LABEL: test_mask_packs_epi16_rm_512
424 ;CHECK: vpacksswb (%rdi), %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x63,0x07]
425 %b = load <32 x i16>, <32 x i16>* %ptr_b
426 %res = call <64 x i8> @llvm.x86.avx512.mask.packsswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> zeroinitializer, i64 -1)
430 define <64 x i8> @test_mask_packs_epi16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <64 x i8> %passThru, i64 %mask) {
431 ;CHECK-LABEL: test_mask_packs_epi16_rmk_512
432 ;CHECK: vpacksswb (%rdi), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0x63,0x0f]
433 %b = load <32 x i16>, <32 x i16>* %ptr_b
434 %res = call <64 x i8> @llvm.x86.avx512.mask.packsswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> %passThru, i64 %mask)
438 define <64 x i8> @test_mask_packs_epi16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i64 %mask) {
439 ;CHECK-LABEL: test_mask_packs_epi16_rmkz_512
440 ;CHECK: vpacksswb (%rdi), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0x63,0x07]
441 %b = load <32 x i16>, <32 x i16>* %ptr_b
442 %res = call <64 x i8> @llvm.x86.avx512.mask.packsswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> zeroinitializer, i64 %mask)
446 declare <64 x i8> @llvm.x86.avx512.mask.packsswb.512(<32 x i16>, <32 x i16>, <64 x i8>, i64)
449 define <32 x i16> @test_mask_packus_epi32_rr_512(<16 x i32> %a, <16 x i32> %b) {
450 ;CHECK-LABEL: test_mask_packus_epi32_rr_512
451 ;CHECK: vpackusdw %zmm1, %zmm0, %zmm0
452 %res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 -1)
456 define <32 x i16> @test_mask_packus_epi32_rrk_512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask) {
457 ;CHECK-LABEL: test_mask_packus_epi32_rrk_512
458 ;CHECK: vpackusdw %zmm1, %zmm0, %zmm2 {%k1}
459 %res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask)
463 define <32 x i16> @test_mask_packus_epi32_rrkz_512(<16 x i32> %a, <16 x i32> %b, i32 %mask) {
464 ;CHECK-LABEL: test_mask_packus_epi32_rrkz_512
465 ;CHECK: vpackusdw %zmm1, %zmm0, %zmm0 {%k1} {z}
466 %res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 %mask)
470 define <32 x i16> @test_mask_packus_epi32_rm_512(<16 x i32> %a, <16 x i32>* %ptr_b) {
471 ;CHECK-LABEL: test_mask_packus_epi32_rm_512
472 ;CHECK: vpackusdw (%rdi), %zmm0, %zmm0
473 %b = load <16 x i32>, <16 x i32>* %ptr_b
474 %res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 -1)
478 define <32 x i16> @test_mask_packus_epi32_rmk_512(<16 x i32> %a, <16 x i32>* %ptr_b, <32 x i16> %passThru, i32 %mask) {
479 ;CHECK-LABEL: test_mask_packus_epi32_rmk_512
480 ;CHECK: vpackusdw (%rdi), %zmm0, %zmm1 {%k1}
481 %b = load <16 x i32>, <16 x i32>* %ptr_b
482 %res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask)
486 define <32 x i16> @test_mask_packus_epi32_rmkz_512(<16 x i32> %a, <16 x i32>* %ptr_b, i32 %mask) {
487 ;CHECK-LABEL: test_mask_packus_epi32_rmkz_512
488 ;CHECK: vpackusdw (%rdi), %zmm0, %zmm0 {%k1} {z}
489 %b = load <16 x i32>, <16 x i32>* %ptr_b
490 %res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 %mask)
494 define <32 x i16> @test_mask_packus_epi32_rmb_512(<16 x i32> %a, i32* %ptr_b) {
495 ;CHECK-LABEL: test_mask_packus_epi32_rmb_512
496 ;CHECK: vpackusdw (%rdi){1to16}, %zmm0, %zmm0
497 %q = load i32, i32* %ptr_b
498 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
499 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
500 %res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 -1)
504 define <32 x i16> @test_mask_packus_epi32_rmbk_512(<16 x i32> %a, i32* %ptr_b, <32 x i16> %passThru, i32 %mask) {
505 ;CHECK-LABEL: test_mask_packus_epi32_rmbk_512
506 ;CHECK: vpackusdw (%rdi){1to16}, %zmm0, %zmm1 {%k1}
507 %q = load i32, i32* %ptr_b
508 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
509 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
510 %res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask)
514 define <32 x i16> @test_mask_packus_epi32_rmbkz_512(<16 x i32> %a, i32* %ptr_b, i32 %mask) {
515 ;CHECK-LABEL: test_mask_packus_epi32_rmbkz_512
516 ;CHECK: vpackusdw (%rdi){1to16}, %zmm0, %zmm0 {%k1} {z}
517 %q = load i32, i32* %ptr_b
518 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
519 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
520 %res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 %mask)
524 declare <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32>, <16 x i32>, <32 x i16>, i32)
526 define <64 x i8> @test_mask_packus_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) {
527 ;CHECK-LABEL: test_mask_packus_epi16_rr_512
528 ;CHECK: vpackuswb %zmm1, %zmm0, %zmm0
529 %res = call <64 x i8> @llvm.x86.avx512.mask.packuswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> zeroinitializer, i64 -1)
533 define <64 x i8> @test_mask_packus_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, <64 x i8> %passThru, i64 %mask) {
534 ;CHECK-LABEL: test_mask_packus_epi16_rrk_512
535 ;CHECK: vpackuswb %zmm1, %zmm0, %zmm2 {%k1}
536 %res = call <64 x i8> @llvm.x86.avx512.mask.packuswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> %passThru, i64 %mask)
540 define <64 x i8> @test_mask_packus_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i64 %mask) {
541 ;CHECK-LABEL: test_mask_packus_epi16_rrkz_512
542 ;CHECK: vpackuswb %zmm1, %zmm0, %zmm0 {%k1} {z}
543 %res = call <64 x i8> @llvm.x86.avx512.mask.packuswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> zeroinitializer, i64 %mask)
547 define <64 x i8> @test_mask_packus_epi16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) {
548 ;CHECK-LABEL: test_mask_packus_epi16_rm_512
549 ;CHECK: vpackuswb (%rdi), %zmm0, %zmm0
550 %b = load <32 x i16>, <32 x i16>* %ptr_b
551 %res = call <64 x i8> @llvm.x86.avx512.mask.packuswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> zeroinitializer, i64 -1)
555 define <64 x i8> @test_mask_packus_epi16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <64 x i8> %passThru, i64 %mask) {
556 ;CHECK-LABEL: test_mask_packus_epi16_rmk_512
557 ;CHECK: vpackuswb (%rdi), %zmm0, %zmm1 {%k1}
558 %b = load <32 x i16>, <32 x i16>* %ptr_b
559 %res = call <64 x i8> @llvm.x86.avx512.mask.packuswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> %passThru, i64 %mask)
563 define <64 x i8> @test_mask_packus_epi16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i64 %mask) {
564 ;CHECK-LABEL: test_mask_packus_epi16_rmkz_512
565 ;CHECK: vpackuswb (%rdi), %zmm0, %zmm0 {%k1} {z}
566 %b = load <32 x i16>, <32 x i16>* %ptr_b
567 %res = call <64 x i8> @llvm.x86.avx512.mask.packuswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> zeroinitializer, i64 %mask)
571 declare <64 x i8> @llvm.x86.avx512.mask.packuswb.512(<32 x i16>, <32 x i16>, <64 x i8>, i64)
573 define <32 x i16> @test_mask_adds_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) {
574 ;CHECK-LABEL: test_mask_adds_epi16_rr_512
575 ;CHECK: vpaddsw %zmm1, %zmm0, %zmm0
576 %res = call <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1)
580 define <32 x i16> @test_mask_adds_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) {
581 ;CHECK-LABEL: test_mask_adds_epi16_rrk_512
582 ;CHECK: vpaddsw %zmm1, %zmm0, %zmm2 {%k1}
583 %res = call <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
587 define <32 x i16> @test_mask_adds_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i32 %mask) {
588 ;CHECK-LABEL: test_mask_adds_epi16_rrkz_512
589 ;CHECK: vpaddsw %zmm1, %zmm0, %zmm0 {%k1} {z}
590 %res = call <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask)
594 define <32 x i16> @test_mask_adds_epi16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) {
595 ;CHECK-LABEL: test_mask_adds_epi16_rm_512
596 ;CHECK: vpaddsw (%rdi), %zmm0, %zmm0
597 %b = load <32 x i16>, <32 x i16>* %ptr_b
598 %res = call <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1)
602 define <32 x i16> @test_mask_adds_epi16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <32 x i16> %passThru, i32 %mask) {
603 ;CHECK-LABEL: test_mask_adds_epi16_rmk_512
604 ;CHECK: vpaddsw (%rdi), %zmm0, %zmm1 {%k1}
605 %b = load <32 x i16>, <32 x i16>* %ptr_b
606 %res = call <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
610 define <32 x i16> @test_mask_adds_epi16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i32 %mask) {
611 ;CHECK-LABEL: test_mask_adds_epi16_rmkz_512
612 ;CHECK: vpaddsw (%rdi), %zmm0, %zmm0 {%k1} {z}
613 %b = load <32 x i16>, <32 x i16>* %ptr_b
614 %res = call <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask)
618 declare <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
620 define <32 x i16> @test_mask_subs_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) {
621 ;CHECK-LABEL: test_mask_subs_epi16_rr_512
622 ;CHECK: vpsubsw %zmm1, %zmm0, %zmm0
623 %res = call <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1)
627 define <32 x i16> @test_mask_subs_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) {
628 ;CHECK-LABEL: test_mask_subs_epi16_rrk_512
629 ;CHECK: vpsubsw %zmm1, %zmm0, %zmm2 {%k1}
630 %res = call <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
634 define <32 x i16> @test_mask_subs_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i32 %mask) {
635 ;CHECK-LABEL: test_mask_subs_epi16_rrkz_512
636 ;CHECK: vpsubsw %zmm1, %zmm0, %zmm0 {%k1} {z}
637 %res = call <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask)
641 define <32 x i16> @test_mask_subs_epi16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) {
642 ;CHECK-LABEL: test_mask_subs_epi16_rm_512
643 ;CHECK: vpsubsw (%rdi), %zmm0, %zmm0
644 %b = load <32 x i16>, <32 x i16>* %ptr_b
645 %res = call <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1)
649 define <32 x i16> @test_mask_subs_epi16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <32 x i16> %passThru, i32 %mask) {
650 ;CHECK-LABEL: test_mask_subs_epi16_rmk_512
651 ;CHECK: vpsubsw (%rdi), %zmm0, %zmm1 {%k1}
652 %b = load <32 x i16>, <32 x i16>* %ptr_b
653 %res = call <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
657 define <32 x i16> @test_mask_subs_epi16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i32 %mask) {
658 ;CHECK-LABEL: test_mask_subs_epi16_rmkz_512
659 ;CHECK: vpsubsw (%rdi), %zmm0, %zmm0 {%k1} {z}
660 %b = load <32 x i16>, <32 x i16>* %ptr_b
661 %res = call <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask)
665 declare <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
667 define <32 x i16> @test_mask_adds_epu16_rr_512(<32 x i16> %a, <32 x i16> %b) {
668 ;CHECK-LABEL: test_mask_adds_epu16_rr_512
669 ;CHECK: vpaddusw %zmm1, %zmm0, %zmm0
670 %res = call <32 x i16> @llvm.x86.avx512.mask.paddus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1)
674 define <32 x i16> @test_mask_adds_epu16_rrk_512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) {
675 ;CHECK-LABEL: test_mask_adds_epu16_rrk_512
676 ;CHECK: vpaddusw %zmm1, %zmm0, %zmm2 {%k1}
677 %res = call <32 x i16> @llvm.x86.avx512.mask.paddus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
681 define <32 x i16> @test_mask_adds_epu16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i32 %mask) {
682 ;CHECK-LABEL: test_mask_adds_epu16_rrkz_512
683 ;CHECK: vpaddusw %zmm1, %zmm0, %zmm0 {%k1} {z}
684 %res = call <32 x i16> @llvm.x86.avx512.mask.paddus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask)
688 define <32 x i16> @test_mask_adds_epu16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) {
689 ;CHECK-LABEL: test_mask_adds_epu16_rm_512
690 ;CHECK: vpaddusw (%rdi), %zmm0, %zmm0
691 %b = load <32 x i16>, <32 x i16>* %ptr_b
692 %res = call <32 x i16> @llvm.x86.avx512.mask.paddus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1)
696 define <32 x i16> @test_mask_adds_epu16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <32 x i16> %passThru, i32 %mask) {
697 ;CHECK-LABEL: test_mask_adds_epu16_rmk_512
698 ;CHECK: vpaddusw (%rdi), %zmm0, %zmm1 {%k1}
699 %b = load <32 x i16>, <32 x i16>* %ptr_b
700 %res = call <32 x i16> @llvm.x86.avx512.mask.paddus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
704 define <32 x i16> @test_mask_adds_epu16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i32 %mask) {
705 ;CHECK-LABEL: test_mask_adds_epu16_rmkz_512
706 ;CHECK: vpaddusw (%rdi), %zmm0, %zmm0 {%k1} {z}
707 %b = load <32 x i16>, <32 x i16>* %ptr_b
708 %res = call <32 x i16> @llvm.x86.avx512.mask.paddus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask)
712 declare <32 x i16> @llvm.x86.avx512.mask.paddus.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
714 define <32 x i16> @test_mask_subs_epu16_rr_512(<32 x i16> %a, <32 x i16> %b) {
715 ;CHECK-LABEL: test_mask_subs_epu16_rr_512
716 ;CHECK: vpsubusw %zmm1, %zmm0, %zmm0
717 %res = call <32 x i16> @llvm.x86.avx512.mask.psubus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1)
721 define <32 x i16> @test_mask_subs_epu16_rrk_512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) {
722 ;CHECK-LABEL: test_mask_subs_epu16_rrk_512
723 ;CHECK: vpsubusw %zmm1, %zmm0, %zmm2 {%k1}
724 %res = call <32 x i16> @llvm.x86.avx512.mask.psubus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
728 define <32 x i16> @test_mask_subs_epu16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i32 %mask) {
729 ;CHECK-LABEL: test_mask_subs_epu16_rrkz_512
730 ;CHECK: vpsubusw %zmm1, %zmm0, %zmm0 {%k1} {z}
731 %res = call <32 x i16> @llvm.x86.avx512.mask.psubus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask)
735 define <32 x i16> @test_mask_subs_epu16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) {
736 ;CHECK-LABEL: test_mask_subs_epu16_rm_512
737 ;CHECK: vpsubusw (%rdi), %zmm0, %zmm0
738 %b = load <32 x i16>, <32 x i16>* %ptr_b
739 %res = call <32 x i16> @llvm.x86.avx512.mask.psubus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1)
743 define <32 x i16> @test_mask_subs_epu16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <32 x i16> %passThru, i32 %mask) {
744 ;CHECK-LABEL: test_mask_subs_epu16_rmk_512
745 ;CHECK: vpsubusw (%rdi), %zmm0, %zmm1 {%k1}
746 %b = load <32 x i16>, <32 x i16>* %ptr_b
747 %res = call <32 x i16> @llvm.x86.avx512.mask.psubus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
751 define <32 x i16> @test_mask_subs_epu16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i32 %mask) {
752 ;CHECK-LABEL: test_mask_subs_epu16_rmkz_512
753 ;CHECK: vpsubusw (%rdi), %zmm0, %zmm0 {%k1} {z}
754 %b = load <32 x i16>, <32 x i16>* %ptr_b
755 %res = call <32 x i16> @llvm.x86.avx512.mask.psubus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask)
759 declare <32 x i16> @llvm.x86.avx512.mask.psubus.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
761 declare <64 x i8> @llvm.x86.avx512.mask.pmaxs.b.512(<64 x i8>, <64 x i8>, <64 x i8>, i64)
763 ; CHECK-LABEL: @test_int_x86_avx512_mask_pmaxs_b_512
765 ; CHECK: vpmaxsb %zmm
767 define <64 x i8>@test_int_x86_avx512_mask_pmaxs_b_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) {
768 %res = call <64 x i8> @llvm.x86.avx512.mask.pmaxs.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3)
769 %res1 = call <64 x i8> @llvm.x86.avx512.mask.pmaxs.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 -1)
770 %res2 = add <64 x i8> %res, %res1
774 declare <32 x i16> @llvm.x86.avx512.mask.pmaxs.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
776 ; CHECK-LABEL: @test_int_x86_avx512_mask_pmaxs_w_512
778 ; CHECK: vpmaxsw %zmm
780 define <32 x i16>@test_int_x86_avx512_mask_pmaxs_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
781 %res = call <32 x i16> @llvm.x86.avx512.mask.pmaxs.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
782 %res1 = call <32 x i16> @llvm.x86.avx512.mask.pmaxs.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
783 %res2 = add <32 x i16> %res, %res1
787 declare <64 x i8> @llvm.x86.avx512.mask.pmaxu.b.512(<64 x i8>, <64 x i8>, <64 x i8>, i64)
789 ; CHECK-LABEL: @test_int_x86_avx512_mask_pmaxu_b_512
791 ; CHECK: vpmaxub %zmm
793 define <64 x i8>@test_int_x86_avx512_mask_pmaxu_b_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) {
794 %res = call <64 x i8> @llvm.x86.avx512.mask.pmaxu.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3)
795 %res1 = call <64 x i8> @llvm.x86.avx512.mask.pmaxu.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 -1)
796 %res2 = add <64 x i8> %res, %res1
800 declare <32 x i16> @llvm.x86.avx512.mask.pmaxu.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
802 ; CHECK-LABEL: @test_int_x86_avx512_mask_pmaxu_w_512
804 ; CHECK: vpmaxuw %zmm
806 define <32 x i16>@test_int_x86_avx512_mask_pmaxu_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
807 %res = call <32 x i16> @llvm.x86.avx512.mask.pmaxu.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
808 %res1 = call <32 x i16> @llvm.x86.avx512.mask.pmaxu.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
809 %res2 = add <32 x i16> %res, %res1
813 declare <64 x i8> @llvm.x86.avx512.mask.pmins.b.512(<64 x i8>, <64 x i8>, <64 x i8>, i64)
815 ; CHECK-LABEL: @test_int_x86_avx512_mask_pmins_b_512
817 ; CHECK: vpminsb %zmm
819 define <64 x i8>@test_int_x86_avx512_mask_pmins_b_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) {
820 %res = call <64 x i8> @llvm.x86.avx512.mask.pmins.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3)
821 %res1 = call <64 x i8> @llvm.x86.avx512.mask.pmins.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 -1)
822 %res2 = add <64 x i8> %res, %res1
826 declare <32 x i16> @llvm.x86.avx512.mask.pmins.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
828 ; CHECK-LABEL: @test_int_x86_avx512_mask_pmins_w_512
830 ; CHECK: vpminsw %zmm
832 define <32 x i16>@test_int_x86_avx512_mask_pmins_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
833 %res = call <32 x i16> @llvm.x86.avx512.mask.pmins.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
834 %res1 = call <32 x i16> @llvm.x86.avx512.mask.pmins.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
835 %res2 = add <32 x i16> %res, %res1
839 declare <64 x i8> @llvm.x86.avx512.mask.pminu.b.512(<64 x i8>, <64 x i8>, <64 x i8>, i64)
841 ; CHECK-LABEL: @test_int_x86_avx512_mask_pminu_b_512
843 ; CHECK: vpminub %zmm
845 define <64 x i8>@test_int_x86_avx512_mask_pminu_b_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) {
846 %res = call <64 x i8> @llvm.x86.avx512.mask.pminu.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3)
847 %res1 = call <64 x i8> @llvm.x86.avx512.mask.pminu.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 -1)
848 %res2 = add <64 x i8> %res, %res1
852 declare <32 x i16> @llvm.x86.avx512.mask.pminu.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
854 ; CHECK-LABEL: @test_int_x86_avx512_mask_pminu_w_512
856 ; CHECK: vpminuw %zmm
858 define <32 x i16>@test_int_x86_avx512_mask_pminu_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
859 %res = call <32 x i16> @llvm.x86.avx512.mask.pminu.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
860 %res1 = call <32 x i16> @llvm.x86.avx512.mask.pminu.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
861 %res2 = add <32 x i16> %res, %res1
865 declare <32 x i16> @llvm.x86.avx512.mask.vpermt2var.hi.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
867 ; CHECK-LABEL: @test_int_x86_avx512_mask_vpermt2var_hi_512
870 ; CHECK: vpermt2w %zmm{{.*}}{%k1}
871 define <32 x i16>@test_int_x86_avx512_mask_vpermt2var_hi_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
872 %res = call <32 x i16> @llvm.x86.avx512.mask.vpermt2var.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
873 %res1 = call <32 x i16> @llvm.x86.avx512.mask.vpermt2var.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
874 %res2 = add <32 x i16> %res, %res1
878 declare <32 x i16> @llvm.x86.avx512.maskz.vpermt2var.hi.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
880 ; CHECK-LABEL: @test_int_x86_avx512_maskz_vpermt2var_hi_512
883 ; CHECK: vpermt2w %zmm{{.*}}{%k1} {z}
884 define <32 x i16>@test_int_x86_avx512_maskz_vpermt2var_hi_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
885 %res = call <32 x i16> @llvm.x86.avx512.maskz.vpermt2var.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
886 %res1 = call <32 x i16> @llvm.x86.avx512.maskz.vpermt2var.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
887 %res2 = add <32 x i16> %res, %res1
891 declare <32 x i16> @llvm.x86.avx512.mask.vpermi2var.hi.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
893 ; CHECK-LABEL: @test_int_x86_avx512_mask_vpermi2var_hi_512
896 ; CHECK: vpermi2w %zmm{{.*}}{%k1}
897 define <32 x i16>@test_int_x86_avx512_mask_vpermi2var_hi_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
898 %res = call <32 x i16> @llvm.x86.avx512.mask.vpermi2var.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
899 %res1 = call <32 x i16> @llvm.x86.avx512.mask.vpermi2var.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
900 %res2 = add <32 x i16> %res, %res1
904 declare <64 x i8> @llvm.x86.avx512.mask.pavg.b.512(<64 x i8>, <64 x i8>, <64 x i8>, i64)
906 ; CHECK-LABEL: @test_int_x86_avx512_mask_pavg_b_512
910 define <64 x i8>@test_int_x86_avx512_mask_pavg_b_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) {
911 %res = call <64 x i8> @llvm.x86.avx512.mask.pavg.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3)
912 %res1 = call <64 x i8> @llvm.x86.avx512.mask.pavg.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 -1)
913 %res2 = add <64 x i8> %res, %res1
917 declare <32 x i16> @llvm.x86.avx512.mask.pavg.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
919 ; CHECK-LABEL: @test_int_x86_avx512_mask_pavg_w_512
923 define <32 x i16>@test_int_x86_avx512_mask_pavg_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
924 %res = call <32 x i16> @llvm.x86.avx512.mask.pavg.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
925 %res1 = call <32 x i16> @llvm.x86.avx512.mask.pavg.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
926 %res2 = add <32 x i16> %res, %res1
930 declare <64 x i8> @llvm.x86.avx512.mask.pshuf.b.512(<64 x i8>, <64 x i8>, <64 x i8>, i64)
932 ; CHECK-LABEL: @test_int_x86_avx512_mask_pshuf_b_512
935 ; CHECK: vpshufb %zmm{{.*}}{%k1}
936 define <64 x i8>@test_int_x86_avx512_mask_pshuf_b_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) {
937 %res = call <64 x i8> @llvm.x86.avx512.mask.pshuf.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3)
938 %res1 = call <64 x i8> @llvm.x86.avx512.mask.pshuf.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 -1)
939 %res2 = add <64 x i8> %res, %res1
943 declare <32 x i16> @llvm.x86.avx512.mask.pabs.w.512(<32 x i16>, <32 x i16>, i32)
945 ; CHECK-LABEL: @test_int_x86_avx512_mask_pabs_w_512
948 ; CHECK: vpabsw{{.*}}{%k1}
949 define <32 x i16>@test_int_x86_avx512_mask_pabs_w_512(<32 x i16> %x0, <32 x i16> %x1, i32 %x2) {
950 %res = call <32 x i16> @llvm.x86.avx512.mask.pabs.w.512(<32 x i16> %x0, <32 x i16> %x1, i32 %x2)
951 %res1 = call <32 x i16> @llvm.x86.avx512.mask.pabs.w.512(<32 x i16> %x0, <32 x i16> %x1, i32 -1)
952 %res2 = add <32 x i16> %res, %res1
956 declare <64 x i8> @llvm.x86.avx512.mask.pabs.b.512(<64 x i8>, <64 x i8>, i64)
958 ; CHECK-LABEL: @test_int_x86_avx512_mask_pabs_b_512
961 ; CHECK: vpabsb{{.*}}{%k1}
962 define <64 x i8>@test_int_x86_avx512_mask_pabs_b_512(<64 x i8> %x0, <64 x i8> %x1, i64 %x2) {
963 %res = call <64 x i8> @llvm.x86.avx512.mask.pabs.b.512(<64 x i8> %x0, <64 x i8> %x1, i64 %x2)
964 %res1 = call <64 x i8> @llvm.x86.avx512.mask.pabs.b.512(<64 x i8> %x0, <64 x i8> %x1, i64 -1)
965 %res2 = add <64 x i8> %res, %res1
969 declare <32 x i16> @llvm.x86.avx512.mask.pmulhu.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
971 ; CHECK-LABEL: @test_int_x86_avx512_mask_pmulhu_w_512
975 ; CHECK: vpmulhuw {{.*}}encoding: [0x62
976 define <32 x i16>@test_int_x86_avx512_mask_pmulhu_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
977 %res = call <32 x i16> @llvm.x86.avx512.mask.pmulhu.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
978 %res1 = call <32 x i16> @llvm.x86.avx512.mask.pmulhu.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
979 %res2 = add <32 x i16> %res, %res1
983 declare <32 x i16> @llvm.x86.avx512.mask.pmulh.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
985 ; CHECK-LABEL: @test_int_x86_avx512_mask_pmulh_w_512
989 ; CHECK: vpmulhw {{.*}}encoding: [0x62
990 define <32 x i16>@test_int_x86_avx512_mask_pmulh_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
991 %res = call <32 x i16> @llvm.x86.avx512.mask.pmulh.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
992 %res1 = call <32 x i16> @llvm.x86.avx512.mask.pmulh.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
993 %res2 = add <32 x i16> %res, %res1
997 declare <32 x i16> @llvm.x86.avx512.mask.pmul.hr.sw.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
999 ; CHECK-LABEL: @test_int_x86_avx512_mask_pmulhr_sw_512
1003 ; CHECK: vpmulhrsw {{.*}}encoding: [0x62
1004 define <32 x i16>@test_int_x86_avx512_mask_pmulhr_sw_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
1005 %res = call <32 x i16> @llvm.x86.avx512.mask.pmul.hr.sw.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
1006 %res1 = call <32 x i16> @llvm.x86.avx512.mask.pmul.hr.sw.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
1007 %res2 = add <32 x i16> %res, %res1
1008 ret <32 x i16> %res2
1011 declare <32 x i8> @llvm.x86.avx512.mask.pmov.wb.512(<32 x i16>, <32 x i8>, i32)
1013 define <32 x i8>@test_int_x86_avx512_mask_pmov_wb_512(<32 x i16> %x0, <32 x i8> %x1, i32 %x2) {
1014 ; CHECK-LABEL: test_int_x86_avx512_mask_pmov_wb_512:
1015 ; CHECK: vpmovwb %zmm0, %ymm1 {%k1}
1016 ; CHECK-NEXT: vpmovwb %zmm0, %ymm2 {%k1} {z}
1017 ; CHECK-NEXT: vpmovwb %zmm0, %ymm0
1018 %res0 = call <32 x i8> @llvm.x86.avx512.mask.pmov.wb.512(<32 x i16> %x0, <32 x i8> %x1, i32 -1)
1019 %res1 = call <32 x i8> @llvm.x86.avx512.mask.pmov.wb.512(<32 x i16> %x0, <32 x i8> %x1, i32 %x2)
1020 %res2 = call <32 x i8> @llvm.x86.avx512.mask.pmov.wb.512(<32 x i16> %x0, <32 x i8> zeroinitializer, i32 %x2)
1021 %res3 = add <32 x i8> %res0, %res1
1022 %res4 = add <32 x i8> %res3, %res2
1026 declare void @llvm.x86.avx512.mask.pmov.wb.mem.512(i8* %ptr, <32 x i16>, i32)
1028 define void @test_int_x86_avx512_mask_pmov_wb_mem_512(i8* %ptr, <32 x i16> %x1, i32 %x2) {
1029 ; CHECK-LABEL: test_int_x86_avx512_mask_pmov_wb_mem_512:
1030 ; CHECK: vpmovwb %zmm0, (%rdi)
1031 ; CHECK: vpmovwb %zmm0, (%rdi) {%k1}
1032 call void @llvm.x86.avx512.mask.pmov.wb.mem.512(i8* %ptr, <32 x i16> %x1, i32 -1)
1033 call void @llvm.x86.avx512.mask.pmov.wb.mem.512(i8* %ptr, <32 x i16> %x1, i32 %x2)
1037 declare <32 x i8> @llvm.x86.avx512.mask.pmovs.wb.512(<32 x i16>, <32 x i8>, i32)
1039 define <32 x i8>@test_int_x86_avx512_mask_pmovs_wb_512(<32 x i16> %x0, <32 x i8> %x1, i32 %x2) {
1040 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_wb_512:
1041 ; CHECK: vpmovswb %zmm0, %ymm1 {%k1}
1042 ; CHECK-NEXT: vpmovswb %zmm0, %ymm2 {%k1} {z}
1043 ; CHECK-NEXT: vpmovswb %zmm0, %ymm0
1044 %res0 = call <32 x i8> @llvm.x86.avx512.mask.pmovs.wb.512(<32 x i16> %x0, <32 x i8> %x1, i32 -1)
1045 %res1 = call <32 x i8> @llvm.x86.avx512.mask.pmovs.wb.512(<32 x i16> %x0, <32 x i8> %x1, i32 %x2)
1046 %res2 = call <32 x i8> @llvm.x86.avx512.mask.pmovs.wb.512(<32 x i16> %x0, <32 x i8> zeroinitializer, i32 %x2)
1047 %res3 = add <32 x i8> %res0, %res1
1048 %res4 = add <32 x i8> %res3, %res2
1052 declare void @llvm.x86.avx512.mask.pmovs.wb.mem.512(i8* %ptr, <32 x i16>, i32)
1054 define void @test_int_x86_avx512_mask_pmovs_wb_mem_512(i8* %ptr, <32 x i16> %x1, i32 %x2) {
1055 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_wb_mem_512:
1056 ; CHECK: vpmovswb %zmm0, (%rdi)
1057 ; CHECK: vpmovswb %zmm0, (%rdi) {%k1}
1058 call void @llvm.x86.avx512.mask.pmovs.wb.mem.512(i8* %ptr, <32 x i16> %x1, i32 -1)
1059 call void @llvm.x86.avx512.mask.pmovs.wb.mem.512(i8* %ptr, <32 x i16> %x1, i32 %x2)
1063 declare <32 x i8> @llvm.x86.avx512.mask.pmovus.wb.512(<32 x i16>, <32 x i8>, i32)
1065 define <32 x i8>@test_int_x86_avx512_mask_pmovus_wb_512(<32 x i16> %x0, <32 x i8> %x1, i32 %x2) {
1066 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_wb_512:
1067 ; CHECK: vpmovuswb %zmm0, %ymm1 {%k1}
1068 ; CHECK-NEXT: vpmovuswb %zmm0, %ymm2 {%k1} {z}
1069 ; CHECK-NEXT: vpmovuswb %zmm0, %ymm0
1070 %res0 = call <32 x i8> @llvm.x86.avx512.mask.pmovus.wb.512(<32 x i16> %x0, <32 x i8> %x1, i32 -1)
1071 %res1 = call <32 x i8> @llvm.x86.avx512.mask.pmovus.wb.512(<32 x i16> %x0, <32 x i8> %x1, i32 %x2)
1072 %res2 = call <32 x i8> @llvm.x86.avx512.mask.pmovus.wb.512(<32 x i16> %x0, <32 x i8> zeroinitializer, i32 %x2)
1073 %res3 = add <32 x i8> %res0, %res1
1074 %res4 = add <32 x i8> %res3, %res2
1078 declare void @llvm.x86.avx512.mask.pmovus.wb.mem.512(i8* %ptr, <32 x i16>, i32)
1080 define void @test_int_x86_avx512_mask_pmovus_wb_mem_512(i8* %ptr, <32 x i16> %x1, i32 %x2) {
1081 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_wb_mem_512:
1082 ; CHECK: vpmovuswb %zmm0, (%rdi)
1083 ; CHECK: vpmovuswb %zmm0, (%rdi) {%k1}
1084 call void @llvm.x86.avx512.mask.pmovus.wb.mem.512(i8* %ptr, <32 x i16> %x1, i32 -1)
1085 call void @llvm.x86.avx512.mask.pmovus.wb.mem.512(i8* %ptr, <32 x i16> %x1, i32 %x2)
1089 declare <32 x i16> @llvm.x86.avx512.mask.pmaddubs.w.512(<64 x i8>, <64 x i8>, <32 x i16>, i32)
1091 define <32 x i16>@test_int_x86_avx512_mask_pmaddubs_w_512(<64 x i8> %x0, <64 x i8> %x1, <32 x i16> %x2, i32 %x3) {
1092 ; CHECK-LABEL: test_int_x86_avx512_mask_pmaddubs_w_512:
1094 ; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
1095 ; CHECK-NEXT: vpmaddubsw %zmm1, %zmm0, %zmm2 {%k1}
1096 ; CHECK-NEXT: vpmaddubsw %zmm1, %zmm0, %zmm0
1097 ; CHECK-NEXT: vpaddw %zmm0, %zmm2, %zmm0
1099 %res = call <32 x i16> @llvm.x86.avx512.mask.pmaddubs.w.512(<64 x i8> %x0, <64 x i8> %x1, <32 x i16> %x2, i32 %x3)
1100 %res1 = call <32 x i16> @llvm.x86.avx512.mask.pmaddubs.w.512(<64 x i8> %x0, <64 x i8> %x1, <32 x i16> %x2, i32 -1)
1101 %res2 = add <32 x i16> %res, %res1
1102 ret <32 x i16> %res2
1105 declare <16 x i32> @llvm.x86.avx512.mask.pmaddw.d.512(<32 x i16>, <32 x i16>, <16 x i32>, i16)
1107 define <16 x i32>@test_int_x86_avx512_mask_pmaddw_d_512(<32 x i16> %x0, <32 x i16> %x1, <16 x i32> %x2, i16 %x3) {
1108 ; CHECK-LABEL: test_int_x86_avx512_mask_pmaddw_d_512:
1110 ; CHECK-NEXT: kmovw %edi, %k1
1111 ; CHECK-NEXT: vpmaddwd %zmm1, %zmm0, %zmm2 {%k1}
1112 ; CHECK-NEXT: vpmaddwd %zmm1, %zmm0, %zmm0
1113 ; CHECK-NEXT: vpaddd %zmm0, %zmm2, %zmm0
1115 %res = call <16 x i32> @llvm.x86.avx512.mask.pmaddw.d.512(<32 x i16> %x0, <32 x i16> %x1, <16 x i32> %x2, i16 %x3)
1116 %res1 = call <16 x i32> @llvm.x86.avx512.mask.pmaddw.d.512(<32 x i16> %x0, <32 x i16> %x1, <16 x i32> %x2, i16 -1)
1117 %res2 = add <16 x i32> %res, %res1
1118 ret <16 x i32> %res2
1121 declare <64 x i8> @llvm.x86.avx512.mask.punpckhb.w.512(<64 x i8>, <64 x i8>, <64 x i8>, i64)
1123 define <64 x i8>@test_int_x86_avx512_mask_punpckhb_w_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) {
1124 ; CHECK-LABEL: test_int_x86_avx512_mask_punpckhb_w_512:
1126 ; CHECK-NEXT: kmovq %rdi, %k1
1127 ; CHECK-NEXT: vpunpckhbw %zmm1, %zmm0, %zmm2 {%k1}
1128 ; CHECK-NEXT: ## zmm2 = zmm2[8],k1[8],zmm2[9],k1[9],zmm2[10],k1[10],zmm2[11],k1[11],zmm2[12],k1[12],zmm2[13],k1[13],zmm2[14],k1[14],zmm2[15],k1[15],zmm2[24],k1[24],zmm2[25],k1[25],zmm2[26],k1[26],zmm2[27],k1[27],zmm2[28],k1[28],zmm2[29],k1[29],zmm2[30],k1[30],zmm2[31],k1[31],zmm2[40],k1[40],zmm2[41],k1[41],zmm2[42],k1[42],zmm2[43],k1[43],zmm2[44],k1[44],zmm2[45],k1[45],zmm2[46],k1[46],zmm2[47],k1[47],zmm2[56],k1[56],zmm2[57],k1[57],zmm2[58],k1[58],zmm2[59],k1[59],zmm2[60],k1[60],zmm2[61],k1[61],zmm2[62],k1[62],zmm2[63],k1[63]
1129 ; CHECK-NEXT: vpunpckhbw %zmm1, %zmm0, %zmm0
1130 ; CHECK-NEXT: ## zmm0 = zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[12],zmm1[12],zmm0[13],zmm1[13],zmm0[14],zmm1[14],zmm0[15],zmm1[15],zmm0[24],zmm1[24],zmm0[25],zmm1[25],zmm0[26],zmm1[26],zmm0[27],zmm1[27],zmm0[28],zmm1[28],zmm0[29],zmm1[29],zmm0[30],zmm1[30],zmm0[31],zmm1[31],zmm0[40],zmm1[40],zmm0[41],zmm1[41],zmm0[42],zmm1[42],zmm0[43],zmm1[43],zmm0[44],zmm1[44],zmm0[45],zmm1[45],zmm0[46],zmm1[46],zmm0[47],zmm1[47],zmm0[56],zmm1[56],zmm0[57],zmm1[57],zmm0[58],zmm1[58],zmm0[59],zmm1[59],zmm0[60],zmm1[60],zmm0[61],zmm1[61],zmm0[62],zmm1[62],zmm0[63],zmm1[63]
1131 ; CHECK-NEXT: vpaddb %zmm0, %zmm2, %zmm0
1133 %res = call <64 x i8> @llvm.x86.avx512.mask.punpckhb.w.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3)
1134 %res1 = call <64 x i8> @llvm.x86.avx512.mask.punpckhb.w.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 -1)
1135 %res2 = add <64 x i8> %res, %res1
1139 declare <64 x i8> @llvm.x86.avx512.mask.punpcklb.w.512(<64 x i8>, <64 x i8>, <64 x i8>, i64)
1141 define <64 x i8>@test_int_x86_avx512_mask_punpcklb_w_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) {
1142 ; CHECK-LABEL: test_int_x86_avx512_mask_punpcklb_w_512:
1144 ; CHECK-NEXT: kmovq %rdi, %k1
1145 ; CHECK-NEXT: vpunpcklbw %zmm1, %zmm0, %zmm2 {%k1}
1146 ; CHECK-NEXT: ## zmm2 = zmm2[0],k1[0],zmm2[1],k1[1],zmm2[2],k1[2],zmm2[3],k1[3],zmm2[4],k1[4],zmm2[5],k1[5],zmm2[6],k1[6],zmm2[7],k1[7],zmm2[16],k1[16],zmm2[17],k1[17],zmm2[18],k1[18],zmm2[19],k1[19],zmm2[20],k1[20],zmm2[21],k1[21],zmm2[22],k1[22],zmm2[23],k1[23],zmm2[32],k1[32],zmm2[33],k1[33],zmm2[34],k1[34],zmm2[35],k1[35],zmm2[36],k1[36],zmm2[37],k1[37],zmm2[38],k1[38],zmm2[39],k1[39],zmm2[48],k1[48],zmm2[49],k1[49],zmm2[50],k1[50],zmm2[51],k1[51],zmm2[52],k1[52],zmm2[53],k1[53],zmm2[54],k1[54],zmm2[55],k1[55]
1147 ; CHECK-NEXT: vpunpcklbw %zmm1, %zmm0, %zmm0
1148 ; CHECK-NEXT: ## zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[16],zmm1[16],zmm0[17],zmm1[17],zmm0[18],zmm1[18],zmm0[19],zmm1[19],zmm0[20],zmm1[20],zmm0[21],zmm1[21],zmm0[22],zmm1[22],zmm0[23],zmm1[23],zmm0[32],zmm1[32],zmm0[33],zmm1[33],zmm0[34],zmm1[34],zmm0[35],zmm1[35],zmm0[36],zmm1[36],zmm0[37],zmm1[37],zmm0[38],zmm1[38],zmm0[39],zmm1[39],zmm0[48],zmm1[48],zmm0[49],zmm1[49],zmm0[50],zmm1[50],zmm0[51],zmm1[51],zmm0[52],zmm1[52],zmm0[53],zmm1[53],zmm0[54],zmm1[54],zmm0[55],zmm1[55]
1149 ; CHECK-NEXT: vpaddb %zmm0, %zmm2, %zmm0
1151 %res = call <64 x i8> @llvm.x86.avx512.mask.punpcklb.w.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3)
1152 %res1 = call <64 x i8> @llvm.x86.avx512.mask.punpcklb.w.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 -1)
1153 %res2 = add <64 x i8> %res, %res1
1157 declare <32 x i16> @llvm.x86.avx512.mask.punpckhw.d.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
1159 define <32 x i16>@test_int_x86_avx512_mask_punpckhw_d_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
1160 ; CHECK-LABEL: test_int_x86_avx512_mask_punpckhw_d_512:
1162 ; CHECK-NEXT: kmovd %edi, %k1
1163 ; CHECK-NEXT: vpunpckhwd %zmm1, %zmm0, %zmm2 {%k1}
1164 ; CHECK-NEXT: ## zmm2 = zmm2[4],k1[4],zmm2[5],k1[5],zmm2[6],k1[6],zmm2[7],k1[7],zmm2[12],k1[12],zmm2[13],k1[13],zmm2[14],k1[14],zmm2[15],k1[15],zmm2[20],k1[20],zmm2[21],k1[21],zmm2[22],k1[22],zmm2[23],k1[23],zmm2[28],k1[28],zmm2[29],k1[29],zmm2[30],k1[30],zmm2[31],k1[31]
1165 ; CHECK-NEXT: vpunpckhwd %zmm1, %zmm0, %zmm0
1166 ; CHECK-NEXT: ## zmm0 = zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[12],zmm1[12],zmm0[13],zmm1[13],zmm0[14],zmm1[14],zmm0[15],zmm1[15],zmm0[20],zmm1[20],zmm0[21],zmm1[21],zmm0[22],zmm1[22],zmm0[23],zmm1[23],zmm0[28],zmm1[28],zmm0[29],zmm1[29],zmm0[30],zmm1[30],zmm0[31],zmm1[31]
1167 ; CHECK-NEXT: vpaddw %zmm0, %zmm2, %zmm0
1169 %res = call <32 x i16> @llvm.x86.avx512.mask.punpckhw.d.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
1170 %res1 = call <32 x i16> @llvm.x86.avx512.mask.punpckhw.d.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
1171 %res2 = add <32 x i16> %res, %res1
1172 ret <32 x i16> %res2
1175 declare <32 x i16> @llvm.x86.avx512.mask.punpcklw.d.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
1177 define <32 x i16>@test_int_x86_avx512_mask_punpcklw_d_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
1178 ; CHECK-LABEL: test_int_x86_avx512_mask_punpcklw_d_512:
1180 ; CHECK-NEXT: kmovd %edi, %k1
1181 ; CHECK-NEXT: vpunpcklwd %zmm1, %zmm0, %zmm2 {%k1}
1182 ; CHECK-NEXT: ## zmm2 = zmm2[0],k1[0],zmm2[1],k1[1],zmm2[2],k1[2],zmm2[3],k1[3],zmm2[8],k1[8],zmm2[9],k1[9],zmm2[10],k1[10],zmm2[11],k1[11],zmm2[16],k1[16],zmm2[17],k1[17],zmm2[18],k1[18],zmm2[19],k1[19],zmm2[24],k1[24],zmm2[25],k1[25],zmm2[26],k1[26],zmm2[27],k1[27]
1183 ; CHECK-NEXT: vpunpcklwd %zmm1, %zmm0, %zmm0
1184 ; CHECK-NEXT: ## zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[16],zmm1[16],zmm0[17],zmm1[17],zmm0[18],zmm1[18],zmm0[19],zmm1[19],zmm0[24],zmm1[24],zmm0[25],zmm1[25],zmm0[26],zmm1[26],zmm0[27],zmm1[27]
1185 ; CHECK-NEXT: vpaddw %zmm0, %zmm2, %zmm0
1187 %res = call <32 x i16> @llvm.x86.avx512.mask.punpcklw.d.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
1188 %res1 = call <32 x i16> @llvm.x86.avx512.mask.punpcklw.d.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
1189 %res2 = add <32 x i16> %res, %res1
1190 ret <32 x i16> %res2
1193 declare <64 x i8> @llvm.x86.avx512.mask.palignr.512(<64 x i8>, <64 x i8>, i32, <64 x i8>, i64)
1195 define <64 x i8>@test_int_x86_avx512_mask_palignr_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x3, i64 %x4) {
1196 ; CHECK-LABEL: test_int_x86_avx512_mask_palignr_512:
1198 ; CHECK-NEXT: kmovq %rdi, %k1
1199 ; CHECK-NEXT: vpalignr $2, %zmm1, %zmm0, %zmm2 {%k1}
1200 ; CHECK-NEXT: vpalignr $2, %zmm1, %zmm0, %zmm3 {%k1} {z}
1201 ; CHECK-NEXT: vpalignr $2, %zmm1, %zmm0, %zmm0
1202 ; CHECK-NEXT: vpaddb %zmm3, %zmm2, %zmm1
1203 ; CHECK-NEXT: vpaddb %zmm0, %zmm1, %zmm0
1205 %res = call <64 x i8> @llvm.x86.avx512.mask.palignr.512(<64 x i8> %x0, <64 x i8> %x1, i32 2, <64 x i8> %x3, i64 %x4)
1206 %res1 = call <64 x i8> @llvm.x86.avx512.mask.palignr.512(<64 x i8> %x0, <64 x i8> %x1, i32 2, <64 x i8> zeroinitializer, i64 %x4)
1207 %res2 = call <64 x i8> @llvm.x86.avx512.mask.palignr.512(<64 x i8> %x0, <64 x i8> %x1, i32 2, <64 x i8> %x3, i64 -1)
1208 %res3 = add <64 x i8> %res, %res1
1209 %res4 = add <64 x i8> %res3, %res2
1213 declare <32 x i16> @llvm.x86.avx512.mask.dbpsadbw.512(<64 x i8>, <64 x i8>, i32, <32 x i16>, i32)
1215 define <32 x i16>@test_int_x86_avx512_mask_dbpsadbw_512(<64 x i8> %x0, <64 x i8> %x1, <32 x i16> %x3, i32 %x4) {
1216 ; CHECK-LABEL: test_int_x86_avx512_mask_dbpsadbw_512:
1218 ; CHECK-NEXT: kmovd %edi, %k1
1219 ; CHECK-NEXT: vdbpsadbw $2, %zmm1, %zmm0, %zmm2 {%k1}
1220 ; CHECK-NEXT: vdbpsadbw $2, %zmm1, %zmm0, %zmm3 {%k1} {z}
1221 ; CHECK-NEXT: vdbpsadbw $2, %zmm1, %zmm0, %zmm0
1222 ; CHECK-NEXT: vpaddw %zmm3, %zmm2, %zmm1
1223 ; CHECK-NEXT: vpaddw %zmm0, %zmm1, %zmm0
1225 %res = call <32 x i16> @llvm.x86.avx512.mask.dbpsadbw.512(<64 x i8> %x0, <64 x i8> %x1, i32 2, <32 x i16> %x3, i32 %x4)
1226 %res1 = call <32 x i16> @llvm.x86.avx512.mask.dbpsadbw.512(<64 x i8> %x0, <64 x i8> %x1, i32 2, <32 x i16> zeroinitializer, i32 %x4)
1227 %res2 = call <32 x i16> @llvm.x86.avx512.mask.dbpsadbw.512(<64 x i8> %x0, <64 x i8> %x1, i32 2, <32 x i16> %x3, i32 -1)
1228 %res3 = add <32 x i16> %res, %res1
1229 %res4 = add <32 x i16> %res3, %res2
1230 ret <32 x i16> %res4
1233 declare <8 x i64> @llvm.x86.avx512.psll.dq.512(<8 x i64>, i32)
1235 ; CHECK-LABEL: @test_int_x86_avx512_mask_psll_dq_512
1239 define <8 x i64>@test_int_x86_avx512_mask_psll_dq_512(<8 x i64> %x0) {
1240 %res = call <8 x i64> @llvm.x86.avx512.psll.dq.512(<8 x i64> %x0, i32 8)
1241 %res1 = call <8 x i64> @llvm.x86.avx512.psll.dq.512(<8 x i64> %x0, i32 4)
1242 %res2 = add <8 x i64> %res, %res1
1246 declare <8 x i64> @llvm.x86.avx512.psrl.dq.512(<8 x i64>, i32)
1248 ; CHECK-LABEL: @test_int_x86_avx512_mask_psrl_dq_512
1252 define <8 x i64>@test_int_x86_avx512_mask_psrl_dq_512(<8 x i64> %x0) {
1253 %res = call <8 x i64> @llvm.x86.avx512.psrl.dq.512(<8 x i64> %x0, i32 8)
1254 %res1 = call <8 x i64> @llvm.x86.avx512.psrl.dq.512(<8 x i64> %x0, i32 4)
1255 %res2 = add <8 x i64> %res, %res1
1258 declare <64 x i8> @llvm.x86.avx512.psad.bw.512(<64 x i8>, <64 x i8>)
1260 ; CHECK-LABEL: @test_int_x86_avx512_mask_psadb_w_512
1262 ; CHECK: vpsadbw %zmm1
1263 ; CHECK: vpsadbw %zmm2
1264 define <64 x i8>@test_int_x86_avx512_mask_psadb_w_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2){
1265 %res = call <64 x i8> @llvm.x86.avx512.psad.bw.512(<64 x i8> %x0, <64 x i8> %x1)
1266 %res1 = call <64 x i8> @llvm.x86.avx512.psad.bw.512(<64 x i8> %x0, <64 x i8> %x2)
1267 %res2 = add <64 x i8> %res, %res1