1 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512bw -mattr=+avx512vl --show-mc-encoding| FileCheck %s
5 define i32 @test_pcmpeq_b_256(<32 x i8> %a, <32 x i8> %b) {
6 ; CHECK-LABEL: test_pcmpeq_b_256
7 ; CHECK: vpcmpeqb %ymm1, %ymm0, %k0 ##
8 %res = call i32 @llvm.x86.avx512.mask.pcmpeq.b.256(<32 x i8> %a, <32 x i8> %b, i32 -1)
12 define i32 @test_mask_pcmpeq_b_256(<32 x i8> %a, <32 x i8> %b, i32 %mask) {
13 ; CHECK-LABEL: test_mask_pcmpeq_b_256
14 ; CHECK: vpcmpeqb %ymm1, %ymm0, %k0 {%k1} ##
15 %res = call i32 @llvm.x86.avx512.mask.pcmpeq.b.256(<32 x i8> %a, <32 x i8> %b, i32 %mask)
19 declare i32 @llvm.x86.avx512.mask.pcmpeq.b.256(<32 x i8>, <32 x i8>, i32)
21 define i16 @test_pcmpeq_w_256(<16 x i16> %a, <16 x i16> %b) {
22 ; CHECK-LABEL: test_pcmpeq_w_256
23 ; CHECK: vpcmpeqw %ymm1, %ymm0, %k0 ##
24 %res = call i16 @llvm.x86.avx512.mask.pcmpeq.w.256(<16 x i16> %a, <16 x i16> %b, i16 -1)
28 define i16 @test_mask_pcmpeq_w_256(<16 x i16> %a, <16 x i16> %b, i16 %mask) {
29 ; CHECK-LABEL: test_mask_pcmpeq_w_256
30 ; CHECK: vpcmpeqw %ymm1, %ymm0, %k0 {%k1} ##
31 %res = call i16 @llvm.x86.avx512.mask.pcmpeq.w.256(<16 x i16> %a, <16 x i16> %b, i16 %mask)
35 declare i16 @llvm.x86.avx512.mask.pcmpeq.w.256(<16 x i16>, <16 x i16>, i16)
37 define i32 @test_pcmpgt_b_256(<32 x i8> %a, <32 x i8> %b) {
38 ; CHECK-LABEL: test_pcmpgt_b_256
39 ; CHECK: vpcmpgtb %ymm1, %ymm0, %k0 ##
40 %res = call i32 @llvm.x86.avx512.mask.pcmpgt.b.256(<32 x i8> %a, <32 x i8> %b, i32 -1)
44 define i32 @test_mask_pcmpgt_b_256(<32 x i8> %a, <32 x i8> %b, i32 %mask) {
45 ; CHECK-LABEL: test_mask_pcmpgt_b_256
46 ; CHECK: vpcmpgtb %ymm1, %ymm0, %k0 {%k1} ##
47 %res = call i32 @llvm.x86.avx512.mask.pcmpgt.b.256(<32 x i8> %a, <32 x i8> %b, i32 %mask)
51 declare i32 @llvm.x86.avx512.mask.pcmpgt.b.256(<32 x i8>, <32 x i8>, i32)
53 define i16 @test_pcmpgt_w_256(<16 x i16> %a, <16 x i16> %b) {
54 ; CHECK-LABEL: test_pcmpgt_w_256
55 ; CHECK: vpcmpgtw %ymm1, %ymm0, %k0 ##
56 %res = call i16 @llvm.x86.avx512.mask.pcmpgt.w.256(<16 x i16> %a, <16 x i16> %b, i16 -1)
60 define i16 @test_mask_pcmpgt_w_256(<16 x i16> %a, <16 x i16> %b, i16 %mask) {
61 ; CHECK-LABEL: test_mask_pcmpgt_w_256
62 ; CHECK: vpcmpgtw %ymm1, %ymm0, %k0 {%k1} ##
63 %res = call i16 @llvm.x86.avx512.mask.pcmpgt.w.256(<16 x i16> %a, <16 x i16> %b, i16 %mask)
67 declare i16 @llvm.x86.avx512.mask.pcmpgt.w.256(<16 x i16>, <16 x i16>, i16)
69 define <8 x i32> @test_cmp_b_256(<32 x i8> %a0, <32 x i8> %a1) {
70 ; CHECK_LABEL: test_cmp_b_256
71 ; CHECK: vpcmpeqb %ymm1, %ymm0, %k0 ##
72 %res0 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 0, i32 -1)
73 %vec0 = insertelement <8 x i32> undef, i32 %res0, i32 0
74 ; CHECK: vpcmpltb %ymm1, %ymm0, %k0 ##
75 %res1 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 1, i32 -1)
76 %vec1 = insertelement <8 x i32> %vec0, i32 %res1, i32 1
77 ; CHECK: vpcmpleb %ymm1, %ymm0, %k0 ##
78 %res2 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 2, i32 -1)
79 %vec2 = insertelement <8 x i32> %vec1, i32 %res2, i32 2
80 ; CHECK: vpcmpunordb %ymm1, %ymm0, %k0 ##
81 %res3 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 3, i32 -1)
82 %vec3 = insertelement <8 x i32> %vec2, i32 %res3, i32 3
83 ; CHECK: vpcmpneqb %ymm1, %ymm0, %k0 ##
84 %res4 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 4, i32 -1)
85 %vec4 = insertelement <8 x i32> %vec3, i32 %res4, i32 4
86 ; CHECK: vpcmpnltb %ymm1, %ymm0, %k0 ##
87 %res5 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 5, i32 -1)
88 %vec5 = insertelement <8 x i32> %vec4, i32 %res5, i32 5
89 ; CHECK: vpcmpnleb %ymm1, %ymm0, %k0 ##
90 %res6 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 6, i32 -1)
91 %vec6 = insertelement <8 x i32> %vec5, i32 %res6, i32 6
92 ; CHECK: vpcmpordb %ymm1, %ymm0, %k0 ##
93 %res7 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 7, i32 -1)
94 %vec7 = insertelement <8 x i32> %vec6, i32 %res7, i32 7
98 define <8 x i32> @test_mask_cmp_b_256(<32 x i8> %a0, <32 x i8> %a1, i32 %mask) {
99 ; CHECK_LABEL: test_mask_cmp_b_256
100 ; CHECK: vpcmpeqb %ymm1, %ymm0, %k0 {%k1} ##
101 %res0 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 0, i32 %mask)
102 %vec0 = insertelement <8 x i32> undef, i32 %res0, i32 0
103 ; CHECK: vpcmpltb %ymm1, %ymm0, %k0 {%k1} ##
104 %res1 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 1, i32 %mask)
105 %vec1 = insertelement <8 x i32> %vec0, i32 %res1, i32 1
106 ; CHECK: vpcmpleb %ymm1, %ymm0, %k0 {%k1} ##
107 %res2 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 2, i32 %mask)
108 %vec2 = insertelement <8 x i32> %vec1, i32 %res2, i32 2
109 ; CHECK: vpcmpunordb %ymm1, %ymm0, %k0 {%k1} ##
110 %res3 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 3, i32 %mask)
111 %vec3 = insertelement <8 x i32> %vec2, i32 %res3, i32 3
112 ; CHECK: vpcmpneqb %ymm1, %ymm0, %k0 {%k1} ##
113 %res4 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 4, i32 %mask)
114 %vec4 = insertelement <8 x i32> %vec3, i32 %res4, i32 4
115 ; CHECK: vpcmpnltb %ymm1, %ymm0, %k0 {%k1} ##
116 %res5 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 5, i32 %mask)
117 %vec5 = insertelement <8 x i32> %vec4, i32 %res5, i32 5
118 ; CHECK: vpcmpnleb %ymm1, %ymm0, %k0 {%k1} ##
119 %res6 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 6, i32 %mask)
120 %vec6 = insertelement <8 x i32> %vec5, i32 %res6, i32 6
121 ; CHECK: vpcmpordb %ymm1, %ymm0, %k0 {%k1} ##
122 %res7 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 7, i32 %mask)
123 %vec7 = insertelement <8 x i32> %vec6, i32 %res7, i32 7
127 declare i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8>, <32 x i8>, i32, i32) nounwind readnone
129 define <8 x i32> @test_ucmp_b_256(<32 x i8> %a0, <32 x i8> %a1) {
130 ; CHECK_LABEL: test_ucmp_b_256
131 ; CHECK: vpcmpequb %ymm1, %ymm0, %k0 ##
132 %res0 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 0, i32 -1)
133 %vec0 = insertelement <8 x i32> undef, i32 %res0, i32 0
134 ; CHECK: vpcmpltub %ymm1, %ymm0, %k0 ##
135 %res1 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 1, i32 -1)
136 %vec1 = insertelement <8 x i32> %vec0, i32 %res1, i32 1
137 ; CHECK: vpcmpleub %ymm1, %ymm0, %k0 ##
138 %res2 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 2, i32 -1)
139 %vec2 = insertelement <8 x i32> %vec1, i32 %res2, i32 2
140 ; CHECK: vpcmpunordub %ymm1, %ymm0, %k0 ##
141 %res3 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 3, i32 -1)
142 %vec3 = insertelement <8 x i32> %vec2, i32 %res3, i32 3
143 ; CHECK: vpcmpnequb %ymm1, %ymm0, %k0 ##
144 %res4 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 4, i32 -1)
145 %vec4 = insertelement <8 x i32> %vec3, i32 %res4, i32 4
146 ; CHECK: vpcmpnltub %ymm1, %ymm0, %k0 ##
147 %res5 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 5, i32 -1)
148 %vec5 = insertelement <8 x i32> %vec4, i32 %res5, i32 5
149 ; CHECK: vpcmpnleub %ymm1, %ymm0, %k0 ##
150 %res6 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 6, i32 -1)
151 %vec6 = insertelement <8 x i32> %vec5, i32 %res6, i32 6
152 ; CHECK: vpcmpordub %ymm1, %ymm0, %k0 ##
153 %res7 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 7, i32 -1)
154 %vec7 = insertelement <8 x i32> %vec6, i32 %res7, i32 7
158 define <8 x i32> @test_mask_ucmp_b_256(<32 x i8> %a0, <32 x i8> %a1, i32 %mask) {
159 ; CHECK_LABEL: test_mask_ucmp_b_256
160 ; CHECK: vpcmpequb %ymm1, %ymm0, %k0 {%k1} ##
161 %res0 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 0, i32 %mask)
162 %vec0 = insertelement <8 x i32> undef, i32 %res0, i32 0
163 ; CHECK: vpcmpltub %ymm1, %ymm0, %k0 {%k1} ##
164 %res1 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 1, i32 %mask)
165 %vec1 = insertelement <8 x i32> %vec0, i32 %res1, i32 1
166 ; CHECK: vpcmpleub %ymm1, %ymm0, %k0 {%k1} ##
167 %res2 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 2, i32 %mask)
168 %vec2 = insertelement <8 x i32> %vec1, i32 %res2, i32 2
169 ; CHECK: vpcmpunordub %ymm1, %ymm0, %k0 {%k1} ##
170 %res3 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 3, i32 %mask)
171 %vec3 = insertelement <8 x i32> %vec2, i32 %res3, i32 3
172 ; CHECK: vpcmpnequb %ymm1, %ymm0, %k0 {%k1} ##
173 %res4 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 4, i32 %mask)
174 %vec4 = insertelement <8 x i32> %vec3, i32 %res4, i32 4
175 ; CHECK: vpcmpnltub %ymm1, %ymm0, %k0 {%k1} ##
176 %res5 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 5, i32 %mask)
177 %vec5 = insertelement <8 x i32> %vec4, i32 %res5, i32 5
178 ; CHECK: vpcmpnleub %ymm1, %ymm0, %k0 {%k1} ##
179 %res6 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 6, i32 %mask)
180 %vec6 = insertelement <8 x i32> %vec5, i32 %res6, i32 6
181 ; CHECK: vpcmpordub %ymm1, %ymm0, %k0 {%k1} ##
182 %res7 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 7, i32 %mask)
183 %vec7 = insertelement <8 x i32> %vec6, i32 %res7, i32 7
187 declare i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8>, <32 x i8>, i32, i32) nounwind readnone
189 define <8 x i16> @test_cmp_w_256(<16 x i16> %a0, <16 x i16> %a1) {
190 ; CHECK_LABEL: test_cmp_w_256
191 ; CHECK: vpcmpeqw %ymm1, %ymm0, %k0 ##
192 %res0 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 0, i16 -1)
193 %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0
194 ; CHECK: vpcmpltw %ymm1, %ymm0, %k0 ##
195 %res1 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 1, i16 -1)
196 %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1
197 ; CHECK: vpcmplew %ymm1, %ymm0, %k0 ##
198 %res2 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 2, i16 -1)
199 %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2
200 ; CHECK: vpcmpunordw %ymm1, %ymm0, %k0 ##
201 %res3 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 3, i16 -1)
202 %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3
203 ; CHECK: vpcmpneqw %ymm1, %ymm0, %k0 ##
204 %res4 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 4, i16 -1)
205 %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4
206 ; CHECK: vpcmpnltw %ymm1, %ymm0, %k0 ##
207 %res5 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 5, i16 -1)
208 %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5
209 ; CHECK: vpcmpnlew %ymm1, %ymm0, %k0 ##
210 %res6 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 6, i16 -1)
211 %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6
212 ; CHECK: vpcmpordw %ymm1, %ymm0, %k0 ##
213 %res7 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 7, i16 -1)
214 %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7
218 define <8 x i16> @test_mask_cmp_w_256(<16 x i16> %a0, <16 x i16> %a1, i16 %mask) {
219 ; CHECK_LABEL: test_mask_cmp_w_256
220 ; CHECK: vpcmpeqw %ymm1, %ymm0, %k0 {%k1} ##
221 %res0 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 0, i16 %mask)
222 %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0
223 ; CHECK: vpcmpltw %ymm1, %ymm0, %k0 {%k1} ##
224 %res1 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 1, i16 %mask)
225 %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1
226 ; CHECK: vpcmplew %ymm1, %ymm0, %k0 {%k1} ##
227 %res2 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 2, i16 %mask)
228 %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2
229 ; CHECK: vpcmpunordw %ymm1, %ymm0, %k0 {%k1} ##
230 %res3 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 3, i16 %mask)
231 %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3
232 ; CHECK: vpcmpneqw %ymm1, %ymm0, %k0 {%k1} ##
233 %res4 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 4, i16 %mask)
234 %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4
235 ; CHECK: vpcmpnltw %ymm1, %ymm0, %k0 {%k1} ##
236 %res5 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 5, i16 %mask)
237 %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5
238 ; CHECK: vpcmpnlew %ymm1, %ymm0, %k0 {%k1} ##
239 %res6 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 6, i16 %mask)
240 %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6
241 ; CHECK: vpcmpordw %ymm1, %ymm0, %k0 {%k1} ##
242 %res7 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 7, i16 %mask)
243 %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7
247 declare i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16>, <16 x i16>, i32, i16) nounwind readnone
249 define <8 x i16> @test_ucmp_w_256(<16 x i16> %a0, <16 x i16> %a1) {
250 ; CHECK_LABEL: test_ucmp_w_256
251 ; CHECK: vpcmpequw %ymm1, %ymm0, %k0 ##
252 %res0 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 0, i16 -1)
253 %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0
254 ; CHECK: vpcmpltuw %ymm1, %ymm0, %k0 ##
255 %res1 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 1, i16 -1)
256 %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1
257 ; CHECK: vpcmpleuw %ymm1, %ymm0, %k0 ##
258 %res2 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 2, i16 -1)
259 %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2
260 ; CHECK: vpcmpunorduw %ymm1, %ymm0, %k0 ##
261 %res3 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 3, i16 -1)
262 %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3
263 ; CHECK: vpcmpnequw %ymm1, %ymm0, %k0 ##
264 %res4 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 4, i16 -1)
265 %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4
266 ; CHECK: vpcmpnltuw %ymm1, %ymm0, %k0 ##
267 %res5 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 5, i16 -1)
268 %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5
269 ; CHECK: vpcmpnleuw %ymm1, %ymm0, %k0 ##
270 %res6 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 6, i16 -1)
271 %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6
272 ; CHECK: vpcmporduw %ymm1, %ymm0, %k0 ##
273 %res7 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 7, i16 -1)
274 %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7
278 define <8 x i16> @test_mask_ucmp_w_256(<16 x i16> %a0, <16 x i16> %a1, i16 %mask) {
279 ; CHECK_LABEL: test_mask_ucmp_w_256
280 ; CHECK: vpcmpequw %ymm1, %ymm0, %k0 {%k1} ##
281 %res0 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 0, i16 %mask)
282 %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0
283 ; CHECK: vpcmpltuw %ymm1, %ymm0, %k0 {%k1} ##
284 %res1 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 1, i16 %mask)
285 %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1
286 ; CHECK: vpcmpleuw %ymm1, %ymm0, %k0 {%k1} ##
287 %res2 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 2, i16 %mask)
288 %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2
289 ; CHECK: vpcmpunorduw %ymm1, %ymm0, %k0 {%k1} ##
290 %res3 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 3, i16 %mask)
291 %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3
292 ; CHECK: vpcmpnequw %ymm1, %ymm0, %k0 {%k1} ##
293 %res4 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 4, i16 %mask)
294 %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4
295 ; CHECK: vpcmpnltuw %ymm1, %ymm0, %k0 {%k1} ##
296 %res5 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 5, i16 %mask)
297 %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5
298 ; CHECK: vpcmpnleuw %ymm1, %ymm0, %k0 {%k1} ##
299 %res6 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 6, i16 %mask)
300 %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6
301 ; CHECK: vpcmporduw %ymm1, %ymm0, %k0 {%k1} ##
302 %res7 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 7, i16 %mask)
303 %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7
307 declare i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16>, <16 x i16>, i32, i16) nounwind readnone
311 define i16 @test_pcmpeq_b_128(<16 x i8> %a, <16 x i8> %b) {
312 ; CHECK-LABEL: test_pcmpeq_b_128
313 ; CHECK: vpcmpeqb %xmm1, %xmm0, %k0 ##
314 %res = call i16 @llvm.x86.avx512.mask.pcmpeq.b.128(<16 x i8> %a, <16 x i8> %b, i16 -1)
318 define i16 @test_mask_pcmpeq_b_128(<16 x i8> %a, <16 x i8> %b, i16 %mask) {
319 ; CHECK-LABEL: test_mask_pcmpeq_b_128
320 ; CHECK: vpcmpeqb %xmm1, %xmm0, %k0 {%k1} ##
321 %res = call i16 @llvm.x86.avx512.mask.pcmpeq.b.128(<16 x i8> %a, <16 x i8> %b, i16 %mask)
325 declare i16 @llvm.x86.avx512.mask.pcmpeq.b.128(<16 x i8>, <16 x i8>, i16)
327 define i8 @test_pcmpeq_w_128(<8 x i16> %a, <8 x i16> %b) {
328 ; CHECK-LABEL: test_pcmpeq_w_128
329 ; CHECK: vpcmpeqw %xmm1, %xmm0, %k0 ##
330 %res = call i8 @llvm.x86.avx512.mask.pcmpeq.w.128(<8 x i16> %a, <8 x i16> %b, i8 -1)
334 define i8 @test_mask_pcmpeq_w_128(<8 x i16> %a, <8 x i16> %b, i8 %mask) {
335 ; CHECK-LABEL: test_mask_pcmpeq_w_128
336 ; CHECK: vpcmpeqw %xmm1, %xmm0, %k0 {%k1} ##
337 %res = call i8 @llvm.x86.avx512.mask.pcmpeq.w.128(<8 x i16> %a, <8 x i16> %b, i8 %mask)
341 declare i8 @llvm.x86.avx512.mask.pcmpeq.w.128(<8 x i16>, <8 x i16>, i8)
343 define i16 @test_pcmpgt_b_128(<16 x i8> %a, <16 x i8> %b) {
344 ; CHECK-LABEL: test_pcmpgt_b_128
345 ; CHECK: vpcmpgtb %xmm1, %xmm0, %k0 ##
346 %res = call i16 @llvm.x86.avx512.mask.pcmpgt.b.128(<16 x i8> %a, <16 x i8> %b, i16 -1)
350 define i16 @test_mask_pcmpgt_b_128(<16 x i8> %a, <16 x i8> %b, i16 %mask) {
351 ; CHECK-LABEL: test_mask_pcmpgt_b_128
352 ; CHECK: vpcmpgtb %xmm1, %xmm0, %k0 {%k1} ##
353 %res = call i16 @llvm.x86.avx512.mask.pcmpgt.b.128(<16 x i8> %a, <16 x i8> %b, i16 %mask)
357 declare i16 @llvm.x86.avx512.mask.pcmpgt.b.128(<16 x i8>, <16 x i8>, i16)
359 define i8 @test_pcmpgt_w_128(<8 x i16> %a, <8 x i16> %b) {
360 ; CHECK-LABEL: test_pcmpgt_w_128
361 ; CHECK: vpcmpgtw %xmm1, %xmm0, %k0 ##
362 %res = call i8 @llvm.x86.avx512.mask.pcmpgt.w.128(<8 x i16> %a, <8 x i16> %b, i8 -1)
366 define i8 @test_mask_pcmpgt_w_128(<8 x i16> %a, <8 x i16> %b, i8 %mask) {
367 ; CHECK-LABEL: test_mask_pcmpgt_w_128
368 ; CHECK: vpcmpgtw %xmm1, %xmm0, %k0 {%k1} ##
369 %res = call i8 @llvm.x86.avx512.mask.pcmpgt.w.128(<8 x i16> %a, <8 x i16> %b, i8 %mask)
373 declare i8 @llvm.x86.avx512.mask.pcmpgt.w.128(<8 x i16>, <8 x i16>, i8)
375 define <8 x i16> @test_cmp_b_128(<16 x i8> %a0, <16 x i8> %a1) {
376 ; CHECK_LABEL: test_cmp_b_128
377 ; CHECK: vpcmpeqb %xmm1, %xmm0, %k0 ##
378 %res0 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 0, i16 -1)
379 %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0
380 ; CHECK: vpcmpltb %xmm1, %xmm0, %k0 ##
381 %res1 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 1, i16 -1)
382 %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1
383 ; CHECK: vpcmpleb %xmm1, %xmm0, %k0 ##
384 %res2 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 2, i16 -1)
385 %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2
386 ; CHECK: vpcmpunordb %xmm1, %xmm0, %k0 ##
387 %res3 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 3, i16 -1)
388 %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3
389 ; CHECK: vpcmpneqb %xmm1, %xmm0, %k0 ##
390 %res4 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 4, i16 -1)
391 %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4
392 ; CHECK: vpcmpnltb %xmm1, %xmm0, %k0 ##
393 %res5 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 5, i16 -1)
394 %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5
395 ; CHECK: vpcmpnleb %xmm1, %xmm0, %k0 ##
396 %res6 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 6, i16 -1)
397 %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6
398 ; CHECK: vpcmpordb %xmm1, %xmm0, %k0 ##
399 %res7 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 7, i16 -1)
400 %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7
404 define <8 x i16> @test_mask_cmp_b_128(<16 x i8> %a0, <16 x i8> %a1, i16 %mask) {
405 ; CHECK_LABEL: test_mask_cmp_b_128
406 ; CHECK: vpcmpeqb %xmm1, %xmm0, %k0 {%k1} ##
407 %res0 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 0, i16 %mask)
408 %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0
409 ; CHECK: vpcmpltb %xmm1, %xmm0, %k0 {%k1} ##
410 %res1 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 1, i16 %mask)
411 %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1
412 ; CHECK: vpcmpleb %xmm1, %xmm0, %k0 {%k1} ##
413 %res2 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 2, i16 %mask)
414 %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2
415 ; CHECK: vpcmpunordb %xmm1, %xmm0, %k0 {%k1} ##
416 %res3 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 3, i16 %mask)
417 %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3
418 ; CHECK: vpcmpneqb %xmm1, %xmm0, %k0 {%k1} ##
419 %res4 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 4, i16 %mask)
420 %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4
421 ; CHECK: vpcmpnltb %xmm1, %xmm0, %k0 {%k1} ##
422 %res5 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 5, i16 %mask)
423 %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5
424 ; CHECK: vpcmpnleb %xmm1, %xmm0, %k0 {%k1} ##
425 %res6 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 6, i16 %mask)
426 %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6
427 ; CHECK: vpcmpordb %xmm1, %xmm0, %k0 {%k1} ##
428 %res7 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 7, i16 %mask)
429 %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7
433 declare i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8>, <16 x i8>, i32, i16) nounwind readnone
435 define <8 x i16> @test_ucmp_b_128(<16 x i8> %a0, <16 x i8> %a1) {
436 ; CHECK_LABEL: test_ucmp_b_128
437 ; CHECK: vpcmpequb %xmm1, %xmm0, %k0 ##
438 %res0 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 0, i16 -1)
439 %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0
440 ; CHECK: vpcmpltub %xmm1, %xmm0, %k0 ##
441 %res1 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 1, i16 -1)
442 %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1
443 ; CHECK: vpcmpleub %xmm1, %xmm0, %k0 ##
444 %res2 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 2, i16 -1)
445 %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2
446 ; CHECK: vpcmpunordub %xmm1, %xmm0, %k0 ##
447 %res3 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 3, i16 -1)
448 %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3
449 ; CHECK: vpcmpnequb %xmm1, %xmm0, %k0 ##
450 %res4 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 4, i16 -1)
451 %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4
452 ; CHECK: vpcmpnltub %xmm1, %xmm0, %k0 ##
453 %res5 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 5, i16 -1)
454 %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5
455 ; CHECK: vpcmpnleub %xmm1, %xmm0, %k0 ##
456 %res6 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 6, i16 -1)
457 %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6
458 ; CHECK: vpcmpordub %xmm1, %xmm0, %k0 ##
459 %res7 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 7, i16 -1)
460 %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7
464 define <8 x i16> @test_mask_ucmp_b_128(<16 x i8> %a0, <16 x i8> %a1, i16 %mask) {
465 ; CHECK_LABEL: test_mask_ucmp_b_128
466 ; CHECK: vpcmpequb %xmm1, %xmm0, %k0 {%k1} ##
467 %res0 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 0, i16 %mask)
468 %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0
469 ; CHECK: vpcmpltub %xmm1, %xmm0, %k0 {%k1} ##
470 %res1 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 1, i16 %mask)
471 %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1
472 ; CHECK: vpcmpleub %xmm1, %xmm0, %k0 {%k1} ##
473 %res2 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 2, i16 %mask)
474 %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2
475 ; CHECK: vpcmpunordub %xmm1, %xmm0, %k0 {%k1} ##
476 %res3 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 3, i16 %mask)
477 %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3
478 ; CHECK: vpcmpnequb %xmm1, %xmm0, %k0 {%k1} ##
479 %res4 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 4, i16 %mask)
480 %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4
481 ; CHECK: vpcmpnltub %xmm1, %xmm0, %k0 {%k1} ##
482 %res5 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 5, i16 %mask)
483 %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5
484 ; CHECK: vpcmpnleub %xmm1, %xmm0, %k0 {%k1} ##
485 %res6 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 6, i16 %mask)
486 %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6
487 ; CHECK: vpcmpordub %xmm1, %xmm0, %k0 {%k1} ##
488 %res7 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 7, i16 %mask)
489 %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7
493 declare i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8>, <16 x i8>, i32, i16) nounwind readnone
495 define <8 x i8> @test_cmp_w_128(<8 x i16> %a0, <8 x i16> %a1) {
496 ; CHECK_LABEL: test_cmp_w_128
497 ; CHECK: vpcmpeqw %xmm1, %xmm0, %k0 ##
498 %res0 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 0, i8 -1)
499 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
500 ; CHECK: vpcmpltw %xmm1, %xmm0, %k0 ##
501 %res1 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 1, i8 -1)
502 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
503 ; CHECK: vpcmplew %xmm1, %xmm0, %k0 ##
504 %res2 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 2, i8 -1)
505 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
506 ; CHECK: vpcmpunordw %xmm1, %xmm0, %k0 ##
507 %res3 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 3, i8 -1)
508 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
509 ; CHECK: vpcmpneqw %xmm1, %xmm0, %k0 ##
510 %res4 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 4, i8 -1)
511 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
512 ; CHECK: vpcmpnltw %xmm1, %xmm0, %k0 ##
513 %res5 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 5, i8 -1)
514 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
515 ; CHECK: vpcmpnlew %xmm1, %xmm0, %k0 ##
516 %res6 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 6, i8 -1)
517 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
518 ; CHECK: vpcmpordw %xmm1, %xmm0, %k0 ##
519 %res7 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 7, i8 -1)
520 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
524 define <8 x i8> @test_mask_cmp_w_128(<8 x i16> %a0, <8 x i16> %a1, i8 %mask) {
525 ; CHECK_LABEL: test_mask_cmp_w_128
526 ; CHECK: vpcmpeqw %xmm1, %xmm0, %k0 {%k1} ##
527 %res0 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 0, i8 %mask)
528 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
529 ; CHECK: vpcmpltw %xmm1, %xmm0, %k0 {%k1} ##
530 %res1 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 1, i8 %mask)
531 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
532 ; CHECK: vpcmplew %xmm1, %xmm0, %k0 {%k1} ##
533 %res2 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 2, i8 %mask)
534 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
535 ; CHECK: vpcmpunordw %xmm1, %xmm0, %k0 {%k1} ##
536 %res3 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 3, i8 %mask)
537 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
538 ; CHECK: vpcmpneqw %xmm1, %xmm0, %k0 {%k1} ##
539 %res4 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 4, i8 %mask)
540 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
541 ; CHECK: vpcmpnltw %xmm1, %xmm0, %k0 {%k1} ##
542 %res5 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 5, i8 %mask)
543 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
544 ; CHECK: vpcmpnlew %xmm1, %xmm0, %k0 {%k1} ##
545 %res6 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 6, i8 %mask)
546 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
547 ; CHECK: vpcmpordw %xmm1, %xmm0, %k0 {%k1} ##
548 %res7 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 7, i8 %mask)
549 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
553 declare i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16>, <8 x i16>, i32, i8) nounwind readnone
555 define <8 x i8> @test_ucmp_w_128(<8 x i16> %a0, <8 x i16> %a1) {
556 ; CHECK_LABEL: test_ucmp_w_128
557 ; CHECK: vpcmpequw %xmm1, %xmm0, %k0 ##
558 %res0 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 0, i8 -1)
559 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
560 ; CHECK: vpcmpltuw %xmm1, %xmm0, %k0 ##
561 %res1 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 1, i8 -1)
562 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
563 ; CHECK: vpcmpleuw %xmm1, %xmm0, %k0 ##
564 %res2 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 2, i8 -1)
565 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
566 ; CHECK: vpcmpunorduw %xmm1, %xmm0, %k0 ##
567 %res3 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 3, i8 -1)
568 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
569 ; CHECK: vpcmpnequw %xmm1, %xmm0, %k0 ##
570 %res4 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 4, i8 -1)
571 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
572 ; CHECK: vpcmpnltuw %xmm1, %xmm0, %k0 ##
573 %res5 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 5, i8 -1)
574 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
575 ; CHECK: vpcmpnleuw %xmm1, %xmm0, %k0 ##
576 %res6 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 6, i8 -1)
577 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
578 ; CHECK: vpcmporduw %xmm1, %xmm0, %k0 ##
579 %res7 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 7, i8 -1)
580 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
584 define <8 x i8> @test_mask_ucmp_w_128(<8 x i16> %a0, <8 x i16> %a1, i8 %mask) {
585 ; CHECK_LABEL: test_mask_ucmp_w_128
586 ; CHECK: vpcmpequw %xmm1, %xmm0, %k0 {%k1} ##
587 %res0 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 0, i8 %mask)
588 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
589 ; CHECK: vpcmpltuw %xmm1, %xmm0, %k0 {%k1} ##
590 %res1 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 1, i8 %mask)
591 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
592 ; CHECK: vpcmpleuw %xmm1, %xmm0, %k0 {%k1} ##
593 %res2 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 2, i8 %mask)
594 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
595 ; CHECK: vpcmpunorduw %xmm1, %xmm0, %k0 {%k1} ##
596 %res3 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 3, i8 %mask)
597 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
598 ; CHECK: vpcmpnequw %xmm1, %xmm0, %k0 {%k1} ##
599 %res4 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 4, i8 %mask)
600 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
601 ; CHECK: vpcmpnltuw %xmm1, %xmm0, %k0 {%k1} ##
602 %res5 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 5, i8 %mask)
603 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
604 ; CHECK: vpcmpnleuw %xmm1, %xmm0, %k0 {%k1} ##
605 %res6 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 6, i8 %mask)
606 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
607 ; CHECK: vpcmporduw %xmm1, %xmm0, %k0 {%k1} ##
608 %res7 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 7, i8 %mask)
609 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
613 declare i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16>, <8 x i16>, i32, i8) nounwind readnone
615 declare <8 x float> @llvm.x86.fma.mask.vfmadd.ps.256(<8 x float>, <8 x float>, <8 x float>, i8) nounwind readnone
617 define <8 x float> @test_mask_vfmadd256_ps(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, i8 %mask) {
618 ; CHECK-LABEL: test_mask_vfmadd256_ps
619 ; CHECK: vfmadd213ps %ymm2, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x29,0xa8,0xc2]
620 %res = call <8 x float> @llvm.x86.fma.mask.vfmadd.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, i8 %mask) nounwind
624 declare <4 x float> @llvm.x86.fma.mask.vfmadd.ps.128(<4 x float>, <4 x float>, <4 x float>, i8) nounwind readnone
626 define <4 x float> @test_mask_vfmadd128_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) {
627 ; CHECK-LABEL: test_mask_vfmadd128_ps
628 ; CHECK: vfmadd213ps %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0xa8,0xc2]
629 %res = call <4 x float> @llvm.x86.fma.mask.vfmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) nounwind
633 declare <4 x double> @llvm.x86.fma.mask.vfmadd.pd.256(<4 x double>, <4 x double>, <4 x double>, i8)
635 define <4 x double> @test_mask_fmadd256_pd(<4 x double> %a, <4 x double> %b, <4 x double> %c, i8 %mask) {
636 ; CHECK-LABEL: test_mask_fmadd256_pd:
637 ; CHECK: vfmadd213pd %ymm2, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x29,0xa8,0xc2]
638 %res = call <4 x double> @llvm.x86.fma.mask.vfmadd.pd.256(<4 x double> %a, <4 x double> %b, <4 x double> %c, i8 %mask)
639 ret <4 x double> %res
642 declare <2 x double> @llvm.x86.fma.mask.vfmadd.pd.128(<2 x double>, <2 x double>, <2 x double>, i8)
644 define <2 x double> @test_mask_fmadd128_pd(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) {
645 ; CHECK-LABEL: test_mask_fmadd128_pd:
646 ; CHECK: vfmadd213pd %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0xa8,0xc2]
647 %res = call <2 x double> @llvm.x86.fma.mask.vfmadd.pd.128(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask)
648 ret <2 x double> %res
651 declare <8 x float> @llvm.x86.fma.mask.vfmsub.ps.256(<8 x float>, <8 x float>, <8 x float>, i8) nounwind readnone
653 define <8 x float> @test_mask_vfmsub256_ps(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, i8 %mask) {
654 ; CHECK-LABEL: test_mask_vfmsub256_ps
655 ; CHECK: vfmsub213ps %ymm2, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x29,0xaa,0xc2]
656 %res = call <8 x float> @llvm.x86.fma.mask.vfmsub.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, i8 %mask) nounwind
660 declare <4 x float> @llvm.x86.fma.mask.vfmsub.ps.128(<4 x float>, <4 x float>, <4 x float>, i8) nounwind readnone
662 define <4 x float> @test_mask_vfmsub128_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) {
663 ; CHECK-LABEL: test_mask_vfmsub128_ps
664 ; CHECK: vfmsub213ps %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0xaa,0xc2]
665 %res = call <4 x float> @llvm.x86.fma.mask.vfmsub.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) nounwind
669 declare <4 x double> @llvm.x86.fma.mask.vfmsub.pd.256(<4 x double>, <4 x double>, <4 x double>, i8) nounwind readnone
671 define <4 x double> @test_mask_vfmsub256_pd(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 %mask) {
672 ; CHECK-LABEL: test_mask_vfmsub256_pd
673 ; CHECK: vfmsub213pd %ymm2, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x29,0xaa,0xc2]
674 %res = call <4 x double> @llvm.x86.fma.mask.vfmsub.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 %mask) nounwind
675 ret <4 x double> %res
678 declare <2 x double> @llvm.x86.fma.mask.vfmsub.pd.128(<2 x double>, <2 x double>, <2 x double>, i8) nounwind readnone
680 define <2 x double> @test_mask_vfmsub128_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) {
681 ; CHECK-LABEL: test_mask_vfmsub128_pd
682 ; CHECK: vfmsub213pd %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0xaa,0xc2]
683 %res = call <2 x double> @llvm.x86.fma.mask.vfmsub.pd.128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) nounwind
684 ret <2 x double> %res
687 declare <8 x float> @llvm.x86.fma.mask.vfnmadd.ps.256(<8 x float>, <8 x float>, <8 x float>, i8) nounwind readnone
689 define <8 x float> @test_mask_vfnmadd256_ps(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, i8 %mask) {
690 ; CHECK-LABEL: test_mask_vfnmadd256_ps
691 ; CHECK: vfnmadd213ps %ymm2, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x29,0xac,0xc2]
692 %res = call <8 x float> @llvm.x86.fma.mask.vfnmadd.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, i8 %mask) nounwind
696 declare <4 x float> @llvm.x86.fma.mask.vfnmadd.ps.128(<4 x float>, <4 x float>, <4 x float>, i8) nounwind readnone
698 define <4 x float> @test_mask_vfnmadd128_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) {
699 ; CHECK-LABEL: test_mask_vfnmadd128_ps
700 ; CHECK: vfnmadd213ps %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0xac,0xc2]
701 %res = call <4 x float> @llvm.x86.fma.mask.vfnmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) nounwind
705 declare <4 x double> @llvm.x86.fma.mask.vfnmadd.pd.256(<4 x double>, <4 x double>, <4 x double>, i8) nounwind readnone
707 define <4 x double> @test_mask_vfnmadd256_pd(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 %mask) {
708 ; CHECK-LABEL: test_mask_vfnmadd256_pd
709 ; CHECK: vfnmadd213pd %ymm2, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x29,0xac,0xc2]
710 %res = call <4 x double> @llvm.x86.fma.mask.vfnmadd.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 %mask) nounwind
711 ret <4 x double> %res
714 declare <2 x double> @llvm.x86.fma.mask.vfnmadd.pd.128(<2 x double>, <2 x double>, <2 x double>, i8) nounwind readnone
716 define <2 x double> @test_mask_vfnmadd128_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) {
717 ; CHECK-LABEL: test_mask_vfnmadd128_pd
718 ; CHECK: vfnmadd213pd %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0xac,0xc2]
719 %res = call <2 x double> @llvm.x86.fma.mask.vfnmadd.pd.128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) nounwind
720 ret <2 x double> %res
723 declare <8 x float> @llvm.x86.fma.mask.vfnmsub.ps.256(<8 x float>, <8 x float>, <8 x float>, i8) nounwind readnone
725 define <8 x float> @test_mask_vfnmsub256_ps(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, i8 %mask) {
726 ; CHECK-LABEL: test_mask_vfnmsub256_ps
727 ; CHECK: vfnmsub213ps %ymm2, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x29,0xae,0xc2]
728 %res = call <8 x float> @llvm.x86.fma.mask.vfnmsub.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, i8 %mask) nounwind
732 declare <4 x float> @llvm.x86.fma.mask.vfnmsub.ps.128(<4 x float>, <4 x float>, <4 x float>, i8) nounwind readnone
734 define <4 x float> @test_mask_vfnmsub128_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) {
735 ; CHECK-LABEL: test_mask_vfnmsub128_ps
736 ; CHECK: vfnmsub213ps %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0xae,0xc2]
737 %res = call <4 x float> @llvm.x86.fma.mask.vfnmsub.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) nounwind
741 declare <4 x double> @llvm.x86.fma.mask.vfnmsub.pd.256(<4 x double>, <4 x double>, <4 x double>, i8) nounwind readnone
743 define <4 x double> @test_mask_vfnmsub256_pd(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 %mask) {
744 ; CHECK-LABEL: test_mask_vfnmsub256_pd
745 ; CHECK: vfnmsub213pd %ymm2, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x29,0xae,0xc2]
746 %res = call <4 x double> @llvm.x86.fma.mask.vfnmsub.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 %mask) nounwind
747 ret <4 x double> %res
750 declare <2 x double> @llvm.x86.fma.mask.vfnmsub.pd.128(<2 x double>, <2 x double>, <2 x double>, i8) nounwind readnone
752 define <2 x double> @test_mask_vfnmsub128_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) {
753 ; CHECK-LABEL: test_mask_vfnmsub128_pd
754 ; CHECK: vfnmsub213pd %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0xae,0xc2]
755 %res = call <2 x double> @llvm.x86.fma.mask.vfnmsub.pd.128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) nounwind
756 ret <2 x double> %res
759 declare <8 x float> @llvm.x86.fma.mask.vfmaddsub.ps.256(<8 x float>, <8 x float>, <8 x float>, i8) nounwind readnone
761 define <8 x float> @test_mask_fmaddsub256_ps(<8 x float> %a, <8 x float> %b, <8 x float> %c, i8 %mask) {
762 ; CHECK-LABEL: test_mask_fmaddsub256_ps:
763 ; CHECK: vfmaddsub213ps %ymm2, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x29,0xa6,0xc2]
764 %res = call <8 x float> @llvm.x86.fma.mask.vfmaddsub.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %c, i8 %mask)
768 declare <4 x float> @llvm.x86.fma.mask.vfmaddsub.ps.128(<4 x float>, <4 x float>, <4 x float>, i8) nounwind readnone
770 define <4 x float> @test_mask_fmaddsub128_ps(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) {
771 ; CHECK-LABEL: test_mask_fmaddsub128_ps:
772 ; CHECK: vfmaddsub213ps %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0xa6,0xc2]
773 %res = call <4 x float> @llvm.x86.fma.mask.vfmaddsub.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask)
777 declare <4 x double> @llvm.x86.fma.mask.vfmaddsub.pd.256(<4 x double>, <4 x double>, <4 x double>, i8) nounwind readnone
779 define <4 x double> @test_mask_vfmaddsub256_pd(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 %mask) {
780 ; CHECK-LABEL: test_mask_vfmaddsub256_pd
781 ; CHECK: vfmaddsub213pd %ymm2, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x29,0xa6,0xc2]
782 %res = call <4 x double> @llvm.x86.fma.mask.vfmaddsub.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 %mask) nounwind
783 ret <4 x double> %res
786 declare <2 x double> @llvm.x86.fma.mask.vfmaddsub.pd.128(<2 x double>, <2 x double>, <2 x double>, i8) nounwind readnone
788 define <2 x double> @test_mask_vfmaddsub128_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) {
789 ; CHECK-LABEL: test_mask_vfmaddsub128_pd
790 ; CHECK: vfmaddsub213pd %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0xa6,0xc2]
791 %res = call <2 x double> @llvm.x86.fma.mask.vfmaddsub.pd.128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) nounwind
792 ret <2 x double> %res
795 declare <8 x float> @llvm.x86.fma.mask.vfmsubadd.ps.256(<8 x float>, <8 x float>, <8 x float>, i8) nounwind readnone
797 define <8 x float> @test_mask_vfmsubadd256_ps(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, i8 %mask) {
798 ; CHECK-LABEL: test_mask_vfmsubadd256_ps
799 ; CHECK: vfmsubadd213ps %ymm2, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x29,0xa7,0xc2]
800 %res = call <8 x float> @llvm.x86.fma.mask.vfmsubadd.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, i8 %mask) nounwind
804 declare <4 x float> @llvm.x86.fma.mask.vfmsubadd.ps.128(<4 x float>, <4 x float>, <4 x float>, i8) nounwind readnone
806 define <4 x float> @test_mask_vfmsubadd128_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) {
807 ; CHECK-LABEL: test_mask_vfmsubadd128_ps
808 ; CHECK: vfmsubadd213ps %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0xa7,0xc2]
809 %res = call <4 x float> @llvm.x86.fma.mask.vfmsubadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) nounwind
813 declare <4 x double> @llvm.x86.fma.mask.vfmsubadd.pd.256(<4 x double>, <4 x double>, <4 x double>, i8) nounwind readnone
815 define <4 x double> @test_mask_vfmsubadd256_pd(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 %mask) {
816 ; CHECK-LABEL: test_mask_vfmsubadd256_pd
817 ; CHECK: vfmsubadd213pd %ymm2, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x29,0xa7,0xc2]
818 %res = call <4 x double> @llvm.x86.fma.mask.vfmsubadd.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 %mask) nounwind
819 ret <4 x double> %res
821 declare <2 x double> @llvm.x86.fma.mask.vfmsubadd.pd.128(<2 x double>, <2 x double>, <2 x double>, i8) nounwind readnone
823 define <2 x double> @test_mask_vfmsubadd128_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) {
824 ; CHECK-LABEL: test_mask_vfmsubadd128_pd
825 ; CHECK: vfmsubadd213pd %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0xa7,0xc2]
826 %res = call <2 x double> @llvm.x86.fma.mask.vfmsubadd.pd.128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) nounwind
827 ret <2 x double> %res
830 define <2 x double> @test_mask_vfmsubadd128rm_pd(<2 x double> %a0, <2 x double> %a1, <2 x double>* %ptr_a2, i8 %mask) {
831 ; CHECK-LABEL: test_mask_vfmsubadd128rm_pd
832 ; CHECK: vfmsubadd213pd (%rdi), %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0xa7,0x07]
833 %a2 = load <2 x double>, <2 x double>* %ptr_a2
834 %res = call <2 x double> @llvm.x86.fma.mask.vfmsubadd.pd.128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) nounwind
835 ret <2 x double> %res
837 declare <8 x double> @llvm.x86.fma.mask.vfmsubadd.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32) nounwind readnone
838 define <8 x double> @test_mask_vfmsubaddrm_pd(<8 x double> %a0, <8 x double> %a1, <8 x double>* %ptr_a2, i8 %mask) {
839 ; CHECK-LABEL: test_mask_vfmsubaddrm_pd
840 ; CHECK: vfmsubadd213pd (%rdi), %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0xa7,0x07]
841 %a2 = load <8 x double>, <8 x double>* %ptr_a2, align 8
842 %res = call <8 x double> @llvm.x86.fma.mask.vfmsubadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 4) nounwind
843 ret <8 x double> %res
846 define <4 x float> @test_mask_vfmadd128_ps_r(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) {
847 ; CHECK-LABEL: test_mask_vfmadd128_ps_r
848 ; CHECK: vfmadd213ps %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0xa8,0xc2]
849 %res = call <4 x float> @llvm.x86.fma.mask.vfmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) nounwind
853 define <4 x float> @test_mask_vfmadd128_ps_rz(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
854 ; CHECK-LABEL: test_mask_vfmadd128_ps_rz
855 ; CHECK: vfmadd213ps %xmm2, %xmm1, %xmm0 ## encoding: [0x62,0xf2,0x75,0x08,0xa8,0xc2]
856 %res = call <4 x float> @llvm.x86.fma.mask.vfmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 -1) nounwind
860 define <4 x float> @test_mask_vfmadd128_ps_rmk(<4 x float> %a0, <4 x float> %a1, <4 x float>* %ptr_a2, i8 %mask) {
861 ; CHECK-LABEL: test_mask_vfmadd128_ps_rmk
862 ; CHECK: vfmadd213ps (%rdi), %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0xa8,0x07]
863 %a2 = load <4 x float>, <4 x float>* %ptr_a2
864 %res = call <4 x float> @llvm.x86.fma.mask.vfmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) nounwind
868 define <4 x float> @test_mask_vfmadd128_ps_rmka(<4 x float> %a0, <4 x float> %a1, <4 x float>* %ptr_a2, i8 %mask) {
869 ; CHECK-LABEL: test_mask_vfmadd128_ps_rmka
870 ; CHECK: vfmadd213ps (%rdi), %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0xa8,0x07]
871 %a2 = load <4 x float>, <4 x float>* %ptr_a2, align 8
872 %res = call <4 x float> @llvm.x86.fma.mask.vfmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) nounwind
876 define <4 x float> @test_mask_vfmadd128_ps_rmkz(<4 x float> %a0, <4 x float> %a1, <4 x float>* %ptr_a2) {
877 ; CHECK-LABEL: test_mask_vfmadd128_ps_rmkz
878 ; CHECK: vfmadd213ps (%rdi), %xmm1, %xmm0 ## encoding: [0xc4,0xe2,0x71,0xa8,0x07]
879 %a2 = load <4 x float>, <4 x float>* %ptr_a2
880 %res = call <4 x float> @llvm.x86.fma.mask.vfmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 -1) nounwind
884 define <4 x float> @test_mask_vfmadd128_ps_rmkza(<4 x float> %a0, <4 x float> %a1, <4 x float>* %ptr_a2) {
885 ; CHECK-LABEL: test_mask_vfmadd128_ps_rmkza
886 ; CHECK: vfmadd213ps (%rdi), %xmm1, %xmm0 ## encoding: [0xc4,0xe2,0x71,0xa8,0x07]
887 %a2 = load <4 x float>, <4 x float>* %ptr_a2, align 4
888 %res = call <4 x float> @llvm.x86.fma.mask.vfmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 -1) nounwind
892 define <4 x float> @test_mask_vfmadd128_ps_rmb(<4 x float> %a0, <4 x float> %a1, float* %ptr_a2, i8 %mask) {
893 ; CHECK-LABEL: test_mask_vfmadd128_ps_rmb
894 ; CHECK: vfmadd213ps (%rdi){1to4}, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x19,0xa8,0x07]
895 %q = load float, float* %ptr_a2
896 %vecinit.i = insertelement <4 x float> undef, float %q, i32 0
897 %vecinit2.i = insertelement <4 x float> %vecinit.i, float %q, i32 1
898 %vecinit4.i = insertelement <4 x float> %vecinit2.i, float %q, i32 2
899 %vecinit6.i = insertelement <4 x float> %vecinit4.i, float %q, i32 3
900 %res = call <4 x float> @llvm.x86.fma.mask.vfmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %vecinit6.i, i8 %mask) nounwind
904 define <4 x float> @test_mask_vfmadd128_ps_rmba(<4 x float> %a0, <4 x float> %a1, float* %ptr_a2, i8 %mask) {
905 ; CHECK-LABEL: test_mask_vfmadd128_ps_rmba
906 ; CHECK: vfmadd213ps (%rdi){1to4}, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x19,0xa8,0x07]
907 %q = load float, float* %ptr_a2, align 4
908 %vecinit.i = insertelement <4 x float> undef, float %q, i32 0
909 %vecinit2.i = insertelement <4 x float> %vecinit.i, float %q, i32 1
910 %vecinit4.i = insertelement <4 x float> %vecinit2.i, float %q, i32 2
911 %vecinit6.i = insertelement <4 x float> %vecinit4.i, float %q, i32 3
912 %res = call <4 x float> @llvm.x86.fma.mask.vfmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %vecinit6.i, i8 %mask) nounwind
916 define <4 x float> @test_mask_vfmadd128_ps_rmbz(<4 x float> %a0, <4 x float> %a1, float* %ptr_a2) {
917 ; CHECK-LABEL: test_mask_vfmadd128_ps_rmbz
918 ; CHECK: vfmadd213ps (%rdi){1to4}, %xmm1, %xmm0 ## encoding: [0x62,0xf2,0x75,0x18,0xa8,0x07]
919 %q = load float, float* %ptr_a2
920 %vecinit.i = insertelement <4 x float> undef, float %q, i32 0
921 %vecinit2.i = insertelement <4 x float> %vecinit.i, float %q, i32 1
922 %vecinit4.i = insertelement <4 x float> %vecinit2.i, float %q, i32 2
923 %vecinit6.i = insertelement <4 x float> %vecinit4.i, float %q, i32 3
924 %res = call <4 x float> @llvm.x86.fma.mask.vfmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %vecinit6.i, i8 -1) nounwind
928 define <4 x float> @test_mask_vfmadd128_ps_rmbza(<4 x float> %a0, <4 x float> %a1, float* %ptr_a2) {
929 ; CHECK-LABEL: test_mask_vfmadd128_ps_rmbza
930 ; CHECK: vfmadd213ps (%rdi){1to4}, %xmm1, %xmm0 ## encoding: [0x62,0xf2,0x75,0x18,0xa8,0x07]
931 %q = load float, float* %ptr_a2, align 4
932 %vecinit.i = insertelement <4 x float> undef, float %q, i32 0
933 %vecinit2.i = insertelement <4 x float> %vecinit.i, float %q, i32 1
934 %vecinit4.i = insertelement <4 x float> %vecinit2.i, float %q, i32 2
935 %vecinit6.i = insertelement <4 x float> %vecinit4.i, float %q, i32 3
936 %res = call <4 x float> @llvm.x86.fma.mask.vfmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %vecinit6.i, i8 -1) nounwind
940 define <2 x double> @test_mask_vfmadd128_pd_r(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) {
941 ; CHECK-LABEL: test_mask_vfmadd128_pd_r
942 ; CHECK: vfmadd213pd %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0xa8,0xc2]
943 %res = call <2 x double> @llvm.x86.fma.mask.vfmadd.pd.128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) nounwind
944 ret <2 x double> %res
947 define <2 x double> @test_mask_vfmadd128_pd_rz(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) {
948 ; CHECK-LABEL: test_mask_vfmadd128_pd_rz
949 ; CHECK: vfmadd213pd %xmm2, %xmm1, %xmm0 ## encoding: [0x62,0xf2,0xf5,0x08,0xa8,0xc2]
950 %res = call <2 x double> @llvm.x86.fma.mask.vfmadd.pd.128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 -1) nounwind
951 ret <2 x double> %res
954 define <2 x double> @test_mask_vfmadd128_pd_rmk(<2 x double> %a0, <2 x double> %a1, <2 x double>* %ptr_a2, i8 %mask) {
955 ; CHECK-LABEL: test_mask_vfmadd128_pd_rmk
956 ; CHECK: vfmadd213pd (%rdi), %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0xa8,0x07]
957 %a2 = load <2 x double>, <2 x double>* %ptr_a2
958 %res = call <2 x double> @llvm.x86.fma.mask.vfmadd.pd.128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) nounwind
959 ret <2 x double> %res
962 define <2 x double> @test_mask_vfmadd128_pd_rmkz(<2 x double> %a0, <2 x double> %a1, <2 x double>* %ptr_a2) {
963 ; CHECK-LABEL: test_mask_vfmadd128_pd_rmkz
964 ; CHECK: vfmadd213pd (%rdi), %xmm1, %xmm0 ## encoding: [0xc4,0xe2,0xf1,0xa8,0x07]
965 %a2 = load <2 x double>, <2 x double>* %ptr_a2
966 %res = call <2 x double> @llvm.x86.fma.mask.vfmadd.pd.128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 -1) nounwind
967 ret <2 x double> %res
970 define <4 x double> @test_mask_vfmadd256_pd_r(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 %mask) {
971 ; CHECK-LABEL: test_mask_vfmadd256_pd_r
972 ; CHECK: vfmadd213pd %ymm2, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x29,0xa8,0xc2]
973 %res = call <4 x double> @llvm.x86.fma.mask.vfmadd.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 %mask) nounwind
974 ret <4 x double> %res
977 define <4 x double> @test_mask_vfmadd256_pd_rz(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) {
978 ; CHECK-LABEL: test_mask_vfmadd256_pd_rz
979 ; CHECK: vfmadd213pd %ymm2, %ymm1, %ymm0 ## encoding: [0x62,0xf2,0xf5,0x28,0xa8,0xc2]
980 %res = call <4 x double> @llvm.x86.fma.mask.vfmadd.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 -1) nounwind
981 ret <4 x double> %res
984 define <4 x double> @test_mask_vfmadd256_pd_rmk(<4 x double> %a0, <4 x double> %a1, <4 x double>* %ptr_a2, i8 %mask) {
985 ; CHECK-LABEL: test_mask_vfmadd256_pd_rmk
986 ; CHECK: vfmadd213pd (%rdi), %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x29,0xa8,0x07]
987 %a2 = load <4 x double>, <4 x double>* %ptr_a2
988 %res = call <4 x double> @llvm.x86.fma.mask.vfmadd.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 %mask) nounwind
989 ret <4 x double> %res
992 define <4 x double> @test_mask_vfmadd256_pd_rmkz(<4 x double> %a0, <4 x double> %a1, <4 x double>* %ptr_a2) {
993 ; CHECK-LABEL: test_mask_vfmadd256_pd_rmkz
994 ; CHECK: vfmadd213pd (%rdi), %ymm1, %ymm0 ## encoding: [0xc4,0xe2,0xf5,0xa8,0x07]
995 %a2 = load <4 x double>, <4 x double>* %ptr_a2
996 %res = call <4 x double> @llvm.x86.fma.mask.vfmadd.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 -1) nounwind
997 ret <4 x double> %res
999 define <8 x i16> @test_mask_add_epi16_rr_128(<8 x i16> %a, <8 x i16> %b) {
1000 ;CHECK-LABEL: test_mask_add_epi16_rr_128
1001 ;CHECK: vpaddw %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xfd,0xc1]
1002 %res = call <8 x i16> @llvm.x86.avx512.mask.padd.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 -1)
1006 define <8 x i16> @test_mask_add_epi16_rrk_128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) {
1007 ;CHECK-LABEL: test_mask_add_epi16_rrk_128
1008 ;CHECK: vpaddw %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xfd,0xd1]
1009 %res = call <8 x i16> @llvm.x86.avx512.mask.padd.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask)
1013 define <8 x i16> @test_mask_add_epi16_rrkz_128(<8 x i16> %a, <8 x i16> %b, i8 %mask) {
1014 ;CHECK-LABEL: test_mask_add_epi16_rrkz_128
1015 ;CHECK: vpaddw %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xfd,0xc1]
1016 %res = call <8 x i16> @llvm.x86.avx512.mask.padd.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 %mask)
1020 define <8 x i16> @test_mask_add_epi16_rm_128(<8 x i16> %a, <8 x i16>* %ptr_b) {
1021 ;CHECK-LABEL: test_mask_add_epi16_rm_128
1022 ;CHECK: vpaddw (%rdi), %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xfd,0x07]
1023 %b = load <8 x i16>, <8 x i16>* %ptr_b
1024 %res = call <8 x i16> @llvm.x86.avx512.mask.padd.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 -1)
1028 define <8 x i16> @test_mask_add_epi16_rmk_128(<8 x i16> %a, <8 x i16>* %ptr_b, <8 x i16> %passThru, i8 %mask) {
1029 ;CHECK-LABEL: test_mask_add_epi16_rmk_128
1030 ;CHECK: vpaddw (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xfd,0x0f]
1031 %b = load <8 x i16>, <8 x i16>* %ptr_b
1032 %res = call <8 x i16> @llvm.x86.avx512.mask.padd.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask)
1036 define <8 x i16> @test_mask_add_epi16_rmkz_128(<8 x i16> %a, <8 x i16>* %ptr_b, i8 %mask) {
1037 ;CHECK-LABEL: test_mask_add_epi16_rmkz_128
1038 ;CHECK: vpaddw (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xfd,0x07]
1039 %b = load <8 x i16>, <8 x i16>* %ptr_b
1040 %res = call <8 x i16> @llvm.x86.avx512.mask.padd.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 %mask)
1044 declare <8 x i16> @llvm.x86.avx512.mask.padd.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8)
1046 define <16 x i16> @test_mask_add_epi16_rr_256(<16 x i16> %a, <16 x i16> %b) {
1047 ;CHECK-LABEL: test_mask_add_epi16_rr_256
1048 ;CHECK: vpaddw %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xfd,0xc1]
1049 %res = call <16 x i16> @llvm.x86.avx512.mask.padd.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 -1)
1053 define <16 x i16> @test_mask_add_epi16_rrk_256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) {
1054 ;CHECK-LABEL: test_mask_add_epi16_rrk_256
1055 ;CHECK: vpaddw %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xfd,0xd1]
1056 %res = call <16 x i16> @llvm.x86.avx512.mask.padd.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask)
1060 define <16 x i16> @test_mask_add_epi16_rrkz_256(<16 x i16> %a, <16 x i16> %b, i16 %mask) {
1061 ;CHECK-LABEL: test_mask_add_epi16_rrkz_256
1062 ;CHECK: vpaddw %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xfd,0xc1]
1063 %res = call <16 x i16> @llvm.x86.avx512.mask.padd.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 %mask)
1067 define <16 x i16> @test_mask_add_epi16_rm_256(<16 x i16> %a, <16 x i16>* %ptr_b) {
1068 ;CHECK-LABEL: test_mask_add_epi16_rm_256
1069 ;CHECK: vpaddw (%rdi), %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xfd,0x07]
1070 %b = load <16 x i16>, <16 x i16>* %ptr_b
1071 %res = call <16 x i16> @llvm.x86.avx512.mask.padd.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 -1)
1075 define <16 x i16> @test_mask_add_epi16_rmk_256(<16 x i16> %a, <16 x i16>* %ptr_b, <16 x i16> %passThru, i16 %mask) {
1076 ;CHECK-LABEL: test_mask_add_epi16_rmk_256
1077 ;CHECK: vpaddw (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xfd,0x0f]
1078 %b = load <16 x i16>, <16 x i16>* %ptr_b
1079 %res = call <16 x i16> @llvm.x86.avx512.mask.padd.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask)
1083 define <16 x i16> @test_mask_add_epi16_rmkz_256(<16 x i16> %a, <16 x i16>* %ptr_b, i16 %mask) {
1084 ;CHECK-LABEL: test_mask_add_epi16_rmkz_256
1085 ;CHECK: vpaddw (%rdi), %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xfd,0x07]
1086 %b = load <16 x i16>, <16 x i16>* %ptr_b
1087 %res = call <16 x i16> @llvm.x86.avx512.mask.padd.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 %mask)
1091 declare <16 x i16> @llvm.x86.avx512.mask.padd.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16)
1093 define <8 x i16> @test_mask_sub_epi16_rr_128(<8 x i16> %a, <8 x i16> %b) {
1094 ;CHECK-LABEL: test_mask_sub_epi16_rr_128
1095 ;CHECK: vpsubw %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xf9,0xc1]
1096 %res = call <8 x i16> @llvm.x86.avx512.mask.psub.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 -1)
1100 define <8 x i16> @test_mask_sub_epi16_rrk_128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) {
1101 ;CHECK-LABEL: test_mask_sub_epi16_rrk_128
1102 ;CHECK: vpsubw %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xf9,0xd1]
1103 %res = call <8 x i16> @llvm.x86.avx512.mask.psub.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask)
1107 define <8 x i16> @test_mask_sub_epi16_rrkz_128(<8 x i16> %a, <8 x i16> %b, i8 %mask) {
1108 ;CHECK-LABEL: test_mask_sub_epi16_rrkz_128
1109 ;CHECK: vpsubw %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xf9,0xc1]
1110 %res = call <8 x i16> @llvm.x86.avx512.mask.psub.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 %mask)
1114 define <8 x i16> @test_mask_sub_epi16_rm_128(<8 x i16> %a, <8 x i16>* %ptr_b) {
1115 ;CHECK-LABEL: test_mask_sub_epi16_rm_128
1116 ;CHECK: vpsubw (%rdi), %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xf9,0x07]
1117 %b = load <8 x i16>, <8 x i16>* %ptr_b
1118 %res = call <8 x i16> @llvm.x86.avx512.mask.psub.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 -1)
1122 define <8 x i16> @test_mask_sub_epi16_rmk_128(<8 x i16> %a, <8 x i16>* %ptr_b, <8 x i16> %passThru, i8 %mask) {
1123 ;CHECK-LABEL: test_mask_sub_epi16_rmk_128
1124 ;CHECK: vpsubw (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xf9,0x0f]
1125 %b = load <8 x i16>, <8 x i16>* %ptr_b
1126 %res = call <8 x i16> @llvm.x86.avx512.mask.psub.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask)
1130 define <8 x i16> @test_mask_sub_epi16_rmkz_128(<8 x i16> %a, <8 x i16>* %ptr_b, i8 %mask) {
1131 ;CHECK-LABEL: test_mask_sub_epi16_rmkz_128
1132 ;CHECK: vpsubw (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xf9,0x07]
1133 %b = load <8 x i16>, <8 x i16>* %ptr_b
1134 %res = call <8 x i16> @llvm.x86.avx512.mask.psub.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 %mask)
1138 declare <8 x i16> @llvm.x86.avx512.mask.psub.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8)
1140 define <16 x i16> @test_mask_sub_epi16_rr_256(<16 x i16> %a, <16 x i16> %b) {
1141 ;CHECK-LABEL: test_mask_sub_epi16_rr_256
1142 ;CHECK: vpsubw %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xf9,0xc1]
1143 %res = call <16 x i16> @llvm.x86.avx512.mask.psub.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 -1)
1147 define <16 x i16> @test_mask_sub_epi16_rrk_256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) {
1148 ;CHECK-LABEL: test_mask_sub_epi16_rrk_256
1149 ;CHECK: vpsubw %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xf9,0xd1]
1150 %res = call <16 x i16> @llvm.x86.avx512.mask.psub.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask)
1154 define <16 x i16> @test_mask_sub_epi16_rrkz_256(<16 x i16> %a, <16 x i16> %b, i16 %mask) {
1155 ;CHECK-LABEL: test_mask_sub_epi16_rrkz_256
1156 ;CHECK: vpsubw %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xf9,0xc1]
1157 %res = call <16 x i16> @llvm.x86.avx512.mask.psub.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 %mask)
1161 define <16 x i16> @test_mask_sub_epi16_rm_256(<16 x i16> %a, <16 x i16>* %ptr_b) {
1162 ;CHECK-LABEL: test_mask_sub_epi16_rm_256
1163 ;CHECK: vpsubw (%rdi), %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xf9,0x07]
1164 %b = load <16 x i16>, <16 x i16>* %ptr_b
1165 %res = call <16 x i16> @llvm.x86.avx512.mask.psub.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 -1)
1169 define <16 x i16> @test_mask_sub_epi16_rmk_256(<16 x i16> %a, <16 x i16>* %ptr_b, <16 x i16> %passThru, i16 %mask) {
1170 ;CHECK-LABEL: test_mask_sub_epi16_rmk_256
1171 ;CHECK: vpsubw (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xf9,0x0f]
1172 %b = load <16 x i16>, <16 x i16>* %ptr_b
1173 %res = call <16 x i16> @llvm.x86.avx512.mask.psub.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask)
1177 define <16 x i16> @test_mask_sub_epi16_rmkz_256(<16 x i16> %a, <16 x i16>* %ptr_b, i16 %mask) {
1178 ;CHECK-LABEL: test_mask_sub_epi16_rmkz_256
1179 ;CHECK: vpsubw (%rdi), %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xf9,0x07]
1180 %b = load <16 x i16>, <16 x i16>* %ptr_b
1181 %res = call <16 x i16> @llvm.x86.avx512.mask.psub.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 %mask)
1185 declare <16 x i16> @llvm.x86.avx512.mask.psub.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16)
1187 define <32 x i16> @test_mask_add_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) {
1188 ;CHECK-LABEL: test_mask_add_epi16_rr_512
1189 ;CHECK: vpaddw %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfd,0xc1]
1190 %res = call <32 x i16> @llvm.x86.avx512.mask.padd.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1)
1194 define <32 x i16> @test_mask_add_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) {
1195 ;CHECK-LABEL: test_mask_add_epi16_rrk_512
1196 ;CHECK: vpaddw %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xfd,0xd1]
1197 %res = call <32 x i16> @llvm.x86.avx512.mask.padd.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
1201 define <32 x i16> @test_mask_add_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i32 %mask) {
1202 ;CHECK-LABEL: test_mask_add_epi16_rrkz_512
1203 ;CHECK: vpaddw %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0xfd,0xc1]
1204 %res = call <32 x i16> @llvm.x86.avx512.mask.padd.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask)
1208 define <32 x i16> @test_mask_add_epi16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) {
1209 ;CHECK-LABEL: test_mask_add_epi16_rm_512
1210 ;CHECK: vpaddw (%rdi), %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfd,0x07]
1211 %b = load <32 x i16>, <32 x i16>* %ptr_b
1212 %res = call <32 x i16> @llvm.x86.avx512.mask.padd.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1)
1216 define <32 x i16> @test_mask_add_epi16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <32 x i16> %passThru, i32 %mask) {
1217 ;CHECK-LABEL: test_mask_add_epi16_rmk_512
1218 ;CHECK: vpaddw (%rdi), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xfd,0x0f]
1219 %b = load <32 x i16>, <32 x i16>* %ptr_b
1220 %res = call <32 x i16> @llvm.x86.avx512.mask.padd.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
1224 define <32 x i16> @test_mask_add_epi16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i32 %mask) {
1225 ;CHECK-LABEL: test_mask_add_epi16_rmkz_512
1226 ;CHECK: vpaddw (%rdi), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0xfd,0x07]
1227 %b = load <32 x i16>, <32 x i16>* %ptr_b
1228 %res = call <32 x i16> @llvm.x86.avx512.mask.padd.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask)
1232 declare <32 x i16> @llvm.x86.avx512.mask.padd.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
1234 define <32 x i16> @test_mask_sub_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) {
1235 ;CHECK-LABEL: test_mask_sub_epi16_rr_512
1236 ;CHECK: vpsubw %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xf9,0xc1]
1237 %res = call <32 x i16> @llvm.x86.avx512.mask.psub.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1)
1241 define <32 x i16> @test_mask_sub_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) {
1242 ;CHECK-LABEL: test_mask_sub_epi16_rrk_512
1243 ;CHECK: vpsubw %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xf9,0xd1]
1244 %res = call <32 x i16> @llvm.x86.avx512.mask.psub.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
1248 define <32 x i16> @test_mask_sub_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i32 %mask) {
1249 ;CHECK-LABEL: test_mask_sub_epi16_rrkz_512
1250 ;CHECK: vpsubw %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0xf9,0xc1]
1251 %res = call <32 x i16> @llvm.x86.avx512.mask.psub.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask)
1255 define <32 x i16> @test_mask_sub_epi16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) {
1256 ;CHECK-LABEL: test_mask_sub_epi16_rm_512
1257 ;CHECK: vpsubw (%rdi), %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xf9,0x07]
1258 %b = load <32 x i16>, <32 x i16>* %ptr_b
1259 %res = call <32 x i16> @llvm.x86.avx512.mask.psub.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1)
1263 define <32 x i16> @test_mask_sub_epi16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <32 x i16> %passThru, i32 %mask) {
1264 ;CHECK-LABEL: test_mask_sub_epi16_rmk_512
1265 ;CHECK: vpsubw (%rdi), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xf9,0x0f]
1266 %b = load <32 x i16>, <32 x i16>* %ptr_b
1267 %res = call <32 x i16> @llvm.x86.avx512.mask.psub.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
1271 define <32 x i16> @test_mask_sub_epi16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i32 %mask) {
1272 ;CHECK-LABEL: test_mask_sub_epi16_rmkz_512
1273 ;CHECK: vpsubw (%rdi), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0xf9,0x07]
1274 %b = load <32 x i16>, <32 x i16>* %ptr_b
1275 %res = call <32 x i16> @llvm.x86.avx512.mask.psub.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask)
1279 declare <32 x i16> @llvm.x86.avx512.mask.psub.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
1281 define <32 x i16> @test_mask_mullo_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) {
1282 ;CHECK-LABEL: test_mask_mullo_epi16_rr_512
1283 ;CHECK: vpmullw %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xd5,0xc1]
1284 %res = call <32 x i16> @llvm.x86.avx512.mask.pmull.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1)
1288 define <32 x i16> @test_mask_mullo_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) {
1289 ;CHECK-LABEL: test_mask_mullo_epi16_rrk_512
1290 ;CHECK: vpmullw %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xd5,0xd1]
1291 %res = call <32 x i16> @llvm.x86.avx512.mask.pmull.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
1295 define <32 x i16> @test_mask_mullo_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i32 %mask) {
1296 ;CHECK-LABEL: test_mask_mullo_epi16_rrkz_512
1297 ;CHECK: vpmullw %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0xd5,0xc1]
1298 %res = call <32 x i16> @llvm.x86.avx512.mask.pmull.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask)
1302 define <32 x i16> @test_mask_mullo_epi16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) {
1303 ;CHECK-LABEL: test_mask_mullo_epi16_rm_512
1304 ;CHECK: vpmullw (%rdi), %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xd5,0x07]
1305 %b = load <32 x i16>, <32 x i16>* %ptr_b
1306 %res = call <32 x i16> @llvm.x86.avx512.mask.pmull.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1)
1310 define <32 x i16> @test_mask_mullo_epi16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <32 x i16> %passThru, i32 %mask) {
1311 ;CHECK-LABEL: test_mask_mullo_epi16_rmk_512
1312 ;CHECK: vpmullw (%rdi), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xd5,0x0f]
1313 %b = load <32 x i16>, <32 x i16>* %ptr_b
1314 %res = call <32 x i16> @llvm.x86.avx512.mask.pmull.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
1318 define <32 x i16> @test_mask_mullo_epi16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i32 %mask) {
1319 ;CHECK-LABEL: test_mask_mullo_epi16_rmkz_512
1320 ;CHECK: vpmullw (%rdi), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0xd5,0x07]
1321 %b = load <32 x i16>, <32 x i16>* %ptr_b
1322 %res = call <32 x i16> @llvm.x86.avx512.mask.pmull.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask)
1326 declare <32 x i16> @llvm.x86.avx512.mask.pmull.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
1328 define <8 x i16> @test_mask_mullo_epi16_rr_128(<8 x i16> %a, <8 x i16> %b) {
1329 ;CHECK-LABEL: test_mask_mullo_epi16_rr_128
1330 ;CHECK: vpmullw %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xd5,0xc1]
1331 %res = call <8 x i16> @llvm.x86.avx512.mask.pmull.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 -1)
1335 define <8 x i16> @test_mask_mullo_epi16_rrk_128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) {
1336 ;CHECK-LABEL: test_mask_mullo_epi16_rrk_128
1337 ;CHECK: vpmullw %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xd5,0xd1]
1338 %res = call <8 x i16> @llvm.x86.avx512.mask.pmull.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask)
1342 define <8 x i16> @test_mask_mullo_epi16_rrkz_128(<8 x i16> %a, <8 x i16> %b, i8 %mask) {
1343 ;CHECK-LABEL: test_mask_mullo_epi16_rrkz_128
1344 ;CHECK: vpmullw %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xd5,0xc1]
1345 %res = call <8 x i16> @llvm.x86.avx512.mask.pmull.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 %mask)
1349 define <8 x i16> @test_mask_mullo_epi16_rm_128(<8 x i16> %a, <8 x i16>* %ptr_b) {
1350 ;CHECK-LABEL: test_mask_mullo_epi16_rm_128
1351 ;CHECK: vpmullw (%rdi), %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xd5,0x07]
1352 %b = load <8 x i16>, <8 x i16>* %ptr_b
1353 %res = call <8 x i16> @llvm.x86.avx512.mask.pmull.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 -1)
1357 define <8 x i16> @test_mask_mullo_epi16_rmk_128(<8 x i16> %a, <8 x i16>* %ptr_b, <8 x i16> %passThru, i8 %mask) {
1358 ;CHECK-LABEL: test_mask_mullo_epi16_rmk_128
1359 ;CHECK: vpmullw (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xd5,0x0f]
1360 %b = load <8 x i16>, <8 x i16>* %ptr_b
1361 %res = call <8 x i16> @llvm.x86.avx512.mask.pmull.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask)
1365 define <8 x i16> @test_mask_mullo_epi16_rmkz_128(<8 x i16> %a, <8 x i16>* %ptr_b, i8 %mask) {
1366 ;CHECK-LABEL: test_mask_mullo_epi16_rmkz_128
1367 ;CHECK: vpmullw (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xd5,0x07]
1368 %b = load <8 x i16>, <8 x i16>* %ptr_b
1369 %res = call <8 x i16> @llvm.x86.avx512.mask.pmull.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 %mask)
1373 declare <8 x i16> @llvm.x86.avx512.mask.pmull.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8)
1375 define <16 x i16> @test_mask_mullo_epi16_rr_256(<16 x i16> %a, <16 x i16> %b) {
1376 ;CHECK-LABEL: test_mask_mullo_epi16_rr_256
1377 ;CHECK: vpmullw %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xd5,0xc1]
1378 %res = call <16 x i16> @llvm.x86.avx512.mask.pmull.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 -1)
1382 define <16 x i16> @test_mask_mullo_epi16_rrk_256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) {
1383 ;CHECK-LABEL: test_mask_mullo_epi16_rrk_256
1384 ;CHECK: vpmullw %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xd5,0xd1]
1385 %res = call <16 x i16> @llvm.x86.avx512.mask.pmull.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask)
1389 define <16 x i16> @test_mask_mullo_epi16_rrkz_256(<16 x i16> %a, <16 x i16> %b, i16 %mask) {
1390 ;CHECK-LABEL: test_mask_mullo_epi16_rrkz_256
1391 ;CHECK: vpmullw %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xd5,0xc1]
1392 %res = call <16 x i16> @llvm.x86.avx512.mask.pmull.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 %mask)
1396 define <16 x i16> @test_mask_mullo_epi16_rm_256(<16 x i16> %a, <16 x i16>* %ptr_b) {
1397 ;CHECK-LABEL: test_mask_mullo_epi16_rm_256
1398 ;CHECK: vpmullw (%rdi), %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xd5,0x07]
1399 %b = load <16 x i16>, <16 x i16>* %ptr_b
1400 %res = call <16 x i16> @llvm.x86.avx512.mask.pmull.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 -1)
1404 define <16 x i16> @test_mask_mullo_epi16_rmk_256(<16 x i16> %a, <16 x i16>* %ptr_b, <16 x i16> %passThru, i16 %mask) {
1405 ;CHECK-LABEL: test_mask_mullo_epi16_rmk_256
1406 ;CHECK: vpmullw (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xd5,0x0f]
1407 %b = load <16 x i16>, <16 x i16>* %ptr_b
1408 %res = call <16 x i16> @llvm.x86.avx512.mask.pmull.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask)
1412 define <16 x i16> @test_mask_mullo_epi16_rmkz_256(<16 x i16> %a, <16 x i16>* %ptr_b, i16 %mask) {
1413 ;CHECK-LABEL: test_mask_mullo_epi16_rmkz_256
1414 ;CHECK: vpmullw (%rdi), %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xd5,0x07]
1415 %b = load <16 x i16>, <16 x i16>* %ptr_b
1416 %res = call <16 x i16> @llvm.x86.avx512.mask.pmull.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 %mask)
1420 declare <16 x i16> @llvm.x86.avx512.mask.pmull.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16)
1423 define <8 x i16> @test_mask_packs_epi32_rr_128(<4 x i32> %a, <4 x i32> %b) {
1424 ;CHECK-LABEL: test_mask_packs_epi32_rr_128
1425 ;CHECK: vpackssdw %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0x6b,0xc1]
1426 %res = call <8 x i16> @llvm.x86.avx512.mask.packssdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> zeroinitializer, i8 -1)
1430 define <8 x i16> @test_mask_packs_epi32_rrk_128(<4 x i32> %a, <4 x i32> %b, <8 x i16> %passThru, i8 %mask) {
1431 ;CHECK-LABEL: test_mask_packs_epi32_rrk_128
1432 ;CHECK: vpackssdw %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0x6b,0xd1]
1433 %res = call <8 x i16> @llvm.x86.avx512.mask.packssdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> %passThru, i8 %mask)
1437 define <8 x i16> @test_mask_packs_epi32_rrkz_128(<4 x i32> %a, <4 x i32> %b, i8 %mask) {
1438 ;CHECK-LABEL: test_mask_packs_epi32_rrkz_128
1439 ;CHECK: vpackssdw %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0x6b,0xc1]
1440 %res = call <8 x i16> @llvm.x86.avx512.mask.packssdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> zeroinitializer, i8 %mask)
1444 define <8 x i16> @test_mask_packs_epi32_rm_128(<4 x i32> %a, <4 x i32>* %ptr_b) {
1445 ;CHECK-LABEL: test_mask_packs_epi32_rm_128
1446 ;CHECK: vpackssdw (%rdi), %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0x6b,0x07]
1447 %b = load <4 x i32>, <4 x i32>* %ptr_b
1448 %res = call <8 x i16> @llvm.x86.avx512.mask.packssdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> zeroinitializer, i8 -1)
1452 define <8 x i16> @test_mask_packs_epi32_rmk_128(<4 x i32> %a, <4 x i32>* %ptr_b, <8 x i16> %passThru, i8 %mask) {
1453 ;CHECK-LABEL: test_mask_packs_epi32_rmk_128
1454 ;CHECK: vpackssdw (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0x6b,0x0f]
1455 %b = load <4 x i32>, <4 x i32>* %ptr_b
1456 %res = call <8 x i16> @llvm.x86.avx512.mask.packssdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> %passThru, i8 %mask)
1460 define <8 x i16> @test_mask_packs_epi32_rmkz_128(<4 x i32> %a, <4 x i32>* %ptr_b, i8 %mask) {
1461 ;CHECK-LABEL: test_mask_packs_epi32_rmkz_128
1462 ;CHECK: vpackssdw (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0x6b,0x07]
1463 %b = load <4 x i32>, <4 x i32>* %ptr_b
1464 %res = call <8 x i16> @llvm.x86.avx512.mask.packssdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> zeroinitializer, i8 %mask)
1468 define <8 x i16> @test_mask_packs_epi32_rmb_128(<4 x i32> %a, i32* %ptr_b) {
1469 ;CHECK-LABEL: test_mask_packs_epi32_rmb_128
1470 ;CHECK: vpackssdw (%rdi){1to4}, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x18,0x6b,0x07]
1471 %q = load i32, i32* %ptr_b
1472 %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
1473 %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
1474 %res = call <8 x i16> @llvm.x86.avx512.mask.packssdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> zeroinitializer, i8 -1)
1478 define <8 x i16> @test_mask_packs_epi32_rmbk_128(<4 x i32> %a, i32* %ptr_b, <8 x i16> %passThru, i8 %mask) {
1479 ;CHECK-LABEL: test_mask_packs_epi32_rmbk_128
1480 ;CHECK: vpackssdw (%rdi){1to4}, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x19,0x6b,0x0f]
1481 %q = load i32, i32* %ptr_b
1482 %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
1483 %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
1484 %res = call <8 x i16> @llvm.x86.avx512.mask.packssdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> %passThru, i8 %mask)
1488 define <8 x i16> @test_mask_packs_epi32_rmbkz_128(<4 x i32> %a, i32* %ptr_b, i8 %mask) {
1489 ;CHECK-LABEL: test_mask_packs_epi32_rmbkz_128
1490 ;CHECK: vpackssdw (%rdi){1to4}, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x99,0x6b,0x07]
1491 %q = load i32, i32* %ptr_b
1492 %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
1493 %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
1494 %res = call <8 x i16> @llvm.x86.avx512.mask.packssdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> zeroinitializer, i8 %mask)
1498 declare <8 x i16> @llvm.x86.avx512.mask.packssdw.128(<4 x i32>, <4 x i32>, <8 x i16>, i8)
1500 define <16 x i16> @test_mask_packs_epi32_rr_256(<8 x i32> %a, <8 x i32> %b) {
1501 ;CHECK-LABEL: test_mask_packs_epi32_rr_256
1502 ;CHECK: vpackssdw %ymm1, %ymm0, %ymm0 ## encoding: [0xc5,0xfd,0x6b,0xc1]
1503 %res = call <16 x i16> @llvm.x86.avx512.mask.packssdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> zeroinitializer, i16 -1)
1507 define <16 x i16> @test_mask_packs_epi32_rrk_256(<8 x i32> %a, <8 x i32> %b, <16 x i16> %passThru, i16 %mask) {
1508 ;CHECK-LABEL: test_mask_packs_epi32_rrk_256
1509 ;CHECK: vpackssdw %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0x6b,0xd1]
1510 %res = call <16 x i16> @llvm.x86.avx512.mask.packssdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> %passThru, i16 %mask)
1514 define <16 x i16> @test_mask_packs_epi32_rrkz_256(<8 x i32> %a, <8 x i32> %b, i16 %mask) {
1515 ;CHECK-LABEL: test_mask_packs_epi32_rrkz_256
1516 ;CHECK: vpackssdw %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0x6b,0xc1]
1517 %res = call <16 x i16> @llvm.x86.avx512.mask.packssdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> zeroinitializer, i16 %mask)
1521 define <16 x i16> @test_mask_packs_epi32_rm_256(<8 x i32> %a, <8 x i32>* %ptr_b) {
1522 ;CHECK-LABEL: test_mask_packs_epi32_rm_256
1523 ;CHECK: vpackssdw (%rdi), %ymm0, %ymm0 ## encoding: [0xc5,0xfd,0x6b,0x07]
1524 %b = load <8 x i32>, <8 x i32>* %ptr_b
1525 %res = call <16 x i16> @llvm.x86.avx512.mask.packssdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> zeroinitializer, i16 -1)
1529 define <16 x i16> @test_mask_packs_epi32_rmk_256(<8 x i32> %a, <8 x i32>* %ptr_b, <16 x i16> %passThru, i16 %mask) {
1530 ;CHECK-LABEL: test_mask_packs_epi32_rmk_256
1531 ;CHECK: vpackssdw (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0x6b,0x0f]
1532 %b = load <8 x i32>, <8 x i32>* %ptr_b
1533 %res = call <16 x i16> @llvm.x86.avx512.mask.packssdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> %passThru, i16 %mask)
1537 define <16 x i16> @test_mask_packs_epi32_rmkz_256(<8 x i32> %a, <8 x i32>* %ptr_b, i16 %mask) {
1538 ;CHECK-LABEL: test_mask_packs_epi32_rmkz_256
1539 ;CHECK: vpackssdw (%rdi), %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0x6b,0x07]
1540 %b = load <8 x i32>, <8 x i32>* %ptr_b
1541 %res = call <16 x i16> @llvm.x86.avx512.mask.packssdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> zeroinitializer, i16 %mask)
1545 define <16 x i16> @test_mask_packs_epi32_rmb_256(<8 x i32> %a, i32* %ptr_b) {
1546 ;CHECK-LABEL: test_mask_packs_epi32_rmb_256
1547 ;CHECK: vpackssdw (%rdi){1to8}, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x38,0x6b,0x07]
1548 %q = load i32, i32* %ptr_b
1549 %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
1550 %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
1551 %res = call <16 x i16> @llvm.x86.avx512.mask.packssdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> zeroinitializer, i16 -1)
1555 define <16 x i16> @test_mask_packs_epi32_rmbk_256(<8 x i32> %a, i32* %ptr_b, <16 x i16> %passThru, i16 %mask) {
1556 ;CHECK-LABEL: test_mask_packs_epi32_rmbk_256
1557 ;CHECK: vpackssdw (%rdi){1to8}, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x39,0x6b,0x0f]
1558 %q = load i32, i32* %ptr_b
1559 %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
1560 %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
1561 %res = call <16 x i16> @llvm.x86.avx512.mask.packssdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> %passThru, i16 %mask)
1565 define <16 x i16> @test_mask_packs_epi32_rmbkz_256(<8 x i32> %a, i32* %ptr_b, i16 %mask) {
1566 ;CHECK-LABEL: test_mask_packs_epi32_rmbkz_256
1567 ;CHECK: vpackssdw (%rdi){1to8}, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xb9,0x6b,0x07]
1568 %q = load i32, i32* %ptr_b
1569 %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
1570 %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
1571 %res = call <16 x i16> @llvm.x86.avx512.mask.packssdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> zeroinitializer, i16 %mask)
1575 declare <16 x i16> @llvm.x86.avx512.mask.packssdw.256(<8 x i32>, <8 x i32>, <16 x i16>, i16)
1577 define <16 x i8> @test_mask_packs_epi16_rr_128(<8 x i16> %a, <8 x i16> %b) {
1578 ;CHECK-LABEL: test_mask_packs_epi16_rr_128
1579 ;CHECK: vpacksswb %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0x63,0xc1]
1580 %res = call <16 x i8> @llvm.x86.avx512.mask.packsswb.128(<8 x i16> %a, <8 x i16> %b, <16 x i8> zeroinitializer, i16 -1)
1584 define <16 x i8> @test_mask_packs_epi16_rrk_128(<8 x i16> %a, <8 x i16> %b, <16 x i8> %passThru, i16 %mask) {
1585 ;CHECK-LABEL: test_mask_packs_epi16_rrk_128
1586 ;CHECK: vpacksswb %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x09,0x63,0xd1]
1587 %res = call <16 x i8> @llvm.x86.avx512.mask.packsswb.128(<8 x i16> %a, <8 x i16> %b, <16 x i8> %passThru, i16 %mask)
1591 define <16 x i8> @test_mask_packs_epi16_rrkz_128(<8 x i16> %a, <8 x i16> %b, i16 %mask) {
1592 ;CHECK-LABEL: test_mask_packs_epi16_rrkz_128
1593 ;CHECK: vpacksswb %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0x89,0x63,0xc1]
1594 %res = call <16 x i8> @llvm.x86.avx512.mask.packsswb.128(<8 x i16> %a, <8 x i16> %b, <16 x i8> zeroinitializer, i16 %mask)
1598 define <16 x i8> @test_mask_packs_epi16_rm_128(<8 x i16> %a, <8 x i16>* %ptr_b) {
1599 ;CHECK-LABEL: test_mask_packs_epi16_rm_128
1600 ;CHECK: vpacksswb (%rdi), %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0x63,0x07]
1601 %b = load <8 x i16>, <8 x i16>* %ptr_b
1602 %res = call <16 x i8> @llvm.x86.avx512.mask.packsswb.128(<8 x i16> %a, <8 x i16> %b, <16 x i8> zeroinitializer, i16 -1)
1606 define <16 x i8> @test_mask_packs_epi16_rmk_128(<8 x i16> %a, <8 x i16>* %ptr_b, <16 x i8> %passThru, i16 %mask) {
1607 ;CHECK-LABEL: test_mask_packs_epi16_rmk_128
1608 ;CHECK: vpacksswb (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x09,0x63,0x0f]
1609 %b = load <8 x i16>, <8 x i16>* %ptr_b
1610 %res = call <16 x i8> @llvm.x86.avx512.mask.packsswb.128(<8 x i16> %a, <8 x i16> %b, <16 x i8> %passThru, i16 %mask)
1614 define <16 x i8> @test_mask_packs_epi16_rmkz_128(<8 x i16> %a, <8 x i16>* %ptr_b, i16 %mask) {
1615 ;CHECK-LABEL: test_mask_packs_epi16_rmkz_128
1616 ;CHECK: vpacksswb (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0x89,0x63,0x07]
1617 %b = load <8 x i16>, <8 x i16>* %ptr_b
1618 %res = call <16 x i8> @llvm.x86.avx512.mask.packsswb.128(<8 x i16> %a, <8 x i16> %b, <16 x i8> zeroinitializer, i16 %mask)
1622 declare <16 x i8> @llvm.x86.avx512.mask.packsswb.128(<8 x i16>, <8 x i16>, <16 x i8>, i16)
1624 define <32 x i8> @test_mask_packs_epi16_rr_256(<16 x i16> %a, <16 x i16> %b) {
1625 ;CHECK-LABEL: test_mask_packs_epi16_rr_256
1626 ;CHECK: vpacksswb %ymm1, %ymm0, %ymm0 ## encoding: [0xc5,0xfd,0x63,0xc1]
1627 %res = call <32 x i8> @llvm.x86.avx512.mask.packsswb.256(<16 x i16> %a, <16 x i16> %b, <32 x i8> zeroinitializer, i32 -1)
1631 define <32 x i8> @test_mask_packs_epi16_rrk_256(<16 x i16> %a, <16 x i16> %b, <32 x i8> %passThru, i32 %mask) {
1632 ;CHECK-LABEL: test_mask_packs_epi16_rrk_256
1633 ;CHECK: vpacksswb %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x29,0x63,0xd1]
1634 %res = call <32 x i8> @llvm.x86.avx512.mask.packsswb.256(<16 x i16> %a, <16 x i16> %b, <32 x i8> %passThru, i32 %mask)
1638 define <32 x i8> @test_mask_packs_epi16_rrkz_256(<16 x i16> %a, <16 x i16> %b, i32 %mask) {
1639 ;CHECK-LABEL: test_mask_packs_epi16_rrkz_256
1640 ;CHECK: vpacksswb %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xa9,0x63,0xc1]
1641 %res = call <32 x i8> @llvm.x86.avx512.mask.packsswb.256(<16 x i16> %a, <16 x i16> %b, <32 x i8> zeroinitializer, i32 %mask)
1645 define <32 x i8> @test_mask_packs_epi16_rm_256(<16 x i16> %a, <16 x i16>* %ptr_b) {
1646 ;CHECK-LABEL: test_mask_packs_epi16_rm_256
1647 ;CHECK: vpacksswb (%rdi), %ymm0, %ymm0 ## encoding: [0xc5,0xfd,0x63,0x07]
1648 %b = load <16 x i16>, <16 x i16>* %ptr_b
1649 %res = call <32 x i8> @llvm.x86.avx512.mask.packsswb.256(<16 x i16> %a, <16 x i16> %b, <32 x i8> zeroinitializer, i32 -1)
1653 define <32 x i8> @test_mask_packs_epi16_rmk_256(<16 x i16> %a, <16 x i16>* %ptr_b, <32 x i8> %passThru, i32 %mask) {
1654 ;CHECK-LABEL: test_mask_packs_epi16_rmk_256
1655 ;CHECK: vpacksswb (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x29,0x63,0x0f]
1656 %b = load <16 x i16>, <16 x i16>* %ptr_b
1657 %res = call <32 x i8> @llvm.x86.avx512.mask.packsswb.256(<16 x i16> %a, <16 x i16> %b, <32 x i8> %passThru, i32 %mask)
1661 define <32 x i8> @test_mask_packs_epi16_rmkz_256(<16 x i16> %a, <16 x i16>* %ptr_b, i32 %mask) {
1662 ;CHECK-LABEL: test_mask_packs_epi16_rmkz_256
1663 ;CHECK: vpacksswb (%rdi), %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xa9,0x63,0x07]
1664 %b = load <16 x i16>, <16 x i16>* %ptr_b
1665 %res = call <32 x i8> @llvm.x86.avx512.mask.packsswb.256(<16 x i16> %a, <16 x i16> %b, <32 x i8> zeroinitializer, i32 %mask)
1669 declare <32 x i8> @llvm.x86.avx512.mask.packsswb.256(<16 x i16>, <16 x i16>, <32 x i8>, i32)
1672 define <8 x i16> @test_mask_packus_epi32_rr_128(<4 x i32> %a, <4 x i32> %b) {
1673 ;CHECK-LABEL: test_mask_packus_epi32_rr_128
1674 ;CHECK: vpackusdw %xmm1, %xmm0, %xmm0
1675 %res = call <8 x i16> @llvm.x86.avx512.mask.packusdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> zeroinitializer, i8 -1)
1679 define <8 x i16> @test_mask_packus_epi32_rrk_128(<4 x i32> %a, <4 x i32> %b, <8 x i16> %passThru, i8 %mask) {
1680 ;CHECK-LABEL: test_mask_packus_epi32_rrk_128
1681 ;CHECK: vpackusdw %xmm1, %xmm0, %xmm2 {%k1}
1682 %res = call <8 x i16> @llvm.x86.avx512.mask.packusdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> %passThru, i8 %mask)
1686 define <8 x i16> @test_mask_packus_epi32_rrkz_128(<4 x i32> %a, <4 x i32> %b, i8 %mask) {
1687 ;CHECK-LABEL: test_mask_packus_epi32_rrkz_128
1688 ;CHECK: vpackusdw %xmm1, %xmm0, %xmm0 {%k1} {z}
1689 %res = call <8 x i16> @llvm.x86.avx512.mask.packusdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> zeroinitializer, i8 %mask)
1693 define <8 x i16> @test_mask_packus_epi32_rm_128(<4 x i32> %a, <4 x i32>* %ptr_b) {
1694 ;CHECK-LABEL: test_mask_packus_epi32_rm_128
1695 ;CHECK: vpackusdw (%rdi), %xmm0, %xmm0
1696 %b = load <4 x i32>, <4 x i32>* %ptr_b
1697 %res = call <8 x i16> @llvm.x86.avx512.mask.packusdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> zeroinitializer, i8 -1)
1701 define <8 x i16> @test_mask_packus_epi32_rmk_128(<4 x i32> %a, <4 x i32>* %ptr_b, <8 x i16> %passThru, i8 %mask) {
1702 ;CHECK-LABEL: test_mask_packus_epi32_rmk_128
1703 ;CHECK: vpackusdw (%rdi), %xmm0, %xmm1 {%k1}
1704 %b = load <4 x i32>, <4 x i32>* %ptr_b
1705 %res = call <8 x i16> @llvm.x86.avx512.mask.packusdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> %passThru, i8 %mask)
1709 define <8 x i16> @test_mask_packus_epi32_rmkz_128(<4 x i32> %a, <4 x i32>* %ptr_b, i8 %mask) {
1710 ;CHECK-LABEL: test_mask_packus_epi32_rmkz_128
1711 ;CHECK: vpackusdw (%rdi), %xmm0, %xmm0 {%k1} {z}
1712 %b = load <4 x i32>, <4 x i32>* %ptr_b
1713 %res = call <8 x i16> @llvm.x86.avx512.mask.packusdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> zeroinitializer, i8 %mask)
1717 define <8 x i16> @test_mask_packus_epi32_rmb_128(<4 x i32> %a, i32* %ptr_b) {
1718 ;CHECK-LABEL: test_mask_packus_epi32_rmb_128
1719 ;CHECK: vpackusdw (%rdi){1to4}, %xmm0, %xmm0
1720 %q = load i32, i32* %ptr_b
1721 %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
1722 %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
1723 %res = call <8 x i16> @llvm.x86.avx512.mask.packusdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> zeroinitializer, i8 -1)
1727 define <8 x i16> @test_mask_packus_epi32_rmbk_128(<4 x i32> %a, i32* %ptr_b, <8 x i16> %passThru, i8 %mask) {
1728 ;CHECK-LABEL: test_mask_packus_epi32_rmbk_128
1729 ;CHECK: vpackusdw (%rdi){1to4}, %xmm0, %xmm1 {%k1}
1730 %q = load i32, i32* %ptr_b
1731 %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
1732 %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
1733 %res = call <8 x i16> @llvm.x86.avx512.mask.packusdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> %passThru, i8 %mask)
1737 define <8 x i16> @test_mask_packus_epi32_rmbkz_128(<4 x i32> %a, i32* %ptr_b, i8 %mask) {
1738 ;CHECK-LABEL: test_mask_packus_epi32_rmbkz_128
1739 ;CHECK: vpackusdw (%rdi){1to4}, %xmm0, %xmm0 {%k1} {z}
1740 %q = load i32, i32* %ptr_b
1741 %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
1742 %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
1743 %res = call <8 x i16> @llvm.x86.avx512.mask.packusdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> zeroinitializer, i8 %mask)
1747 declare <8 x i16> @llvm.x86.avx512.mask.packusdw.128(<4 x i32>, <4 x i32>, <8 x i16>, i8)
1749 define <16 x i16> @test_mask_packus_epi32_rr_256(<8 x i32> %a, <8 x i32> %b) {
1750 ;CHECK-LABEL: test_mask_packus_epi32_rr_256
1751 ;CHECK: vpackusdw %ymm1, %ymm0, %ymm0
1752 %res = call <16 x i16> @llvm.x86.avx512.mask.packusdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> zeroinitializer, i16 -1)
1756 define <16 x i16> @test_mask_packus_epi32_rrk_256(<8 x i32> %a, <8 x i32> %b, <16 x i16> %passThru, i16 %mask) {
1757 ;CHECK-LABEL: test_mask_packus_epi32_rrk_256
1758 ;CHECK: vpackusdw %ymm1, %ymm0, %ymm2 {%k1}
1759 %res = call <16 x i16> @llvm.x86.avx512.mask.packusdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> %passThru, i16 %mask)
1763 define <16 x i16> @test_mask_packus_epi32_rrkz_256(<8 x i32> %a, <8 x i32> %b, i16 %mask) {
1764 ;CHECK-LABEL: test_mask_packus_epi32_rrkz_256
1765 ;CHECK: vpackusdw %ymm1, %ymm0, %ymm0 {%k1} {z}
1766 %res = call <16 x i16> @llvm.x86.avx512.mask.packusdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> zeroinitializer, i16 %mask)
1770 define <16 x i16> @test_mask_packus_epi32_rm_256(<8 x i32> %a, <8 x i32>* %ptr_b) {
1771 ;CHECK-LABEL: test_mask_packus_epi32_rm_256
1772 ;CHECK: vpackusdw (%rdi), %ymm0, %ymm0
1773 %b = load <8 x i32>, <8 x i32>* %ptr_b
1774 %res = call <16 x i16> @llvm.x86.avx512.mask.packusdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> zeroinitializer, i16 -1)
1778 define <16 x i16> @test_mask_packus_epi32_rmk_256(<8 x i32> %a, <8 x i32>* %ptr_b, <16 x i16> %passThru, i16 %mask) {
1779 ;CHECK-LABEL: test_mask_packus_epi32_rmk_256
1780 ;CHECK: vpackusdw (%rdi), %ymm0, %ymm1 {%k1}
1781 %b = load <8 x i32>, <8 x i32>* %ptr_b
1782 %res = call <16 x i16> @llvm.x86.avx512.mask.packusdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> %passThru, i16 %mask)
1786 define <16 x i16> @test_mask_packus_epi32_rmkz_256(<8 x i32> %a, <8 x i32>* %ptr_b, i16 %mask) {
1787 ;CHECK-LABEL: test_mask_packus_epi32_rmkz_256
1788 ;CHECK: vpackusdw (%rdi), %ymm0, %ymm0 {%k1} {z}
1789 %b = load <8 x i32>, <8 x i32>* %ptr_b
1790 %res = call <16 x i16> @llvm.x86.avx512.mask.packusdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> zeroinitializer, i16 %mask)
1794 define <16 x i16> @test_mask_packus_epi32_rmb_256(<8 x i32> %a, i32* %ptr_b) {
1795 ;CHECK-LABEL: test_mask_packus_epi32_rmb_256
1796 ;CHECK: vpackusdw (%rdi){1to8}, %ymm0, %ymm0
1797 %q = load i32, i32* %ptr_b
1798 %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
1799 %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
1800 %res = call <16 x i16> @llvm.x86.avx512.mask.packusdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> zeroinitializer, i16 -1)
1804 define <16 x i16> @test_mask_packus_epi32_rmbk_256(<8 x i32> %a, i32* %ptr_b, <16 x i16> %passThru, i16 %mask) {
1805 ;CHECK-LABEL: test_mask_packus_epi32_rmbk_256
1806 ;CHECK: vpackusdw (%rdi){1to8}, %ymm0, %ymm1 {%k1}
1807 %q = load i32, i32* %ptr_b
1808 %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
1809 %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
1810 %res = call <16 x i16> @llvm.x86.avx512.mask.packusdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> %passThru, i16 %mask)
1814 define <16 x i16> @test_mask_packus_epi32_rmbkz_256(<8 x i32> %a, i32* %ptr_b, i16 %mask) {
1815 ;CHECK-LABEL: test_mask_packus_epi32_rmbkz_256
1816 ;CHECK: vpackusdw (%rdi){1to8}, %ymm0, %ymm0 {%k1} {z}
1817 %q = load i32, i32* %ptr_b
1818 %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
1819 %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
1820 %res = call <16 x i16> @llvm.x86.avx512.mask.packusdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> zeroinitializer, i16 %mask)
1824 declare <16 x i16> @llvm.x86.avx512.mask.packusdw.256(<8 x i32>, <8 x i32>, <16 x i16>, i16)
1826 define <16 x i8> @test_mask_packus_epi16_rr_128(<8 x i16> %a, <8 x i16> %b) {
1827 ;CHECK-LABEL: test_mask_packus_epi16_rr_128
1828 ;CHECK: vpackuswb %xmm1, %xmm0, %xmm0
1829 %res = call <16 x i8> @llvm.x86.avx512.mask.packuswb.128(<8 x i16> %a, <8 x i16> %b, <16 x i8> zeroinitializer, i16 -1)
1833 define <16 x i8> @test_mask_packus_epi16_rrk_128(<8 x i16> %a, <8 x i16> %b, <16 x i8> %passThru, i16 %mask) {
1834 ;CHECK-LABEL: test_mask_packus_epi16_rrk_128
1835 ;CHECK: vpackuswb %xmm1, %xmm0, %xmm2 {%k1}
1836 %res = call <16 x i8> @llvm.x86.avx512.mask.packuswb.128(<8 x i16> %a, <8 x i16> %b, <16 x i8> %passThru, i16 %mask)
1840 define <16 x i8> @test_mask_packus_epi16_rrkz_128(<8 x i16> %a, <8 x i16> %b, i16 %mask) {
1841 ;CHECK-LABEL: test_mask_packus_epi16_rrkz_128
1842 ;CHECK: vpackuswb %xmm1, %xmm0, %xmm0 {%k1} {z}
1843 %res = call <16 x i8> @llvm.x86.avx512.mask.packuswb.128(<8 x i16> %a, <8 x i16> %b, <16 x i8> zeroinitializer, i16 %mask)
1847 define <16 x i8> @test_mask_packus_epi16_rm_128(<8 x i16> %a, <8 x i16>* %ptr_b) {
1848 ;CHECK-LABEL: test_mask_packus_epi16_rm_128
1849 ;CHECK: vpackuswb (%rdi), %xmm0, %xmm0
1850 %b = load <8 x i16>, <8 x i16>* %ptr_b
1851 %res = call <16 x i8> @llvm.x86.avx512.mask.packuswb.128(<8 x i16> %a, <8 x i16> %b, <16 x i8> zeroinitializer, i16 -1)
1855 define <16 x i8> @test_mask_packus_epi16_rmk_128(<8 x i16> %a, <8 x i16>* %ptr_b, <16 x i8> %passThru, i16 %mask) {
1856 ;CHECK-LABEL: test_mask_packus_epi16_rmk_128
1857 ;CHECK: vpackuswb (%rdi), %xmm0, %xmm1 {%k1}
1858 %b = load <8 x i16>, <8 x i16>* %ptr_b
1859 %res = call <16 x i8> @llvm.x86.avx512.mask.packuswb.128(<8 x i16> %a, <8 x i16> %b, <16 x i8> %passThru, i16 %mask)
1863 define <16 x i8> @test_mask_packus_epi16_rmkz_128(<8 x i16> %a, <8 x i16>* %ptr_b, i16 %mask) {
1864 ;CHECK-LABEL: test_mask_packus_epi16_rmkz_128
1865 ;CHECK: vpackuswb (%rdi), %xmm0, %xmm0 {%k1} {z}
1866 %b = load <8 x i16>, <8 x i16>* %ptr_b
1867 %res = call <16 x i8> @llvm.x86.avx512.mask.packuswb.128(<8 x i16> %a, <8 x i16> %b, <16 x i8> zeroinitializer, i16 %mask)
1871 declare <16 x i8> @llvm.x86.avx512.mask.packuswb.128(<8 x i16>, <8 x i16>, <16 x i8>, i16)
1873 define <32 x i8> @test_mask_packus_epi16_rr_256(<16 x i16> %a, <16 x i16> %b) {
1874 ;CHECK-LABEL: test_mask_packus_epi16_rr_256
1875 ;CHECK: vpackuswb %ymm1, %ymm0, %ymm0
1876 %res = call <32 x i8> @llvm.x86.avx512.mask.packuswb.256(<16 x i16> %a, <16 x i16> %b, <32 x i8> zeroinitializer, i32 -1)
1880 define <32 x i8> @test_mask_packus_epi16_rrk_256(<16 x i16> %a, <16 x i16> %b, <32 x i8> %passThru, i32 %mask) {
1881 ;CHECK-LABEL: test_mask_packus_epi16_rrk_256
1882 ;CHECK: vpackuswb %ymm1, %ymm0, %ymm2 {%k1}
1883 %res = call <32 x i8> @llvm.x86.avx512.mask.packuswb.256(<16 x i16> %a, <16 x i16> %b, <32 x i8> %passThru, i32 %mask)
1887 define <32 x i8> @test_mask_packus_epi16_rrkz_256(<16 x i16> %a, <16 x i16> %b, i32 %mask) {
1888 ;CHECK-LABEL: test_mask_packus_epi16_rrkz_256
1889 ;CHECK: vpackuswb %ymm1, %ymm0, %ymm0 {%k1} {z}
1890 %res = call <32 x i8> @llvm.x86.avx512.mask.packuswb.256(<16 x i16> %a, <16 x i16> %b, <32 x i8> zeroinitializer, i32 %mask)
1894 define <32 x i8> @test_mask_packus_epi16_rm_256(<16 x i16> %a, <16 x i16>* %ptr_b) {
1895 ;CHECK-LABEL: test_mask_packus_epi16_rm_256
1896 ;CHECK: vpackuswb (%rdi), %ymm0, %ymm0
1897 %b = load <16 x i16>, <16 x i16>* %ptr_b
1898 %res = call <32 x i8> @llvm.x86.avx512.mask.packuswb.256(<16 x i16> %a, <16 x i16> %b, <32 x i8> zeroinitializer, i32 -1)
1902 define <32 x i8> @test_mask_packus_epi16_rmk_256(<16 x i16> %a, <16 x i16>* %ptr_b, <32 x i8> %passThru, i32 %mask) {
1903 ;CHECK-LABEL: test_mask_packus_epi16_rmk_256
1904 ;CHECK: vpackuswb (%rdi), %ymm0, %ymm1 {%k1}
1905 %b = load <16 x i16>, <16 x i16>* %ptr_b
1906 %res = call <32 x i8> @llvm.x86.avx512.mask.packuswb.256(<16 x i16> %a, <16 x i16> %b, <32 x i8> %passThru, i32 %mask)
1910 define <32 x i8> @test_mask_packus_epi16_rmkz_256(<16 x i16> %a, <16 x i16>* %ptr_b, i32 %mask) {
1911 ;CHECK-LABEL: test_mask_packus_epi16_rmkz_256
1912 ;CHECK: vpackuswb (%rdi), %ymm0, %ymm0 {%k1} {z}
1913 %b = load <16 x i16>, <16 x i16>* %ptr_b
1914 %res = call <32 x i8> @llvm.x86.avx512.mask.packuswb.256(<16 x i16> %a, <16 x i16> %b, <32 x i8> zeroinitializer, i32 %mask)
1918 declare <32 x i8> @llvm.x86.avx512.mask.packuswb.256(<16 x i16>, <16 x i16>, <32 x i8>, i32)
1920 define <8 x i16> @test_mask_adds_epi16_rr_128(<8 x i16> %a, <8 x i16> %b) {
1921 ;CHECK-LABEL: test_mask_adds_epi16_rr_128
1922 ;CHECK: vpaddsw %xmm1, %xmm0, %xmm0
1923 %res = call <8 x i16> @llvm.x86.avx512.mask.padds.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 -1)
1927 define <8 x i16> @test_mask_adds_epi16_rrk_128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) {
1928 ;CHECK-LABEL: test_mask_adds_epi16_rrk_128
1929 ;CHECK: vpaddsw %xmm1, %xmm0, %xmm2 {%k1}
1930 %res = call <8 x i16> @llvm.x86.avx512.mask.padds.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask)
1934 define <8 x i16> @test_mask_adds_epi16_rrkz_128(<8 x i16> %a, <8 x i16> %b, i8 %mask) {
1935 ;CHECK-LABEL: test_mask_adds_epi16_rrkz_128
1936 ;CHECK: vpaddsw %xmm1, %xmm0, %xmm0 {%k1} {z}
1937 %res = call <8 x i16> @llvm.x86.avx512.mask.padds.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 %mask)
1941 define <8 x i16> @test_mask_adds_epi16_rm_128(<8 x i16> %a, <8 x i16>* %ptr_b) {
1942 ;CHECK-LABEL: test_mask_adds_epi16_rm_128
1943 ;CHECK: vpaddsw (%rdi), %xmm0, %xmm0
1944 %b = load <8 x i16>, <8 x i16>* %ptr_b
1945 %res = call <8 x i16> @llvm.x86.avx512.mask.padds.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 -1)
1949 define <8 x i16> @test_mask_adds_epi16_rmk_128(<8 x i16> %a, <8 x i16>* %ptr_b, <8 x i16> %passThru, i8 %mask) {
1950 ;CHECK-LABEL: test_mask_adds_epi16_rmk_128
1951 ;CHECK: vpaddsw (%rdi), %xmm0, %xmm1 {%k1}
1952 %b = load <8 x i16>, <8 x i16>* %ptr_b
1953 %res = call <8 x i16> @llvm.x86.avx512.mask.padds.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask)
1957 define <8 x i16> @test_mask_adds_epi16_rmkz_128(<8 x i16> %a, <8 x i16>* %ptr_b, i8 %mask) {
1958 ;CHECK-LABEL: test_mask_adds_epi16_rmkz_128
1959 ;CHECK: vpaddsw (%rdi), %xmm0, %xmm0 {%k1} {z}
1960 %b = load <8 x i16>, <8 x i16>* %ptr_b
1961 %res = call <8 x i16> @llvm.x86.avx512.mask.padds.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 %mask)
1965 declare <8 x i16> @llvm.x86.avx512.mask.padds.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8)
1967 define <16 x i16> @test_mask_adds_epi16_rr_256(<16 x i16> %a, <16 x i16> %b) {
1968 ;CHECK-LABEL: test_mask_adds_epi16_rr_256
1969 ;CHECK: vpaddsw %ymm1, %ymm0, %ymm0
1970 %res = call <16 x i16> @llvm.x86.avx512.mask.padds.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 -1)
1974 define <16 x i16> @test_mask_adds_epi16_rrk_256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) {
1975 ;CHECK-LABEL: test_mask_adds_epi16_rrk_256
1976 ;CHECK: vpaddsw %ymm1, %ymm0, %ymm2 {%k1}
1977 %res = call <16 x i16> @llvm.x86.avx512.mask.padds.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask)
1981 define <16 x i16> @test_mask_adds_epi16_rrkz_256(<16 x i16> %a, <16 x i16> %b, i16 %mask) {
1982 ;CHECK-LABEL: test_mask_adds_epi16_rrkz_256
1983 ;CHECK: vpaddsw %ymm1, %ymm0, %ymm0 {%k1} {z}
1984 %res = call <16 x i16> @llvm.x86.avx512.mask.padds.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 %mask)
1988 define <16 x i16> @test_mask_adds_epi16_rm_256(<16 x i16> %a, <16 x i16>* %ptr_b) {
1989 ;CHECK-LABEL: test_mask_adds_epi16_rm_256
1990 ;CHECK: vpaddsw (%rdi), %ymm0, %ymm0
1991 %b = load <16 x i16>, <16 x i16>* %ptr_b
1992 %res = call <16 x i16> @llvm.x86.avx512.mask.padds.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 -1)
1996 define <16 x i16> @test_mask_adds_epi16_rmk_256(<16 x i16> %a, <16 x i16>* %ptr_b, <16 x i16> %passThru, i16 %mask) {
1997 ;CHECK-LABEL: test_mask_adds_epi16_rmk_256
1998 ;CHECK: vpaddsw (%rdi), %ymm0, %ymm1 {%k1}
1999 %b = load <16 x i16>, <16 x i16>* %ptr_b
2000 %res = call <16 x i16> @llvm.x86.avx512.mask.padds.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask)
2004 define <16 x i16> @test_mask_adds_epi16_rmkz_256(<16 x i16> %a, <16 x i16>* %ptr_b, i16 %mask) {
2005 ;CHECK-LABEL: test_mask_adds_epi16_rmkz_256
2006 ;CHECK: vpaddsw (%rdi), %ymm0, %ymm0 {%k1} {z}
2007 %b = load <16 x i16>, <16 x i16>* %ptr_b
2008 %res = call <16 x i16> @llvm.x86.avx512.mask.padds.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 %mask)
2012 declare <16 x i16> @llvm.x86.avx512.mask.padds.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16)
2014 define <8 x i16> @test_mask_subs_epi16_rr_128(<8 x i16> %a, <8 x i16> %b) {
2015 ;CHECK-LABEL: test_mask_subs_epi16_rr_128
2016 ;CHECK: vpsubsw %xmm1, %xmm0, %xmm0
2017 %res = call <8 x i16> @llvm.x86.avx512.mask.psubs.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 -1)
2021 define <8 x i16> @test_mask_subs_epi16_rrk_128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) {
2022 ;CHECK-LABEL: test_mask_subs_epi16_rrk_128
2023 ;CHECK: vpsubsw %xmm1, %xmm0, %xmm2 {%k1}
2024 %res = call <8 x i16> @llvm.x86.avx512.mask.psubs.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask)
2028 define <8 x i16> @test_mask_subs_epi16_rrkz_128(<8 x i16> %a, <8 x i16> %b, i8 %mask) {
2029 ;CHECK-LABEL: test_mask_subs_epi16_rrkz_128
2030 ;CHECK: vpsubsw %xmm1, %xmm0, %xmm0 {%k1} {z}
2031 %res = call <8 x i16> @llvm.x86.avx512.mask.psubs.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 %mask)
2035 define <8 x i16> @test_mask_subs_epi16_rm_128(<8 x i16> %a, <8 x i16>* %ptr_b) {
2036 ;CHECK-LABEL: test_mask_subs_epi16_rm_128
2037 ;CHECK: vpsubsw (%rdi), %xmm0, %xmm0
2038 %b = load <8 x i16>, <8 x i16>* %ptr_b
2039 %res = call <8 x i16> @llvm.x86.avx512.mask.psubs.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 -1)
2043 define <8 x i16> @test_mask_subs_epi16_rmk_128(<8 x i16> %a, <8 x i16>* %ptr_b, <8 x i16> %passThru, i8 %mask) {
2044 ;CHECK-LABEL: test_mask_subs_epi16_rmk_128
2045 ;CHECK: vpsubsw (%rdi), %xmm0, %xmm1 {%k1}
2046 %b = load <8 x i16>, <8 x i16>* %ptr_b
2047 %res = call <8 x i16> @llvm.x86.avx512.mask.psubs.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask)
2051 define <8 x i16> @test_mask_subs_epi16_rmkz_128(<8 x i16> %a, <8 x i16>* %ptr_b, i8 %mask) {
2052 ;CHECK-LABEL: test_mask_subs_epi16_rmkz_128
2053 ;CHECK: vpsubsw (%rdi), %xmm0, %xmm0 {%k1} {z}
2054 %b = load <8 x i16>, <8 x i16>* %ptr_b
2055 %res = call <8 x i16> @llvm.x86.avx512.mask.psubs.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 %mask)
2059 declare <8 x i16> @llvm.x86.avx512.mask.psubs.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8)
2061 define <16 x i16> @test_mask_subs_epi16_rr_256(<16 x i16> %a, <16 x i16> %b) {
2062 ;CHECK-LABEL: test_mask_subs_epi16_rr_256
2063 ;CHECK: vpsubsw %ymm1, %ymm0, %ymm0
2064 %res = call <16 x i16> @llvm.x86.avx512.mask.psubs.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 -1)
2068 define <16 x i16> @test_mask_subs_epi16_rrk_256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) {
2069 ;CHECK-LABEL: test_mask_subs_epi16_rrk_256
2070 ;CHECK: vpsubsw %ymm1, %ymm0, %ymm2 {%k1}
2071 %res = call <16 x i16> @llvm.x86.avx512.mask.psubs.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask)
2075 define <16 x i16> @test_mask_subs_epi16_rrkz_256(<16 x i16> %a, <16 x i16> %b, i16 %mask) {
2076 ;CHECK-LABEL: test_mask_subs_epi16_rrkz_256
2077 ;CHECK: vpsubsw %ymm1, %ymm0, %ymm0 {%k1} {z}
2078 %res = call <16 x i16> @llvm.x86.avx512.mask.psubs.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 %mask)
2082 define <16 x i16> @test_mask_subs_epi16_rm_256(<16 x i16> %a, <16 x i16>* %ptr_b) {
2083 ;CHECK-LABEL: test_mask_subs_epi16_rm_256
2084 ;CHECK: vpsubsw (%rdi), %ymm0, %ymm0
2085 %b = load <16 x i16>, <16 x i16>* %ptr_b
2086 %res = call <16 x i16> @llvm.x86.avx512.mask.psubs.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 -1)
2090 define <16 x i16> @test_mask_subs_epi16_rmk_256(<16 x i16> %a, <16 x i16>* %ptr_b, <16 x i16> %passThru, i16 %mask) {
2091 ;CHECK-LABEL: test_mask_subs_epi16_rmk_256
2092 ;CHECK: vpsubsw (%rdi), %ymm0, %ymm1 {%k1}
2093 %b = load <16 x i16>, <16 x i16>* %ptr_b
2094 %res = call <16 x i16> @llvm.x86.avx512.mask.psubs.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask)
2098 define <16 x i16> @test_mask_subs_epi16_rmkz_256(<16 x i16> %a, <16 x i16>* %ptr_b, i16 %mask) {
2099 ;CHECK-LABEL: test_mask_subs_epi16_rmkz_256
2100 ;CHECK: vpsubsw (%rdi), %ymm0, %ymm0 {%k1} {z}
2101 %b = load <16 x i16>, <16 x i16>* %ptr_b
2102 %res = call <16 x i16> @llvm.x86.avx512.mask.psubs.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 %mask)
2106 declare <16 x i16> @llvm.x86.avx512.mask.psubs.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16)
2108 define <8 x i16> @test_mask_adds_epu16_rr_128(<8 x i16> %a, <8 x i16> %b) {
2109 ;CHECK-LABEL: test_mask_adds_epu16_rr_128
2110 ;CHECK: vpaddusw %xmm1, %xmm0, %xmm0
2111 %res = call <8 x i16> @llvm.x86.avx512.mask.paddus.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 -1)
2115 define <8 x i16> @test_mask_adds_epu16_rrk_128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) {
2116 ;CHECK-LABEL: test_mask_adds_epu16_rrk_128
2117 ;CHECK: vpaddusw %xmm1, %xmm0, %xmm2 {%k1}
2118 %res = call <8 x i16> @llvm.x86.avx512.mask.paddus.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask)
2122 define <8 x i16> @test_mask_adds_epu16_rrkz_128(<8 x i16> %a, <8 x i16> %b, i8 %mask) {
2123 ;CHECK-LABEL: test_mask_adds_epu16_rrkz_128
2124 ;CHECK: vpaddusw %xmm1, %xmm0, %xmm0 {%k1} {z}
2125 %res = call <8 x i16> @llvm.x86.avx512.mask.paddus.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 %mask)
2129 define <8 x i16> @test_mask_adds_epu16_rm_128(<8 x i16> %a, <8 x i16>* %ptr_b) {
2130 ;CHECK-LABEL: test_mask_adds_epu16_rm_128
2131 ;CHECK: vpaddusw (%rdi), %xmm0, %xmm0
2132 %b = load <8 x i16>, <8 x i16>* %ptr_b
2133 %res = call <8 x i16> @llvm.x86.avx512.mask.paddus.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 -1)
2137 define <8 x i16> @test_mask_adds_epu16_rmk_128(<8 x i16> %a, <8 x i16>* %ptr_b, <8 x i16> %passThru, i8 %mask) {
2138 ;CHECK-LABEL: test_mask_adds_epu16_rmk_128
2139 ;CHECK: vpaddusw (%rdi), %xmm0, %xmm1 {%k1}
2140 %b = load <8 x i16>, <8 x i16>* %ptr_b
2141 %res = call <8 x i16> @llvm.x86.avx512.mask.paddus.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask)
2145 define <8 x i16> @test_mask_adds_epu16_rmkz_128(<8 x i16> %a, <8 x i16>* %ptr_b, i8 %mask) {
2146 ;CHECK-LABEL: test_mask_adds_epu16_rmkz_128
2147 ;CHECK: vpaddusw (%rdi), %xmm0, %xmm0 {%k1} {z}
2148 %b = load <8 x i16>, <8 x i16>* %ptr_b
2149 %res = call <8 x i16> @llvm.x86.avx512.mask.paddus.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 %mask)
2153 declare <8 x i16> @llvm.x86.avx512.mask.paddus.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8)
2155 define <16 x i16> @test_mask_adds_epu16_rr_256(<16 x i16> %a, <16 x i16> %b) {
2156 ;CHECK-LABEL: test_mask_adds_epu16_rr_256
2157 ;CHECK: vpaddusw %ymm1, %ymm0, %ymm0
2158 %res = call <16 x i16> @llvm.x86.avx512.mask.paddus.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 -1)
2162 define <16 x i16> @test_mask_adds_epu16_rrk_256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) {
2163 ;CHECK-LABEL: test_mask_adds_epu16_rrk_256
2164 ;CHECK: vpaddusw %ymm1, %ymm0, %ymm2 {%k1}
2165 %res = call <16 x i16> @llvm.x86.avx512.mask.paddus.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask)
2169 define <16 x i16> @test_mask_adds_epu16_rrkz_256(<16 x i16> %a, <16 x i16> %b, i16 %mask) {
2170 ;CHECK-LABEL: test_mask_adds_epu16_rrkz_256
2171 ;CHECK: vpaddusw %ymm1, %ymm0, %ymm0 {%k1} {z}
2172 %res = call <16 x i16> @llvm.x86.avx512.mask.paddus.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 %mask)
2176 define <16 x i16> @test_mask_adds_epu16_rm_256(<16 x i16> %a, <16 x i16>* %ptr_b) {
2177 ;CHECK-LABEL: test_mask_adds_epu16_rm_256
2178 ;CHECK: vpaddusw (%rdi), %ymm0, %ymm0
2179 %b = load <16 x i16>, <16 x i16>* %ptr_b
2180 %res = call <16 x i16> @llvm.x86.avx512.mask.paddus.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 -1)
2184 define <16 x i16> @test_mask_adds_epu16_rmk_256(<16 x i16> %a, <16 x i16>* %ptr_b, <16 x i16> %passThru, i16 %mask) {
2185 ;CHECK-LABEL: test_mask_adds_epu16_rmk_256
2186 ;CHECK: vpaddusw (%rdi), %ymm0, %ymm1 {%k1}
2187 %b = load <16 x i16>, <16 x i16>* %ptr_b
2188 %res = call <16 x i16> @llvm.x86.avx512.mask.paddus.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask)
2192 define <16 x i16> @test_mask_adds_epu16_rmkz_256(<16 x i16> %a, <16 x i16>* %ptr_b, i16 %mask) {
2193 ;CHECK-LABEL: test_mask_adds_epu16_rmkz_256
2194 ;CHECK: vpaddusw (%rdi), %ymm0, %ymm0 {%k1} {z}
2195 %b = load <16 x i16>, <16 x i16>* %ptr_b
2196 %res = call <16 x i16> @llvm.x86.avx512.mask.paddus.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 %mask)
2200 declare <16 x i16> @llvm.x86.avx512.mask.paddus.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16)
2202 define <8 x i16> @test_mask_subs_epu16_rr_128(<8 x i16> %a, <8 x i16> %b) {
2203 ;CHECK-LABEL: test_mask_subs_epu16_rr_128
2204 ;CHECK: vpsubusw %xmm1, %xmm0, %xmm0
2205 %res = call <8 x i16> @llvm.x86.avx512.mask.psubus.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 -1)
2209 define <8 x i16> @test_mask_subs_epu16_rrk_128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) {
2210 ;CHECK-LABEL: test_mask_subs_epu16_rrk_128
2211 ;CHECK: vpsubusw %xmm1, %xmm0, %xmm2 {%k1}
2212 %res = call <8 x i16> @llvm.x86.avx512.mask.psubus.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask)
2216 define <8 x i16> @test_mask_subs_epu16_rrkz_128(<8 x i16> %a, <8 x i16> %b, i8 %mask) {
2217 ;CHECK-LABEL: test_mask_subs_epu16_rrkz_128
2218 ;CHECK: vpsubusw %xmm1, %xmm0, %xmm0 {%k1} {z}
2219 %res = call <8 x i16> @llvm.x86.avx512.mask.psubus.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 %mask)
2223 define <8 x i16> @test_mask_subs_epu16_rm_128(<8 x i16> %a, <8 x i16>* %ptr_b) {
2224 ;CHECK-LABEL: test_mask_subs_epu16_rm_128
2225 ;CHECK: vpsubusw (%rdi), %xmm0, %xmm0
2226 %b = load <8 x i16>, <8 x i16>* %ptr_b
2227 %res = call <8 x i16> @llvm.x86.avx512.mask.psubus.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 -1)
2231 define <8 x i16> @test_mask_subs_epu16_rmk_128(<8 x i16> %a, <8 x i16>* %ptr_b, <8 x i16> %passThru, i8 %mask) {
2232 ;CHECK-LABEL: test_mask_subs_epu16_rmk_128
2233 ;CHECK: vpsubusw (%rdi), %xmm0, %xmm1 {%k1}
2234 %b = load <8 x i16>, <8 x i16>* %ptr_b
2235 %res = call <8 x i16> @llvm.x86.avx512.mask.psubus.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask)
2239 define <8 x i16> @test_mask_subs_epu16_rmkz_128(<8 x i16> %a, <8 x i16>* %ptr_b, i8 %mask) {
2240 ;CHECK-LABEL: test_mask_subs_epu16_rmkz_128
2241 ;CHECK: vpsubusw (%rdi), %xmm0, %xmm0 {%k1} {z}
2242 %b = load <8 x i16>, <8 x i16>* %ptr_b
2243 %res = call <8 x i16> @llvm.x86.avx512.mask.psubus.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 %mask)
2247 declare <8 x i16> @llvm.x86.avx512.mask.psubus.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8)
2249 define <16 x i16> @test_mask_subs_epu16_rr_256(<16 x i16> %a, <16 x i16> %b) {
2250 ;CHECK-LABEL: test_mask_subs_epu16_rr_256
2251 ;CHECK: vpsubusw %ymm1, %ymm0, %ymm0
2252 %res = call <16 x i16> @llvm.x86.avx512.mask.psubus.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 -1)
2256 define <16 x i16> @test_mask_subs_epu16_rrk_256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) {
2257 ;CHECK-LABEL: test_mask_subs_epu16_rrk_256
2258 ;CHECK: vpsubusw %ymm1, %ymm0, %ymm2 {%k1}
2259 %res = call <16 x i16> @llvm.x86.avx512.mask.psubus.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask)
2263 define <16 x i16> @test_mask_subs_epu16_rrkz_256(<16 x i16> %a, <16 x i16> %b, i16 %mask) {
2264 ;CHECK-LABEL: test_mask_subs_epu16_rrkz_256
2265 ;CHECK: vpsubusw %ymm1, %ymm0, %ymm0 {%k1} {z}
2266 %res = call <16 x i16> @llvm.x86.avx512.mask.psubus.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 %mask)
2270 define <16 x i16> @test_mask_subs_epu16_rm_256(<16 x i16> %a, <16 x i16>* %ptr_b) {
2271 ;CHECK-LABEL: test_mask_subs_epu16_rm_256
2272 ;CHECK: vpsubusw (%rdi), %ymm0, %ymm0
2273 %b = load <16 x i16>, <16 x i16>* %ptr_b
2274 %res = call <16 x i16> @llvm.x86.avx512.mask.psubus.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 -1)
2278 define <16 x i16> @test_mask_subs_epu16_rmk_256(<16 x i16> %a, <16 x i16>* %ptr_b, <16 x i16> %passThru, i16 %mask) {
2279 ;CHECK-LABEL: test_mask_subs_epu16_rmk_256
2280 ;CHECK: vpsubusw (%rdi), %ymm0, %ymm1 {%k1}
2281 %b = load <16 x i16>, <16 x i16>* %ptr_b
2282 %res = call <16 x i16> @llvm.x86.avx512.mask.psubus.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask)
2286 define <16 x i16> @test_mask_subs_epu16_rmkz_256(<16 x i16> %a, <16 x i16>* %ptr_b, i16 %mask) {
2287 ;CHECK-LABEL: test_mask_subs_epu16_rmkz_256
2288 ;CHECK: vpsubusw (%rdi), %ymm0, %ymm0 {%k1} {z}
2289 %b = load <16 x i16>, <16 x i16>* %ptr_b
2290 %res = call <16 x i16> @llvm.x86.avx512.mask.psubus.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 %mask)
2294 declare <16 x i16> @llvm.x86.avx512.mask.psubus.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16)
2296 define <16 x i8> @test_mask_adds_epi8_rr_128(<16 x i8> %a, <16 x i8> %b) {
2297 ;CHECK-LABEL: test_mask_adds_epi8_rr_128
2298 ;CHECK: vpaddsb %xmm1, %xmm0, %xmm0
2299 %res = call <16 x i8> @llvm.x86.avx512.mask.padds.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> zeroinitializer, i16 -1)
2303 define <16 x i8> @test_mask_adds_epi8_rrk_128(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passThru, i16 %mask) {
2304 ;CHECK-LABEL: test_mask_adds_epi8_rrk_128
2305 ;CHECK: vpaddsb %xmm1, %xmm0, %xmm2 {%k1}
2306 %res = call <16 x i8> @llvm.x86.avx512.mask.padds.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passThru, i16 %mask)
2310 define <16 x i8> @test_mask_adds_epi8_rrkz_128(<16 x i8> %a, <16 x i8> %b, i16 %mask) {
2311 ;CHECK-LABEL: test_mask_adds_epi8_rrkz_128
2312 ;CHECK: vpaddsb %xmm1, %xmm0, %xmm0 {%k1} {z}
2313 %res = call <16 x i8> @llvm.x86.avx512.mask.padds.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> zeroinitializer, i16 %mask)
2317 define <16 x i8> @test_mask_adds_epi8_rm_128(<16 x i8> %a, <16 x i8>* %ptr_b) {
2318 ;CHECK-LABEL: test_mask_adds_epi8_rm_128
2319 ;CHECK: vpaddsb (%rdi), %xmm0, %xmm0
2320 %b = load <16 x i8>, <16 x i8>* %ptr_b
2321 %res = call <16 x i8> @llvm.x86.avx512.mask.padds.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> zeroinitializer, i16 -1)
2325 define <16 x i8> @test_mask_adds_epi8_rmk_128(<16 x i8> %a, <16 x i8>* %ptr_b, <16 x i8> %passThru, i16 %mask) {
2326 ;CHECK-LABEL: test_mask_adds_epi8_rmk_128
2327 ;CHECK: vpaddsb (%rdi), %xmm0, %xmm1 {%k1}
2328 %b = load <16 x i8>, <16 x i8>* %ptr_b
2329 %res = call <16 x i8> @llvm.x86.avx512.mask.padds.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passThru, i16 %mask)
2333 define <16 x i8> @test_mask_adds_epi8_rmkz_128(<16 x i8> %a, <16 x i8>* %ptr_b, i16 %mask) {
2334 ;CHECK-LABEL: test_mask_adds_epi8_rmkz_128
2335 ;CHECK: vpaddsb (%rdi), %xmm0, %xmm0 {%k1} {z}
2336 %b = load <16 x i8>, <16 x i8>* %ptr_b
2337 %res = call <16 x i8> @llvm.x86.avx512.mask.padds.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> zeroinitializer, i16 %mask)
2341 declare <16 x i8> @llvm.x86.avx512.mask.padds.b.128(<16 x i8>, <16 x i8>, <16 x i8>, i16)
2343 define <32 x i8> @test_mask_adds_epi8_rr_256(<32 x i8> %a, <32 x i8> %b) {
2344 ;CHECK-LABEL: test_mask_adds_epi8_rr_256
2345 ;CHECK: vpaddsb %ymm1, %ymm0, %ymm0
2346 %res = call <32 x i8> @llvm.x86.avx512.mask.padds.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> zeroinitializer, i32 -1)
2350 define <32 x i8> @test_mask_adds_epi8_rrk_256(<32 x i8> %a, <32 x i8> %b, <32 x i8> %passThru, i32 %mask) {
2351 ;CHECK-LABEL: test_mask_adds_epi8_rrk_256
2352 ;CHECK: vpaddsb %ymm1, %ymm0, %ymm2 {%k1}
2353 %res = call <32 x i8> @llvm.x86.avx512.mask.padds.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> %passThru, i32 %mask)
2357 define <32 x i8> @test_mask_adds_epi8_rrkz_256(<32 x i8> %a, <32 x i8> %b, i32 %mask) {
2358 ;CHECK-LABEL: test_mask_adds_epi8_rrkz_256
2359 ;CHECK: vpaddsb %ymm1, %ymm0, %ymm0 {%k1} {z}
2360 %res = call <32 x i8> @llvm.x86.avx512.mask.padds.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> zeroinitializer, i32 %mask)
2364 define <32 x i8> @test_mask_adds_epi8_rm_256(<32 x i8> %a, <32 x i8>* %ptr_b) {
2365 ;CHECK-LABEL: test_mask_adds_epi8_rm_256
2366 ;CHECK: vpaddsb (%rdi), %ymm0, %ymm0
2367 %b = load <32 x i8>, <32 x i8>* %ptr_b
2368 %res = call <32 x i8> @llvm.x86.avx512.mask.padds.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> zeroinitializer, i32 -1)
2372 define <32 x i8> @test_mask_adds_epi8_rmk_256(<32 x i8> %a, <32 x i8>* %ptr_b, <32 x i8> %passThru, i32 %mask) {
2373 ;CHECK-LABEL: test_mask_adds_epi8_rmk_256
2374 ;CHECK: vpaddsb (%rdi), %ymm0, %ymm1 {%k1}
2375 %b = load <32 x i8>, <32 x i8>* %ptr_b
2376 %res = call <32 x i8> @llvm.x86.avx512.mask.padds.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> %passThru, i32 %mask)
2380 define <32 x i8> @test_mask_adds_epi8_rmkz_256(<32 x i8> %a, <32 x i8>* %ptr_b, i32 %mask) {
2381 ;CHECK-LABEL: test_mask_adds_epi8_rmkz_256
2382 ;CHECK: vpaddsb (%rdi), %ymm0, %ymm0 {%k1} {z}
2383 %b = load <32 x i8>, <32 x i8>* %ptr_b
2384 %res = call <32 x i8> @llvm.x86.avx512.mask.padds.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> zeroinitializer, i32 %mask)
2388 declare <32 x i8> @llvm.x86.avx512.mask.padds.b.256(<32 x i8>, <32 x i8>, <32 x i8>, i32)
2390 define <16 x i8> @test_mask_subs_epi8_rr_128(<16 x i8> %a, <16 x i8> %b) {
2391 ;CHECK-LABEL: test_mask_subs_epi8_rr_128
2392 ;CHECK: vpsubsb %xmm1, %xmm0, %xmm0
2393 %res = call <16 x i8> @llvm.x86.avx512.mask.psubs.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> zeroinitializer, i16 -1)
2397 define <16 x i8> @test_mask_subs_epi8_rrk_128(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passThru, i16 %mask) {
2398 ;CHECK-LABEL: test_mask_subs_epi8_rrk_128
2399 ;CHECK: vpsubsb %xmm1, %xmm0, %xmm2 {%k1}
2400 %res = call <16 x i8> @llvm.x86.avx512.mask.psubs.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passThru, i16 %mask)
2404 define <16 x i8> @test_mask_subs_epi8_rrkz_128(<16 x i8> %a, <16 x i8> %b, i16 %mask) {
2405 ;CHECK-LABEL: test_mask_subs_epi8_rrkz_128
2406 ;CHECK: vpsubsb %xmm1, %xmm0, %xmm0 {%k1} {z}
2407 %res = call <16 x i8> @llvm.x86.avx512.mask.psubs.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> zeroinitializer, i16 %mask)
2411 define <16 x i8> @test_mask_subs_epi8_rm_128(<16 x i8> %a, <16 x i8>* %ptr_b) {
2412 ;CHECK-LABEL: test_mask_subs_epi8_rm_128
2413 ;CHECK: vpsubsb (%rdi), %xmm0, %xmm0
2414 %b = load <16 x i8>, <16 x i8>* %ptr_b
2415 %res = call <16 x i8> @llvm.x86.avx512.mask.psubs.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> zeroinitializer, i16 -1)
2419 define <16 x i8> @test_mask_subs_epi8_rmk_128(<16 x i8> %a, <16 x i8>* %ptr_b, <16 x i8> %passThru, i16 %mask) {
2420 ;CHECK-LABEL: test_mask_subs_epi8_rmk_128
2421 ;CHECK: vpsubsb (%rdi), %xmm0, %xmm1 {%k1}
2422 %b = load <16 x i8>, <16 x i8>* %ptr_b
2423 %res = call <16 x i8> @llvm.x86.avx512.mask.psubs.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passThru, i16 %mask)
2427 define <16 x i8> @test_mask_subs_epi8_rmkz_128(<16 x i8> %a, <16 x i8>* %ptr_b, i16 %mask) {
2428 ;CHECK-LABEL: test_mask_subs_epi8_rmkz_128
2429 ;CHECK: vpsubsb (%rdi), %xmm0, %xmm0 {%k1} {z}
2430 %b = load <16 x i8>, <16 x i8>* %ptr_b
2431 %res = call <16 x i8> @llvm.x86.avx512.mask.psubs.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> zeroinitializer, i16 %mask)
2435 declare <16 x i8> @llvm.x86.avx512.mask.psubs.b.128(<16 x i8>, <16 x i8>, <16 x i8>, i16)
2437 define <32 x i8> @test_mask_subs_epi8_rr_256(<32 x i8> %a, <32 x i8> %b) {
2438 ;CHECK-LABEL: test_mask_subs_epi8_rr_256
2439 ;CHECK: vpsubsb %ymm1, %ymm0, %ymm0
2440 %res = call <32 x i8> @llvm.x86.avx512.mask.psubs.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> zeroinitializer, i32 -1)
2444 define <32 x i8> @test_mask_subs_epi8_rrk_256(<32 x i8> %a, <32 x i8> %b, <32 x i8> %passThru, i32 %mask) {
2445 ;CHECK-LABEL: test_mask_subs_epi8_rrk_256
2446 ;CHECK: vpsubsb %ymm1, %ymm0, %ymm2 {%k1}
2447 %res = call <32 x i8> @llvm.x86.avx512.mask.psubs.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> %passThru, i32 %mask)
2451 define <32 x i8> @test_mask_subs_epi8_rrkz_256(<32 x i8> %a, <32 x i8> %b, i32 %mask) {
2452 ;CHECK-LABEL: test_mask_subs_epi8_rrkz_256
2453 ;CHECK: vpsubsb %ymm1, %ymm0, %ymm0 {%k1} {z}
2454 %res = call <32 x i8> @llvm.x86.avx512.mask.psubs.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> zeroinitializer, i32 %mask)
2458 define <32 x i8> @test_mask_subs_epi8_rm_256(<32 x i8> %a, <32 x i8>* %ptr_b) {
2459 ;CHECK-LABEL: test_mask_subs_epi8_rm_256
2460 ;CHECK: vpsubsb (%rdi), %ymm0, %ymm0
2461 %b = load <32 x i8>, <32 x i8>* %ptr_b
2462 %res = call <32 x i8> @llvm.x86.avx512.mask.psubs.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> zeroinitializer, i32 -1)
2466 define <32 x i8> @test_mask_subs_epi8_rmk_256(<32 x i8> %a, <32 x i8>* %ptr_b, <32 x i8> %passThru, i32 %mask) {
2467 ;CHECK-LABEL: test_mask_subs_epi8_rmk_256
2468 ;CHECK: vpsubsb (%rdi), %ymm0, %ymm1 {%k1}
2469 %b = load <32 x i8>, <32 x i8>* %ptr_b
2470 %res = call <32 x i8> @llvm.x86.avx512.mask.psubs.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> %passThru, i32 %mask)
2474 define <32 x i8> @test_mask_subs_epi8_rmkz_256(<32 x i8> %a, <32 x i8>* %ptr_b, i32 %mask) {
2475 ;CHECK-LABEL: test_mask_subs_epi8_rmkz_256
2476 ;CHECK: vpsubsb (%rdi), %ymm0, %ymm0 {%k1} {z}
2477 %b = load <32 x i8>, <32 x i8>* %ptr_b
2478 %res = call <32 x i8> @llvm.x86.avx512.mask.psubs.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> zeroinitializer, i32 %mask)
2482 declare <32 x i8> @llvm.x86.avx512.mask.psubs.b.256(<32 x i8>, <32 x i8>, <32 x i8>, i32)
2484 define <16 x i8> @test_mask_adds_epu8_rr_128(<16 x i8> %a, <16 x i8> %b) {
2485 ;CHECK-LABEL: test_mask_adds_epu8_rr_128
2486 ;CHECK: vpaddusb %xmm1, %xmm0, %xmm0
2487 %res = call <16 x i8> @llvm.x86.avx512.mask.paddus.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> zeroinitializer, i16 -1)
2491 define <16 x i8> @test_mask_adds_epu8_rrk_128(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passThru, i16 %mask) {
2492 ;CHECK-LABEL: test_mask_adds_epu8_rrk_128
2493 ;CHECK: vpaddusb %xmm1, %xmm0, %xmm2 {%k1}
2494 %res = call <16 x i8> @llvm.x86.avx512.mask.paddus.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passThru, i16 %mask)
2498 define <16 x i8> @test_mask_adds_epu8_rrkz_128(<16 x i8> %a, <16 x i8> %b, i16 %mask) {
2499 ;CHECK-LABEL: test_mask_adds_epu8_rrkz_128
2500 ;CHECK: vpaddusb %xmm1, %xmm0, %xmm0 {%k1} {z}
2501 %res = call <16 x i8> @llvm.x86.avx512.mask.paddus.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> zeroinitializer, i16 %mask)
2505 define <16 x i8> @test_mask_adds_epu8_rm_128(<16 x i8> %a, <16 x i8>* %ptr_b) {
2506 ;CHECK-LABEL: test_mask_adds_epu8_rm_128
2507 ;CHECK: vpaddusb (%rdi), %xmm0, %xmm0
2508 %b = load <16 x i8>, <16 x i8>* %ptr_b
2509 %res = call <16 x i8> @llvm.x86.avx512.mask.paddus.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> zeroinitializer, i16 -1)
2513 define <16 x i8> @test_mask_adds_epu8_rmk_128(<16 x i8> %a, <16 x i8>* %ptr_b, <16 x i8> %passThru, i16 %mask) {
2514 ;CHECK-LABEL: test_mask_adds_epu8_rmk_128
2515 ;CHECK: vpaddusb (%rdi), %xmm0, %xmm1 {%k1}
2516 %b = load <16 x i8>, <16 x i8>* %ptr_b
2517 %res = call <16 x i8> @llvm.x86.avx512.mask.paddus.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passThru, i16 %mask)
2521 define <16 x i8> @test_mask_adds_epu8_rmkz_128(<16 x i8> %a, <16 x i8>* %ptr_b, i16 %mask) {
2522 ;CHECK-LABEL: test_mask_adds_epu8_rmkz_128
2523 ;CHECK: vpaddusb (%rdi), %xmm0, %xmm0 {%k1} {z}
2524 %b = load <16 x i8>, <16 x i8>* %ptr_b
2525 %res = call <16 x i8> @llvm.x86.avx512.mask.paddus.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> zeroinitializer, i16 %mask)
2529 declare <16 x i8> @llvm.x86.avx512.mask.paddus.b.128(<16 x i8>, <16 x i8>, <16 x i8>, i16)
2531 define <32 x i8> @test_mask_adds_epu8_rr_256(<32 x i8> %a, <32 x i8> %b) {
2532 ;CHECK-LABEL: test_mask_adds_epu8_rr_256
2533 ;CHECK: vpaddusb %ymm1, %ymm0, %ymm0
2534 %res = call <32 x i8> @llvm.x86.avx512.mask.paddus.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> zeroinitializer, i32 -1)
2538 define <32 x i8> @test_mask_adds_epu8_rrk_256(<32 x i8> %a, <32 x i8> %b, <32 x i8> %passThru, i32 %mask) {
2539 ;CHECK-LABEL: test_mask_adds_epu8_rrk_256
2540 ;CHECK: vpaddusb %ymm1, %ymm0, %ymm2 {%k1}
2541 %res = call <32 x i8> @llvm.x86.avx512.mask.paddus.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> %passThru, i32 %mask)
2545 define <32 x i8> @test_mask_adds_epu8_rrkz_256(<32 x i8> %a, <32 x i8> %b, i32 %mask) {
2546 ;CHECK-LABEL: test_mask_adds_epu8_rrkz_256
2547 ;CHECK: vpaddusb %ymm1, %ymm0, %ymm0 {%k1} {z}
2548 %res = call <32 x i8> @llvm.x86.avx512.mask.paddus.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> zeroinitializer, i32 %mask)
2552 define <32 x i8> @test_mask_adds_epu8_rm_256(<32 x i8> %a, <32 x i8>* %ptr_b) {
2553 ;CHECK-LABEL: test_mask_adds_epu8_rm_256
2554 ;CHECK: vpaddusb (%rdi), %ymm0, %ymm0
2555 %b = load <32 x i8>, <32 x i8>* %ptr_b
2556 %res = call <32 x i8> @llvm.x86.avx512.mask.paddus.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> zeroinitializer, i32 -1)
2560 define <32 x i8> @test_mask_adds_epu8_rmk_256(<32 x i8> %a, <32 x i8>* %ptr_b, <32 x i8> %passThru, i32 %mask) {
2561 ;CHECK-LABEL: test_mask_adds_epu8_rmk_256
2562 ;CHECK: vpaddusb (%rdi), %ymm0, %ymm1 {%k1}
2563 %b = load <32 x i8>, <32 x i8>* %ptr_b
2564 %res = call <32 x i8> @llvm.x86.avx512.mask.paddus.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> %passThru, i32 %mask)
2568 define <32 x i8> @test_mask_adds_epu8_rmkz_256(<32 x i8> %a, <32 x i8>* %ptr_b, i32 %mask) {
2569 ;CHECK-LABEL: test_mask_adds_epu8_rmkz_256
2570 ;CHECK: vpaddusb (%rdi), %ymm0, %ymm0 {%k1} {z}
2571 %b = load <32 x i8>, <32 x i8>* %ptr_b
2572 %res = call <32 x i8> @llvm.x86.avx512.mask.paddus.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> zeroinitializer, i32 %mask)
2576 declare <32 x i8> @llvm.x86.avx512.mask.paddus.b.256(<32 x i8>, <32 x i8>, <32 x i8>, i32)
2578 define <16 x i8> @test_mask_subs_epu8_rr_128(<16 x i8> %a, <16 x i8> %b) {
2579 ;CHECK-LABEL: test_mask_subs_epu8_rr_128
2580 ;CHECK: vpsubusb %xmm1, %xmm0, %xmm0
2581 %res = call <16 x i8> @llvm.x86.avx512.mask.psubus.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> zeroinitializer, i16 -1)
2585 define <16 x i8> @test_mask_subs_epu8_rrk_128(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passThru, i16 %mask) {
2586 ;CHECK-LABEL: test_mask_subs_epu8_rrk_128
2587 ;CHECK: vpsubusb %xmm1, %xmm0, %xmm2 {%k1}
2588 %res = call <16 x i8> @llvm.x86.avx512.mask.psubus.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passThru, i16 %mask)
2592 define <16 x i8> @test_mask_subs_epu8_rrkz_128(<16 x i8> %a, <16 x i8> %b, i16 %mask) {
2593 ;CHECK-LABEL: test_mask_subs_epu8_rrkz_128
2594 ;CHECK: vpsubusb %xmm1, %xmm0, %xmm0 {%k1} {z}
2595 %res = call <16 x i8> @llvm.x86.avx512.mask.psubus.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> zeroinitializer, i16 %mask)
2599 define <16 x i8> @test_mask_subs_epu8_rm_128(<16 x i8> %a, <16 x i8>* %ptr_b) {
2600 ;CHECK-LABEL: test_mask_subs_epu8_rm_128
2601 ;CHECK: vpsubusb (%rdi), %xmm0, %xmm0
2602 %b = load <16 x i8>, <16 x i8>* %ptr_b
2603 %res = call <16 x i8> @llvm.x86.avx512.mask.psubus.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> zeroinitializer, i16 -1)
2607 define <16 x i8> @test_mask_subs_epu8_rmk_128(<16 x i8> %a, <16 x i8>* %ptr_b, <16 x i8> %passThru, i16 %mask) {
2608 ;CHECK-LABEL: test_mask_subs_epu8_rmk_128
2609 ;CHECK: vpsubusb (%rdi), %xmm0, %xmm1 {%k1}
2610 %b = load <16 x i8>, <16 x i8>* %ptr_b
2611 %res = call <16 x i8> @llvm.x86.avx512.mask.psubus.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passThru, i16 %mask)
2615 define <16 x i8> @test_mask_subs_epu8_rmkz_128(<16 x i8> %a, <16 x i8>* %ptr_b, i16 %mask) {
2616 ;CHECK-LABEL: test_mask_subs_epu8_rmkz_128
2617 ;CHECK: vpsubusb (%rdi), %xmm0, %xmm0 {%k1} {z}
2618 %b = load <16 x i8>, <16 x i8>* %ptr_b
2619 %res = call <16 x i8> @llvm.x86.avx512.mask.psubus.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> zeroinitializer, i16 %mask)
2623 declare <16 x i8> @llvm.x86.avx512.mask.psubus.b.128(<16 x i8>, <16 x i8>, <16 x i8>, i16)
2625 define <32 x i8> @test_mask_subs_epu8_rr_256(<32 x i8> %a, <32 x i8> %b) {
2626 ;CHECK-LABEL: test_mask_subs_epu8_rr_256
2627 ;CHECK: vpsubusb %ymm1, %ymm0, %ymm0
2628 %res = call <32 x i8> @llvm.x86.avx512.mask.psubus.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> zeroinitializer, i32 -1)
2632 define <32 x i8> @test_mask_subs_epu8_rrk_256(<32 x i8> %a, <32 x i8> %b, <32 x i8> %passThru, i32 %mask) {
2633 ;CHECK-LABEL: test_mask_subs_epu8_rrk_256
2634 ;CHECK: vpsubusb %ymm1, %ymm0, %ymm2 {%k1}
2635 %res = call <32 x i8> @llvm.x86.avx512.mask.psubus.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> %passThru, i32 %mask)
2639 define <32 x i8> @test_mask_subs_epu8_rrkz_256(<32 x i8> %a, <32 x i8> %b, i32 %mask) {
2640 ;CHECK-LABEL: test_mask_subs_epu8_rrkz_256
2641 ;CHECK: vpsubusb %ymm1, %ymm0, %ymm0 {%k1} {z}
2642 %res = call <32 x i8> @llvm.x86.avx512.mask.psubus.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> zeroinitializer, i32 %mask)
2646 define <32 x i8> @test_mask_subs_epu8_rm_256(<32 x i8> %a, <32 x i8>* %ptr_b) {
2647 ;CHECK-LABEL: test_mask_subs_epu8_rm_256
2648 ;CHECK: vpsubusb (%rdi), %ymm0, %ymm0
2649 %b = load <32 x i8>, <32 x i8>* %ptr_b
2650 %res = call <32 x i8> @llvm.x86.avx512.mask.psubus.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> zeroinitializer, i32 -1)
2654 define <32 x i8> @test_mask_subs_epu8_rmk_256(<32 x i8> %a, <32 x i8>* %ptr_b, <32 x i8> %passThru, i32 %mask) {
2655 ;CHECK-LABEL: test_mask_subs_epu8_rmk_256
2656 ;CHECK: vpsubusb (%rdi), %ymm0, %ymm1 {%k1}
2657 %b = load <32 x i8>, <32 x i8>* %ptr_b
2658 %res = call <32 x i8> @llvm.x86.avx512.mask.psubus.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> %passThru, i32 %mask)
2662 define <32 x i8> @test_mask_subs_epu8_rmkz_256(<32 x i8> %a, <32 x i8>* %ptr_b, i32 %mask) {
2663 ;CHECK-LABEL: test_mask_subs_epu8_rmkz_256
2664 ;CHECK: vpsubusb (%rdi), %ymm0, %ymm0 {%k1} {z}
2665 %b = load <32 x i8>, <32 x i8>* %ptr_b
2666 %res = call <32 x i8> @llvm.x86.avx512.mask.psubus.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> zeroinitializer, i32 %mask)
2670 declare <32 x i8> @llvm.x86.avx512.mask.psubus.b.256(<32 x i8>, <32 x i8>, <32 x i8>, i32)
2672 declare <16 x i8> @llvm.x86.avx512.mask.pmaxs.b.128(<16 x i8>, <16 x i8>, <16 x i8>, i16)
2674 ; CHECK-LABEL: @test_int_x86_avx512_mask_pmaxs_b_128
2676 ; CHECK: vpmaxsb %xmm
2678 define <16 x i8>@test_int_x86_avx512_mask_pmaxs_b_128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %mask) {
2679 %res = call <16 x i8> @llvm.x86.avx512.mask.pmaxs.b.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2 ,i16 %mask)
2680 %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmaxs.b.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> zeroinitializer, i16 %mask)
2681 %res2 = add <16 x i8> %res, %res1
2685 declare <32 x i8> @llvm.x86.avx512.mask.pmaxs.b.256(<32 x i8>, <32 x i8>, <32 x i8>, i32)
2687 ; CHECK-LABEL: @test_int_x86_avx512_mask_pmaxs_b_256
2689 ; CHECK: vpmaxsb %ymm
2691 define <32 x i8>@test_int_x86_avx512_mask_pmaxs_b_256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) {
2692 %res = call <32 x i8> @llvm.x86.avx512.mask.pmaxs.b.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3)
2693 %res1 = call <32 x i8> @llvm.x86.avx512.mask.pmaxs.b.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 -1)
2694 %res2 = add <32 x i8> %res, %res1
2698 declare <8 x i16> @llvm.x86.avx512.mask.pmaxs.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8)
2700 ; CHECK-LABEL: @test_int_x86_avx512_mask_pmaxs_w_128
2702 ; CHECK: vpmaxsw %xmm
2704 define <8 x i16>@test_int_x86_avx512_mask_pmaxs_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) {
2705 %res = call <8 x i16> @llvm.x86.avx512.mask.pmaxs.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3)
2706 %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmaxs.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1)
2707 %res2 = add <8 x i16> %res, %res1
2711 declare <16 x i16> @llvm.x86.avx512.mask.pmaxs.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16)
2713 ; CHECK-LABEL: @test_int_x86_avx512_mask_pmaxs_w_256
2715 ; CHECK: vpmaxsw %ymm
2717 define <16 x i16>@test_int_x86_avx512_mask_pmaxs_w_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %mask) {
2718 %res = call <16 x i16> @llvm.x86.avx512.mask.pmaxs.w.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %mask)
2719 %res1 = call <16 x i16> @llvm.x86.avx512.mask.pmaxs.w.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> zeroinitializer, i16 %mask)
2720 %res2 = add <16 x i16> %res, %res1
2721 ret <16 x i16> %res2
2724 declare <16 x i8> @llvm.x86.avx512.mask.pmaxu.b.128(<16 x i8>, <16 x i8>, <16 x i8>, i16)
2726 ; CHECK-LABEL: @test_int_x86_avx512_mask_pmaxu_b_128
2728 ; CHECK: vpmaxub %xmm
2730 define <16 x i8>@test_int_x86_avx512_mask_pmaxu_b_128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2,i16 %mask) {
2731 %res = call <16 x i8> @llvm.x86.avx512.mask.pmaxu.b.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %mask)
2732 %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmaxu.b.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> zeroinitializer, i16 %mask)
2733 %res2 = add <16 x i8> %res, %res1
2737 declare <32 x i8> @llvm.x86.avx512.mask.pmaxu.b.256(<32 x i8>, <32 x i8>, <32 x i8>, i32)
2739 ; CHECK-LABEL: @test_int_x86_avx512_mask_pmaxu_b_256
2741 ; CHECK: vpmaxub %ymm
2743 define <32 x i8>@test_int_x86_avx512_mask_pmaxu_b_256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) {
2744 %res = call <32 x i8> @llvm.x86.avx512.mask.pmaxu.b.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3)
2745 %res1 = call <32 x i8> @llvm.x86.avx512.mask.pmaxu.b.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 -1)
2746 %res2 = add <32 x i8> %res, %res1
2750 declare <8 x i16> @llvm.x86.avx512.mask.pmaxu.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8)
2752 ; CHECK-LABEL: @test_int_x86_avx512_mask_pmaxu_w_128
2754 ; CHECK: vpmaxuw %xmm
2756 define <8 x i16>@test_int_x86_avx512_mask_pmaxu_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) {
2757 %res = call <8 x i16> @llvm.x86.avx512.mask.pmaxu.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3)
2758 %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmaxu.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1)
2759 %res2 = add <8 x i16> %res, %res1
2763 declare <16 x i16> @llvm.x86.avx512.mask.pmaxu.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16)
2765 ; CHECK-LABEL: @test_int_x86_avx512_mask_pmaxu_w_256
2767 ; CHECK: vpmaxuw %ymm
2769 define <16 x i16>@test_int_x86_avx512_mask_pmaxu_w_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %mask) {
2770 %res = call <16 x i16> @llvm.x86.avx512.mask.pmaxu.w.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %mask)
2771 %res1 = call <16 x i16> @llvm.x86.avx512.mask.pmaxu.w.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> zeroinitializer, i16 %mask)
2772 %res2 = add <16 x i16> %res, %res1
2773 ret <16 x i16> %res2
2776 declare <16 x i8> @llvm.x86.avx512.mask.pmins.b.128(<16 x i8>, <16 x i8>, <16 x i8>, i16)
2778 ; CHECK-LABEL: @test_int_x86_avx512_mask_pmins_b_128
2780 ; CHECK: vpminsb %xmm
2782 define <16 x i8>@test_int_x86_avx512_mask_pmins_b_128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %mask) {
2783 %res = call <16 x i8> @llvm.x86.avx512.mask.pmins.b.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %mask)
2784 %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmins.b.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> zeroinitializer, i16 %mask)
2785 %res2 = add <16 x i8> %res, %res1
2789 declare <32 x i8> @llvm.x86.avx512.mask.pmins.b.256(<32 x i8>, <32 x i8>, <32 x i8>, i32)
2791 ; CHECK-LABEL: @test_int_x86_avx512_mask_pmins_b_256
2793 ; CHECK: vpminsb %ymm
2795 define <32 x i8>@test_int_x86_avx512_mask_pmins_b_256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) {
2796 %res = call <32 x i8> @llvm.x86.avx512.mask.pmins.b.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3)
2797 %res1 = call <32 x i8> @llvm.x86.avx512.mask.pmins.b.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 -1)
2798 %res2 = add <32 x i8> %res, %res1
2802 declare <8 x i16> @llvm.x86.avx512.mask.pmins.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8)
2804 ; CHECK-LABEL: @test_int_x86_avx512_mask_pmins_w_128
2806 ; CHECK: vpminsw %xmm
2808 define <8 x i16>@test_int_x86_avx512_mask_pmins_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) {
2809 %res = call <8 x i16> @llvm.x86.avx512.mask.pmins.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3)
2810 %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmins.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1)
2811 %res2 = add <8 x i16> %res, %res1
2815 declare <16 x i16> @llvm.x86.avx512.mask.pmins.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16)
2817 ; CHECK-LABEL: @test_int_x86_avx512_mask_pmins_w_256
2819 ; CHECK: vpminsw %ymm
2821 define <16 x i16>@test_int_x86_avx512_mask_pmins_w_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %mask) {
2822 %res = call <16 x i16> @llvm.x86.avx512.mask.pmins.w.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %mask)
2823 %res1 = call <16 x i16> @llvm.x86.avx512.mask.pmins.w.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> zeroinitializer, i16 %mask)
2824 %res2 = add <16 x i16> %res, %res1
2825 ret <16 x i16> %res2
2828 declare <16 x i8> @llvm.x86.avx512.mask.pminu.b.128(<16 x i8>, <16 x i8>, <16 x i8>, i16)
2830 ; CHECK-LABEL: @test_int_x86_avx512_mask_pminu_b_128
2832 ; CHECK: vpminub %xmm
2834 define <16 x i8>@test_int_x86_avx512_mask_pminu_b_128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %mask) {
2835 %res = call <16 x i8> @llvm.x86.avx512.mask.pminu.b.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %mask)
2836 %res1 = call <16 x i8> @llvm.x86.avx512.mask.pminu.b.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> zeroinitializer, i16 %mask)
2837 %res2 = add <16 x i8> %res, %res1
2841 declare <32 x i8> @llvm.x86.avx512.mask.pminu.b.256(<32 x i8>, <32 x i8>, <32 x i8>, i32)
2843 ; CHECK-LABEL: @test_int_x86_avx512_mask_pminu_b_256
2845 ; CHECK: vpminub %ymm
2847 define <32 x i8>@test_int_x86_avx512_mask_pminu_b_256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) {
2848 %res = call <32 x i8> @llvm.x86.avx512.mask.pminu.b.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3)
2849 %res1 = call <32 x i8> @llvm.x86.avx512.mask.pminu.b.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 -1)
2850 %res2 = add <32 x i8> %res, %res1
2854 declare <8 x i16> @llvm.x86.avx512.mask.pminu.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8)
2856 ; CHECK-LABEL: @test_int_x86_avx512_mask_pminu_w_128
2858 ; CHECK: vpminuw %xmm
2860 define <8 x i16>@test_int_x86_avx512_mask_pminu_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) {
2861 %res = call <8 x i16> @llvm.x86.avx512.mask.pminu.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3)
2862 %res1 = call <8 x i16> @llvm.x86.avx512.mask.pminu.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1)
2863 %res2 = add <8 x i16> %res, %res1
2867 declare <16 x i16> @llvm.x86.avx512.mask.pminu.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16)
2869 ; CHECK-LABEL: @test_int_x86_avx512_mask_pminu_w_256
2871 ; CHECK: vpminuw %ymm
2873 define <16 x i16>@test_int_x86_avx512_mask_pminu_w_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %mask) {
2874 %res = call <16 x i16> @llvm.x86.avx512.mask.pminu.w.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %mask)
2875 %res1 = call <16 x i16> @llvm.x86.avx512.mask.pminu.w.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> zeroinitializer, i16 %mask)
2876 %res2 = add <16 x i16> %res, %res1
2877 ret <16 x i16> %res2
2880 declare <8 x i16> @llvm.x86.avx512.mask.vpermt2var.hi.128(<8 x i16>, <8 x i16>, <8 x i16>, i8)
2882 ; CHECK-LABEL: @test_int_x86_avx512_mask_vpermt2var_hi_128
2885 ; CHECK: vpermt2w %xmm{{.*}}{%k1}
2887 define <8 x i16>@test_int_x86_avx512_mask_vpermt2var_hi_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) {
2888 %res = call <8 x i16> @llvm.x86.avx512.mask.vpermt2var.hi.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3)
2889 %res1 = call <8 x i16> @llvm.x86.avx512.mask.vpermt2var.hi.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1)
2890 %res2 = add <8 x i16> %res, %res1
2894 declare <8 x i16> @llvm.x86.avx512.maskz.vpermt2var.hi.128(<8 x i16>, <8 x i16>, <8 x i16>, i8)
2896 ; CHECK-LABEL: @test_int_x86_avx512_maskz_vpermt2var_hi_128
2899 ; CHECK: vpermt2w %xmm{{.*}}{%k1} {z}
2900 define <8 x i16>@test_int_x86_avx512_maskz_vpermt2var_hi_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) {
2901 %res = call <8 x i16> @llvm.x86.avx512.maskz.vpermt2var.hi.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3)
2902 %res1 = call <8 x i16> @llvm.x86.avx512.maskz.vpermt2var.hi.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1)
2903 %res2 = add <8 x i16> %res, %res1
2907 declare <16 x i16> @llvm.x86.avx512.mask.vpermt2var.hi.256(<16 x i16>, <16 x i16>, <16 x i16>, i16)
2909 ; CHECK-LABEL: @test_int_x86_avx512_mask_vpermt2var_hi_256
2912 ; CHECK: vpermt2w %ymm{{.*}}{%k1}
2913 define <16 x i16>@test_int_x86_avx512_mask_vpermt2var_hi_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) {
2914 %res = call <16 x i16> @llvm.x86.avx512.mask.vpermt2var.hi.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3)
2915 %res1 = call <16 x i16> @llvm.x86.avx512.mask.vpermt2var.hi.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 -1)
2916 %res2 = add <16 x i16> %res, %res1
2917 ret <16 x i16> %res2
2920 declare <16 x i16> @llvm.x86.avx512.maskz.vpermt2var.hi.256(<16 x i16>, <16 x i16>, <16 x i16>, i16)
2922 ; CHECK-LABEL: @test_int_x86_avx512_maskz_vpermt2var_hi_256
2925 ; CHECK: vpermt2w %ymm{{.*}}{%k1} {z}
2926 define <16 x i16>@test_int_x86_avx512_maskz_vpermt2var_hi_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) {
2927 %res = call <16 x i16> @llvm.x86.avx512.maskz.vpermt2var.hi.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3)
2928 %res1 = call <16 x i16> @llvm.x86.avx512.maskz.vpermt2var.hi.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 -1)
2929 %res2 = add <16 x i16> %res, %res1
2930 ret <16 x i16> %res2
2933 declare <8 x i16> @llvm.x86.avx512.mask.vpermi2var.hi.128(<8 x i16>, <8 x i16>, <8 x i16>, i8)
2935 ; CHECK-LABEL: @test_int_x86_avx512_mask_vpermi2var_hi_128
2938 ; CHECK: vpermi2w %xmm{{.*}}{%k1}
2939 define <8 x i16>@test_int_x86_avx512_mask_vpermi2var_hi_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) {
2940 %res = call <8 x i16> @llvm.x86.avx512.mask.vpermi2var.hi.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3)
2941 %res1 = call <8 x i16> @llvm.x86.avx512.mask.vpermi2var.hi.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1)
2942 %res2 = add <8 x i16> %res, %res1
2946 declare <16 x i16> @llvm.x86.avx512.mask.vpermi2var.hi.256(<16 x i16>, <16 x i16>, <16 x i16>, i16)
2948 ; CHECK-LABEL: @test_int_x86_avx512_mask_vpermi2var_hi_256
2951 ; CHECK: vpermi2w %ymm{{.*}}{%k1}
2952 define <16 x i16>@test_int_x86_avx512_mask_vpermi2var_hi_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) {
2953 %res = call <16 x i16> @llvm.x86.avx512.mask.vpermi2var.hi.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3)
2954 %res1 = call <16 x i16> @llvm.x86.avx512.mask.vpermi2var.hi.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 -1)
2955 %res2 = add <16 x i16> %res, %res1
2956 ret <16 x i16> %res2
2959 declare <16 x i8> @llvm.x86.avx512.mask.pavg.b.128(<16 x i8>, <16 x i8>, <16 x i8>, i16)
2961 ; CHECK-LABEL: @test_int_x86_avx512_mask_pavg_b_128
2963 ; CHECK: vpavgb %xmm
2965 define <16 x i8>@test_int_x86_avx512_mask_pavg_b_128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %x3) {
2966 %res = call <16 x i8> @llvm.x86.avx512.mask.pavg.b.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %x3)
2967 %res1 = call <16 x i8> @llvm.x86.avx512.mask.pavg.b.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 -1)
2968 %res2 = add <16 x i8> %res, %res1
2972 declare <32 x i8> @llvm.x86.avx512.mask.pavg.b.256(<32 x i8>, <32 x i8>, <32 x i8>, i32)
2974 ; CHECK-LABEL: @test_int_x86_avx512_mask_pavg_b_256
2976 ; CHECK: vpavgb %ymm
2978 define <32 x i8>@test_int_x86_avx512_mask_pavg_b_256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) {
2979 %res = call <32 x i8> @llvm.x86.avx512.mask.pavg.b.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3)
2980 %res1 = call <32 x i8> @llvm.x86.avx512.mask.pavg.b.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 -1)
2981 %res2 = add <32 x i8> %res, %res1
2985 declare <8 x i16> @llvm.x86.avx512.mask.pavg.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8)
2987 ; CHECK-LABEL: @test_int_x86_avx512_mask_pavg_w_128
2989 ; CHECK: vpavgw %xmm
2991 define <8 x i16>@test_int_x86_avx512_mask_pavg_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) {
2992 %res = call <8 x i16> @llvm.x86.avx512.mask.pavg.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3)
2993 %res1 = call <8 x i16> @llvm.x86.avx512.mask.pavg.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1)
2994 %res2 = add <8 x i16> %res, %res1
2998 declare <16 x i16> @llvm.x86.avx512.mask.pavg.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16)
3000 ; CHECK-LABEL: @test_int_x86_avx512_mask_pavg_w_256
3002 ; CHECK: vpavgw %ymm
3004 define <16 x i16>@test_int_x86_avx512_mask_pavg_w_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) {
3005 %res = call <16 x i16> @llvm.x86.avx512.mask.pavg.w.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3)
3006 %res1 = call <16 x i16> @llvm.x86.avx512.mask.pavg.w.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 -1)
3007 %res2 = add <16 x i16> %res, %res1
3008 ret <16 x i16> %res2