1 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512bw -mattr=+avx512vl --show-mc-encoding| FileCheck %s
5 define i32 @test_pcmpeq_b_256(<32 x i8> %a, <32 x i8> %b) {
6 ; CHECK-LABEL: test_pcmpeq_b_256
7 ; CHECK: vpcmpeqb %ymm1, %ymm0, %k0 ##
8 %res = call i32 @llvm.x86.avx512.mask.pcmpeq.b.256(<32 x i8> %a, <32 x i8> %b, i32 -1)
12 define i32 @test_mask_pcmpeq_b_256(<32 x i8> %a, <32 x i8> %b, i32 %mask) {
13 ; CHECK-LABEL: test_mask_pcmpeq_b_256
14 ; CHECK: vpcmpeqb %ymm1, %ymm0, %k0 {%k1} ##
15 %res = call i32 @llvm.x86.avx512.mask.pcmpeq.b.256(<32 x i8> %a, <32 x i8> %b, i32 %mask)
19 declare i32 @llvm.x86.avx512.mask.pcmpeq.b.256(<32 x i8>, <32 x i8>, i32)
21 define i16 @test_pcmpeq_w_256(<16 x i16> %a, <16 x i16> %b) {
22 ; CHECK-LABEL: test_pcmpeq_w_256
23 ; CHECK: vpcmpeqw %ymm1, %ymm0, %k0 ##
24 %res = call i16 @llvm.x86.avx512.mask.pcmpeq.w.256(<16 x i16> %a, <16 x i16> %b, i16 -1)
28 define i16 @test_mask_pcmpeq_w_256(<16 x i16> %a, <16 x i16> %b, i16 %mask) {
29 ; CHECK-LABEL: test_mask_pcmpeq_w_256
30 ; CHECK: vpcmpeqw %ymm1, %ymm0, %k0 {%k1} ##
31 %res = call i16 @llvm.x86.avx512.mask.pcmpeq.w.256(<16 x i16> %a, <16 x i16> %b, i16 %mask)
35 declare i16 @llvm.x86.avx512.mask.pcmpeq.w.256(<16 x i16>, <16 x i16>, i16)
37 define i32 @test_pcmpgt_b_256(<32 x i8> %a, <32 x i8> %b) {
38 ; CHECK-LABEL: test_pcmpgt_b_256
39 ; CHECK: vpcmpgtb %ymm1, %ymm0, %k0 ##
40 %res = call i32 @llvm.x86.avx512.mask.pcmpgt.b.256(<32 x i8> %a, <32 x i8> %b, i32 -1)
44 define i32 @test_mask_pcmpgt_b_256(<32 x i8> %a, <32 x i8> %b, i32 %mask) {
45 ; CHECK-LABEL: test_mask_pcmpgt_b_256
46 ; CHECK: vpcmpgtb %ymm1, %ymm0, %k0 {%k1} ##
47 %res = call i32 @llvm.x86.avx512.mask.pcmpgt.b.256(<32 x i8> %a, <32 x i8> %b, i32 %mask)
51 declare i32 @llvm.x86.avx512.mask.pcmpgt.b.256(<32 x i8>, <32 x i8>, i32)
53 define i16 @test_pcmpgt_w_256(<16 x i16> %a, <16 x i16> %b) {
54 ; CHECK-LABEL: test_pcmpgt_w_256
55 ; CHECK: vpcmpgtw %ymm1, %ymm0, %k0 ##
56 %res = call i16 @llvm.x86.avx512.mask.pcmpgt.w.256(<16 x i16> %a, <16 x i16> %b, i16 -1)
60 define i16 @test_mask_pcmpgt_w_256(<16 x i16> %a, <16 x i16> %b, i16 %mask) {
61 ; CHECK-LABEL: test_mask_pcmpgt_w_256
62 ; CHECK: vpcmpgtw %ymm1, %ymm0, %k0 {%k1} ##
63 %res = call i16 @llvm.x86.avx512.mask.pcmpgt.w.256(<16 x i16> %a, <16 x i16> %b, i16 %mask)
67 declare i16 @llvm.x86.avx512.mask.pcmpgt.w.256(<16 x i16>, <16 x i16>, i16)
69 define <8 x i32> @test_cmp_b_256(<32 x i8> %a0, <32 x i8> %a1) {
70 ; CHECK_LABEL: test_cmp_b_256
71 ; CHECK: vpcmpeqb %ymm1, %ymm0, %k0 ##
72 %res0 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 0, i32 -1)
73 %vec0 = insertelement <8 x i32> undef, i32 %res0, i32 0
74 ; CHECK: vpcmpltb %ymm1, %ymm0, %k0 ##
75 %res1 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 1, i32 -1)
76 %vec1 = insertelement <8 x i32> %vec0, i32 %res1, i32 1
77 ; CHECK: vpcmpleb %ymm1, %ymm0, %k0 ##
78 %res2 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 2, i32 -1)
79 %vec2 = insertelement <8 x i32> %vec1, i32 %res2, i32 2
80 ; CHECK: vpcmpunordb %ymm1, %ymm0, %k0 ##
81 %res3 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 3, i32 -1)
82 %vec3 = insertelement <8 x i32> %vec2, i32 %res3, i32 3
83 ; CHECK: vpcmpneqb %ymm1, %ymm0, %k0 ##
84 %res4 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 4, i32 -1)
85 %vec4 = insertelement <8 x i32> %vec3, i32 %res4, i32 4
86 ; CHECK: vpcmpnltb %ymm1, %ymm0, %k0 ##
87 %res5 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 5, i32 -1)
88 %vec5 = insertelement <8 x i32> %vec4, i32 %res5, i32 5
89 ; CHECK: vpcmpnleb %ymm1, %ymm0, %k0 ##
90 %res6 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 6, i32 -1)
91 %vec6 = insertelement <8 x i32> %vec5, i32 %res6, i32 6
92 ; CHECK: vpcmpordb %ymm1, %ymm0, %k0 ##
93 %res7 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 7, i32 -1)
94 %vec7 = insertelement <8 x i32> %vec6, i32 %res7, i32 7
98 define <8 x i32> @test_mask_cmp_b_256(<32 x i8> %a0, <32 x i8> %a1, i32 %mask) {
99 ; CHECK_LABEL: test_mask_cmp_b_256
100 ; CHECK: vpcmpeqb %ymm1, %ymm0, %k0 {%k1} ##
101 %res0 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 0, i32 %mask)
102 %vec0 = insertelement <8 x i32> undef, i32 %res0, i32 0
103 ; CHECK: vpcmpltb %ymm1, %ymm0, %k0 {%k1} ##
104 %res1 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 1, i32 %mask)
105 %vec1 = insertelement <8 x i32> %vec0, i32 %res1, i32 1
106 ; CHECK: vpcmpleb %ymm1, %ymm0, %k0 {%k1} ##
107 %res2 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 2, i32 %mask)
108 %vec2 = insertelement <8 x i32> %vec1, i32 %res2, i32 2
109 ; CHECK: vpcmpunordb %ymm1, %ymm0, %k0 {%k1} ##
110 %res3 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 3, i32 %mask)
111 %vec3 = insertelement <8 x i32> %vec2, i32 %res3, i32 3
112 ; CHECK: vpcmpneqb %ymm1, %ymm0, %k0 {%k1} ##
113 %res4 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 4, i32 %mask)
114 %vec4 = insertelement <8 x i32> %vec3, i32 %res4, i32 4
115 ; CHECK: vpcmpnltb %ymm1, %ymm0, %k0 {%k1} ##
116 %res5 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 5, i32 %mask)
117 %vec5 = insertelement <8 x i32> %vec4, i32 %res5, i32 5
118 ; CHECK: vpcmpnleb %ymm1, %ymm0, %k0 {%k1} ##
119 %res6 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 6, i32 %mask)
120 %vec6 = insertelement <8 x i32> %vec5, i32 %res6, i32 6
121 ; CHECK: vpcmpordb %ymm1, %ymm0, %k0 {%k1} ##
122 %res7 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 7, i32 %mask)
123 %vec7 = insertelement <8 x i32> %vec6, i32 %res7, i32 7
127 declare i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8>, <32 x i8>, i32, i32) nounwind readnone
129 define <8 x i32> @test_ucmp_b_256(<32 x i8> %a0, <32 x i8> %a1) {
130 ; CHECK_LABEL: test_ucmp_b_256
131 ; CHECK: vpcmpequb %ymm1, %ymm0, %k0 ##
132 %res0 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 0, i32 -1)
133 %vec0 = insertelement <8 x i32> undef, i32 %res0, i32 0
134 ; CHECK: vpcmpltub %ymm1, %ymm0, %k0 ##
135 %res1 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 1, i32 -1)
136 %vec1 = insertelement <8 x i32> %vec0, i32 %res1, i32 1
137 ; CHECK: vpcmpleub %ymm1, %ymm0, %k0 ##
138 %res2 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 2, i32 -1)
139 %vec2 = insertelement <8 x i32> %vec1, i32 %res2, i32 2
140 ; CHECK: vpcmpunordub %ymm1, %ymm0, %k0 ##
141 %res3 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 3, i32 -1)
142 %vec3 = insertelement <8 x i32> %vec2, i32 %res3, i32 3
143 ; CHECK: vpcmpnequb %ymm1, %ymm0, %k0 ##
144 %res4 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 4, i32 -1)
145 %vec4 = insertelement <8 x i32> %vec3, i32 %res4, i32 4
146 ; CHECK: vpcmpnltub %ymm1, %ymm0, %k0 ##
147 %res5 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 5, i32 -1)
148 %vec5 = insertelement <8 x i32> %vec4, i32 %res5, i32 5
149 ; CHECK: vpcmpnleub %ymm1, %ymm0, %k0 ##
150 %res6 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 6, i32 -1)
151 %vec6 = insertelement <8 x i32> %vec5, i32 %res6, i32 6
152 ; CHECK: vpcmpordub %ymm1, %ymm0, %k0 ##
153 %res7 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 7, i32 -1)
154 %vec7 = insertelement <8 x i32> %vec6, i32 %res7, i32 7
158 define <8 x i32> @test_mask_ucmp_b_256(<32 x i8> %a0, <32 x i8> %a1, i32 %mask) {
159 ; CHECK_LABEL: test_mask_ucmp_b_256
160 ; CHECK: vpcmpequb %ymm1, %ymm0, %k0 {%k1} ##
161 %res0 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 0, i32 %mask)
162 %vec0 = insertelement <8 x i32> undef, i32 %res0, i32 0
163 ; CHECK: vpcmpltub %ymm1, %ymm0, %k0 {%k1} ##
164 %res1 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 1, i32 %mask)
165 %vec1 = insertelement <8 x i32> %vec0, i32 %res1, i32 1
166 ; CHECK: vpcmpleub %ymm1, %ymm0, %k0 {%k1} ##
167 %res2 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 2, i32 %mask)
168 %vec2 = insertelement <8 x i32> %vec1, i32 %res2, i32 2
169 ; CHECK: vpcmpunordub %ymm1, %ymm0, %k0 {%k1} ##
170 %res3 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 3, i32 %mask)
171 %vec3 = insertelement <8 x i32> %vec2, i32 %res3, i32 3
172 ; CHECK: vpcmpnequb %ymm1, %ymm0, %k0 {%k1} ##
173 %res4 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 4, i32 %mask)
174 %vec4 = insertelement <8 x i32> %vec3, i32 %res4, i32 4
175 ; CHECK: vpcmpnltub %ymm1, %ymm0, %k0 {%k1} ##
176 %res5 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 5, i32 %mask)
177 %vec5 = insertelement <8 x i32> %vec4, i32 %res5, i32 5
178 ; CHECK: vpcmpnleub %ymm1, %ymm0, %k0 {%k1} ##
179 %res6 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 6, i32 %mask)
180 %vec6 = insertelement <8 x i32> %vec5, i32 %res6, i32 6
181 ; CHECK: vpcmpordub %ymm1, %ymm0, %k0 {%k1} ##
182 %res7 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 7, i32 %mask)
183 %vec7 = insertelement <8 x i32> %vec6, i32 %res7, i32 7
187 declare i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8>, <32 x i8>, i32, i32) nounwind readnone
189 define <8 x i16> @test_cmp_w_256(<16 x i16> %a0, <16 x i16> %a1) {
190 ; CHECK_LABEL: test_cmp_w_256
191 ; CHECK: vpcmpeqw %ymm1, %ymm0, %k0 ##
192 %res0 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 0, i16 -1)
193 %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0
194 ; CHECK: vpcmpltw %ymm1, %ymm0, %k0 ##
195 %res1 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 1, i16 -1)
196 %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1
197 ; CHECK: vpcmplew %ymm1, %ymm0, %k0 ##
198 %res2 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 2, i16 -1)
199 %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2
200 ; CHECK: vpcmpunordw %ymm1, %ymm0, %k0 ##
201 %res3 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 3, i16 -1)
202 %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3
203 ; CHECK: vpcmpneqw %ymm1, %ymm0, %k0 ##
204 %res4 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 4, i16 -1)
205 %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4
206 ; CHECK: vpcmpnltw %ymm1, %ymm0, %k0 ##
207 %res5 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 5, i16 -1)
208 %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5
209 ; CHECK: vpcmpnlew %ymm1, %ymm0, %k0 ##
210 %res6 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 6, i16 -1)
211 %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6
212 ; CHECK: vpcmpordw %ymm1, %ymm0, %k0 ##
213 %res7 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 7, i16 -1)
214 %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7
218 define <8 x i16> @test_mask_cmp_w_256(<16 x i16> %a0, <16 x i16> %a1, i16 %mask) {
219 ; CHECK_LABEL: test_mask_cmp_w_256
220 ; CHECK: vpcmpeqw %ymm1, %ymm0, %k0 {%k1} ##
221 %res0 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 0, i16 %mask)
222 %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0
223 ; CHECK: vpcmpltw %ymm1, %ymm0, %k0 {%k1} ##
224 %res1 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 1, i16 %mask)
225 %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1
226 ; CHECK: vpcmplew %ymm1, %ymm0, %k0 {%k1} ##
227 %res2 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 2, i16 %mask)
228 %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2
229 ; CHECK: vpcmpunordw %ymm1, %ymm0, %k0 {%k1} ##
230 %res3 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 3, i16 %mask)
231 %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3
232 ; CHECK: vpcmpneqw %ymm1, %ymm0, %k0 {%k1} ##
233 %res4 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 4, i16 %mask)
234 %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4
235 ; CHECK: vpcmpnltw %ymm1, %ymm0, %k0 {%k1} ##
236 %res5 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 5, i16 %mask)
237 %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5
238 ; CHECK: vpcmpnlew %ymm1, %ymm0, %k0 {%k1} ##
239 %res6 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 6, i16 %mask)
240 %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6
241 ; CHECK: vpcmpordw %ymm1, %ymm0, %k0 {%k1} ##
242 %res7 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 7, i16 %mask)
243 %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7
247 declare i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16>, <16 x i16>, i32, i16) nounwind readnone
249 define <8 x i16> @test_ucmp_w_256(<16 x i16> %a0, <16 x i16> %a1) {
250 ; CHECK_LABEL: test_ucmp_w_256
251 ; CHECK: vpcmpequw %ymm1, %ymm0, %k0 ##
252 %res0 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 0, i16 -1)
253 %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0
254 ; CHECK: vpcmpltuw %ymm1, %ymm0, %k0 ##
255 %res1 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 1, i16 -1)
256 %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1
257 ; CHECK: vpcmpleuw %ymm1, %ymm0, %k0 ##
258 %res2 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 2, i16 -1)
259 %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2
260 ; CHECK: vpcmpunorduw %ymm1, %ymm0, %k0 ##
261 %res3 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 3, i16 -1)
262 %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3
263 ; CHECK: vpcmpnequw %ymm1, %ymm0, %k0 ##
264 %res4 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 4, i16 -1)
265 %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4
266 ; CHECK: vpcmpnltuw %ymm1, %ymm0, %k0 ##
267 %res5 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 5, i16 -1)
268 %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5
269 ; CHECK: vpcmpnleuw %ymm1, %ymm0, %k0 ##
270 %res6 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 6, i16 -1)
271 %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6
272 ; CHECK: vpcmporduw %ymm1, %ymm0, %k0 ##
273 %res7 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 7, i16 -1)
274 %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7
278 define <8 x i16> @test_mask_ucmp_w_256(<16 x i16> %a0, <16 x i16> %a1, i16 %mask) {
279 ; CHECK_LABEL: test_mask_ucmp_w_256
280 ; CHECK: vpcmpequw %ymm1, %ymm0, %k0 {%k1} ##
281 %res0 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 0, i16 %mask)
282 %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0
283 ; CHECK: vpcmpltuw %ymm1, %ymm0, %k0 {%k1} ##
284 %res1 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 1, i16 %mask)
285 %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1
286 ; CHECK: vpcmpleuw %ymm1, %ymm0, %k0 {%k1} ##
287 %res2 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 2, i16 %mask)
288 %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2
289 ; CHECK: vpcmpunorduw %ymm1, %ymm0, %k0 {%k1} ##
290 %res3 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 3, i16 %mask)
291 %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3
292 ; CHECK: vpcmpnequw %ymm1, %ymm0, %k0 {%k1} ##
293 %res4 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 4, i16 %mask)
294 %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4
295 ; CHECK: vpcmpnltuw %ymm1, %ymm0, %k0 {%k1} ##
296 %res5 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 5, i16 %mask)
297 %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5
298 ; CHECK: vpcmpnleuw %ymm1, %ymm0, %k0 {%k1} ##
299 %res6 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 6, i16 %mask)
300 %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6
301 ; CHECK: vpcmporduw %ymm1, %ymm0, %k0 {%k1} ##
302 %res7 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 7, i16 %mask)
303 %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7
307 declare i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16>, <16 x i16>, i32, i16) nounwind readnone
311 define i16 @test_pcmpeq_b_128(<16 x i8> %a, <16 x i8> %b) {
312 ; CHECK-LABEL: test_pcmpeq_b_128
313 ; CHECK: vpcmpeqb %xmm1, %xmm0, %k0 ##
314 %res = call i16 @llvm.x86.avx512.mask.pcmpeq.b.128(<16 x i8> %a, <16 x i8> %b, i16 -1)
318 define i16 @test_mask_pcmpeq_b_128(<16 x i8> %a, <16 x i8> %b, i16 %mask) {
319 ; CHECK-LABEL: test_mask_pcmpeq_b_128
320 ; CHECK: vpcmpeqb %xmm1, %xmm0, %k0 {%k1} ##
321 %res = call i16 @llvm.x86.avx512.mask.pcmpeq.b.128(<16 x i8> %a, <16 x i8> %b, i16 %mask)
325 declare i16 @llvm.x86.avx512.mask.pcmpeq.b.128(<16 x i8>, <16 x i8>, i16)
327 define i8 @test_pcmpeq_w_128(<8 x i16> %a, <8 x i16> %b) {
328 ; CHECK-LABEL: test_pcmpeq_w_128
329 ; CHECK: vpcmpeqw %xmm1, %xmm0, %k0 ##
330 %res = call i8 @llvm.x86.avx512.mask.pcmpeq.w.128(<8 x i16> %a, <8 x i16> %b, i8 -1)
334 define i8 @test_mask_pcmpeq_w_128(<8 x i16> %a, <8 x i16> %b, i8 %mask) {
335 ; CHECK-LABEL: test_mask_pcmpeq_w_128
336 ; CHECK: vpcmpeqw %xmm1, %xmm0, %k0 {%k1} ##
337 %res = call i8 @llvm.x86.avx512.mask.pcmpeq.w.128(<8 x i16> %a, <8 x i16> %b, i8 %mask)
341 declare i8 @llvm.x86.avx512.mask.pcmpeq.w.128(<8 x i16>, <8 x i16>, i8)
343 define i16 @test_pcmpgt_b_128(<16 x i8> %a, <16 x i8> %b) {
344 ; CHECK-LABEL: test_pcmpgt_b_128
345 ; CHECK: vpcmpgtb %xmm1, %xmm0, %k0 ##
346 %res = call i16 @llvm.x86.avx512.mask.pcmpgt.b.128(<16 x i8> %a, <16 x i8> %b, i16 -1)
350 define i16 @test_mask_pcmpgt_b_128(<16 x i8> %a, <16 x i8> %b, i16 %mask) {
351 ; CHECK-LABEL: test_mask_pcmpgt_b_128
352 ; CHECK: vpcmpgtb %xmm1, %xmm0, %k0 {%k1} ##
353 %res = call i16 @llvm.x86.avx512.mask.pcmpgt.b.128(<16 x i8> %a, <16 x i8> %b, i16 %mask)
357 declare i16 @llvm.x86.avx512.mask.pcmpgt.b.128(<16 x i8>, <16 x i8>, i16)
359 define i8 @test_pcmpgt_w_128(<8 x i16> %a, <8 x i16> %b) {
360 ; CHECK-LABEL: test_pcmpgt_w_128
361 ; CHECK: vpcmpgtw %xmm1, %xmm0, %k0 ##
362 %res = call i8 @llvm.x86.avx512.mask.pcmpgt.w.128(<8 x i16> %a, <8 x i16> %b, i8 -1)
366 define i8 @test_mask_pcmpgt_w_128(<8 x i16> %a, <8 x i16> %b, i8 %mask) {
367 ; CHECK-LABEL: test_mask_pcmpgt_w_128
368 ; CHECK: vpcmpgtw %xmm1, %xmm0, %k0 {%k1} ##
369 %res = call i8 @llvm.x86.avx512.mask.pcmpgt.w.128(<8 x i16> %a, <8 x i16> %b, i8 %mask)
373 declare i8 @llvm.x86.avx512.mask.pcmpgt.w.128(<8 x i16>, <8 x i16>, i8)
375 define <8 x i16> @test_cmp_b_128(<16 x i8> %a0, <16 x i8> %a1) {
376 ; CHECK_LABEL: test_cmp_b_128
377 ; CHECK: vpcmpeqb %xmm1, %xmm0, %k0 ##
378 %res0 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 0, i16 -1)
379 %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0
380 ; CHECK: vpcmpltb %xmm1, %xmm0, %k0 ##
381 %res1 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 1, i16 -1)
382 %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1
383 ; CHECK: vpcmpleb %xmm1, %xmm0, %k0 ##
384 %res2 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 2, i16 -1)
385 %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2
386 ; CHECK: vpcmpunordb %xmm1, %xmm0, %k0 ##
387 %res3 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 3, i16 -1)
388 %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3
389 ; CHECK: vpcmpneqb %xmm1, %xmm0, %k0 ##
390 %res4 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 4, i16 -1)
391 %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4
392 ; CHECK: vpcmpnltb %xmm1, %xmm0, %k0 ##
393 %res5 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 5, i16 -1)
394 %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5
395 ; CHECK: vpcmpnleb %xmm1, %xmm0, %k0 ##
396 %res6 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 6, i16 -1)
397 %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6
398 ; CHECK: vpcmpordb %xmm1, %xmm0, %k0 ##
399 %res7 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 7, i16 -1)
400 %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7
404 define <8 x i16> @test_mask_cmp_b_128(<16 x i8> %a0, <16 x i8> %a1, i16 %mask) {
405 ; CHECK_LABEL: test_mask_cmp_b_128
406 ; CHECK: vpcmpeqb %xmm1, %xmm0, %k0 {%k1} ##
407 %res0 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 0, i16 %mask)
408 %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0
409 ; CHECK: vpcmpltb %xmm1, %xmm0, %k0 {%k1} ##
410 %res1 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 1, i16 %mask)
411 %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1
412 ; CHECK: vpcmpleb %xmm1, %xmm0, %k0 {%k1} ##
413 %res2 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 2, i16 %mask)
414 %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2
415 ; CHECK: vpcmpunordb %xmm1, %xmm0, %k0 {%k1} ##
416 %res3 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 3, i16 %mask)
417 %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3
418 ; CHECK: vpcmpneqb %xmm1, %xmm0, %k0 {%k1} ##
419 %res4 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 4, i16 %mask)
420 %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4
421 ; CHECK: vpcmpnltb %xmm1, %xmm0, %k0 {%k1} ##
422 %res5 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 5, i16 %mask)
423 %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5
424 ; CHECK: vpcmpnleb %xmm1, %xmm0, %k0 {%k1} ##
425 %res6 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 6, i16 %mask)
426 %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6
427 ; CHECK: vpcmpordb %xmm1, %xmm0, %k0 {%k1} ##
428 %res7 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 7, i16 %mask)
429 %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7
433 declare i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8>, <16 x i8>, i32, i16) nounwind readnone
435 define <8 x i16> @test_ucmp_b_128(<16 x i8> %a0, <16 x i8> %a1) {
436 ; CHECK_LABEL: test_ucmp_b_128
437 ; CHECK: vpcmpequb %xmm1, %xmm0, %k0 ##
438 %res0 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 0, i16 -1)
439 %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0
440 ; CHECK: vpcmpltub %xmm1, %xmm0, %k0 ##
441 %res1 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 1, i16 -1)
442 %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1
443 ; CHECK: vpcmpleub %xmm1, %xmm0, %k0 ##
444 %res2 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 2, i16 -1)
445 %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2
446 ; CHECK: vpcmpunordub %xmm1, %xmm0, %k0 ##
447 %res3 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 3, i16 -1)
448 %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3
449 ; CHECK: vpcmpnequb %xmm1, %xmm0, %k0 ##
450 %res4 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 4, i16 -1)
451 %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4
452 ; CHECK: vpcmpnltub %xmm1, %xmm0, %k0 ##
453 %res5 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 5, i16 -1)
454 %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5
455 ; CHECK: vpcmpnleub %xmm1, %xmm0, %k0 ##
456 %res6 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 6, i16 -1)
457 %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6
458 ; CHECK: vpcmpordub %xmm1, %xmm0, %k0 ##
459 %res7 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 7, i16 -1)
460 %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7
464 define <8 x i16> @test_mask_ucmp_b_128(<16 x i8> %a0, <16 x i8> %a1, i16 %mask) {
465 ; CHECK_LABEL: test_mask_ucmp_b_128
466 ; CHECK: vpcmpequb %xmm1, %xmm0, %k0 {%k1} ##
467 %res0 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 0, i16 %mask)
468 %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0
469 ; CHECK: vpcmpltub %xmm1, %xmm0, %k0 {%k1} ##
470 %res1 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 1, i16 %mask)
471 %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1
472 ; CHECK: vpcmpleub %xmm1, %xmm0, %k0 {%k1} ##
473 %res2 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 2, i16 %mask)
474 %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2
475 ; CHECK: vpcmpunordub %xmm1, %xmm0, %k0 {%k1} ##
476 %res3 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 3, i16 %mask)
477 %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3
478 ; CHECK: vpcmpnequb %xmm1, %xmm0, %k0 {%k1} ##
479 %res4 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 4, i16 %mask)
480 %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4
481 ; CHECK: vpcmpnltub %xmm1, %xmm0, %k0 {%k1} ##
482 %res5 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 5, i16 %mask)
483 %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5
484 ; CHECK: vpcmpnleub %xmm1, %xmm0, %k0 {%k1} ##
485 %res6 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 6, i16 %mask)
486 %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6
487 ; CHECK: vpcmpordub %xmm1, %xmm0, %k0 {%k1} ##
488 %res7 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 7, i16 %mask)
489 %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7
493 declare i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8>, <16 x i8>, i32, i16) nounwind readnone
495 define <8 x i8> @test_cmp_w_128(<8 x i16> %a0, <8 x i16> %a1) {
496 ; CHECK_LABEL: test_cmp_w_128
497 ; CHECK: vpcmpeqw %xmm1, %xmm0, %k0 ##
498 %res0 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 0, i8 -1)
499 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
500 ; CHECK: vpcmpltw %xmm1, %xmm0, %k0 ##
501 %res1 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 1, i8 -1)
502 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
503 ; CHECK: vpcmplew %xmm1, %xmm0, %k0 ##
504 %res2 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 2, i8 -1)
505 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
506 ; CHECK: vpcmpunordw %xmm1, %xmm0, %k0 ##
507 %res3 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 3, i8 -1)
508 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
509 ; CHECK: vpcmpneqw %xmm1, %xmm0, %k0 ##
510 %res4 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 4, i8 -1)
511 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
512 ; CHECK: vpcmpnltw %xmm1, %xmm0, %k0 ##
513 %res5 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 5, i8 -1)
514 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
515 ; CHECK: vpcmpnlew %xmm1, %xmm0, %k0 ##
516 %res6 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 6, i8 -1)
517 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
518 ; CHECK: vpcmpordw %xmm1, %xmm0, %k0 ##
519 %res7 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 7, i8 -1)
520 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
524 define <8 x i8> @test_mask_cmp_w_128(<8 x i16> %a0, <8 x i16> %a1, i8 %mask) {
525 ; CHECK_LABEL: test_mask_cmp_w_128
526 ; CHECK: vpcmpeqw %xmm1, %xmm0, %k0 {%k1} ##
527 %res0 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 0, i8 %mask)
528 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
529 ; CHECK: vpcmpltw %xmm1, %xmm0, %k0 {%k1} ##
530 %res1 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 1, i8 %mask)
531 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
532 ; CHECK: vpcmplew %xmm1, %xmm0, %k0 {%k1} ##
533 %res2 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 2, i8 %mask)
534 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
535 ; CHECK: vpcmpunordw %xmm1, %xmm0, %k0 {%k1} ##
536 %res3 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 3, i8 %mask)
537 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
538 ; CHECK: vpcmpneqw %xmm1, %xmm0, %k0 {%k1} ##
539 %res4 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 4, i8 %mask)
540 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
541 ; CHECK: vpcmpnltw %xmm1, %xmm0, %k0 {%k1} ##
542 %res5 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 5, i8 %mask)
543 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
544 ; CHECK: vpcmpnlew %xmm1, %xmm0, %k0 {%k1} ##
545 %res6 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 6, i8 %mask)
546 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
547 ; CHECK: vpcmpordw %xmm1, %xmm0, %k0 {%k1} ##
548 %res7 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 7, i8 %mask)
549 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
553 declare i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16>, <8 x i16>, i32, i8) nounwind readnone
555 define <8 x i8> @test_ucmp_w_128(<8 x i16> %a0, <8 x i16> %a1) {
556 ; CHECK_LABEL: test_ucmp_w_128
557 ; CHECK: vpcmpequw %xmm1, %xmm0, %k0 ##
558 %res0 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 0, i8 -1)
559 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
560 ; CHECK: vpcmpltuw %xmm1, %xmm0, %k0 ##
561 %res1 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 1, i8 -1)
562 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
563 ; CHECK: vpcmpleuw %xmm1, %xmm0, %k0 ##
564 %res2 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 2, i8 -1)
565 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
566 ; CHECK: vpcmpunorduw %xmm1, %xmm0, %k0 ##
567 %res3 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 3, i8 -1)
568 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
569 ; CHECK: vpcmpnequw %xmm1, %xmm0, %k0 ##
570 %res4 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 4, i8 -1)
571 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
572 ; CHECK: vpcmpnltuw %xmm1, %xmm0, %k0 ##
573 %res5 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 5, i8 -1)
574 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
575 ; CHECK: vpcmpnleuw %xmm1, %xmm0, %k0 ##
576 %res6 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 6, i8 -1)
577 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
578 ; CHECK: vpcmporduw %xmm1, %xmm0, %k0 ##
579 %res7 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 7, i8 -1)
580 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
584 define <8 x i8> @test_mask_ucmp_w_128(<8 x i16> %a0, <8 x i16> %a1, i8 %mask) {
585 ; CHECK_LABEL: test_mask_ucmp_w_128
586 ; CHECK: vpcmpequw %xmm1, %xmm0, %k0 {%k1} ##
587 %res0 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 0, i8 %mask)
588 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
589 ; CHECK: vpcmpltuw %xmm1, %xmm0, %k0 {%k1} ##
590 %res1 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 1, i8 %mask)
591 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
592 ; CHECK: vpcmpleuw %xmm1, %xmm0, %k0 {%k1} ##
593 %res2 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 2, i8 %mask)
594 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
595 ; CHECK: vpcmpunorduw %xmm1, %xmm0, %k0 {%k1} ##
596 %res3 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 3, i8 %mask)
597 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
598 ; CHECK: vpcmpnequw %xmm1, %xmm0, %k0 {%k1} ##
599 %res4 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 4, i8 %mask)
600 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
601 ; CHECK: vpcmpnltuw %xmm1, %xmm0, %k0 {%k1} ##
602 %res5 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 5, i8 %mask)
603 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
604 ; CHECK: vpcmpnleuw %xmm1, %xmm0, %k0 {%k1} ##
605 %res6 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 6, i8 %mask)
606 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
607 ; CHECK: vpcmporduw %xmm1, %xmm0, %k0 {%k1} ##
608 %res7 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 7, i8 %mask)
609 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
613 declare i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16>, <8 x i16>, i32, i8) nounwind readnone
615 declare <8 x float> @llvm.x86.avx512.mask.vfmadd.ps.256(<8 x float>, <8 x float>, <8 x float>, i8) nounwind readnone
617 define <8 x float> @test_mask_vfmadd256_ps(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, i8 %mask) {
618 ; CHECK-LABEL: test_mask_vfmadd256_ps
619 ; CHECK: vfmadd213ps %ymm2, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x29,0xa8,0xc2]
620 %res = call <8 x float> @llvm.x86.avx512.mask.vfmadd.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, i8 %mask) nounwind
624 declare <4 x float> @llvm.x86.avx512.mask.vfmadd.ps.128(<4 x float>, <4 x float>, <4 x float>, i8) nounwind readnone
626 define <4 x float> @test_mask_vfmadd128_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) {
627 ; CHECK-LABEL: test_mask_vfmadd128_ps
628 ; CHECK: vfmadd213ps %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0xa8,0xc2]
629 %res = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) nounwind
633 declare <4 x double> @llvm.x86.avx512.mask.vfmadd.pd.256(<4 x double>, <4 x double>, <4 x double>, i8)
635 define <4 x double> @test_mask_fmadd256_pd(<4 x double> %a, <4 x double> %b, <4 x double> %c, i8 %mask) {
636 ; CHECK-LABEL: test_mask_fmadd256_pd:
637 ; CHECK: vfmadd213pd %ymm2, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x29,0xa8,0xc2]
638 %res = call <4 x double> @llvm.x86.avx512.mask.vfmadd.pd.256(<4 x double> %a, <4 x double> %b, <4 x double> %c, i8 %mask)
639 ret <4 x double> %res
642 declare <2 x double> @llvm.x86.avx512.mask.vfmadd.pd.128(<2 x double>, <2 x double>, <2 x double>, i8)
644 define <2 x double> @test_mask_fmadd128_pd(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) {
645 ; CHECK-LABEL: test_mask_fmadd128_pd:
646 ; CHECK: vfmadd213pd %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0xa8,0xc2]
647 %res = call <2 x double> @llvm.x86.avx512.mask.vfmadd.pd.128(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask)
648 ret <2 x double> %res
651 define <2 x double>@test_int_x86_avx512_mask_vfmadd_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) {
652 ; CHECK-LABEL: test_int_x86_avx512_mask_vfmadd_pd_128:
654 ; CHECK-NEXT: movzbl %dil, %eax
655 ; CHECK-NEXT: kmovw %eax, %k1
656 ; CHECK-NEXT: vmovaps %zmm0, %zmm3
657 ; CHECK-NEXT: vfmadd213pd %xmm2, %xmm1, %xmm3 {%k1}
658 ; CHECK-NEXT: vfmadd213pd %xmm2, %xmm1, %xmm0
659 ; CHECK-NEXT: vaddpd %xmm0, %xmm3, %xmm0
661 %res = call <2 x double> @llvm.x86.avx512.mask.vfmadd.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3)
662 %res1 = call <2 x double> @llvm.x86.avx512.mask.vfmadd.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1)
663 %res2 = fadd <2 x double> %res, %res1
664 ret <2 x double> %res2
667 declare <2 x double> @llvm.x86.avx512.mask3.vfmadd.pd.128(<2 x double>, <2 x double>, <2 x double>, i8)
669 define <2 x double>@test_int_x86_avx512_mask3_vfmadd_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) {
670 ; CHECK-LABEL: test_int_x86_avx512_mask3_vfmadd_pd_128:
672 ; CHECK-NEXT: movzbl %dil, %eax
673 ; CHECK-NEXT: kmovw %eax, %k1
674 ; CHECK-NEXT: vmovaps %zmm2, %zmm3
675 ; CHECK-NEXT: vfmadd231pd %xmm1, %xmm0, %xmm3 {%k1}
676 ; CHECK-NEXT: vfmadd213pd %xmm2, %xmm1, %xmm0
677 ; CHECK-NEXT: vaddpd %xmm0, %xmm3, %xmm0
679 %res = call <2 x double> @llvm.x86.avx512.mask3.vfmadd.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3)
680 %res1 = call <2 x double> @llvm.x86.avx512.mask3.vfmadd.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1)
681 %res2 = fadd <2 x double> %res, %res1
682 ret <2 x double> %res2
685 declare <2 x double> @llvm.x86.avx512.maskz.vfmadd.pd.128(<2 x double>, <2 x double>, <2 x double>, i8)
687 define <2 x double>@test_int_x86_avx512_maskz_vfmadd_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) {
688 ; CHECK-LABEL: test_int_x86_avx512_maskz_vfmadd_pd_128:
690 ; CHECK-NEXT: movzbl %dil, %eax
691 ; CHECK-NEXT: kmovw %eax, %k1
692 ; CHECK-NEXT: vmovaps %zmm0, %zmm3
693 ; CHECK-NEXT: vfmadd213pd %xmm2, %xmm1, %xmm3 {%k1} {z}
694 ; CHECK-NEXT: vfmadd213pd %xmm2, %xmm1, %xmm0
695 ; CHECK-NEXT: vaddpd %xmm0, %xmm3, %xmm0
697 %res = call <2 x double> @llvm.x86.avx512.maskz.vfmadd.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3)
698 %res1 = call <2 x double> @llvm.x86.avx512.maskz.vfmadd.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1)
699 %res2 = fadd <2 x double> %res, %res1
700 ret <2 x double> %res2
703 define <4 x double>@test_int_x86_avx512_mask_vfmadd_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) {
704 ; CHECK-LABEL: test_int_x86_avx512_mask_vfmadd_pd_256:
706 ; CHECK-NEXT: movzbl %dil, %eax
707 ; CHECK-NEXT: kmovw %eax, %k1
708 ; CHECK-NEXT: vmovaps %zmm0, %zmm3
709 ; CHECK-NEXT: vfmadd213pd %ymm2, %ymm1, %ymm3 {%k1}
710 ; CHECK-NEXT: vfmadd213pd %ymm2, %ymm1, %ymm0
711 ; CHECK-NEXT: vaddpd %ymm0, %ymm3, %ymm0
713 %res = call <4 x double> @llvm.x86.avx512.mask.vfmadd.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3)
714 %res1 = call <4 x double> @llvm.x86.avx512.mask.vfmadd.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 -1)
715 %res2 = fadd <4 x double> %res, %res1
716 ret <4 x double> %res2
719 declare <4 x double> @llvm.x86.avx512.mask3.vfmadd.pd.256(<4 x double>, <4 x double>, <4 x double>, i8)
721 define <4 x double>@test_int_x86_avx512_mask3_vfmadd_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) {
722 ; CHECK-LABEL: test_int_x86_avx512_mask3_vfmadd_pd_256:
724 ; CHECK-NEXT: movzbl %dil, %eax
725 ; CHECK-NEXT: kmovw %eax, %k1
726 ; CHECK-NEXT: vmovaps %zmm2, %zmm3
727 ; CHECK-NEXT: vfmadd231pd %ymm1, %ymm0, %ymm3 {%k1}
728 ; CHECK-NEXT: vfmadd213pd %ymm2, %ymm1, %ymm0
729 ; CHECK-NEXT: vaddpd %ymm0, %ymm3, %ymm0
731 %res = call <4 x double> @llvm.x86.avx512.mask3.vfmadd.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3)
732 %res1 = call <4 x double> @llvm.x86.avx512.mask3.vfmadd.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 -1)
733 %res2 = fadd <4 x double> %res, %res1
734 ret <4 x double> %res2
737 declare <4 x double> @llvm.x86.avx512.maskz.vfmadd.pd.256(<4 x double>, <4 x double>, <4 x double>, i8)
739 define <4 x double>@test_int_x86_avx512_maskz_vfmadd_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) {
740 ; CHECK-LABEL: test_int_x86_avx512_maskz_vfmadd_pd_256:
742 ; CHECK-NEXT: movzbl %dil, %eax
743 ; CHECK-NEXT: kmovw %eax, %k1
744 ; CHECK-NEXT: vmovaps %zmm0, %zmm3
745 ; CHECK-NEXT: vfmadd213pd %ymm2, %ymm1, %ymm3 {%k1} {z}
746 ; CHECK-NEXT: vfmadd213pd %ymm2, %ymm1, %ymm0
747 ; CHECK-NEXT: vaddpd %ymm0, %ymm3, %ymm0
749 %res = call <4 x double> @llvm.x86.avx512.maskz.vfmadd.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3)
750 %res1 = call <4 x double> @llvm.x86.avx512.maskz.vfmadd.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 -1)
751 %res2 = fadd <4 x double> %res, %res1
752 ret <4 x double> %res2
755 define <4 x float>@test_int_x86_avx512_mask_vfmadd_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) {
756 ; CHECK-LABEL: test_int_x86_avx512_mask_vfmadd_ps_128:
758 ; CHECK-NEXT: movzbl %dil, %eax
759 ; CHECK-NEXT: kmovw %eax, %k1
760 ; CHECK-NEXT: vmovaps %zmm0, %zmm3
761 ; CHECK-NEXT: vfmadd213ps %xmm2, %xmm1, %xmm3 {%k1}
762 ; CHECK-NEXT: vfmadd213ps %xmm2, %xmm1, %xmm0
763 ; CHECK-NEXT: vaddps %xmm0, %xmm3, %xmm0
765 %res = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3)
766 %res1 = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1)
767 %res2 = fadd <4 x float> %res, %res1
768 ret <4 x float> %res2
771 declare <4 x float> @llvm.x86.avx512.mask3.vfmadd.ps.128(<4 x float>, <4 x float>, <4 x float>, i8)
773 define <4 x float>@test_int_x86_avx512_mask3_vfmadd_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) {
774 ; CHECK-LABEL: test_int_x86_avx512_mask3_vfmadd_ps_128:
776 ; CHECK-NEXT: movzbl %dil, %eax
777 ; CHECK-NEXT: kmovw %eax, %k1
778 ; CHECK-NEXT: vmovaps %zmm2, %zmm3
779 ; CHECK-NEXT: vfmadd231ps %xmm1, %xmm0, %xmm3 {%k1}
780 ; CHECK-NEXT: vfmadd213ps %xmm2, %xmm1, %xmm0
781 ; CHECK-NEXT: vaddps %xmm0, %xmm3, %xmm0
783 %res = call <4 x float> @llvm.x86.avx512.mask3.vfmadd.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3)
784 %res1 = call <4 x float> @llvm.x86.avx512.mask3.vfmadd.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1)
785 %res2 = fadd <4 x float> %res, %res1
786 ret <4 x float> %res2
789 declare <4 x float> @llvm.x86.avx512.maskz.vfmadd.ps.128(<4 x float>, <4 x float>, <4 x float>, i8)
791 define <4 x float>@test_int_x86_avx512_maskz_vfmadd_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) {
792 ; CHECK-LABEL: test_int_x86_avx512_maskz_vfmadd_ps_128:
794 ; CHECK-NEXT: movzbl %dil, %eax
795 ; CHECK-NEXT: kmovw %eax, %k1
796 ; CHECK-NEXT: vmovaps %zmm0, %zmm3
797 ; CHECK-NEXT: vfmadd213ps %xmm2, %xmm1, %xmm3 {%k1} {z}
798 ; CHECK-NEXT: vfmadd213ps %xmm2, %xmm1, %xmm0
799 ; CHECK-NEXT: vaddps %xmm0, %xmm3, %xmm0
801 %res = call <4 x float> @llvm.x86.avx512.maskz.vfmadd.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3)
802 %res1 = call <4 x float> @llvm.x86.avx512.maskz.vfmadd.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1)
803 %res2 = fadd <4 x float> %res, %res1
804 ret <4 x float> %res2
807 define <8 x float>@test_int_x86_avx512_mask_vfmadd_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) {
808 ; CHECK-LABEL: test_int_x86_avx512_mask_vfmadd_ps_256:
810 ; CHECK-NEXT: movzbl %dil, %eax
811 ; CHECK-NEXT: kmovw %eax, %k1
812 ; CHECK-NEXT: vmovaps %zmm0, %zmm3
813 ; CHECK-NEXT: vfmadd213ps %ymm2, %ymm1, %ymm3 {%k1}
814 ; CHECK-NEXT: vfmadd213ps %ymm2, %ymm1, %ymm0
815 ; CHECK-NEXT: vaddps %ymm0, %ymm3, %ymm0
817 %res = call <8 x float> @llvm.x86.avx512.mask.vfmadd.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3)
818 %res1 = call <8 x float> @llvm.x86.avx512.mask.vfmadd.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 -1)
819 %res2 = fadd <8 x float> %res, %res1
820 ret <8 x float> %res2
823 declare <8 x float> @llvm.x86.avx512.mask3.vfmadd.ps.256(<8 x float>, <8 x float>, <8 x float>, i8)
825 define <8 x float>@test_int_x86_avx512_mask3_vfmadd_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) {
826 ; CHECK-LABEL: test_int_x86_avx512_mask3_vfmadd_ps_256:
828 ; CHECK-NEXT: movzbl %dil, %eax
829 ; CHECK-NEXT: kmovw %eax, %k1
830 ; CHECK-NEXT: vmovaps %zmm2, %zmm3
831 ; CHECK-NEXT: vfmadd231ps %ymm1, %ymm0, %ymm3 {%k1}
832 ; CHECK-NEXT: vfmadd213ps %ymm2, %ymm1, %ymm0
833 ; CHECK-NEXT: vaddps %ymm0, %ymm3, %ymm0
835 %res = call <8 x float> @llvm.x86.avx512.mask3.vfmadd.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3)
836 %res1 = call <8 x float> @llvm.x86.avx512.mask3.vfmadd.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 -1)
837 %res2 = fadd <8 x float> %res, %res1
838 ret <8 x float> %res2
841 declare <8 x float> @llvm.x86.avx512.maskz.vfmadd.ps.256(<8 x float>, <8 x float>, <8 x float>, i8)
843 define <8 x float>@test_int_x86_avx512_maskz_vfmadd_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) {
844 ; CHECK-LABEL: test_int_x86_avx512_maskz_vfmadd_ps_256:
846 ; CHECK-NEXT: movzbl %dil, %eax
847 ; CHECK-NEXT: kmovw %eax, %k1
848 ; CHECK-NEXT: vmovaps %zmm0, %zmm3
849 ; CHECK-NEXT: vfmadd213ps %ymm2, %ymm1, %ymm3 {%k1} {z}
850 ; CHECK-NEXT: vfmadd213ps %ymm2, %ymm1, %ymm0
851 ; CHECK-NEXT: vaddps %ymm0, %ymm3, %ymm0
853 %res = call <8 x float> @llvm.x86.avx512.maskz.vfmadd.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3)
854 %res1 = call <8 x float> @llvm.x86.avx512.maskz.vfmadd.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 -1)
855 %res2 = fadd <8 x float> %res, %res1
856 ret <8 x float> %res2
860 declare <2 x double> @llvm.x86.avx512.mask3.vfmsub.pd.128(<2 x double>, <2 x double>, <2 x double>, i8)
862 define <2 x double>@test_int_x86_avx512_mask3_vfmsub_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) {
863 ; CHECK-LABEL: test_int_x86_avx512_mask3_vfmsub_pd_128:
865 ; CHECK-NEXT: movzbl %dil, %eax
866 ; CHECK-NEXT: kmovw %eax, %k1
867 ; CHECK-NEXT: vmovaps %zmm2, %zmm3
868 ; CHECK-NEXT: vfmsub231pd %xmm1, %xmm0, %xmm3 {%k1}
869 ; CHECK-NEXT: vfmsub213pd %xmm2, %xmm1, %xmm0
870 ; CHECK-NEXT: vaddpd %xmm0, %xmm3, %xmm0
872 %res = call <2 x double> @llvm.x86.avx512.mask3.vfmsub.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3)
873 %res1 = call <2 x double> @llvm.x86.avx512.mask3.vfmsub.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1)
874 %res2 = fadd <2 x double> %res, %res1
875 ret <2 x double> %res2
879 declare <4 x double> @llvm.x86.avx512.mask3.vfmsub.pd.256(<4 x double>, <4 x double>, <4 x double>, i8)
881 define <4 x double>@test_int_x86_avx512_mask3_vfmsub_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) {
882 ; CHECK-LABEL: test_int_x86_avx512_mask3_vfmsub_pd_256:
884 ; CHECK-NEXT: movzbl %dil, %eax
885 ; CHECK-NEXT: kmovw %eax, %k1
886 ; CHECK-NEXT: vmovaps %zmm2, %zmm3
887 ; CHECK-NEXT: vfmsub231pd %ymm1, %ymm0, %ymm3 {%k1}
888 ; CHECK-NEXT: vfmsub213pd %ymm2, %ymm1, %ymm0
889 ; CHECK-NEXT: vaddpd %ymm0, %ymm3, %ymm0
891 %res = call <4 x double> @llvm.x86.avx512.mask3.vfmsub.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3)
892 %res1 = call <4 x double> @llvm.x86.avx512.mask3.vfmsub.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 -1)
893 %res2 = fadd <4 x double> %res, %res1
894 ret <4 x double> %res2
897 declare <4 x float> @llvm.x86.avx512.mask3.vfmsub.ps.128(<4 x float>, <4 x float>, <4 x float>, i8)
899 define <4 x float>@test_int_x86_avx512_mask3_vfmsub_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) {
900 ; CHECK-LABEL: test_int_x86_avx512_mask3_vfmsub_ps_128:
902 ; CHECK-NEXT: movzbl %dil, %eax
903 ; CHECK-NEXT: kmovw %eax, %k1
904 ; CHECK-NEXT: vmovaps %zmm2, %zmm3
905 ; CHECK-NEXT: vfmsub231ps %xmm1, %xmm0, %xmm3 {%k1}
906 ; CHECK-NEXT: vfmsub213ps %xmm2, %xmm1, %xmm0
907 ; CHECK-NEXT: vaddps %xmm0, %xmm3, %xmm0
909 %res = call <4 x float> @llvm.x86.avx512.mask3.vfmsub.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3)
910 %res1 = call <4 x float> @llvm.x86.avx512.mask3.vfmsub.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1)
911 %res2 = fadd <4 x float> %res, %res1
912 ret <4 x float> %res2
915 declare <8 x float> @llvm.x86.avx512.mask3.vfmsub.ps.256(<8 x float>, <8 x float>, <8 x float>, i8)
917 define <8 x float>@test_int_x86_avx512_mask3_vfmsub_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) {
918 ; CHECK-LABEL: test_int_x86_avx512_mask3_vfmsub_ps_256:
920 ; CHECK-NEXT: movzbl %dil, %eax
921 ; CHECK-NEXT: kmovw %eax, %k1
922 ; CHECK-NEXT: vmovaps %zmm2, %zmm3
923 ; CHECK-NEXT: vfmsub231ps %ymm1, %ymm0, %ymm3 {%k1}
924 ; CHECK-NEXT: vfmsub213ps %ymm2, %ymm1, %ymm0
925 ; CHECK-NEXT: vaddps %ymm0, %ymm3, %ymm0
927 %res = call <8 x float> @llvm.x86.avx512.mask3.vfmsub.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3)
928 %res1 = call <8 x float> @llvm.x86.avx512.mask3.vfmsub.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 -1)
929 %res2 = fadd <8 x float> %res, %res1
930 ret <8 x float> %res2
933 declare <8 x float> @llvm.x86.avx512.mask.vfnmadd.ps.256(<8 x float>, <8 x float>, <8 x float>, i8) nounwind readnone
935 define <8 x float> @test_mask_vfnmadd256_ps(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, i8 %mask) {
936 ; CHECK-LABEL: test_mask_vfnmadd256_ps
937 ; CHECK: vfnmadd213ps %ymm2, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x29,0xac,0xc2]
938 %res = call <8 x float> @llvm.x86.avx512.mask.vfnmadd.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, i8 %mask) nounwind
942 declare <4 x float> @llvm.x86.avx512.mask.vfnmadd.ps.128(<4 x float>, <4 x float>, <4 x float>, i8) nounwind readnone
944 define <4 x float> @test_mask_vfnmadd128_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) {
945 ; CHECK-LABEL: test_mask_vfnmadd128_ps
946 ; CHECK: vfnmadd213ps %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0xac,0xc2]
947 %res = call <4 x float> @llvm.x86.avx512.mask.vfnmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) nounwind
951 declare <4 x double> @llvm.x86.avx512.mask.vfnmadd.pd.256(<4 x double>, <4 x double>, <4 x double>, i8) nounwind readnone
953 define <4 x double> @test_mask_vfnmadd256_pd(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 %mask) {
954 ; CHECK-LABEL: test_mask_vfnmadd256_pd
955 ; CHECK: vfnmadd213pd %ymm2, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x29,0xac,0xc2]
956 %res = call <4 x double> @llvm.x86.avx512.mask.vfnmadd.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 %mask) nounwind
957 ret <4 x double> %res
960 declare <2 x double> @llvm.x86.avx512.mask.vfnmadd.pd.128(<2 x double>, <2 x double>, <2 x double>, i8) nounwind readnone
962 define <2 x double> @test_mask_vfnmadd128_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) {
963 ; CHECK-LABEL: test_mask_vfnmadd128_pd
964 ; CHECK: vfnmadd213pd %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0xac,0xc2]
965 %res = call <2 x double> @llvm.x86.avx512.mask.vfnmadd.pd.128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) nounwind
966 ret <2 x double> %res
969 declare <8 x float> @llvm.x86.avx512.mask.vfnmsub.ps.256(<8 x float>, <8 x float>, <8 x float>, i8) nounwind readnone
971 define <8 x float> @test_mask_vfnmsub256_ps(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, i8 %mask) {
972 ; CHECK-LABEL: test_mask_vfnmsub256_ps
973 ; CHECK: vfnmsub213ps %ymm2, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x29,0xae,0xc2]
974 %res = call <8 x float> @llvm.x86.avx512.mask.vfnmsub.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, i8 %mask) nounwind
978 declare <4 x float> @llvm.x86.avx512.mask.vfnmsub.ps.128(<4 x float>, <4 x float>, <4 x float>, i8) nounwind readnone
980 define <4 x float> @test_mask_vfnmsub128_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) {
981 ; CHECK-LABEL: test_mask_vfnmsub128_ps
982 ; CHECK: vfnmsub213ps %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0xae,0xc2]
983 %res = call <4 x float> @llvm.x86.avx512.mask.vfnmsub.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) nounwind
987 declare <4 x double> @llvm.x86.avx512.mask.vfnmsub.pd.256(<4 x double>, <4 x double>, <4 x double>, i8) nounwind readnone
989 define <4 x double> @test_mask_vfnmsub256_pd(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 %mask) {
990 ; CHECK-LABEL: test_mask_vfnmsub256_pd
991 ; CHECK: vfnmsub213pd %ymm2, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x29,0xae,0xc2]
992 %res = call <4 x double> @llvm.x86.avx512.mask.vfnmsub.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 %mask) nounwind
993 ret <4 x double> %res
996 declare <2 x double> @llvm.x86.avx512.mask.vfnmsub.pd.128(<2 x double>, <2 x double>, <2 x double>, i8) nounwind readnone
998 define <2 x double> @test_mask_vfnmsub128_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) {
999 ; CHECK-LABEL: test_mask_vfnmsub128_pd
1000 ; CHECK: vfnmsub213pd %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0xae,0xc2]
1001 %res = call <2 x double> @llvm.x86.avx512.mask.vfnmsub.pd.128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) nounwind
1002 ret <2 x double> %res
1006 define <2 x double>@test_int_x86_avx512_mask_vfnmsub_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) {
1007 ; CHECK-LABEL: test_int_x86_avx512_mask_vfnmsub_pd_128:
1009 ; CHECK-NEXT: movzbl %dil, %eax
1010 ; CHECK-NEXT: kmovw %eax, %k1
1011 ; CHECK-NEXT: vmovaps %zmm0, %zmm3
1012 ; CHECK-NEXT: vfnmsub213pd %xmm2, %xmm1, %xmm3 {%k1}
1013 ; CHECK-NEXT: vfnmsub213pd %xmm2, %xmm1, %xmm0
1014 ; CHECK-NEXT: vaddpd %xmm0, %xmm3, %xmm0
1016 %res = call <2 x double> @llvm.x86.avx512.mask.vfnmsub.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3)
1017 %res1 = call <2 x double> @llvm.x86.avx512.mask.vfnmsub.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1)
1018 %res2 = fadd <2 x double> %res, %res1
1019 ret <2 x double> %res2
1022 declare <2 x double> @llvm.x86.avx512.mask3.vfnmsub.pd.128(<2 x double>, <2 x double>, <2 x double>, i8)
1024 define <2 x double>@test_int_x86_avx512_mask3_vfnmsub_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) {
1025 ; CHECK-LABEL: test_int_x86_avx512_mask3_vfnmsub_pd_128:
1027 ; CHECK-NEXT: movzbl %dil, %eax
1028 ; CHECK-NEXT: kmovw %eax, %k1
1029 ; CHECK-NEXT: vmovaps %zmm2, %zmm3
1030 ; CHECK-NEXT: vfnmsub231pd %xmm1, %xmm0, %xmm3 {%k1}
1031 ; CHECK-NEXT: vfnmsub213pd %xmm2, %xmm1, %xmm0
1032 ; CHECK-NEXT: vaddpd %xmm0, %xmm3, %xmm0
1034 %res = call <2 x double> @llvm.x86.avx512.mask3.vfnmsub.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3)
1035 %res1 = call <2 x double> @llvm.x86.avx512.mask3.vfnmsub.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1)
1036 %res2 = fadd <2 x double> %res, %res1
1037 ret <2 x double> %res2
1040 define <4 x double>@test_int_x86_avx512_mask_vfnmsub_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) {
1041 ; CHECK-LABEL: test_int_x86_avx512_mask_vfnmsub_pd_256:
1043 ; CHECK-NEXT: movzbl %dil, %eax
1044 ; CHECK-NEXT: kmovw %eax, %k1
1045 ; CHECK-NEXT: vmovaps %zmm0, %zmm3
1046 ; CHECK-NEXT: vfnmsub213pd %ymm2, %ymm1, %ymm3 {%k1}
1047 ; CHECK-NEXT: vfnmsub213pd %ymm2, %ymm1, %ymm0
1048 ; CHECK-NEXT: vaddpd %ymm0, %ymm3, %ymm0
1050 %res = call <4 x double> @llvm.x86.avx512.mask.vfnmsub.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3)
1051 %res1 = call <4 x double> @llvm.x86.avx512.mask.vfnmsub.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 -1)
1052 %res2 = fadd <4 x double> %res, %res1
1053 ret <4 x double> %res2
1056 declare <4 x double> @llvm.x86.avx512.mask3.vfnmsub.pd.256(<4 x double>, <4 x double>, <4 x double>, i8)
1058 define <4 x double>@test_int_x86_avx512_mask3_vfnmsub_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) {
1059 ; CHECK-LABEL: test_int_x86_avx512_mask3_vfnmsub_pd_256:
1061 ; CHECK-NEXT: movzbl %dil, %eax
1062 ; CHECK-NEXT: kmovw %eax, %k1
1063 ; CHECK-NEXT: vmovaps %zmm2, %zmm3
1064 ; CHECK-NEXT: vfnmsub231pd %ymm1, %ymm0, %ymm3 {%k1}
1065 ; CHECK-NEXT: vfnmsub213pd %ymm2, %ymm1, %ymm0
1066 ; CHECK-NEXT: vaddpd %ymm0, %ymm3, %ymm0
1068 %res = call <4 x double> @llvm.x86.avx512.mask3.vfnmsub.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3)
1069 %res1 = call <4 x double> @llvm.x86.avx512.mask3.vfnmsub.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 -1)
1070 %res2 = fadd <4 x double> %res, %res1
1071 ret <4 x double> %res2
1074 define <4 x float>@test_int_x86_avx512_mask_vfnmsub_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) {
1075 ; CHECK-LABEL: test_int_x86_avx512_mask_vfnmsub_ps_128:
1077 ; CHECK-NEXT: movzbl %dil, %eax
1078 ; CHECK-NEXT: kmovw %eax, %k1
1079 ; CHECK-NEXT: vmovaps %zmm0, %zmm3
1080 ; CHECK-NEXT: vfnmsub213ps %xmm2, %xmm1, %xmm3 {%k1}
1081 ; CHECK-NEXT: vfnmsub213ps %xmm2, %xmm1, %xmm0
1082 ; CHECK-NEXT: vaddps %xmm0, %xmm3, %xmm0
1084 %res = call <4 x float> @llvm.x86.avx512.mask.vfnmsub.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3)
1085 %res1 = call <4 x float> @llvm.x86.avx512.mask.vfnmsub.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1)
1086 %res2 = fadd <4 x float> %res, %res1
1087 ret <4 x float> %res2
1090 declare <4 x float> @llvm.x86.avx512.mask3.vfnmsub.ps.128(<4 x float>, <4 x float>, <4 x float>, i8)
1092 define <4 x float>@test_int_x86_avx512_mask3_vfnmsub_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) {
1093 ; CHECK-LABEL: test_int_x86_avx512_mask3_vfnmsub_ps_128:
1095 ; CHECK-NEXT: movzbl %dil, %eax
1096 ; CHECK-NEXT: kmovw %eax, %k1
1097 ; CHECK-NEXT: vmovaps %zmm2, %zmm3
1098 ; CHECK-NEXT: vfnmsub231ps %xmm1, %xmm0, %xmm3 {%k1}
1099 ; CHECK-NEXT: vfnmsub213ps %xmm2, %xmm1, %xmm0
1100 ; CHECK-NEXT: vaddps %xmm0, %xmm3, %xmm0
1102 %res = call <4 x float> @llvm.x86.avx512.mask3.vfnmsub.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3)
1103 %res1 = call <4 x float> @llvm.x86.avx512.mask3.vfnmsub.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1)
1104 %res2 = fadd <4 x float> %res, %res1
1105 ret <4 x float> %res2
1108 define <8 x float>@test_int_x86_avx512_mask_vfnmsub_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) {
1109 ; CHECK-LABEL: test_int_x86_avx512_mask_vfnmsub_ps_256:
1111 ; CHECK-NEXT: movzbl %dil, %eax
1112 ; CHECK-NEXT: kmovw %eax, %k1
1113 ; CHECK-NEXT: vmovaps %zmm0, %zmm3
1114 ; CHECK-NEXT: vfnmsub213ps %ymm2, %ymm1, %ymm3 {%k1}
1115 ; CHECK-NEXT: vfnmsub213ps %ymm2, %ymm1, %ymm0
1116 ; CHECK-NEXT: vaddps %ymm0, %ymm3, %ymm0
1118 %res = call <8 x float> @llvm.x86.avx512.mask.vfnmsub.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3)
1119 %res1 = call <8 x float> @llvm.x86.avx512.mask.vfnmsub.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 -1)
1120 %res2 = fadd <8 x float> %res, %res1
1121 ret <8 x float> %res2
1124 declare <8 x float> @llvm.x86.avx512.mask3.vfnmsub.ps.256(<8 x float>, <8 x float>, <8 x float>, i8)
1126 define <8 x float>@test_int_x86_avx512_mask3_vfnmsub_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) {
1127 ; CHECK-LABEL: test_int_x86_avx512_mask3_vfnmsub_ps_256:
1129 ; CHECK-NEXT: movzbl %dil, %eax
1130 ; CHECK-NEXT: kmovw %eax, %k1
1131 ; CHECK-NEXT: vmovaps %zmm2, %zmm3
1132 ; CHECK-NEXT: vfnmsub231ps %ymm1, %ymm0, %ymm3 {%k1}
1133 ; CHECK-NEXT: vfnmsub213ps %ymm2, %ymm1, %ymm0
1134 ; CHECK-NEXT: vaddps %ymm0, %ymm3, %ymm0
1136 %res = call <8 x float> @llvm.x86.avx512.mask3.vfnmsub.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3)
1137 %res1 = call <8 x float> @llvm.x86.avx512.mask3.vfnmsub.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 -1)
1138 %res2 = fadd <8 x float> %res, %res1
1139 ret <8 x float> %res2
1142 define <2 x double>@test_int_x86_avx512_mask_vfnmadd_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) {
1143 ; CHECK-LABEL: test_int_x86_avx512_mask_vfnmadd_pd_128:
1145 ; CHECK-NEXT: movzbl %dil, %eax
1146 ; CHECK-NEXT: kmovw %eax, %k1
1147 ; CHECK-NEXT: vmovaps %zmm0, %zmm3
1148 ; CHECK-NEXT: vfnmadd213pd %xmm2, %xmm1, %xmm3 {%k1}
1149 ; CHECK-NEXT: vfnmadd213pd %xmm2, %xmm1, %xmm0
1150 ; CHECK-NEXT: vaddpd %xmm0, %xmm3, %xmm0
1152 %res = call <2 x double> @llvm.x86.avx512.mask.vfnmadd.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3)
1153 %res1 = call <2 x double> @llvm.x86.avx512.mask.vfnmadd.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1)
1154 %res2 = fadd <2 x double> %res, %res1
1155 ret <2 x double> %res2
1158 define <4 x double>@test_int_x86_avx512_mask_vfnmadd_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) {
1159 ; CHECK-LABEL: test_int_x86_avx512_mask_vfnmadd_pd_256:
1161 ; CHECK-NEXT: movzbl %dil, %eax
1162 ; CHECK-NEXT: kmovw %eax, %k1
1163 ; CHECK-NEXT: vmovaps %zmm0, %zmm3
1164 ; CHECK-NEXT: vfnmadd213pd %ymm2, %ymm1, %ymm3 {%k1}
1165 ; CHECK-NEXT: vfnmadd213pd %ymm2, %ymm1, %ymm0
1166 ; CHECK-NEXT: vaddpd %ymm0, %ymm3, %ymm0
1168 %res = call <4 x double> @llvm.x86.avx512.mask.vfnmadd.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3)
1169 %res1 = call <4 x double> @llvm.x86.avx512.mask.vfnmadd.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 -1)
1170 %res2 = fadd <4 x double> %res, %res1
1171 ret <4 x double> %res2
1174 define <4 x float>@test_int_x86_avx512_mask_vfnmadd_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) {
1175 ; CHECK-LABEL: test_int_x86_avx512_mask_vfnmadd_ps_128:
1177 ; CHECK-NEXT: movzbl %dil, %eax
1178 ; CHECK-NEXT: kmovw %eax, %k1
1179 ; CHECK-NEXT: vmovaps %zmm0, %zmm3
1180 ; CHECK-NEXT: vfnmadd213ps %xmm2, %xmm1, %xmm3 {%k1}
1181 ; CHECK-NEXT: vfnmadd213ps %xmm2, %xmm1, %xmm0
1182 ; CHECK-NEXT: vaddps %xmm0, %xmm3, %xmm0
1184 %res = call <4 x float> @llvm.x86.avx512.mask.vfnmadd.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3)
1185 %res1 = call <4 x float> @llvm.x86.avx512.mask.vfnmadd.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1)
1186 %res2 = fadd <4 x float> %res, %res1
1187 ret <4 x float> %res2
1190 define <8 x float>@test_int_x86_avx512_mask_vfnmadd_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) {
1191 ; CHECK-LABEL: test_int_x86_avx512_mask_vfnmadd_ps_256:
1193 ; CHECK-NEXT: movzbl %dil, %eax
1194 ; CHECK-NEXT: kmovw %eax, %k1
1195 ; CHECK-NEXT: vmovaps %zmm0, %zmm3
1196 ; CHECK-NEXT: vfnmadd213ps %ymm2, %ymm1, %ymm3 {%k1}
1197 ; CHECK-NEXT: vfnmadd213ps %ymm2, %ymm1, %ymm0
1198 ; CHECK-NEXT: vaddps %ymm0, %ymm3, %ymm0
1200 %res = call <8 x float> @llvm.x86.avx512.mask.vfnmadd.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3)
1201 %res1 = call <8 x float> @llvm.x86.avx512.mask.vfnmadd.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 -1)
1202 %res2 = fadd <8 x float> %res, %res1
1203 ret <8 x float> %res2
1206 declare <8 x float> @llvm.x86.avx512.mask.vfmaddsub.ps.256(<8 x float>, <8 x float>, <8 x float>, i8) nounwind readnone
1208 define <8 x float> @test_mask_fmaddsub256_ps(<8 x float> %a, <8 x float> %b, <8 x float> %c, i8 %mask) {
1209 ; CHECK-LABEL: test_mask_fmaddsub256_ps:
1210 ; CHECK: vfmaddsub213ps %ymm2, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x29,0xa6,0xc2]
1211 %res = call <8 x float> @llvm.x86.avx512.mask.vfmaddsub.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %c, i8 %mask)
1212 ret <8 x float> %res
1215 declare <4 x float> @llvm.x86.avx512.mask.vfmaddsub.ps.128(<4 x float>, <4 x float>, <4 x float>, i8) nounwind readnone
1217 define <4 x float> @test_mask_fmaddsub128_ps(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) {
1218 ; CHECK-LABEL: test_mask_fmaddsub128_ps:
1219 ; CHECK: vfmaddsub213ps %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0xa6,0xc2]
1220 %res = call <4 x float> @llvm.x86.avx512.mask.vfmaddsub.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask)
1221 ret <4 x float> %res
1224 declare <4 x double> @llvm.x86.avx512.mask.vfmaddsub.pd.256(<4 x double>, <4 x double>, <4 x double>, i8) nounwind readnone
1226 define <4 x double> @test_mask_vfmaddsub256_pd(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 %mask) {
1227 ; CHECK-LABEL: test_mask_vfmaddsub256_pd
1228 ; CHECK: vfmaddsub213pd %ymm2, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x29,0xa6,0xc2]
1229 %res = call <4 x double> @llvm.x86.avx512.mask.vfmaddsub.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 %mask) nounwind
1230 ret <4 x double> %res
1233 declare <2 x double> @llvm.x86.avx512.mask.vfmaddsub.pd.128(<2 x double>, <2 x double>, <2 x double>, i8) nounwind readnone
1235 define <2 x double> @test_mask_vfmaddsub128_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) {
1236 ; CHECK-LABEL: test_mask_vfmaddsub128_pd
1237 ; CHECK: vfmaddsub213pd %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0xa6,0xc2]
1238 %res = call <2 x double> @llvm.x86.avx512.mask.vfmaddsub.pd.128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) nounwind
1239 ret <2 x double> %res
1242 define <2 x double>@test_int_x86_avx512_mask_vfmaddsub_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) {
1243 ; CHECK-LABEL: test_int_x86_avx512_mask_vfmaddsub_pd_128:
1245 ; CHECK-NEXT: movzbl %dil, %eax
1246 ; CHECK-NEXT: kmovw %eax, %k1
1247 ; CHECK-NEXT: vmovaps %zmm0, %zmm3
1248 ; CHECK-NEXT: vfmaddsub213pd %xmm2, %xmm1, %xmm3 {%k1}
1249 ; CHECK-NEXT: vfmaddsub213pd %xmm2, %xmm1, %xmm0
1250 ; CHECK-NEXT: vaddpd %xmm0, %xmm3, %xmm0
1252 %res = call <2 x double> @llvm.x86.avx512.mask.vfmaddsub.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3)
1253 %res1 = call <2 x double> @llvm.x86.avx512.mask.vfmaddsub.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1)
1254 %res2 = fadd <2 x double> %res, %res1
1255 ret <2 x double> %res2
1258 declare <2 x double> @llvm.x86.avx512.mask3.vfmaddsub.pd.128(<2 x double>, <2 x double>, <2 x double>, i8)
1260 define <2 x double>@test_int_x86_avx512_mask3_vfmaddsub_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) {
1261 ; CHECK-LABEL: test_int_x86_avx512_mask3_vfmaddsub_pd_128:
1263 ; CHECK-NEXT: movzbl %dil, %eax
1264 ; CHECK-NEXT: kmovw %eax, %k1
1265 ; CHECK-NEXT: vmovaps %zmm2, %zmm3
1266 ; CHECK-NEXT: vfmaddsub231pd %xmm1, %xmm0, %xmm3 {%k1}
1267 ; CHECK-NEXT: vfmaddsub213pd %xmm2, %xmm1, %xmm0
1268 ; CHECK-NEXT: vaddpd %xmm0, %xmm3, %xmm0
1270 %res = call <2 x double> @llvm.x86.avx512.mask3.vfmaddsub.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3)
1271 %res1 = call <2 x double> @llvm.x86.avx512.mask3.vfmaddsub.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1)
1272 %res2 = fadd <2 x double> %res, %res1
1273 ret <2 x double> %res2
1276 declare <2 x double> @llvm.x86.avx512.maskz.vfmaddsub.pd.128(<2 x double>, <2 x double>, <2 x double>, i8)
1278 define <2 x double>@test_int_x86_avx512_maskz_vfmaddsub_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) {
1279 ; CHECK-LABEL: test_int_x86_avx512_maskz_vfmaddsub_pd_128:
1281 ; CHECK-NEXT: movzbl %dil, %eax
1282 ; CHECK-NEXT: kmovw %eax, %k1
1283 ; CHECK-NEXT: vmovaps %zmm0, %zmm3
1284 ; CHECK-NEXT: vfmaddsub213pd %xmm2, %xmm1, %xmm3 {%k1} {z}
1285 ; CHECK-NEXT: vfmaddsub213pd %xmm2, %xmm1, %xmm0
1286 ; CHECK-NEXT: vaddpd %xmm0, %xmm3, %xmm0
1288 %res = call <2 x double> @llvm.x86.avx512.maskz.vfmaddsub.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3)
1289 %res1 = call <2 x double> @llvm.x86.avx512.maskz.vfmaddsub.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1)
1290 %res2 = fadd <2 x double> %res, %res1
1291 ret <2 x double> %res2
1294 define <4 x double>@test_int_x86_avx512_mask_vfmaddsub_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) {
1295 ; CHECK-LABEL: test_int_x86_avx512_mask_vfmaddsub_pd_256:
1297 ; CHECK-NEXT: movzbl %dil, %eax
1298 ; CHECK-NEXT: kmovw %eax, %k1
1299 ; CHECK-NEXT: vmovaps %zmm0, %zmm3
1300 ; CHECK-NEXT: vfmaddsub213pd %ymm2, %ymm1, %ymm3 {%k1}
1301 ; CHECK-NEXT: vfmaddsub213pd %ymm2, %ymm1, %ymm0
1302 ; CHECK-NEXT: vaddpd %ymm0, %ymm3, %ymm0
1304 %res = call <4 x double> @llvm.x86.avx512.mask.vfmaddsub.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3)
1305 %res1 = call <4 x double> @llvm.x86.avx512.mask.vfmaddsub.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 -1)
1306 %res2 = fadd <4 x double> %res, %res1
1307 ret <4 x double> %res2
1310 declare <4 x double> @llvm.x86.avx512.mask3.vfmaddsub.pd.256(<4 x double>, <4 x double>, <4 x double>, i8)
1312 define <4 x double>@test_int_x86_avx512_mask3_vfmaddsub_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) {
1313 ; CHECK-LABEL: test_int_x86_avx512_mask3_vfmaddsub_pd_256:
1315 ; CHECK-NEXT: movzbl %dil, %eax
1316 ; CHECK-NEXT: kmovw %eax, %k1
1317 ; CHECK-NEXT: vmovaps %zmm2, %zmm3
1318 ; CHECK-NEXT: vfmaddsub231pd %ymm1, %ymm0, %ymm3 {%k1}
1319 ; CHECK-NEXT: vfmaddsub213pd %ymm2, %ymm1, %ymm0
1320 ; CHECK-NEXT: vaddpd %ymm0, %ymm3, %ymm0
1322 %res = call <4 x double> @llvm.x86.avx512.mask3.vfmaddsub.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3)
1323 %res1 = call <4 x double> @llvm.x86.avx512.mask3.vfmaddsub.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 -1)
1324 %res2 = fadd <4 x double> %res, %res1
1325 ret <4 x double> %res2
1328 declare <4 x double> @llvm.x86.avx512.maskz.vfmaddsub.pd.256(<4 x double>, <4 x double>, <4 x double>, i8)
1330 define <4 x double>@test_int_x86_avx512_maskz_vfmaddsub_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) {
1331 ; CHECK-LABEL: test_int_x86_avx512_maskz_vfmaddsub_pd_256:
1333 ; CHECK-NEXT: movzbl %dil, %eax
1334 ; CHECK-NEXT: kmovw %eax, %k1
1335 ; CHECK-NEXT: vmovaps %zmm0, %zmm3
1336 ; CHECK-NEXT: vfmaddsub213pd %ymm2, %ymm1, %ymm3 {%k1} {z}
1337 ; CHECK-NEXT: vfmaddsub213pd %ymm2, %ymm1, %ymm0
1338 ; CHECK-NEXT: vaddpd %ymm0, %ymm3, %ymm0
1340 %res = call <4 x double> @llvm.x86.avx512.maskz.vfmaddsub.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3)
1341 %res1 = call <4 x double> @llvm.x86.avx512.maskz.vfmaddsub.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 -1)
1342 %res2 = fadd <4 x double> %res, %res1
1343 ret <4 x double> %res2
1346 define <4 x float>@test_int_x86_avx512_mask_vfmaddsub_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) {
1347 ; CHECK-LABEL: test_int_x86_avx512_mask_vfmaddsub_ps_128:
1349 ; CHECK-NEXT: movzbl %dil, %eax
1350 ; CHECK-NEXT: kmovw %eax, %k1
1351 ; CHECK-NEXT: vmovaps %zmm0, %zmm3
1352 ; CHECK-NEXT: vfmaddsub213ps %xmm2, %xmm1, %xmm3 {%k1}
1353 ; CHECK-NEXT: vfmaddsub213ps %xmm2, %xmm1, %xmm0
1354 ; CHECK-NEXT: vaddps %xmm0, %xmm3, %xmm0
1356 %res = call <4 x float> @llvm.x86.avx512.mask.vfmaddsub.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3)
1357 %res1 = call <4 x float> @llvm.x86.avx512.mask.vfmaddsub.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1)
1358 %res2 = fadd <4 x float> %res, %res1
1359 ret <4 x float> %res2
1362 declare <4 x float> @llvm.x86.avx512.mask3.vfmaddsub.ps.128(<4 x float>, <4 x float>, <4 x float>, i8)
1364 define <4 x float>@test_int_x86_avx512_mask3_vfmaddsub_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) {
1365 ; CHECK-LABEL: test_int_x86_avx512_mask3_vfmaddsub_ps_128:
1367 ; CHECK-NEXT: movzbl %dil, %eax
1368 ; CHECK-NEXT: kmovw %eax, %k1
1369 ; CHECK-NEXT: vmovaps %zmm2, %zmm3
1370 ; CHECK-NEXT: vfmaddsub231ps %xmm1, %xmm0, %xmm3 {%k1}
1371 ; CHECK-NEXT: vfmaddsub213ps %xmm2, %xmm1, %xmm0
1372 ; CHECK-NEXT: vaddps %xmm0, %xmm3, %xmm0
1374 %res = call <4 x float> @llvm.x86.avx512.mask3.vfmaddsub.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3)
1375 %res1 = call <4 x float> @llvm.x86.avx512.mask3.vfmaddsub.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1)
1376 %res2 = fadd <4 x float> %res, %res1
1377 ret <4 x float> %res2
1380 declare <4 x float> @llvm.x86.avx512.maskz.vfmaddsub.ps.128(<4 x float>, <4 x float>, <4 x float>, i8)
1382 define <4 x float>@test_int_x86_avx512_maskz_vfmaddsub_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) {
1383 ; CHECK-LABEL: test_int_x86_avx512_maskz_vfmaddsub_ps_128:
1385 ; CHECK-NEXT: movzbl %dil, %eax
1386 ; CHECK-NEXT: kmovw %eax, %k1
1387 ; CHECK-NEXT: vmovaps %zmm0, %zmm3
1388 ; CHECK-NEXT: vfmaddsub213ps %xmm2, %xmm1, %xmm3 {%k1} {z}
1389 ; CHECK-NEXT: vfmaddsub213ps %xmm2, %xmm1, %xmm0
1390 ; CHECK-NEXT: vaddps %xmm0, %xmm3, %xmm0
1392 %res = call <4 x float> @llvm.x86.avx512.maskz.vfmaddsub.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3)
1393 %res1 = call <4 x float> @llvm.x86.avx512.maskz.vfmaddsub.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1)
1394 %res2 = fadd <4 x float> %res, %res1
1395 ret <4 x float> %res2
1398 define <8 x float>@test_int_x86_avx512_mask_vfmaddsub_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) {
1399 ; CHECK-LABEL: test_int_x86_avx512_mask_vfmaddsub_ps_256:
1401 ; CHECK-NEXT: movzbl %dil, %eax
1402 ; CHECK-NEXT: kmovw %eax, %k1
1403 ; CHECK-NEXT: vmovaps %zmm0, %zmm3
1404 ; CHECK-NEXT: vfmaddsub213ps %ymm2, %ymm1, %ymm3 {%k1}
1405 ; CHECK-NEXT: vfmaddsub213ps %ymm2, %ymm1, %ymm0
1406 ; CHECK-NEXT: vaddps %ymm0, %ymm3, %ymm0
1408 %res = call <8 x float> @llvm.x86.avx512.mask.vfmaddsub.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3)
1409 %res1 = call <8 x float> @llvm.x86.avx512.mask.vfmaddsub.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 -1)
1410 %res2 = fadd <8 x float> %res, %res1
1411 ret <8 x float> %res2
1414 declare <8 x float> @llvm.x86.avx512.mask3.vfmaddsub.ps.256(<8 x float>, <8 x float>, <8 x float>, i8)
1416 define <8 x float>@test_int_x86_avx512_mask3_vfmaddsub_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) {
1417 ; CHECK-LABEL: test_int_x86_avx512_mask3_vfmaddsub_ps_256:
1419 ; CHECK-NEXT: movzbl %dil, %eax
1420 ; CHECK-NEXT: kmovw %eax, %k1
1421 ; CHECK-NEXT: vmovaps %zmm2, %zmm3
1422 ; CHECK-NEXT: vfmaddsub231ps %ymm1, %ymm0, %ymm3 {%k1}
1423 ; CHECK-NEXT: vfmaddsub213ps %ymm2, %ymm1, %ymm0
1424 ; CHECK-NEXT: vaddps %ymm0, %ymm3, %ymm0
1426 %res = call <8 x float> @llvm.x86.avx512.mask3.vfmaddsub.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3)
1427 %res1 = call <8 x float> @llvm.x86.avx512.mask3.vfmaddsub.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 -1)
1428 %res2 = fadd <8 x float> %res, %res1
1429 ret <8 x float> %res2
1432 declare <8 x float> @llvm.x86.avx512.maskz.vfmaddsub.ps.256(<8 x float>, <8 x float>, <8 x float>, i8)
1434 define <8 x float>@test_int_x86_avx512_maskz_vfmaddsub_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) {
1435 ; CHECK-LABEL: test_int_x86_avx512_maskz_vfmaddsub_ps_256:
1437 ; CHECK-NEXT: movzbl %dil, %eax
1438 ; CHECK-NEXT: kmovw %eax, %k1
1439 ; CHECK-NEXT: vmovaps %zmm0, %zmm3
1440 ; CHECK-NEXT: vfmaddsub213ps %ymm2, %ymm1, %ymm3 {%k1} {z}
1441 ; CHECK-NEXT: vfmaddsub213ps %ymm2, %ymm1, %ymm0
1442 ; CHECK-NEXT: vaddps %ymm0, %ymm3, %ymm0
1444 %res = call <8 x float> @llvm.x86.avx512.maskz.vfmaddsub.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3)
1445 %res1 = call <8 x float> @llvm.x86.avx512.maskz.vfmaddsub.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 -1)
1446 %res2 = fadd <8 x float> %res, %res1
1447 ret <8 x float> %res2
1450 declare <2 x double> @llvm.x86.avx512.mask3.vfmsubadd.pd.128(<2 x double>, <2 x double>, <2 x double>, i8)
1452 define <2 x double>@test_int_x86_avx512_mask3_vfmsubadd_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) {
1453 ; CHECK-LABEL: test_int_x86_avx512_mask3_vfmsubadd_pd_128:
1455 ; CHECK-NEXT: movzbl %dil, %eax
1456 ; CHECK-NEXT: kmovw %eax, %k1
1457 ; CHECK-NEXT: vmovaps %zmm2, %zmm3
1458 ; CHECK-NEXT: vfmsubadd231pd %xmm1, %xmm0, %xmm3 {%k1}
1459 ; CHECK-NEXT: vfmsubadd213pd %xmm2, %xmm1, %xmm0
1460 ; CHECK-NEXT: vaddpd %xmm0, %xmm3, %xmm0
1462 %res = call <2 x double> @llvm.x86.avx512.mask3.vfmsubadd.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3)
1463 %res1 = call <2 x double> @llvm.x86.avx512.mask3.vfmsubadd.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1)
1464 %res2=fadd <2 x double> %res, %res1
1465 ret <2 x double> %res2
1468 declare <4 x double> @llvm.x86.avx512.mask3.vfmsubadd.pd.256(<4 x double>, <4 x double>, <4 x double>, i8)
1470 define <4 x double>@test_int_x86_avx512_mask3_vfmsubadd_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) {
1471 ; CHECK-LABEL: test_int_x86_avx512_mask3_vfmsubadd_pd_256:
1473 ; CHECK-NEXT: movzbl %dil, %eax
1474 ; CHECK-NEXT: kmovw %eax, %k1
1475 ; CHECK-NEXT: vmovaps %zmm2, %zmm3
1476 ; CHECK-NEXT: vfmsubadd231pd %ymm1, %ymm0, %ymm3 {%k1}
1477 ; CHECK-NEXT: vfmsubadd213pd %ymm2, %ymm1, %ymm0
1478 ; CHECK-NEXT: vaddpd %ymm0, %ymm3, %ymm0
1480 %res = call <4 x double> @llvm.x86.avx512.mask3.vfmsubadd.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3)
1481 %res1 = call <4 x double> @llvm.x86.avx512.mask3.vfmsubadd.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 -1)
1482 %res2=fadd <4 x double> %res, %res1
1483 ret <4 x double> %res2
1486 declare <4 x float> @llvm.x86.avx512.mask3.vfmsubadd.ps.128(<4 x float>, <4 x float>, <4 x float>, i8)
1488 define <4 x float>@test_int_x86_avx512_mask3_vfmsubadd_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) {
1489 ; CHECK-LABEL: test_int_x86_avx512_mask3_vfmsubadd_ps_128:
1491 ; CHECK-NEXT: movzbl %dil, %eax
1492 ; CHECK-NEXT: kmovw %eax, %k1
1493 ; CHECK-NEXT: vmovaps %zmm2, %zmm3
1494 ; CHECK-NEXT: vfmsubadd231ps %xmm1, %xmm0, %xmm3 {%k1}
1495 ; CHECK-NEXT: vfmsubadd213ps %xmm2, %xmm1, %xmm0
1496 ; CHECK-NEXT: vaddps %xmm0, %xmm3, %xmm0
1498 %res = call <4 x float> @llvm.x86.avx512.mask3.vfmsubadd.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3)
1499 %res1 = call <4 x float> @llvm.x86.avx512.mask3.vfmsubadd.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1)
1500 %res2=fadd <4 x float> %res, %res1
1501 ret <4 x float> %res2
1504 declare <8 x float> @llvm.x86.avx512.mask3.vfmsubadd.ps.256(<8 x float>, <8 x float>, <8 x float>, i8)
1506 define <8 x float>@test_int_x86_avx512_mask3_vfmsubadd_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) {
1507 ; CHECK-LABEL: test_int_x86_avx512_mask3_vfmsubadd_ps_256:
1509 ; CHECK-NEXT: movzbl %dil, %eax
1510 ; CHECK-NEXT: kmovw %eax, %k1
1511 ; CHECK-NEXT: vmovaps %zmm2, %zmm3
1512 ; CHECK-NEXT: vfmsubadd231ps %ymm1, %ymm0, %ymm3 {%k1}
1513 ; CHECK-NEXT: vfmsubadd213ps %ymm2, %ymm1, %ymm0
1514 ; CHECK-NEXT: vaddps %ymm0, %ymm3, %ymm0
1516 %res = call <8 x float> @llvm.x86.avx512.mask3.vfmsubadd.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3)
1517 %res1 = call <8 x float> @llvm.x86.avx512.mask3.vfmsubadd.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 -1)
1518 %res2=fadd <8 x float> %res, %res1
1519 ret <8 x float> %res2
1523 define <4 x float> @test_mask_vfmadd128_ps_r(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) {
1524 ; CHECK-LABEL: test_mask_vfmadd128_ps_r
1525 ; CHECK: vfmadd213ps %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0xa8,0xc2]
1526 %res = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) nounwind
1527 ret <4 x float> %res
1530 define <4 x float> @test_mask_vfmadd128_ps_rz(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
1531 ; CHECK-LABEL: test_mask_vfmadd128_ps_rz
1532 ; CHECK: vfmadd213ps %xmm2, %xmm1, %xmm0 ## encoding: [0x62,0xf2,0x75,0x08,0xa8,0xc2]
1533 %res = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 -1) nounwind
1534 ret <4 x float> %res
1537 define <4 x float> @test_mask_vfmadd128_ps_rmk(<4 x float> %a0, <4 x float> %a1, <4 x float>* %ptr_a2, i8 %mask) {
1538 ; CHECK-LABEL: test_mask_vfmadd128_ps_rmk
1539 ; CHECK: vfmadd213ps (%rdi), %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0xa8,0x07]
1540 %a2 = load <4 x float>, <4 x float>* %ptr_a2
1541 %res = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) nounwind
1542 ret <4 x float> %res
1545 define <4 x float> @test_mask_vfmadd128_ps_rmka(<4 x float> %a0, <4 x float> %a1, <4 x float>* %ptr_a2, i8 %mask) {
1546 ; CHECK-LABEL: test_mask_vfmadd128_ps_rmka
1547 ; CHECK: vfmadd213ps (%rdi), %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0xa8,0x07]
1548 %a2 = load <4 x float>, <4 x float>* %ptr_a2, align 8
1549 %res = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) nounwind
1550 ret <4 x float> %res
1553 define <4 x float> @test_mask_vfmadd128_ps_rmkz(<4 x float> %a0, <4 x float> %a1, <4 x float>* %ptr_a2) {
1554 ; CHECK-LABEL: test_mask_vfmadd128_ps_rmkz
1555 ; CHECK: vfmadd213ps (%rdi), %xmm1, %xmm0 ## encoding: [0xc4,0xe2,0x71,0xa8,0x07]
1556 %a2 = load <4 x float>, <4 x float>* %ptr_a2
1557 %res = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 -1) nounwind
1558 ret <4 x float> %res
1561 define <4 x float> @test_mask_vfmadd128_ps_rmkza(<4 x float> %a0, <4 x float> %a1, <4 x float>* %ptr_a2) {
1562 ; CHECK-LABEL: test_mask_vfmadd128_ps_rmkza
1563 ; CHECK: vfmadd213ps (%rdi), %xmm1, %xmm0 ## encoding: [0xc4,0xe2,0x71,0xa8,0x07]
1564 %a2 = load <4 x float>, <4 x float>* %ptr_a2, align 4
1565 %res = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 -1) nounwind
1566 ret <4 x float> %res
1569 define <4 x float> @test_mask_vfmadd128_ps_rmb(<4 x float> %a0, <4 x float> %a1, float* %ptr_a2, i8 %mask) {
1570 ; CHECK-LABEL: test_mask_vfmadd128_ps_rmb
1571 ; CHECK: vfmadd213ps (%rdi){1to4}, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x19,0xa8,0x07]
1572 %q = load float, float* %ptr_a2
1573 %vecinit.i = insertelement <4 x float> undef, float %q, i32 0
1574 %vecinit2.i = insertelement <4 x float> %vecinit.i, float %q, i32 1
1575 %vecinit4.i = insertelement <4 x float> %vecinit2.i, float %q, i32 2
1576 %vecinit6.i = insertelement <4 x float> %vecinit4.i, float %q, i32 3
1577 %res = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %vecinit6.i, i8 %mask) nounwind
1578 ret <4 x float> %res
1581 define <4 x float> @test_mask_vfmadd128_ps_rmba(<4 x float> %a0, <4 x float> %a1, float* %ptr_a2, i8 %mask) {
1582 ; CHECK-LABEL: test_mask_vfmadd128_ps_rmba
1583 ; CHECK: vfmadd213ps (%rdi){1to4}, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x19,0xa8,0x07]
1584 %q = load float, float* %ptr_a2, align 4
1585 %vecinit.i = insertelement <4 x float> undef, float %q, i32 0
1586 %vecinit2.i = insertelement <4 x float> %vecinit.i, float %q, i32 1
1587 %vecinit4.i = insertelement <4 x float> %vecinit2.i, float %q, i32 2
1588 %vecinit6.i = insertelement <4 x float> %vecinit4.i, float %q, i32 3
1589 %res = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %vecinit6.i, i8 %mask) nounwind
1590 ret <4 x float> %res
1593 define <4 x float> @test_mask_vfmadd128_ps_rmbz(<4 x float> %a0, <4 x float> %a1, float* %ptr_a2) {
1594 ; CHECK-LABEL: test_mask_vfmadd128_ps_rmbz
1595 ; CHECK: vfmadd213ps (%rdi){1to4}, %xmm1, %xmm0 ## encoding: [0x62,0xf2,0x75,0x18,0xa8,0x07]
1596 %q = load float, float* %ptr_a2
1597 %vecinit.i = insertelement <4 x float> undef, float %q, i32 0
1598 %vecinit2.i = insertelement <4 x float> %vecinit.i, float %q, i32 1
1599 %vecinit4.i = insertelement <4 x float> %vecinit2.i, float %q, i32 2
1600 %vecinit6.i = insertelement <4 x float> %vecinit4.i, float %q, i32 3
1601 %res = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %vecinit6.i, i8 -1) nounwind
1602 ret <4 x float> %res
1605 define <4 x float> @test_mask_vfmadd128_ps_rmbza(<4 x float> %a0, <4 x float> %a1, float* %ptr_a2) {
1606 ; CHECK-LABEL: test_mask_vfmadd128_ps_rmbza
1607 ; CHECK: vfmadd213ps (%rdi){1to4}, %xmm1, %xmm0 ## encoding: [0x62,0xf2,0x75,0x18,0xa8,0x07]
1608 %q = load float, float* %ptr_a2, align 4
1609 %vecinit.i = insertelement <4 x float> undef, float %q, i32 0
1610 %vecinit2.i = insertelement <4 x float> %vecinit.i, float %q, i32 1
1611 %vecinit4.i = insertelement <4 x float> %vecinit2.i, float %q, i32 2
1612 %vecinit6.i = insertelement <4 x float> %vecinit4.i, float %q, i32 3
1613 %res = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %vecinit6.i, i8 -1) nounwind
1614 ret <4 x float> %res
1617 define <2 x double> @test_mask_vfmadd128_pd_r(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) {
1618 ; CHECK-LABEL: test_mask_vfmadd128_pd_r
1619 ; CHECK: vfmadd213pd %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0xa8,0xc2]
1620 %res = call <2 x double> @llvm.x86.avx512.mask.vfmadd.pd.128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) nounwind
1621 ret <2 x double> %res
1624 define <2 x double> @test_mask_vfmadd128_pd_rz(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) {
1625 ; CHECK-LABEL: test_mask_vfmadd128_pd_rz
1626 ; CHECK: vfmadd213pd %xmm2, %xmm1, %xmm0 ## encoding: [0x62,0xf2,0xf5,0x08,0xa8,0xc2]
1627 %res = call <2 x double> @llvm.x86.avx512.mask.vfmadd.pd.128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 -1) nounwind
1628 ret <2 x double> %res
1631 define <2 x double> @test_mask_vfmadd128_pd_rmk(<2 x double> %a0, <2 x double> %a1, <2 x double>* %ptr_a2, i8 %mask) {
1632 ; CHECK-LABEL: test_mask_vfmadd128_pd_rmk
1633 ; CHECK: vfmadd213pd (%rdi), %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0xa8,0x07]
1634 %a2 = load <2 x double>, <2 x double>* %ptr_a2
1635 %res = call <2 x double> @llvm.x86.avx512.mask.vfmadd.pd.128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) nounwind
1636 ret <2 x double> %res
1639 define <2 x double> @test_mask_vfmadd128_pd_rmkz(<2 x double> %a0, <2 x double> %a1, <2 x double>* %ptr_a2) {
1640 ; CHECK-LABEL: test_mask_vfmadd128_pd_rmkz
1641 ; CHECK: vfmadd213pd (%rdi), %xmm1, %xmm0 ## encoding: [0xc4,0xe2,0xf1,0xa8,0x07]
1642 %a2 = load <2 x double>, <2 x double>* %ptr_a2
1643 %res = call <2 x double> @llvm.x86.avx512.mask.vfmadd.pd.128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 -1) nounwind
1644 ret <2 x double> %res
1647 define <4 x double> @test_mask_vfmadd256_pd_r(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 %mask) {
1648 ; CHECK-LABEL: test_mask_vfmadd256_pd_r
1649 ; CHECK: vfmadd213pd %ymm2, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x29,0xa8,0xc2]
1650 %res = call <4 x double> @llvm.x86.avx512.mask.vfmadd.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 %mask) nounwind
1651 ret <4 x double> %res
1654 define <4 x double> @test_mask_vfmadd256_pd_rz(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) {
1655 ; CHECK-LABEL: test_mask_vfmadd256_pd_rz
1656 ; CHECK: vfmadd213pd %ymm2, %ymm1, %ymm0 ## encoding: [0x62,0xf2,0xf5,0x28,0xa8,0xc2]
1657 %res = call <4 x double> @llvm.x86.avx512.mask.vfmadd.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 -1) nounwind
1658 ret <4 x double> %res
1661 define <4 x double> @test_mask_vfmadd256_pd_rmk(<4 x double> %a0, <4 x double> %a1, <4 x double>* %ptr_a2, i8 %mask) {
1662 ; CHECK-LABEL: test_mask_vfmadd256_pd_rmk
1663 ; CHECK: vfmadd213pd (%rdi), %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x29,0xa8,0x07]
1664 %a2 = load <4 x double>, <4 x double>* %ptr_a2
1665 %res = call <4 x double> @llvm.x86.avx512.mask.vfmadd.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 %mask) nounwind
1666 ret <4 x double> %res
1669 define <4 x double> @test_mask_vfmadd256_pd_rmkz(<4 x double> %a0, <4 x double> %a1, <4 x double>* %ptr_a2) {
1670 ; CHECK-LABEL: test_mask_vfmadd256_pd_rmkz
1671 ; CHECK: vfmadd213pd (%rdi), %ymm1, %ymm0 ## encoding: [0xc4,0xe2,0xf5,0xa8,0x07]
1672 %a2 = load <4 x double>, <4 x double>* %ptr_a2
1673 %res = call <4 x double> @llvm.x86.avx512.mask.vfmadd.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 -1) nounwind
1674 ret <4 x double> %res
1676 define <8 x i16> @test_mask_add_epi16_rr_128(<8 x i16> %a, <8 x i16> %b) {
1677 ;CHECK-LABEL: test_mask_add_epi16_rr_128
1678 ;CHECK: vpaddw %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xfd,0xc1]
1679 %res = call <8 x i16> @llvm.x86.avx512.mask.padd.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 -1)
1683 define <8 x i16> @test_mask_add_epi16_rrk_128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) {
1684 ;CHECK-LABEL: test_mask_add_epi16_rrk_128
1685 ;CHECK: vpaddw %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xfd,0xd1]
1686 %res = call <8 x i16> @llvm.x86.avx512.mask.padd.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask)
1690 define <8 x i16> @test_mask_add_epi16_rrkz_128(<8 x i16> %a, <8 x i16> %b, i8 %mask) {
1691 ;CHECK-LABEL: test_mask_add_epi16_rrkz_128
1692 ;CHECK: vpaddw %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xfd,0xc1]
1693 %res = call <8 x i16> @llvm.x86.avx512.mask.padd.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 %mask)
1697 define <8 x i16> @test_mask_add_epi16_rm_128(<8 x i16> %a, <8 x i16>* %ptr_b) {
1698 ;CHECK-LABEL: test_mask_add_epi16_rm_128
1699 ;CHECK: vpaddw (%rdi), %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xfd,0x07]
1700 %b = load <8 x i16>, <8 x i16>* %ptr_b
1701 %res = call <8 x i16> @llvm.x86.avx512.mask.padd.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 -1)
1705 define <8 x i16> @test_mask_add_epi16_rmk_128(<8 x i16> %a, <8 x i16>* %ptr_b, <8 x i16> %passThru, i8 %mask) {
1706 ;CHECK-LABEL: test_mask_add_epi16_rmk_128
1707 ;CHECK: vpaddw (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xfd,0x0f]
1708 %b = load <8 x i16>, <8 x i16>* %ptr_b
1709 %res = call <8 x i16> @llvm.x86.avx512.mask.padd.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask)
1713 define <8 x i16> @test_mask_add_epi16_rmkz_128(<8 x i16> %a, <8 x i16>* %ptr_b, i8 %mask) {
1714 ;CHECK-LABEL: test_mask_add_epi16_rmkz_128
1715 ;CHECK: vpaddw (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xfd,0x07]
1716 %b = load <8 x i16>, <8 x i16>* %ptr_b
1717 %res = call <8 x i16> @llvm.x86.avx512.mask.padd.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 %mask)
1721 declare <8 x i16> @llvm.x86.avx512.mask.padd.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8)
1723 define <16 x i16> @test_mask_add_epi16_rr_256(<16 x i16> %a, <16 x i16> %b) {
1724 ;CHECK-LABEL: test_mask_add_epi16_rr_256
1725 ;CHECK: vpaddw %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xfd,0xc1]
1726 %res = call <16 x i16> @llvm.x86.avx512.mask.padd.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 -1)
1730 define <16 x i16> @test_mask_add_epi16_rrk_256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) {
1731 ;CHECK-LABEL: test_mask_add_epi16_rrk_256
1732 ;CHECK: vpaddw %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xfd,0xd1]
1733 %res = call <16 x i16> @llvm.x86.avx512.mask.padd.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask)
1737 define <16 x i16> @test_mask_add_epi16_rrkz_256(<16 x i16> %a, <16 x i16> %b, i16 %mask) {
1738 ;CHECK-LABEL: test_mask_add_epi16_rrkz_256
1739 ;CHECK: vpaddw %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xfd,0xc1]
1740 %res = call <16 x i16> @llvm.x86.avx512.mask.padd.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 %mask)
1744 define <16 x i16> @test_mask_add_epi16_rm_256(<16 x i16> %a, <16 x i16>* %ptr_b) {
1745 ;CHECK-LABEL: test_mask_add_epi16_rm_256
1746 ;CHECK: vpaddw (%rdi), %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xfd,0x07]
1747 %b = load <16 x i16>, <16 x i16>* %ptr_b
1748 %res = call <16 x i16> @llvm.x86.avx512.mask.padd.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 -1)
1752 define <16 x i16> @test_mask_add_epi16_rmk_256(<16 x i16> %a, <16 x i16>* %ptr_b, <16 x i16> %passThru, i16 %mask) {
1753 ;CHECK-LABEL: test_mask_add_epi16_rmk_256
1754 ;CHECK: vpaddw (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xfd,0x0f]
1755 %b = load <16 x i16>, <16 x i16>* %ptr_b
1756 %res = call <16 x i16> @llvm.x86.avx512.mask.padd.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask)
1760 define <16 x i16> @test_mask_add_epi16_rmkz_256(<16 x i16> %a, <16 x i16>* %ptr_b, i16 %mask) {
1761 ;CHECK-LABEL: test_mask_add_epi16_rmkz_256
1762 ;CHECK: vpaddw (%rdi), %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xfd,0x07]
1763 %b = load <16 x i16>, <16 x i16>* %ptr_b
1764 %res = call <16 x i16> @llvm.x86.avx512.mask.padd.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 %mask)
1768 declare <16 x i16> @llvm.x86.avx512.mask.padd.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16)
1770 define <8 x i16> @test_mask_sub_epi16_rr_128(<8 x i16> %a, <8 x i16> %b) {
1771 ;CHECK-LABEL: test_mask_sub_epi16_rr_128
1772 ;CHECK: vpsubw %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xf9,0xc1]
1773 %res = call <8 x i16> @llvm.x86.avx512.mask.psub.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 -1)
1777 define <8 x i16> @test_mask_sub_epi16_rrk_128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) {
1778 ;CHECK-LABEL: test_mask_sub_epi16_rrk_128
1779 ;CHECK: vpsubw %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xf9,0xd1]
1780 %res = call <8 x i16> @llvm.x86.avx512.mask.psub.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask)
1784 define <8 x i16> @test_mask_sub_epi16_rrkz_128(<8 x i16> %a, <8 x i16> %b, i8 %mask) {
1785 ;CHECK-LABEL: test_mask_sub_epi16_rrkz_128
1786 ;CHECK: vpsubw %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xf9,0xc1]
1787 %res = call <8 x i16> @llvm.x86.avx512.mask.psub.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 %mask)
1791 define <8 x i16> @test_mask_sub_epi16_rm_128(<8 x i16> %a, <8 x i16>* %ptr_b) {
1792 ;CHECK-LABEL: test_mask_sub_epi16_rm_128
1793 ;CHECK: vpsubw (%rdi), %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xf9,0x07]
1794 %b = load <8 x i16>, <8 x i16>* %ptr_b
1795 %res = call <8 x i16> @llvm.x86.avx512.mask.psub.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 -1)
1799 define <8 x i16> @test_mask_sub_epi16_rmk_128(<8 x i16> %a, <8 x i16>* %ptr_b, <8 x i16> %passThru, i8 %mask) {
1800 ;CHECK-LABEL: test_mask_sub_epi16_rmk_128
1801 ;CHECK: vpsubw (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xf9,0x0f]
1802 %b = load <8 x i16>, <8 x i16>* %ptr_b
1803 %res = call <8 x i16> @llvm.x86.avx512.mask.psub.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask)
1807 define <8 x i16> @test_mask_sub_epi16_rmkz_128(<8 x i16> %a, <8 x i16>* %ptr_b, i8 %mask) {
1808 ;CHECK-LABEL: test_mask_sub_epi16_rmkz_128
1809 ;CHECK: vpsubw (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xf9,0x07]
1810 %b = load <8 x i16>, <8 x i16>* %ptr_b
1811 %res = call <8 x i16> @llvm.x86.avx512.mask.psub.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 %mask)
1815 declare <8 x i16> @llvm.x86.avx512.mask.psub.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8)
1817 define <16 x i16> @test_mask_sub_epi16_rr_256(<16 x i16> %a, <16 x i16> %b) {
1818 ;CHECK-LABEL: test_mask_sub_epi16_rr_256
1819 ;CHECK: vpsubw %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xf9,0xc1]
1820 %res = call <16 x i16> @llvm.x86.avx512.mask.psub.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 -1)
1824 define <16 x i16> @test_mask_sub_epi16_rrk_256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) {
1825 ;CHECK-LABEL: test_mask_sub_epi16_rrk_256
1826 ;CHECK: vpsubw %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xf9,0xd1]
1827 %res = call <16 x i16> @llvm.x86.avx512.mask.psub.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask)
1831 define <16 x i16> @test_mask_sub_epi16_rrkz_256(<16 x i16> %a, <16 x i16> %b, i16 %mask) {
1832 ;CHECK-LABEL: test_mask_sub_epi16_rrkz_256
1833 ;CHECK: vpsubw %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xf9,0xc1]
1834 %res = call <16 x i16> @llvm.x86.avx512.mask.psub.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 %mask)
1838 define <16 x i16> @test_mask_sub_epi16_rm_256(<16 x i16> %a, <16 x i16>* %ptr_b) {
1839 ;CHECK-LABEL: test_mask_sub_epi16_rm_256
1840 ;CHECK: vpsubw (%rdi), %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xf9,0x07]
1841 %b = load <16 x i16>, <16 x i16>* %ptr_b
1842 %res = call <16 x i16> @llvm.x86.avx512.mask.psub.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 -1)
1846 define <16 x i16> @test_mask_sub_epi16_rmk_256(<16 x i16> %a, <16 x i16>* %ptr_b, <16 x i16> %passThru, i16 %mask) {
1847 ;CHECK-LABEL: test_mask_sub_epi16_rmk_256
1848 ;CHECK: vpsubw (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xf9,0x0f]
1849 %b = load <16 x i16>, <16 x i16>* %ptr_b
1850 %res = call <16 x i16> @llvm.x86.avx512.mask.psub.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask)
1854 define <16 x i16> @test_mask_sub_epi16_rmkz_256(<16 x i16> %a, <16 x i16>* %ptr_b, i16 %mask) {
1855 ;CHECK-LABEL: test_mask_sub_epi16_rmkz_256
1856 ;CHECK: vpsubw (%rdi), %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xf9,0x07]
1857 %b = load <16 x i16>, <16 x i16>* %ptr_b
1858 %res = call <16 x i16> @llvm.x86.avx512.mask.psub.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 %mask)
1862 declare <16 x i16> @llvm.x86.avx512.mask.psub.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16)
1864 define <32 x i16> @test_mask_add_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) {
1865 ;CHECK-LABEL: test_mask_add_epi16_rr_512
1866 ;CHECK: vpaddw %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfd,0xc1]
1867 %res = call <32 x i16> @llvm.x86.avx512.mask.padd.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1)
1871 define <32 x i16> @test_mask_add_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) {
1872 ;CHECK-LABEL: test_mask_add_epi16_rrk_512
1873 ;CHECK: vpaddw %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xfd,0xd1]
1874 %res = call <32 x i16> @llvm.x86.avx512.mask.padd.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
1878 define <32 x i16> @test_mask_add_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i32 %mask) {
1879 ;CHECK-LABEL: test_mask_add_epi16_rrkz_512
1880 ;CHECK: vpaddw %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0xfd,0xc1]
1881 %res = call <32 x i16> @llvm.x86.avx512.mask.padd.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask)
1885 define <32 x i16> @test_mask_add_epi16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) {
1886 ;CHECK-LABEL: test_mask_add_epi16_rm_512
1887 ;CHECK: vpaddw (%rdi), %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfd,0x07]
1888 %b = load <32 x i16>, <32 x i16>* %ptr_b
1889 %res = call <32 x i16> @llvm.x86.avx512.mask.padd.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1)
1893 define <32 x i16> @test_mask_add_epi16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <32 x i16> %passThru, i32 %mask) {
1894 ;CHECK-LABEL: test_mask_add_epi16_rmk_512
1895 ;CHECK: vpaddw (%rdi), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xfd,0x0f]
1896 %b = load <32 x i16>, <32 x i16>* %ptr_b
1897 %res = call <32 x i16> @llvm.x86.avx512.mask.padd.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
1901 define <32 x i16> @test_mask_add_epi16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i32 %mask) {
1902 ;CHECK-LABEL: test_mask_add_epi16_rmkz_512
1903 ;CHECK: vpaddw (%rdi), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0xfd,0x07]
1904 %b = load <32 x i16>, <32 x i16>* %ptr_b
1905 %res = call <32 x i16> @llvm.x86.avx512.mask.padd.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask)
1909 declare <32 x i16> @llvm.x86.avx512.mask.padd.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
1911 define <32 x i16> @test_mask_sub_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) {
1912 ;CHECK-LABEL: test_mask_sub_epi16_rr_512
1913 ;CHECK: vpsubw %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xf9,0xc1]
1914 %res = call <32 x i16> @llvm.x86.avx512.mask.psub.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1)
1918 define <32 x i16> @test_mask_sub_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) {
1919 ;CHECK-LABEL: test_mask_sub_epi16_rrk_512
1920 ;CHECK: vpsubw %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xf9,0xd1]
1921 %res = call <32 x i16> @llvm.x86.avx512.mask.psub.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
1925 define <32 x i16> @test_mask_sub_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i32 %mask) {
1926 ;CHECK-LABEL: test_mask_sub_epi16_rrkz_512
1927 ;CHECK: vpsubw %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0xf9,0xc1]
1928 %res = call <32 x i16> @llvm.x86.avx512.mask.psub.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask)
1932 define <32 x i16> @test_mask_sub_epi16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) {
1933 ;CHECK-LABEL: test_mask_sub_epi16_rm_512
1934 ;CHECK: vpsubw (%rdi), %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xf9,0x07]
1935 %b = load <32 x i16>, <32 x i16>* %ptr_b
1936 %res = call <32 x i16> @llvm.x86.avx512.mask.psub.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1)
1940 define <32 x i16> @test_mask_sub_epi16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <32 x i16> %passThru, i32 %mask) {
1941 ;CHECK-LABEL: test_mask_sub_epi16_rmk_512
1942 ;CHECK: vpsubw (%rdi), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xf9,0x0f]
1943 %b = load <32 x i16>, <32 x i16>* %ptr_b
1944 %res = call <32 x i16> @llvm.x86.avx512.mask.psub.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
1948 define <32 x i16> @test_mask_sub_epi16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i32 %mask) {
1949 ;CHECK-LABEL: test_mask_sub_epi16_rmkz_512
1950 ;CHECK: vpsubw (%rdi), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0xf9,0x07]
1951 %b = load <32 x i16>, <32 x i16>* %ptr_b
1952 %res = call <32 x i16> @llvm.x86.avx512.mask.psub.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask)
1956 declare <32 x i16> @llvm.x86.avx512.mask.psub.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
1958 define <32 x i16> @test_mask_mullo_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) {
1959 ;CHECK-LABEL: test_mask_mullo_epi16_rr_512
1960 ;CHECK: vpmullw %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xd5,0xc1]
1961 %res = call <32 x i16> @llvm.x86.avx512.mask.pmull.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1)
1965 define <32 x i16> @test_mask_mullo_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) {
1966 ;CHECK-LABEL: test_mask_mullo_epi16_rrk_512
1967 ;CHECK: vpmullw %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xd5,0xd1]
1968 %res = call <32 x i16> @llvm.x86.avx512.mask.pmull.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
1972 define <32 x i16> @test_mask_mullo_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i32 %mask) {
1973 ;CHECK-LABEL: test_mask_mullo_epi16_rrkz_512
1974 ;CHECK: vpmullw %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0xd5,0xc1]
1975 %res = call <32 x i16> @llvm.x86.avx512.mask.pmull.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask)
1979 define <32 x i16> @test_mask_mullo_epi16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) {
1980 ;CHECK-LABEL: test_mask_mullo_epi16_rm_512
1981 ;CHECK: vpmullw (%rdi), %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xd5,0x07]
1982 %b = load <32 x i16>, <32 x i16>* %ptr_b
1983 %res = call <32 x i16> @llvm.x86.avx512.mask.pmull.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1)
1987 define <32 x i16> @test_mask_mullo_epi16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <32 x i16> %passThru, i32 %mask) {
1988 ;CHECK-LABEL: test_mask_mullo_epi16_rmk_512
1989 ;CHECK: vpmullw (%rdi), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xd5,0x0f]
1990 %b = load <32 x i16>, <32 x i16>* %ptr_b
1991 %res = call <32 x i16> @llvm.x86.avx512.mask.pmull.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
1995 define <32 x i16> @test_mask_mullo_epi16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i32 %mask) {
1996 ;CHECK-LABEL: test_mask_mullo_epi16_rmkz_512
1997 ;CHECK: vpmullw (%rdi), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0xd5,0x07]
1998 %b = load <32 x i16>, <32 x i16>* %ptr_b
1999 %res = call <32 x i16> @llvm.x86.avx512.mask.pmull.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask)
2003 declare <32 x i16> @llvm.x86.avx512.mask.pmull.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
2005 define <8 x i16> @test_mask_mullo_epi16_rr_128(<8 x i16> %a, <8 x i16> %b) {
2006 ;CHECK-LABEL: test_mask_mullo_epi16_rr_128
2007 ;CHECK: vpmullw %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xd5,0xc1]
2008 %res = call <8 x i16> @llvm.x86.avx512.mask.pmull.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 -1)
2012 define <8 x i16> @test_mask_mullo_epi16_rrk_128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) {
2013 ;CHECK-LABEL: test_mask_mullo_epi16_rrk_128
2014 ;CHECK: vpmullw %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xd5,0xd1]
2015 %res = call <8 x i16> @llvm.x86.avx512.mask.pmull.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask)
2019 define <8 x i16> @test_mask_mullo_epi16_rrkz_128(<8 x i16> %a, <8 x i16> %b, i8 %mask) {
2020 ;CHECK-LABEL: test_mask_mullo_epi16_rrkz_128
2021 ;CHECK: vpmullw %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xd5,0xc1]
2022 %res = call <8 x i16> @llvm.x86.avx512.mask.pmull.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 %mask)
2026 define <8 x i16> @test_mask_mullo_epi16_rm_128(<8 x i16> %a, <8 x i16>* %ptr_b) {
2027 ;CHECK-LABEL: test_mask_mullo_epi16_rm_128
2028 ;CHECK: vpmullw (%rdi), %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xd5,0x07]
2029 %b = load <8 x i16>, <8 x i16>* %ptr_b
2030 %res = call <8 x i16> @llvm.x86.avx512.mask.pmull.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 -1)
2034 define <8 x i16> @test_mask_mullo_epi16_rmk_128(<8 x i16> %a, <8 x i16>* %ptr_b, <8 x i16> %passThru, i8 %mask) {
2035 ;CHECK-LABEL: test_mask_mullo_epi16_rmk_128
2036 ;CHECK: vpmullw (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xd5,0x0f]
2037 %b = load <8 x i16>, <8 x i16>* %ptr_b
2038 %res = call <8 x i16> @llvm.x86.avx512.mask.pmull.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask)
2042 define <8 x i16> @test_mask_mullo_epi16_rmkz_128(<8 x i16> %a, <8 x i16>* %ptr_b, i8 %mask) {
2043 ;CHECK-LABEL: test_mask_mullo_epi16_rmkz_128
2044 ;CHECK: vpmullw (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xd5,0x07]
2045 %b = load <8 x i16>, <8 x i16>* %ptr_b
2046 %res = call <8 x i16> @llvm.x86.avx512.mask.pmull.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 %mask)
2050 declare <8 x i16> @llvm.x86.avx512.mask.pmull.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8)
2052 define <16 x i16> @test_mask_mullo_epi16_rr_256(<16 x i16> %a, <16 x i16> %b) {
2053 ;CHECK-LABEL: test_mask_mullo_epi16_rr_256
2054 ;CHECK: vpmullw %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xd5,0xc1]
2055 %res = call <16 x i16> @llvm.x86.avx512.mask.pmull.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 -1)
2059 define <16 x i16> @test_mask_mullo_epi16_rrk_256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) {
2060 ;CHECK-LABEL: test_mask_mullo_epi16_rrk_256
2061 ;CHECK: vpmullw %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xd5,0xd1]
2062 %res = call <16 x i16> @llvm.x86.avx512.mask.pmull.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask)
2066 define <16 x i16> @test_mask_mullo_epi16_rrkz_256(<16 x i16> %a, <16 x i16> %b, i16 %mask) {
2067 ;CHECK-LABEL: test_mask_mullo_epi16_rrkz_256
2068 ;CHECK: vpmullw %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xd5,0xc1]
2069 %res = call <16 x i16> @llvm.x86.avx512.mask.pmull.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 %mask)
2073 define <16 x i16> @test_mask_mullo_epi16_rm_256(<16 x i16> %a, <16 x i16>* %ptr_b) {
2074 ;CHECK-LABEL: test_mask_mullo_epi16_rm_256
2075 ;CHECK: vpmullw (%rdi), %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xd5,0x07]
2076 %b = load <16 x i16>, <16 x i16>* %ptr_b
2077 %res = call <16 x i16> @llvm.x86.avx512.mask.pmull.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 -1)
2081 define <16 x i16> @test_mask_mullo_epi16_rmk_256(<16 x i16> %a, <16 x i16>* %ptr_b, <16 x i16> %passThru, i16 %mask) {
2082 ;CHECK-LABEL: test_mask_mullo_epi16_rmk_256
2083 ;CHECK: vpmullw (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xd5,0x0f]
2084 %b = load <16 x i16>, <16 x i16>* %ptr_b
2085 %res = call <16 x i16> @llvm.x86.avx512.mask.pmull.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask)
2089 define <16 x i16> @test_mask_mullo_epi16_rmkz_256(<16 x i16> %a, <16 x i16>* %ptr_b, i16 %mask) {
2090 ;CHECK-LABEL: test_mask_mullo_epi16_rmkz_256
2091 ;CHECK: vpmullw (%rdi), %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xd5,0x07]
2092 %b = load <16 x i16>, <16 x i16>* %ptr_b
2093 %res = call <16 x i16> @llvm.x86.avx512.mask.pmull.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 %mask)
2097 declare <16 x i16> @llvm.x86.avx512.mask.pmull.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16)
2100 define <8 x i16> @test_mask_packs_epi32_rr_128(<4 x i32> %a, <4 x i32> %b) {
2101 ;CHECK-LABEL: test_mask_packs_epi32_rr_128
2102 ;CHECK: vpackssdw %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0x6b,0xc1]
2103 %res = call <8 x i16> @llvm.x86.avx512.mask.packssdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> zeroinitializer, i8 -1)
2107 define <8 x i16> @test_mask_packs_epi32_rrk_128(<4 x i32> %a, <4 x i32> %b, <8 x i16> %passThru, i8 %mask) {
2108 ;CHECK-LABEL: test_mask_packs_epi32_rrk_128
2109 ;CHECK: vpackssdw %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0x6b,0xd1]
2110 %res = call <8 x i16> @llvm.x86.avx512.mask.packssdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> %passThru, i8 %mask)
2114 define <8 x i16> @test_mask_packs_epi32_rrkz_128(<4 x i32> %a, <4 x i32> %b, i8 %mask) {
2115 ;CHECK-LABEL: test_mask_packs_epi32_rrkz_128
2116 ;CHECK: vpackssdw %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0x6b,0xc1]
2117 %res = call <8 x i16> @llvm.x86.avx512.mask.packssdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> zeroinitializer, i8 %mask)
2121 define <8 x i16> @test_mask_packs_epi32_rm_128(<4 x i32> %a, <4 x i32>* %ptr_b) {
2122 ;CHECK-LABEL: test_mask_packs_epi32_rm_128
2123 ;CHECK: vpackssdw (%rdi), %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0x6b,0x07]
2124 %b = load <4 x i32>, <4 x i32>* %ptr_b
2125 %res = call <8 x i16> @llvm.x86.avx512.mask.packssdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> zeroinitializer, i8 -1)
2129 define <8 x i16> @test_mask_packs_epi32_rmk_128(<4 x i32> %a, <4 x i32>* %ptr_b, <8 x i16> %passThru, i8 %mask) {
2130 ;CHECK-LABEL: test_mask_packs_epi32_rmk_128
2131 ;CHECK: vpackssdw (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0x6b,0x0f]
2132 %b = load <4 x i32>, <4 x i32>* %ptr_b
2133 %res = call <8 x i16> @llvm.x86.avx512.mask.packssdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> %passThru, i8 %mask)
2137 define <8 x i16> @test_mask_packs_epi32_rmkz_128(<4 x i32> %a, <4 x i32>* %ptr_b, i8 %mask) {
2138 ;CHECK-LABEL: test_mask_packs_epi32_rmkz_128
2139 ;CHECK: vpackssdw (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0x6b,0x07]
2140 %b = load <4 x i32>, <4 x i32>* %ptr_b
2141 %res = call <8 x i16> @llvm.x86.avx512.mask.packssdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> zeroinitializer, i8 %mask)
2145 define <8 x i16> @test_mask_packs_epi32_rmb_128(<4 x i32> %a, i32* %ptr_b) {
2146 ;CHECK-LABEL: test_mask_packs_epi32_rmb_128
2147 ;CHECK: vpackssdw (%rdi){1to4}, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x18,0x6b,0x07]
2148 %q = load i32, i32* %ptr_b
2149 %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
2150 %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
2151 %res = call <8 x i16> @llvm.x86.avx512.mask.packssdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> zeroinitializer, i8 -1)
2155 define <8 x i16> @test_mask_packs_epi32_rmbk_128(<4 x i32> %a, i32* %ptr_b, <8 x i16> %passThru, i8 %mask) {
2156 ;CHECK-LABEL: test_mask_packs_epi32_rmbk_128
2157 ;CHECK: vpackssdw (%rdi){1to4}, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x19,0x6b,0x0f]
2158 %q = load i32, i32* %ptr_b
2159 %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
2160 %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
2161 %res = call <8 x i16> @llvm.x86.avx512.mask.packssdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> %passThru, i8 %mask)
2165 define <8 x i16> @test_mask_packs_epi32_rmbkz_128(<4 x i32> %a, i32* %ptr_b, i8 %mask) {
2166 ;CHECK-LABEL: test_mask_packs_epi32_rmbkz_128
2167 ;CHECK: vpackssdw (%rdi){1to4}, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x99,0x6b,0x07]
2168 %q = load i32, i32* %ptr_b
2169 %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
2170 %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
2171 %res = call <8 x i16> @llvm.x86.avx512.mask.packssdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> zeroinitializer, i8 %mask)
2175 declare <8 x i16> @llvm.x86.avx512.mask.packssdw.128(<4 x i32>, <4 x i32>, <8 x i16>, i8)
2177 define <16 x i16> @test_mask_packs_epi32_rr_256(<8 x i32> %a, <8 x i32> %b) {
2178 ;CHECK-LABEL: test_mask_packs_epi32_rr_256
2179 ;CHECK: vpackssdw %ymm1, %ymm0, %ymm0 ## encoding: [0xc5,0xfd,0x6b,0xc1]
2180 %res = call <16 x i16> @llvm.x86.avx512.mask.packssdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> zeroinitializer, i16 -1)
2184 define <16 x i16> @test_mask_packs_epi32_rrk_256(<8 x i32> %a, <8 x i32> %b, <16 x i16> %passThru, i16 %mask) {
2185 ;CHECK-LABEL: test_mask_packs_epi32_rrk_256
2186 ;CHECK: vpackssdw %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0x6b,0xd1]
2187 %res = call <16 x i16> @llvm.x86.avx512.mask.packssdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> %passThru, i16 %mask)
2191 define <16 x i16> @test_mask_packs_epi32_rrkz_256(<8 x i32> %a, <8 x i32> %b, i16 %mask) {
2192 ;CHECK-LABEL: test_mask_packs_epi32_rrkz_256
2193 ;CHECK: vpackssdw %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0x6b,0xc1]
2194 %res = call <16 x i16> @llvm.x86.avx512.mask.packssdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> zeroinitializer, i16 %mask)
2198 define <16 x i16> @test_mask_packs_epi32_rm_256(<8 x i32> %a, <8 x i32>* %ptr_b) {
2199 ;CHECK-LABEL: test_mask_packs_epi32_rm_256
2200 ;CHECK: vpackssdw (%rdi), %ymm0, %ymm0 ## encoding: [0xc5,0xfd,0x6b,0x07]
2201 %b = load <8 x i32>, <8 x i32>* %ptr_b
2202 %res = call <16 x i16> @llvm.x86.avx512.mask.packssdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> zeroinitializer, i16 -1)
2206 define <16 x i16> @test_mask_packs_epi32_rmk_256(<8 x i32> %a, <8 x i32>* %ptr_b, <16 x i16> %passThru, i16 %mask) {
2207 ;CHECK-LABEL: test_mask_packs_epi32_rmk_256
2208 ;CHECK: vpackssdw (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0x6b,0x0f]
2209 %b = load <8 x i32>, <8 x i32>* %ptr_b
2210 %res = call <16 x i16> @llvm.x86.avx512.mask.packssdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> %passThru, i16 %mask)
2214 define <16 x i16> @test_mask_packs_epi32_rmkz_256(<8 x i32> %a, <8 x i32>* %ptr_b, i16 %mask) {
2215 ;CHECK-LABEL: test_mask_packs_epi32_rmkz_256
2216 ;CHECK: vpackssdw (%rdi), %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0x6b,0x07]
2217 %b = load <8 x i32>, <8 x i32>* %ptr_b
2218 %res = call <16 x i16> @llvm.x86.avx512.mask.packssdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> zeroinitializer, i16 %mask)
2222 define <16 x i16> @test_mask_packs_epi32_rmb_256(<8 x i32> %a, i32* %ptr_b) {
2223 ;CHECK-LABEL: test_mask_packs_epi32_rmb_256
2224 ;CHECK: vpackssdw (%rdi){1to8}, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x38,0x6b,0x07]
2225 %q = load i32, i32* %ptr_b
2226 %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
2227 %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
2228 %res = call <16 x i16> @llvm.x86.avx512.mask.packssdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> zeroinitializer, i16 -1)
2232 define <16 x i16> @test_mask_packs_epi32_rmbk_256(<8 x i32> %a, i32* %ptr_b, <16 x i16> %passThru, i16 %mask) {
2233 ;CHECK-LABEL: test_mask_packs_epi32_rmbk_256
2234 ;CHECK: vpackssdw (%rdi){1to8}, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x39,0x6b,0x0f]
2235 %q = load i32, i32* %ptr_b
2236 %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
2237 %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
2238 %res = call <16 x i16> @llvm.x86.avx512.mask.packssdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> %passThru, i16 %mask)
2242 define <16 x i16> @test_mask_packs_epi32_rmbkz_256(<8 x i32> %a, i32* %ptr_b, i16 %mask) {
2243 ;CHECK-LABEL: test_mask_packs_epi32_rmbkz_256
2244 ;CHECK: vpackssdw (%rdi){1to8}, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xb9,0x6b,0x07]
2245 %q = load i32, i32* %ptr_b
2246 %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
2247 %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
2248 %res = call <16 x i16> @llvm.x86.avx512.mask.packssdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> zeroinitializer, i16 %mask)
2252 declare <16 x i16> @llvm.x86.avx512.mask.packssdw.256(<8 x i32>, <8 x i32>, <16 x i16>, i16)
2254 define <16 x i8> @test_mask_packs_epi16_rr_128(<8 x i16> %a, <8 x i16> %b) {
2255 ;CHECK-LABEL: test_mask_packs_epi16_rr_128
2256 ;CHECK: vpacksswb %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0x63,0xc1]
2257 %res = call <16 x i8> @llvm.x86.avx512.mask.packsswb.128(<8 x i16> %a, <8 x i16> %b, <16 x i8> zeroinitializer, i16 -1)
2261 define <16 x i8> @test_mask_packs_epi16_rrk_128(<8 x i16> %a, <8 x i16> %b, <16 x i8> %passThru, i16 %mask) {
2262 ;CHECK-LABEL: test_mask_packs_epi16_rrk_128
2263 ;CHECK: vpacksswb %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x09,0x63,0xd1]
2264 %res = call <16 x i8> @llvm.x86.avx512.mask.packsswb.128(<8 x i16> %a, <8 x i16> %b, <16 x i8> %passThru, i16 %mask)
2268 define <16 x i8> @test_mask_packs_epi16_rrkz_128(<8 x i16> %a, <8 x i16> %b, i16 %mask) {
2269 ;CHECK-LABEL: test_mask_packs_epi16_rrkz_128
2270 ;CHECK: vpacksswb %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0x89,0x63,0xc1]
2271 %res = call <16 x i8> @llvm.x86.avx512.mask.packsswb.128(<8 x i16> %a, <8 x i16> %b, <16 x i8> zeroinitializer, i16 %mask)
2275 define <16 x i8> @test_mask_packs_epi16_rm_128(<8 x i16> %a, <8 x i16>* %ptr_b) {
2276 ;CHECK-LABEL: test_mask_packs_epi16_rm_128
2277 ;CHECK: vpacksswb (%rdi), %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0x63,0x07]
2278 %b = load <8 x i16>, <8 x i16>* %ptr_b
2279 %res = call <16 x i8> @llvm.x86.avx512.mask.packsswb.128(<8 x i16> %a, <8 x i16> %b, <16 x i8> zeroinitializer, i16 -1)
2283 define <16 x i8> @test_mask_packs_epi16_rmk_128(<8 x i16> %a, <8 x i16>* %ptr_b, <16 x i8> %passThru, i16 %mask) {
2284 ;CHECK-LABEL: test_mask_packs_epi16_rmk_128
2285 ;CHECK: vpacksswb (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x09,0x63,0x0f]
2286 %b = load <8 x i16>, <8 x i16>* %ptr_b
2287 %res = call <16 x i8> @llvm.x86.avx512.mask.packsswb.128(<8 x i16> %a, <8 x i16> %b, <16 x i8> %passThru, i16 %mask)
2291 define <16 x i8> @test_mask_packs_epi16_rmkz_128(<8 x i16> %a, <8 x i16>* %ptr_b, i16 %mask) {
2292 ;CHECK-LABEL: test_mask_packs_epi16_rmkz_128
2293 ;CHECK: vpacksswb (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0x89,0x63,0x07]
2294 %b = load <8 x i16>, <8 x i16>* %ptr_b
2295 %res = call <16 x i8> @llvm.x86.avx512.mask.packsswb.128(<8 x i16> %a, <8 x i16> %b, <16 x i8> zeroinitializer, i16 %mask)
2299 declare <16 x i8> @llvm.x86.avx512.mask.packsswb.128(<8 x i16>, <8 x i16>, <16 x i8>, i16)
2301 define <32 x i8> @test_mask_packs_epi16_rr_256(<16 x i16> %a, <16 x i16> %b) {
2302 ;CHECK-LABEL: test_mask_packs_epi16_rr_256
2303 ;CHECK: vpacksswb %ymm1, %ymm0, %ymm0 ## encoding: [0xc5,0xfd,0x63,0xc1]
2304 %res = call <32 x i8> @llvm.x86.avx512.mask.packsswb.256(<16 x i16> %a, <16 x i16> %b, <32 x i8> zeroinitializer, i32 -1)
2308 define <32 x i8> @test_mask_packs_epi16_rrk_256(<16 x i16> %a, <16 x i16> %b, <32 x i8> %passThru, i32 %mask) {
2309 ;CHECK-LABEL: test_mask_packs_epi16_rrk_256
2310 ;CHECK: vpacksswb %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x29,0x63,0xd1]
2311 %res = call <32 x i8> @llvm.x86.avx512.mask.packsswb.256(<16 x i16> %a, <16 x i16> %b, <32 x i8> %passThru, i32 %mask)
2315 define <32 x i8> @test_mask_packs_epi16_rrkz_256(<16 x i16> %a, <16 x i16> %b, i32 %mask) {
2316 ;CHECK-LABEL: test_mask_packs_epi16_rrkz_256
2317 ;CHECK: vpacksswb %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xa9,0x63,0xc1]
2318 %res = call <32 x i8> @llvm.x86.avx512.mask.packsswb.256(<16 x i16> %a, <16 x i16> %b, <32 x i8> zeroinitializer, i32 %mask)
2322 define <32 x i8> @test_mask_packs_epi16_rm_256(<16 x i16> %a, <16 x i16>* %ptr_b) {
2323 ;CHECK-LABEL: test_mask_packs_epi16_rm_256
2324 ;CHECK: vpacksswb (%rdi), %ymm0, %ymm0 ## encoding: [0xc5,0xfd,0x63,0x07]
2325 %b = load <16 x i16>, <16 x i16>* %ptr_b
2326 %res = call <32 x i8> @llvm.x86.avx512.mask.packsswb.256(<16 x i16> %a, <16 x i16> %b, <32 x i8> zeroinitializer, i32 -1)
2330 define <32 x i8> @test_mask_packs_epi16_rmk_256(<16 x i16> %a, <16 x i16>* %ptr_b, <32 x i8> %passThru, i32 %mask) {
2331 ;CHECK-LABEL: test_mask_packs_epi16_rmk_256
2332 ;CHECK: vpacksswb (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x29,0x63,0x0f]
2333 %b = load <16 x i16>, <16 x i16>* %ptr_b
2334 %res = call <32 x i8> @llvm.x86.avx512.mask.packsswb.256(<16 x i16> %a, <16 x i16> %b, <32 x i8> %passThru, i32 %mask)
2338 define <32 x i8> @test_mask_packs_epi16_rmkz_256(<16 x i16> %a, <16 x i16>* %ptr_b, i32 %mask) {
2339 ;CHECK-LABEL: test_mask_packs_epi16_rmkz_256
2340 ;CHECK: vpacksswb (%rdi), %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xa9,0x63,0x07]
2341 %b = load <16 x i16>, <16 x i16>* %ptr_b
2342 %res = call <32 x i8> @llvm.x86.avx512.mask.packsswb.256(<16 x i16> %a, <16 x i16> %b, <32 x i8> zeroinitializer, i32 %mask)
2346 declare <32 x i8> @llvm.x86.avx512.mask.packsswb.256(<16 x i16>, <16 x i16>, <32 x i8>, i32)
2349 define <8 x i16> @test_mask_packus_epi32_rr_128(<4 x i32> %a, <4 x i32> %b) {
2350 ;CHECK-LABEL: test_mask_packus_epi32_rr_128
2351 ;CHECK: vpackusdw %xmm1, %xmm0, %xmm0
2352 %res = call <8 x i16> @llvm.x86.avx512.mask.packusdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> zeroinitializer, i8 -1)
2356 define <8 x i16> @test_mask_packus_epi32_rrk_128(<4 x i32> %a, <4 x i32> %b, <8 x i16> %passThru, i8 %mask) {
2357 ;CHECK-LABEL: test_mask_packus_epi32_rrk_128
2358 ;CHECK: vpackusdw %xmm1, %xmm0, %xmm2 {%k1}
2359 %res = call <8 x i16> @llvm.x86.avx512.mask.packusdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> %passThru, i8 %mask)
2363 define <8 x i16> @test_mask_packus_epi32_rrkz_128(<4 x i32> %a, <4 x i32> %b, i8 %mask) {
2364 ;CHECK-LABEL: test_mask_packus_epi32_rrkz_128
2365 ;CHECK: vpackusdw %xmm1, %xmm0, %xmm0 {%k1} {z}
2366 %res = call <8 x i16> @llvm.x86.avx512.mask.packusdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> zeroinitializer, i8 %mask)
2370 define <8 x i16> @test_mask_packus_epi32_rm_128(<4 x i32> %a, <4 x i32>* %ptr_b) {
2371 ;CHECK-LABEL: test_mask_packus_epi32_rm_128
2372 ;CHECK: vpackusdw (%rdi), %xmm0, %xmm0
2373 %b = load <4 x i32>, <4 x i32>* %ptr_b
2374 %res = call <8 x i16> @llvm.x86.avx512.mask.packusdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> zeroinitializer, i8 -1)
2378 define <8 x i16> @test_mask_packus_epi32_rmk_128(<4 x i32> %a, <4 x i32>* %ptr_b, <8 x i16> %passThru, i8 %mask) {
2379 ;CHECK-LABEL: test_mask_packus_epi32_rmk_128
2380 ;CHECK: vpackusdw (%rdi), %xmm0, %xmm1 {%k1}
2381 %b = load <4 x i32>, <4 x i32>* %ptr_b
2382 %res = call <8 x i16> @llvm.x86.avx512.mask.packusdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> %passThru, i8 %mask)
2386 define <8 x i16> @test_mask_packus_epi32_rmkz_128(<4 x i32> %a, <4 x i32>* %ptr_b, i8 %mask) {
2387 ;CHECK-LABEL: test_mask_packus_epi32_rmkz_128
2388 ;CHECK: vpackusdw (%rdi), %xmm0, %xmm0 {%k1} {z}
2389 %b = load <4 x i32>, <4 x i32>* %ptr_b
2390 %res = call <8 x i16> @llvm.x86.avx512.mask.packusdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> zeroinitializer, i8 %mask)
2394 define <8 x i16> @test_mask_packus_epi32_rmb_128(<4 x i32> %a, i32* %ptr_b) {
2395 ;CHECK-LABEL: test_mask_packus_epi32_rmb_128
2396 ;CHECK: vpackusdw (%rdi){1to4}, %xmm0, %xmm0
2397 %q = load i32, i32* %ptr_b
2398 %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
2399 %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
2400 %res = call <8 x i16> @llvm.x86.avx512.mask.packusdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> zeroinitializer, i8 -1)
2404 define <8 x i16> @test_mask_packus_epi32_rmbk_128(<4 x i32> %a, i32* %ptr_b, <8 x i16> %passThru, i8 %mask) {
2405 ;CHECK-LABEL: test_mask_packus_epi32_rmbk_128
2406 ;CHECK: vpackusdw (%rdi){1to4}, %xmm0, %xmm1 {%k1}
2407 %q = load i32, i32* %ptr_b
2408 %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
2409 %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
2410 %res = call <8 x i16> @llvm.x86.avx512.mask.packusdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> %passThru, i8 %mask)
2414 define <8 x i16> @test_mask_packus_epi32_rmbkz_128(<4 x i32> %a, i32* %ptr_b, i8 %mask) {
2415 ;CHECK-LABEL: test_mask_packus_epi32_rmbkz_128
2416 ;CHECK: vpackusdw (%rdi){1to4}, %xmm0, %xmm0 {%k1} {z}
2417 %q = load i32, i32* %ptr_b
2418 %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
2419 %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
2420 %res = call <8 x i16> @llvm.x86.avx512.mask.packusdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> zeroinitializer, i8 %mask)
2424 declare <8 x i16> @llvm.x86.avx512.mask.packusdw.128(<4 x i32>, <4 x i32>, <8 x i16>, i8)
2426 define <16 x i16> @test_mask_packus_epi32_rr_256(<8 x i32> %a, <8 x i32> %b) {
2427 ;CHECK-LABEL: test_mask_packus_epi32_rr_256
2428 ;CHECK: vpackusdw %ymm1, %ymm0, %ymm0
2429 %res = call <16 x i16> @llvm.x86.avx512.mask.packusdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> zeroinitializer, i16 -1)
2433 define <16 x i16> @test_mask_packus_epi32_rrk_256(<8 x i32> %a, <8 x i32> %b, <16 x i16> %passThru, i16 %mask) {
2434 ;CHECK-LABEL: test_mask_packus_epi32_rrk_256
2435 ;CHECK: vpackusdw %ymm1, %ymm0, %ymm2 {%k1}
2436 %res = call <16 x i16> @llvm.x86.avx512.mask.packusdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> %passThru, i16 %mask)
2440 define <16 x i16> @test_mask_packus_epi32_rrkz_256(<8 x i32> %a, <8 x i32> %b, i16 %mask) {
2441 ;CHECK-LABEL: test_mask_packus_epi32_rrkz_256
2442 ;CHECK: vpackusdw %ymm1, %ymm0, %ymm0 {%k1} {z}
2443 %res = call <16 x i16> @llvm.x86.avx512.mask.packusdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> zeroinitializer, i16 %mask)
2447 define <16 x i16> @test_mask_packus_epi32_rm_256(<8 x i32> %a, <8 x i32>* %ptr_b) {
2448 ;CHECK-LABEL: test_mask_packus_epi32_rm_256
2449 ;CHECK: vpackusdw (%rdi), %ymm0, %ymm0
2450 %b = load <8 x i32>, <8 x i32>* %ptr_b
2451 %res = call <16 x i16> @llvm.x86.avx512.mask.packusdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> zeroinitializer, i16 -1)
2455 define <16 x i16> @test_mask_packus_epi32_rmk_256(<8 x i32> %a, <8 x i32>* %ptr_b, <16 x i16> %passThru, i16 %mask) {
2456 ;CHECK-LABEL: test_mask_packus_epi32_rmk_256
2457 ;CHECK: vpackusdw (%rdi), %ymm0, %ymm1 {%k1}
2458 %b = load <8 x i32>, <8 x i32>* %ptr_b
2459 %res = call <16 x i16> @llvm.x86.avx512.mask.packusdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> %passThru, i16 %mask)
2463 define <16 x i16> @test_mask_packus_epi32_rmkz_256(<8 x i32> %a, <8 x i32>* %ptr_b, i16 %mask) {
2464 ;CHECK-LABEL: test_mask_packus_epi32_rmkz_256
2465 ;CHECK: vpackusdw (%rdi), %ymm0, %ymm0 {%k1} {z}
2466 %b = load <8 x i32>, <8 x i32>* %ptr_b
2467 %res = call <16 x i16> @llvm.x86.avx512.mask.packusdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> zeroinitializer, i16 %mask)
2471 define <16 x i16> @test_mask_packus_epi32_rmb_256(<8 x i32> %a, i32* %ptr_b) {
2472 ;CHECK-LABEL: test_mask_packus_epi32_rmb_256
2473 ;CHECK: vpackusdw (%rdi){1to8}, %ymm0, %ymm0
2474 %q = load i32, i32* %ptr_b
2475 %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
2476 %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
2477 %res = call <16 x i16> @llvm.x86.avx512.mask.packusdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> zeroinitializer, i16 -1)
2481 define <16 x i16> @test_mask_packus_epi32_rmbk_256(<8 x i32> %a, i32* %ptr_b, <16 x i16> %passThru, i16 %mask) {
2482 ;CHECK-LABEL: test_mask_packus_epi32_rmbk_256
2483 ;CHECK: vpackusdw (%rdi){1to8}, %ymm0, %ymm1 {%k1}
2484 %q = load i32, i32* %ptr_b
2485 %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
2486 %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
2487 %res = call <16 x i16> @llvm.x86.avx512.mask.packusdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> %passThru, i16 %mask)
2491 define <16 x i16> @test_mask_packus_epi32_rmbkz_256(<8 x i32> %a, i32* %ptr_b, i16 %mask) {
2492 ;CHECK-LABEL: test_mask_packus_epi32_rmbkz_256
2493 ;CHECK: vpackusdw (%rdi){1to8}, %ymm0, %ymm0 {%k1} {z}
2494 %q = load i32, i32* %ptr_b
2495 %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
2496 %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
2497 %res = call <16 x i16> @llvm.x86.avx512.mask.packusdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> zeroinitializer, i16 %mask)
2501 declare <16 x i16> @llvm.x86.avx512.mask.packusdw.256(<8 x i32>, <8 x i32>, <16 x i16>, i16)
2503 define <16 x i8> @test_mask_packus_epi16_rr_128(<8 x i16> %a, <8 x i16> %b) {
2504 ;CHECK-LABEL: test_mask_packus_epi16_rr_128
2505 ;CHECK: vpackuswb %xmm1, %xmm0, %xmm0
2506 %res = call <16 x i8> @llvm.x86.avx512.mask.packuswb.128(<8 x i16> %a, <8 x i16> %b, <16 x i8> zeroinitializer, i16 -1)
2510 define <16 x i8> @test_mask_packus_epi16_rrk_128(<8 x i16> %a, <8 x i16> %b, <16 x i8> %passThru, i16 %mask) {
2511 ;CHECK-LABEL: test_mask_packus_epi16_rrk_128
2512 ;CHECK: vpackuswb %xmm1, %xmm0, %xmm2 {%k1}
2513 %res = call <16 x i8> @llvm.x86.avx512.mask.packuswb.128(<8 x i16> %a, <8 x i16> %b, <16 x i8> %passThru, i16 %mask)
2517 define <16 x i8> @test_mask_packus_epi16_rrkz_128(<8 x i16> %a, <8 x i16> %b, i16 %mask) {
2518 ;CHECK-LABEL: test_mask_packus_epi16_rrkz_128
2519 ;CHECK: vpackuswb %xmm1, %xmm0, %xmm0 {%k1} {z}
2520 %res = call <16 x i8> @llvm.x86.avx512.mask.packuswb.128(<8 x i16> %a, <8 x i16> %b, <16 x i8> zeroinitializer, i16 %mask)
2524 define <16 x i8> @test_mask_packus_epi16_rm_128(<8 x i16> %a, <8 x i16>* %ptr_b) {
2525 ;CHECK-LABEL: test_mask_packus_epi16_rm_128
2526 ;CHECK: vpackuswb (%rdi), %xmm0, %xmm0
2527 %b = load <8 x i16>, <8 x i16>* %ptr_b
2528 %res = call <16 x i8> @llvm.x86.avx512.mask.packuswb.128(<8 x i16> %a, <8 x i16> %b, <16 x i8> zeroinitializer, i16 -1)
2532 define <16 x i8> @test_mask_packus_epi16_rmk_128(<8 x i16> %a, <8 x i16>* %ptr_b, <16 x i8> %passThru, i16 %mask) {
2533 ;CHECK-LABEL: test_mask_packus_epi16_rmk_128
2534 ;CHECK: vpackuswb (%rdi), %xmm0, %xmm1 {%k1}
2535 %b = load <8 x i16>, <8 x i16>* %ptr_b
2536 %res = call <16 x i8> @llvm.x86.avx512.mask.packuswb.128(<8 x i16> %a, <8 x i16> %b, <16 x i8> %passThru, i16 %mask)
2540 define <16 x i8> @test_mask_packus_epi16_rmkz_128(<8 x i16> %a, <8 x i16>* %ptr_b, i16 %mask) {
2541 ;CHECK-LABEL: test_mask_packus_epi16_rmkz_128
2542 ;CHECK: vpackuswb (%rdi), %xmm0, %xmm0 {%k1} {z}
2543 %b = load <8 x i16>, <8 x i16>* %ptr_b
2544 %res = call <16 x i8> @llvm.x86.avx512.mask.packuswb.128(<8 x i16> %a, <8 x i16> %b, <16 x i8> zeroinitializer, i16 %mask)
2548 declare <16 x i8> @llvm.x86.avx512.mask.packuswb.128(<8 x i16>, <8 x i16>, <16 x i8>, i16)
2550 define <32 x i8> @test_mask_packus_epi16_rr_256(<16 x i16> %a, <16 x i16> %b) {
2551 ;CHECK-LABEL: test_mask_packus_epi16_rr_256
2552 ;CHECK: vpackuswb %ymm1, %ymm0, %ymm0
2553 %res = call <32 x i8> @llvm.x86.avx512.mask.packuswb.256(<16 x i16> %a, <16 x i16> %b, <32 x i8> zeroinitializer, i32 -1)
2557 define <32 x i8> @test_mask_packus_epi16_rrk_256(<16 x i16> %a, <16 x i16> %b, <32 x i8> %passThru, i32 %mask) {
2558 ;CHECK-LABEL: test_mask_packus_epi16_rrk_256
2559 ;CHECK: vpackuswb %ymm1, %ymm0, %ymm2 {%k1}
2560 %res = call <32 x i8> @llvm.x86.avx512.mask.packuswb.256(<16 x i16> %a, <16 x i16> %b, <32 x i8> %passThru, i32 %mask)
2564 define <32 x i8> @test_mask_packus_epi16_rrkz_256(<16 x i16> %a, <16 x i16> %b, i32 %mask) {
2565 ;CHECK-LABEL: test_mask_packus_epi16_rrkz_256
2566 ;CHECK: vpackuswb %ymm1, %ymm0, %ymm0 {%k1} {z}
2567 %res = call <32 x i8> @llvm.x86.avx512.mask.packuswb.256(<16 x i16> %a, <16 x i16> %b, <32 x i8> zeroinitializer, i32 %mask)
2571 define <32 x i8> @test_mask_packus_epi16_rm_256(<16 x i16> %a, <16 x i16>* %ptr_b) {
2572 ;CHECK-LABEL: test_mask_packus_epi16_rm_256
2573 ;CHECK: vpackuswb (%rdi), %ymm0, %ymm0
2574 %b = load <16 x i16>, <16 x i16>* %ptr_b
2575 %res = call <32 x i8> @llvm.x86.avx512.mask.packuswb.256(<16 x i16> %a, <16 x i16> %b, <32 x i8> zeroinitializer, i32 -1)
2579 define <32 x i8> @test_mask_packus_epi16_rmk_256(<16 x i16> %a, <16 x i16>* %ptr_b, <32 x i8> %passThru, i32 %mask) {
2580 ;CHECK-LABEL: test_mask_packus_epi16_rmk_256
2581 ;CHECK: vpackuswb (%rdi), %ymm0, %ymm1 {%k1}
2582 %b = load <16 x i16>, <16 x i16>* %ptr_b
2583 %res = call <32 x i8> @llvm.x86.avx512.mask.packuswb.256(<16 x i16> %a, <16 x i16> %b, <32 x i8> %passThru, i32 %mask)
2587 define <32 x i8> @test_mask_packus_epi16_rmkz_256(<16 x i16> %a, <16 x i16>* %ptr_b, i32 %mask) {
2588 ;CHECK-LABEL: test_mask_packus_epi16_rmkz_256
2589 ;CHECK: vpackuswb (%rdi), %ymm0, %ymm0 {%k1} {z}
2590 %b = load <16 x i16>, <16 x i16>* %ptr_b
2591 %res = call <32 x i8> @llvm.x86.avx512.mask.packuswb.256(<16 x i16> %a, <16 x i16> %b, <32 x i8> zeroinitializer, i32 %mask)
2595 declare <32 x i8> @llvm.x86.avx512.mask.packuswb.256(<16 x i16>, <16 x i16>, <32 x i8>, i32)
2597 define <8 x i16> @test_mask_adds_epi16_rr_128(<8 x i16> %a, <8 x i16> %b) {
2598 ;CHECK-LABEL: test_mask_adds_epi16_rr_128
2599 ;CHECK: vpaddsw %xmm1, %xmm0, %xmm0
2600 %res = call <8 x i16> @llvm.x86.avx512.mask.padds.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 -1)
2604 define <8 x i16> @test_mask_adds_epi16_rrk_128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) {
2605 ;CHECK-LABEL: test_mask_adds_epi16_rrk_128
2606 ;CHECK: vpaddsw %xmm1, %xmm0, %xmm2 {%k1}
2607 %res = call <8 x i16> @llvm.x86.avx512.mask.padds.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask)
2611 define <8 x i16> @test_mask_adds_epi16_rrkz_128(<8 x i16> %a, <8 x i16> %b, i8 %mask) {
2612 ;CHECK-LABEL: test_mask_adds_epi16_rrkz_128
2613 ;CHECK: vpaddsw %xmm1, %xmm0, %xmm0 {%k1} {z}
2614 %res = call <8 x i16> @llvm.x86.avx512.mask.padds.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 %mask)
2618 define <8 x i16> @test_mask_adds_epi16_rm_128(<8 x i16> %a, <8 x i16>* %ptr_b) {
2619 ;CHECK-LABEL: test_mask_adds_epi16_rm_128
2620 ;CHECK: vpaddsw (%rdi), %xmm0, %xmm0
2621 %b = load <8 x i16>, <8 x i16>* %ptr_b
2622 %res = call <8 x i16> @llvm.x86.avx512.mask.padds.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 -1)
2626 define <8 x i16> @test_mask_adds_epi16_rmk_128(<8 x i16> %a, <8 x i16>* %ptr_b, <8 x i16> %passThru, i8 %mask) {
2627 ;CHECK-LABEL: test_mask_adds_epi16_rmk_128
2628 ;CHECK: vpaddsw (%rdi), %xmm0, %xmm1 {%k1}
2629 %b = load <8 x i16>, <8 x i16>* %ptr_b
2630 %res = call <8 x i16> @llvm.x86.avx512.mask.padds.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask)
2634 define <8 x i16> @test_mask_adds_epi16_rmkz_128(<8 x i16> %a, <8 x i16>* %ptr_b, i8 %mask) {
2635 ;CHECK-LABEL: test_mask_adds_epi16_rmkz_128
2636 ;CHECK: vpaddsw (%rdi), %xmm0, %xmm0 {%k1} {z}
2637 %b = load <8 x i16>, <8 x i16>* %ptr_b
2638 %res = call <8 x i16> @llvm.x86.avx512.mask.padds.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 %mask)
2642 declare <8 x i16> @llvm.x86.avx512.mask.padds.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8)
2644 define <16 x i16> @test_mask_adds_epi16_rr_256(<16 x i16> %a, <16 x i16> %b) {
2645 ;CHECK-LABEL: test_mask_adds_epi16_rr_256
2646 ;CHECK: vpaddsw %ymm1, %ymm0, %ymm0
2647 %res = call <16 x i16> @llvm.x86.avx512.mask.padds.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 -1)
2651 define <16 x i16> @test_mask_adds_epi16_rrk_256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) {
2652 ;CHECK-LABEL: test_mask_adds_epi16_rrk_256
2653 ;CHECK: vpaddsw %ymm1, %ymm0, %ymm2 {%k1}
2654 %res = call <16 x i16> @llvm.x86.avx512.mask.padds.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask)
2658 define <16 x i16> @test_mask_adds_epi16_rrkz_256(<16 x i16> %a, <16 x i16> %b, i16 %mask) {
2659 ;CHECK-LABEL: test_mask_adds_epi16_rrkz_256
2660 ;CHECK: vpaddsw %ymm1, %ymm0, %ymm0 {%k1} {z}
2661 %res = call <16 x i16> @llvm.x86.avx512.mask.padds.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 %mask)
2665 define <16 x i16> @test_mask_adds_epi16_rm_256(<16 x i16> %a, <16 x i16>* %ptr_b) {
2666 ;CHECK-LABEL: test_mask_adds_epi16_rm_256
2667 ;CHECK: vpaddsw (%rdi), %ymm0, %ymm0
2668 %b = load <16 x i16>, <16 x i16>* %ptr_b
2669 %res = call <16 x i16> @llvm.x86.avx512.mask.padds.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 -1)
2673 define <16 x i16> @test_mask_adds_epi16_rmk_256(<16 x i16> %a, <16 x i16>* %ptr_b, <16 x i16> %passThru, i16 %mask) {
2674 ;CHECK-LABEL: test_mask_adds_epi16_rmk_256
2675 ;CHECK: vpaddsw (%rdi), %ymm0, %ymm1 {%k1}
2676 %b = load <16 x i16>, <16 x i16>* %ptr_b
2677 %res = call <16 x i16> @llvm.x86.avx512.mask.padds.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask)
2681 define <16 x i16> @test_mask_adds_epi16_rmkz_256(<16 x i16> %a, <16 x i16>* %ptr_b, i16 %mask) {
2682 ;CHECK-LABEL: test_mask_adds_epi16_rmkz_256
2683 ;CHECK: vpaddsw (%rdi), %ymm0, %ymm0 {%k1} {z}
2684 %b = load <16 x i16>, <16 x i16>* %ptr_b
2685 %res = call <16 x i16> @llvm.x86.avx512.mask.padds.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 %mask)
2689 declare <16 x i16> @llvm.x86.avx512.mask.padds.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16)
2691 define <8 x i16> @test_mask_subs_epi16_rr_128(<8 x i16> %a, <8 x i16> %b) {
2692 ;CHECK-LABEL: test_mask_subs_epi16_rr_128
2693 ;CHECK: vpsubsw %xmm1, %xmm0, %xmm0
2694 %res = call <8 x i16> @llvm.x86.avx512.mask.psubs.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 -1)
2698 define <8 x i16> @test_mask_subs_epi16_rrk_128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) {
2699 ;CHECK-LABEL: test_mask_subs_epi16_rrk_128
2700 ;CHECK: vpsubsw %xmm1, %xmm0, %xmm2 {%k1}
2701 %res = call <8 x i16> @llvm.x86.avx512.mask.psubs.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask)
2705 define <8 x i16> @test_mask_subs_epi16_rrkz_128(<8 x i16> %a, <8 x i16> %b, i8 %mask) {
2706 ;CHECK-LABEL: test_mask_subs_epi16_rrkz_128
2707 ;CHECK: vpsubsw %xmm1, %xmm0, %xmm0 {%k1} {z}
2708 %res = call <8 x i16> @llvm.x86.avx512.mask.psubs.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 %mask)
2712 define <8 x i16> @test_mask_subs_epi16_rm_128(<8 x i16> %a, <8 x i16>* %ptr_b) {
2713 ;CHECK-LABEL: test_mask_subs_epi16_rm_128
2714 ;CHECK: vpsubsw (%rdi), %xmm0, %xmm0
2715 %b = load <8 x i16>, <8 x i16>* %ptr_b
2716 %res = call <8 x i16> @llvm.x86.avx512.mask.psubs.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 -1)
2720 define <8 x i16> @test_mask_subs_epi16_rmk_128(<8 x i16> %a, <8 x i16>* %ptr_b, <8 x i16> %passThru, i8 %mask) {
2721 ;CHECK-LABEL: test_mask_subs_epi16_rmk_128
2722 ;CHECK: vpsubsw (%rdi), %xmm0, %xmm1 {%k1}
2723 %b = load <8 x i16>, <8 x i16>* %ptr_b
2724 %res = call <8 x i16> @llvm.x86.avx512.mask.psubs.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask)
2728 define <8 x i16> @test_mask_subs_epi16_rmkz_128(<8 x i16> %a, <8 x i16>* %ptr_b, i8 %mask) {
2729 ;CHECK-LABEL: test_mask_subs_epi16_rmkz_128
2730 ;CHECK: vpsubsw (%rdi), %xmm0, %xmm0 {%k1} {z}
2731 %b = load <8 x i16>, <8 x i16>* %ptr_b
2732 %res = call <8 x i16> @llvm.x86.avx512.mask.psubs.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 %mask)
2736 declare <8 x i16> @llvm.x86.avx512.mask.psubs.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8)
2738 define <16 x i16> @test_mask_subs_epi16_rr_256(<16 x i16> %a, <16 x i16> %b) {
2739 ;CHECK-LABEL: test_mask_subs_epi16_rr_256
2740 ;CHECK: vpsubsw %ymm1, %ymm0, %ymm0
2741 %res = call <16 x i16> @llvm.x86.avx512.mask.psubs.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 -1)
2745 define <16 x i16> @test_mask_subs_epi16_rrk_256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) {
2746 ;CHECK-LABEL: test_mask_subs_epi16_rrk_256
2747 ;CHECK: vpsubsw %ymm1, %ymm0, %ymm2 {%k1}
2748 %res = call <16 x i16> @llvm.x86.avx512.mask.psubs.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask)
2752 define <16 x i16> @test_mask_subs_epi16_rrkz_256(<16 x i16> %a, <16 x i16> %b, i16 %mask) {
2753 ;CHECK-LABEL: test_mask_subs_epi16_rrkz_256
2754 ;CHECK: vpsubsw %ymm1, %ymm0, %ymm0 {%k1} {z}
2755 %res = call <16 x i16> @llvm.x86.avx512.mask.psubs.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 %mask)
2759 define <16 x i16> @test_mask_subs_epi16_rm_256(<16 x i16> %a, <16 x i16>* %ptr_b) {
2760 ;CHECK-LABEL: test_mask_subs_epi16_rm_256
2761 ;CHECK: vpsubsw (%rdi), %ymm0, %ymm0
2762 %b = load <16 x i16>, <16 x i16>* %ptr_b
2763 %res = call <16 x i16> @llvm.x86.avx512.mask.psubs.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 -1)
2767 define <16 x i16> @test_mask_subs_epi16_rmk_256(<16 x i16> %a, <16 x i16>* %ptr_b, <16 x i16> %passThru, i16 %mask) {
2768 ;CHECK-LABEL: test_mask_subs_epi16_rmk_256
2769 ;CHECK: vpsubsw (%rdi), %ymm0, %ymm1 {%k1}
2770 %b = load <16 x i16>, <16 x i16>* %ptr_b
2771 %res = call <16 x i16> @llvm.x86.avx512.mask.psubs.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask)
2775 define <16 x i16> @test_mask_subs_epi16_rmkz_256(<16 x i16> %a, <16 x i16>* %ptr_b, i16 %mask) {
2776 ;CHECK-LABEL: test_mask_subs_epi16_rmkz_256
2777 ;CHECK: vpsubsw (%rdi), %ymm0, %ymm0 {%k1} {z}
2778 %b = load <16 x i16>, <16 x i16>* %ptr_b
2779 %res = call <16 x i16> @llvm.x86.avx512.mask.psubs.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 %mask)
2783 declare <16 x i16> @llvm.x86.avx512.mask.psubs.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16)
2785 define <8 x i16> @test_mask_adds_epu16_rr_128(<8 x i16> %a, <8 x i16> %b) {
2786 ;CHECK-LABEL: test_mask_adds_epu16_rr_128
2787 ;CHECK: vpaddusw %xmm1, %xmm0, %xmm0
2788 %res = call <8 x i16> @llvm.x86.avx512.mask.paddus.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 -1)
2792 define <8 x i16> @test_mask_adds_epu16_rrk_128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) {
2793 ;CHECK-LABEL: test_mask_adds_epu16_rrk_128
2794 ;CHECK: vpaddusw %xmm1, %xmm0, %xmm2 {%k1}
2795 %res = call <8 x i16> @llvm.x86.avx512.mask.paddus.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask)
2799 define <8 x i16> @test_mask_adds_epu16_rrkz_128(<8 x i16> %a, <8 x i16> %b, i8 %mask) {
2800 ;CHECK-LABEL: test_mask_adds_epu16_rrkz_128
2801 ;CHECK: vpaddusw %xmm1, %xmm0, %xmm0 {%k1} {z}
2802 %res = call <8 x i16> @llvm.x86.avx512.mask.paddus.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 %mask)
2806 define <8 x i16> @test_mask_adds_epu16_rm_128(<8 x i16> %a, <8 x i16>* %ptr_b) {
2807 ;CHECK-LABEL: test_mask_adds_epu16_rm_128
2808 ;CHECK: vpaddusw (%rdi), %xmm0, %xmm0
2809 %b = load <8 x i16>, <8 x i16>* %ptr_b
2810 %res = call <8 x i16> @llvm.x86.avx512.mask.paddus.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 -1)
2814 define <8 x i16> @test_mask_adds_epu16_rmk_128(<8 x i16> %a, <8 x i16>* %ptr_b, <8 x i16> %passThru, i8 %mask) {
2815 ;CHECK-LABEL: test_mask_adds_epu16_rmk_128
2816 ;CHECK: vpaddusw (%rdi), %xmm0, %xmm1 {%k1}
2817 %b = load <8 x i16>, <8 x i16>* %ptr_b
2818 %res = call <8 x i16> @llvm.x86.avx512.mask.paddus.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask)
2822 define <8 x i16> @test_mask_adds_epu16_rmkz_128(<8 x i16> %a, <8 x i16>* %ptr_b, i8 %mask) {
2823 ;CHECK-LABEL: test_mask_adds_epu16_rmkz_128
2824 ;CHECK: vpaddusw (%rdi), %xmm0, %xmm0 {%k1} {z}
2825 %b = load <8 x i16>, <8 x i16>* %ptr_b
2826 %res = call <8 x i16> @llvm.x86.avx512.mask.paddus.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 %mask)
2830 declare <8 x i16> @llvm.x86.avx512.mask.paddus.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8)
2832 define <16 x i16> @test_mask_adds_epu16_rr_256(<16 x i16> %a, <16 x i16> %b) {
2833 ;CHECK-LABEL: test_mask_adds_epu16_rr_256
2834 ;CHECK: vpaddusw %ymm1, %ymm0, %ymm0
2835 %res = call <16 x i16> @llvm.x86.avx512.mask.paddus.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 -1)
2839 define <16 x i16> @test_mask_adds_epu16_rrk_256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) {
2840 ;CHECK-LABEL: test_mask_adds_epu16_rrk_256
2841 ;CHECK: vpaddusw %ymm1, %ymm0, %ymm2 {%k1}
2842 %res = call <16 x i16> @llvm.x86.avx512.mask.paddus.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask)
2846 define <16 x i16> @test_mask_adds_epu16_rrkz_256(<16 x i16> %a, <16 x i16> %b, i16 %mask) {
2847 ;CHECK-LABEL: test_mask_adds_epu16_rrkz_256
2848 ;CHECK: vpaddusw %ymm1, %ymm0, %ymm0 {%k1} {z}
2849 %res = call <16 x i16> @llvm.x86.avx512.mask.paddus.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 %mask)
2853 define <16 x i16> @test_mask_adds_epu16_rm_256(<16 x i16> %a, <16 x i16>* %ptr_b) {
2854 ;CHECK-LABEL: test_mask_adds_epu16_rm_256
2855 ;CHECK: vpaddusw (%rdi), %ymm0, %ymm0
2856 %b = load <16 x i16>, <16 x i16>* %ptr_b
2857 %res = call <16 x i16> @llvm.x86.avx512.mask.paddus.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 -1)
2861 define <16 x i16> @test_mask_adds_epu16_rmk_256(<16 x i16> %a, <16 x i16>* %ptr_b, <16 x i16> %passThru, i16 %mask) {
2862 ;CHECK-LABEL: test_mask_adds_epu16_rmk_256
2863 ;CHECK: vpaddusw (%rdi), %ymm0, %ymm1 {%k1}
2864 %b = load <16 x i16>, <16 x i16>* %ptr_b
2865 %res = call <16 x i16> @llvm.x86.avx512.mask.paddus.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask)
2869 define <16 x i16> @test_mask_adds_epu16_rmkz_256(<16 x i16> %a, <16 x i16>* %ptr_b, i16 %mask) {
2870 ;CHECK-LABEL: test_mask_adds_epu16_rmkz_256
2871 ;CHECK: vpaddusw (%rdi), %ymm0, %ymm0 {%k1} {z}
2872 %b = load <16 x i16>, <16 x i16>* %ptr_b
2873 %res = call <16 x i16> @llvm.x86.avx512.mask.paddus.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 %mask)
2877 declare <16 x i16> @llvm.x86.avx512.mask.paddus.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16)
2879 define <8 x i16> @test_mask_subs_epu16_rr_128(<8 x i16> %a, <8 x i16> %b) {
2880 ;CHECK-LABEL: test_mask_subs_epu16_rr_128
2881 ;CHECK: vpsubusw %xmm1, %xmm0, %xmm0
2882 %res = call <8 x i16> @llvm.x86.avx512.mask.psubus.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 -1)
2886 define <8 x i16> @test_mask_subs_epu16_rrk_128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) {
2887 ;CHECK-LABEL: test_mask_subs_epu16_rrk_128
2888 ;CHECK: vpsubusw %xmm1, %xmm0, %xmm2 {%k1}
2889 %res = call <8 x i16> @llvm.x86.avx512.mask.psubus.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask)
2893 define <8 x i16> @test_mask_subs_epu16_rrkz_128(<8 x i16> %a, <8 x i16> %b, i8 %mask) {
2894 ;CHECK-LABEL: test_mask_subs_epu16_rrkz_128
2895 ;CHECK: vpsubusw %xmm1, %xmm0, %xmm0 {%k1} {z}
2896 %res = call <8 x i16> @llvm.x86.avx512.mask.psubus.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 %mask)
2900 define <8 x i16> @test_mask_subs_epu16_rm_128(<8 x i16> %a, <8 x i16>* %ptr_b) {
2901 ;CHECK-LABEL: test_mask_subs_epu16_rm_128
2902 ;CHECK: vpsubusw (%rdi), %xmm0, %xmm0
2903 %b = load <8 x i16>, <8 x i16>* %ptr_b
2904 %res = call <8 x i16> @llvm.x86.avx512.mask.psubus.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 -1)
2908 define <8 x i16> @test_mask_subs_epu16_rmk_128(<8 x i16> %a, <8 x i16>* %ptr_b, <8 x i16> %passThru, i8 %mask) {
2909 ;CHECK-LABEL: test_mask_subs_epu16_rmk_128
2910 ;CHECK: vpsubusw (%rdi), %xmm0, %xmm1 {%k1}
2911 %b = load <8 x i16>, <8 x i16>* %ptr_b
2912 %res = call <8 x i16> @llvm.x86.avx512.mask.psubus.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask)
2916 define <8 x i16> @test_mask_subs_epu16_rmkz_128(<8 x i16> %a, <8 x i16>* %ptr_b, i8 %mask) {
2917 ;CHECK-LABEL: test_mask_subs_epu16_rmkz_128
2918 ;CHECK: vpsubusw (%rdi), %xmm0, %xmm0 {%k1} {z}
2919 %b = load <8 x i16>, <8 x i16>* %ptr_b
2920 %res = call <8 x i16> @llvm.x86.avx512.mask.psubus.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 %mask)
2924 declare <8 x i16> @llvm.x86.avx512.mask.psubus.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8)
2926 define <16 x i16> @test_mask_subs_epu16_rr_256(<16 x i16> %a, <16 x i16> %b) {
2927 ;CHECK-LABEL: test_mask_subs_epu16_rr_256
2928 ;CHECK: vpsubusw %ymm1, %ymm0, %ymm0
2929 %res = call <16 x i16> @llvm.x86.avx512.mask.psubus.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 -1)
2933 define <16 x i16> @test_mask_subs_epu16_rrk_256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) {
2934 ;CHECK-LABEL: test_mask_subs_epu16_rrk_256
2935 ;CHECK: vpsubusw %ymm1, %ymm0, %ymm2 {%k1}
2936 %res = call <16 x i16> @llvm.x86.avx512.mask.psubus.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask)
2940 define <16 x i16> @test_mask_subs_epu16_rrkz_256(<16 x i16> %a, <16 x i16> %b, i16 %mask) {
2941 ;CHECK-LABEL: test_mask_subs_epu16_rrkz_256
2942 ;CHECK: vpsubusw %ymm1, %ymm0, %ymm0 {%k1} {z}
2943 %res = call <16 x i16> @llvm.x86.avx512.mask.psubus.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 %mask)
2947 define <16 x i16> @test_mask_subs_epu16_rm_256(<16 x i16> %a, <16 x i16>* %ptr_b) {
2948 ;CHECK-LABEL: test_mask_subs_epu16_rm_256
2949 ;CHECK: vpsubusw (%rdi), %ymm0, %ymm0
2950 %b = load <16 x i16>, <16 x i16>* %ptr_b
2951 %res = call <16 x i16> @llvm.x86.avx512.mask.psubus.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 -1)
2955 define <16 x i16> @test_mask_subs_epu16_rmk_256(<16 x i16> %a, <16 x i16>* %ptr_b, <16 x i16> %passThru, i16 %mask) {
2956 ;CHECK-LABEL: test_mask_subs_epu16_rmk_256
2957 ;CHECK: vpsubusw (%rdi), %ymm0, %ymm1 {%k1}
2958 %b = load <16 x i16>, <16 x i16>* %ptr_b
2959 %res = call <16 x i16> @llvm.x86.avx512.mask.psubus.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask)
2963 define <16 x i16> @test_mask_subs_epu16_rmkz_256(<16 x i16> %a, <16 x i16>* %ptr_b, i16 %mask) {
2964 ;CHECK-LABEL: test_mask_subs_epu16_rmkz_256
2965 ;CHECK: vpsubusw (%rdi), %ymm0, %ymm0 {%k1} {z}
2966 %b = load <16 x i16>, <16 x i16>* %ptr_b
2967 %res = call <16 x i16> @llvm.x86.avx512.mask.psubus.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 %mask)
2971 declare <16 x i16> @llvm.x86.avx512.mask.psubus.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16)
2973 define <16 x i8> @test_mask_adds_epi8_rr_128(<16 x i8> %a, <16 x i8> %b) {
2974 ;CHECK-LABEL: test_mask_adds_epi8_rr_128
2975 ;CHECK: vpaddsb %xmm1, %xmm0, %xmm0
2976 %res = call <16 x i8> @llvm.x86.avx512.mask.padds.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> zeroinitializer, i16 -1)
2980 define <16 x i8> @test_mask_adds_epi8_rrk_128(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passThru, i16 %mask) {
2981 ;CHECK-LABEL: test_mask_adds_epi8_rrk_128
2982 ;CHECK: vpaddsb %xmm1, %xmm0, %xmm2 {%k1}
2983 %res = call <16 x i8> @llvm.x86.avx512.mask.padds.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passThru, i16 %mask)
2987 define <16 x i8> @test_mask_adds_epi8_rrkz_128(<16 x i8> %a, <16 x i8> %b, i16 %mask) {
2988 ;CHECK-LABEL: test_mask_adds_epi8_rrkz_128
2989 ;CHECK: vpaddsb %xmm1, %xmm0, %xmm0 {%k1} {z}
2990 %res = call <16 x i8> @llvm.x86.avx512.mask.padds.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> zeroinitializer, i16 %mask)
2994 define <16 x i8> @test_mask_adds_epi8_rm_128(<16 x i8> %a, <16 x i8>* %ptr_b) {
2995 ;CHECK-LABEL: test_mask_adds_epi8_rm_128
2996 ;CHECK: vpaddsb (%rdi), %xmm0, %xmm0
2997 %b = load <16 x i8>, <16 x i8>* %ptr_b
2998 %res = call <16 x i8> @llvm.x86.avx512.mask.padds.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> zeroinitializer, i16 -1)
3002 define <16 x i8> @test_mask_adds_epi8_rmk_128(<16 x i8> %a, <16 x i8>* %ptr_b, <16 x i8> %passThru, i16 %mask) {
3003 ;CHECK-LABEL: test_mask_adds_epi8_rmk_128
3004 ;CHECK: vpaddsb (%rdi), %xmm0, %xmm1 {%k1}
3005 %b = load <16 x i8>, <16 x i8>* %ptr_b
3006 %res = call <16 x i8> @llvm.x86.avx512.mask.padds.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passThru, i16 %mask)
3010 define <16 x i8> @test_mask_adds_epi8_rmkz_128(<16 x i8> %a, <16 x i8>* %ptr_b, i16 %mask) {
3011 ;CHECK-LABEL: test_mask_adds_epi8_rmkz_128
3012 ;CHECK: vpaddsb (%rdi), %xmm0, %xmm0 {%k1} {z}
3013 %b = load <16 x i8>, <16 x i8>* %ptr_b
3014 %res = call <16 x i8> @llvm.x86.avx512.mask.padds.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> zeroinitializer, i16 %mask)
3018 declare <16 x i8> @llvm.x86.avx512.mask.padds.b.128(<16 x i8>, <16 x i8>, <16 x i8>, i16)
3020 define <32 x i8> @test_mask_adds_epi8_rr_256(<32 x i8> %a, <32 x i8> %b) {
3021 ;CHECK-LABEL: test_mask_adds_epi8_rr_256
3022 ;CHECK: vpaddsb %ymm1, %ymm0, %ymm0
3023 %res = call <32 x i8> @llvm.x86.avx512.mask.padds.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> zeroinitializer, i32 -1)
3027 define <32 x i8> @test_mask_adds_epi8_rrk_256(<32 x i8> %a, <32 x i8> %b, <32 x i8> %passThru, i32 %mask) {
3028 ;CHECK-LABEL: test_mask_adds_epi8_rrk_256
3029 ;CHECK: vpaddsb %ymm1, %ymm0, %ymm2 {%k1}
3030 %res = call <32 x i8> @llvm.x86.avx512.mask.padds.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> %passThru, i32 %mask)
3034 define <32 x i8> @test_mask_adds_epi8_rrkz_256(<32 x i8> %a, <32 x i8> %b, i32 %mask) {
3035 ;CHECK-LABEL: test_mask_adds_epi8_rrkz_256
3036 ;CHECK: vpaddsb %ymm1, %ymm0, %ymm0 {%k1} {z}
3037 %res = call <32 x i8> @llvm.x86.avx512.mask.padds.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> zeroinitializer, i32 %mask)
3041 define <32 x i8> @test_mask_adds_epi8_rm_256(<32 x i8> %a, <32 x i8>* %ptr_b) {
3042 ;CHECK-LABEL: test_mask_adds_epi8_rm_256
3043 ;CHECK: vpaddsb (%rdi), %ymm0, %ymm0
3044 %b = load <32 x i8>, <32 x i8>* %ptr_b
3045 %res = call <32 x i8> @llvm.x86.avx512.mask.padds.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> zeroinitializer, i32 -1)
3049 define <32 x i8> @test_mask_adds_epi8_rmk_256(<32 x i8> %a, <32 x i8>* %ptr_b, <32 x i8> %passThru, i32 %mask) {
3050 ;CHECK-LABEL: test_mask_adds_epi8_rmk_256
3051 ;CHECK: vpaddsb (%rdi), %ymm0, %ymm1 {%k1}
3052 %b = load <32 x i8>, <32 x i8>* %ptr_b
3053 %res = call <32 x i8> @llvm.x86.avx512.mask.padds.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> %passThru, i32 %mask)
3057 define <32 x i8> @test_mask_adds_epi8_rmkz_256(<32 x i8> %a, <32 x i8>* %ptr_b, i32 %mask) {
3058 ;CHECK-LABEL: test_mask_adds_epi8_rmkz_256
3059 ;CHECK: vpaddsb (%rdi), %ymm0, %ymm0 {%k1} {z}
3060 %b = load <32 x i8>, <32 x i8>* %ptr_b
3061 %res = call <32 x i8> @llvm.x86.avx512.mask.padds.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> zeroinitializer, i32 %mask)
3065 declare <32 x i8> @llvm.x86.avx512.mask.padds.b.256(<32 x i8>, <32 x i8>, <32 x i8>, i32)
3067 define <16 x i8> @test_mask_subs_epi8_rr_128(<16 x i8> %a, <16 x i8> %b) {
3068 ;CHECK-LABEL: test_mask_subs_epi8_rr_128
3069 ;CHECK: vpsubsb %xmm1, %xmm0, %xmm0
3070 %res = call <16 x i8> @llvm.x86.avx512.mask.psubs.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> zeroinitializer, i16 -1)
3074 define <16 x i8> @test_mask_subs_epi8_rrk_128(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passThru, i16 %mask) {
3075 ;CHECK-LABEL: test_mask_subs_epi8_rrk_128
3076 ;CHECK: vpsubsb %xmm1, %xmm0, %xmm2 {%k1}
3077 %res = call <16 x i8> @llvm.x86.avx512.mask.psubs.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passThru, i16 %mask)
3081 define <16 x i8> @test_mask_subs_epi8_rrkz_128(<16 x i8> %a, <16 x i8> %b, i16 %mask) {
3082 ;CHECK-LABEL: test_mask_subs_epi8_rrkz_128
3083 ;CHECK: vpsubsb %xmm1, %xmm0, %xmm0 {%k1} {z}
3084 %res = call <16 x i8> @llvm.x86.avx512.mask.psubs.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> zeroinitializer, i16 %mask)
3088 define <16 x i8> @test_mask_subs_epi8_rm_128(<16 x i8> %a, <16 x i8>* %ptr_b) {
3089 ;CHECK-LABEL: test_mask_subs_epi8_rm_128
3090 ;CHECK: vpsubsb (%rdi), %xmm0, %xmm0
3091 %b = load <16 x i8>, <16 x i8>* %ptr_b
3092 %res = call <16 x i8> @llvm.x86.avx512.mask.psubs.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> zeroinitializer, i16 -1)
3096 define <16 x i8> @test_mask_subs_epi8_rmk_128(<16 x i8> %a, <16 x i8>* %ptr_b, <16 x i8> %passThru, i16 %mask) {
3097 ;CHECK-LABEL: test_mask_subs_epi8_rmk_128
3098 ;CHECK: vpsubsb (%rdi), %xmm0, %xmm1 {%k1}
3099 %b = load <16 x i8>, <16 x i8>* %ptr_b
3100 %res = call <16 x i8> @llvm.x86.avx512.mask.psubs.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passThru, i16 %mask)
3104 define <16 x i8> @test_mask_subs_epi8_rmkz_128(<16 x i8> %a, <16 x i8>* %ptr_b, i16 %mask) {
3105 ;CHECK-LABEL: test_mask_subs_epi8_rmkz_128
3106 ;CHECK: vpsubsb (%rdi), %xmm0, %xmm0 {%k1} {z}
3107 %b = load <16 x i8>, <16 x i8>* %ptr_b
3108 %res = call <16 x i8> @llvm.x86.avx512.mask.psubs.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> zeroinitializer, i16 %mask)
3112 declare <16 x i8> @llvm.x86.avx512.mask.psubs.b.128(<16 x i8>, <16 x i8>, <16 x i8>, i16)
3114 define <32 x i8> @test_mask_subs_epi8_rr_256(<32 x i8> %a, <32 x i8> %b) {
3115 ;CHECK-LABEL: test_mask_subs_epi8_rr_256
3116 ;CHECK: vpsubsb %ymm1, %ymm0, %ymm0
3117 %res = call <32 x i8> @llvm.x86.avx512.mask.psubs.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> zeroinitializer, i32 -1)
3121 define <32 x i8> @test_mask_subs_epi8_rrk_256(<32 x i8> %a, <32 x i8> %b, <32 x i8> %passThru, i32 %mask) {
3122 ;CHECK-LABEL: test_mask_subs_epi8_rrk_256
3123 ;CHECK: vpsubsb %ymm1, %ymm0, %ymm2 {%k1}
3124 %res = call <32 x i8> @llvm.x86.avx512.mask.psubs.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> %passThru, i32 %mask)
3128 define <32 x i8> @test_mask_subs_epi8_rrkz_256(<32 x i8> %a, <32 x i8> %b, i32 %mask) {
3129 ;CHECK-LABEL: test_mask_subs_epi8_rrkz_256
3130 ;CHECK: vpsubsb %ymm1, %ymm0, %ymm0 {%k1} {z}
3131 %res = call <32 x i8> @llvm.x86.avx512.mask.psubs.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> zeroinitializer, i32 %mask)
3135 define <32 x i8> @test_mask_subs_epi8_rm_256(<32 x i8> %a, <32 x i8>* %ptr_b) {
3136 ;CHECK-LABEL: test_mask_subs_epi8_rm_256
3137 ;CHECK: vpsubsb (%rdi), %ymm0, %ymm0
3138 %b = load <32 x i8>, <32 x i8>* %ptr_b
3139 %res = call <32 x i8> @llvm.x86.avx512.mask.psubs.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> zeroinitializer, i32 -1)
3143 define <32 x i8> @test_mask_subs_epi8_rmk_256(<32 x i8> %a, <32 x i8>* %ptr_b, <32 x i8> %passThru, i32 %mask) {
3144 ;CHECK-LABEL: test_mask_subs_epi8_rmk_256
3145 ;CHECK: vpsubsb (%rdi), %ymm0, %ymm1 {%k1}
3146 %b = load <32 x i8>, <32 x i8>* %ptr_b
3147 %res = call <32 x i8> @llvm.x86.avx512.mask.psubs.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> %passThru, i32 %mask)
3151 define <32 x i8> @test_mask_subs_epi8_rmkz_256(<32 x i8> %a, <32 x i8>* %ptr_b, i32 %mask) {
3152 ;CHECK-LABEL: test_mask_subs_epi8_rmkz_256
3153 ;CHECK: vpsubsb (%rdi), %ymm0, %ymm0 {%k1} {z}
3154 %b = load <32 x i8>, <32 x i8>* %ptr_b
3155 %res = call <32 x i8> @llvm.x86.avx512.mask.psubs.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> zeroinitializer, i32 %mask)
3159 declare <32 x i8> @llvm.x86.avx512.mask.psubs.b.256(<32 x i8>, <32 x i8>, <32 x i8>, i32)
3161 define <16 x i8> @test_mask_adds_epu8_rr_128(<16 x i8> %a, <16 x i8> %b) {
3162 ;CHECK-LABEL: test_mask_adds_epu8_rr_128
3163 ;CHECK: vpaddusb %xmm1, %xmm0, %xmm0
3164 %res = call <16 x i8> @llvm.x86.avx512.mask.paddus.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> zeroinitializer, i16 -1)
3168 define <16 x i8> @test_mask_adds_epu8_rrk_128(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passThru, i16 %mask) {
3169 ;CHECK-LABEL: test_mask_adds_epu8_rrk_128
3170 ;CHECK: vpaddusb %xmm1, %xmm0, %xmm2 {%k1}
3171 %res = call <16 x i8> @llvm.x86.avx512.mask.paddus.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passThru, i16 %mask)
3175 define <16 x i8> @test_mask_adds_epu8_rrkz_128(<16 x i8> %a, <16 x i8> %b, i16 %mask) {
3176 ;CHECK-LABEL: test_mask_adds_epu8_rrkz_128
3177 ;CHECK: vpaddusb %xmm1, %xmm0, %xmm0 {%k1} {z}
3178 %res = call <16 x i8> @llvm.x86.avx512.mask.paddus.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> zeroinitializer, i16 %mask)
3182 define <16 x i8> @test_mask_adds_epu8_rm_128(<16 x i8> %a, <16 x i8>* %ptr_b) {
3183 ;CHECK-LABEL: test_mask_adds_epu8_rm_128
3184 ;CHECK: vpaddusb (%rdi), %xmm0, %xmm0
3185 %b = load <16 x i8>, <16 x i8>* %ptr_b
3186 %res = call <16 x i8> @llvm.x86.avx512.mask.paddus.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> zeroinitializer, i16 -1)
3190 define <16 x i8> @test_mask_adds_epu8_rmk_128(<16 x i8> %a, <16 x i8>* %ptr_b, <16 x i8> %passThru, i16 %mask) {
3191 ;CHECK-LABEL: test_mask_adds_epu8_rmk_128
3192 ;CHECK: vpaddusb (%rdi), %xmm0, %xmm1 {%k1}
3193 %b = load <16 x i8>, <16 x i8>* %ptr_b
3194 %res = call <16 x i8> @llvm.x86.avx512.mask.paddus.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passThru, i16 %mask)
3198 define <16 x i8> @test_mask_adds_epu8_rmkz_128(<16 x i8> %a, <16 x i8>* %ptr_b, i16 %mask) {
3199 ;CHECK-LABEL: test_mask_adds_epu8_rmkz_128
3200 ;CHECK: vpaddusb (%rdi), %xmm0, %xmm0 {%k1} {z}
3201 %b = load <16 x i8>, <16 x i8>* %ptr_b
3202 %res = call <16 x i8> @llvm.x86.avx512.mask.paddus.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> zeroinitializer, i16 %mask)
3206 declare <16 x i8> @llvm.x86.avx512.mask.paddus.b.128(<16 x i8>, <16 x i8>, <16 x i8>, i16)
3208 define <32 x i8> @test_mask_adds_epu8_rr_256(<32 x i8> %a, <32 x i8> %b) {
3209 ;CHECK-LABEL: test_mask_adds_epu8_rr_256
3210 ;CHECK: vpaddusb %ymm1, %ymm0, %ymm0
3211 %res = call <32 x i8> @llvm.x86.avx512.mask.paddus.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> zeroinitializer, i32 -1)
3215 define <32 x i8> @test_mask_adds_epu8_rrk_256(<32 x i8> %a, <32 x i8> %b, <32 x i8> %passThru, i32 %mask) {
3216 ;CHECK-LABEL: test_mask_adds_epu8_rrk_256
3217 ;CHECK: vpaddusb %ymm1, %ymm0, %ymm2 {%k1}
3218 %res = call <32 x i8> @llvm.x86.avx512.mask.paddus.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> %passThru, i32 %mask)
3222 define <32 x i8> @test_mask_adds_epu8_rrkz_256(<32 x i8> %a, <32 x i8> %b, i32 %mask) {
3223 ;CHECK-LABEL: test_mask_adds_epu8_rrkz_256
3224 ;CHECK: vpaddusb %ymm1, %ymm0, %ymm0 {%k1} {z}
3225 %res = call <32 x i8> @llvm.x86.avx512.mask.paddus.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> zeroinitializer, i32 %mask)
3229 define <32 x i8> @test_mask_adds_epu8_rm_256(<32 x i8> %a, <32 x i8>* %ptr_b) {
3230 ;CHECK-LABEL: test_mask_adds_epu8_rm_256
3231 ;CHECK: vpaddusb (%rdi), %ymm0, %ymm0
3232 %b = load <32 x i8>, <32 x i8>* %ptr_b
3233 %res = call <32 x i8> @llvm.x86.avx512.mask.paddus.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> zeroinitializer, i32 -1)
3237 define <32 x i8> @test_mask_adds_epu8_rmk_256(<32 x i8> %a, <32 x i8>* %ptr_b, <32 x i8> %passThru, i32 %mask) {
3238 ;CHECK-LABEL: test_mask_adds_epu8_rmk_256
3239 ;CHECK: vpaddusb (%rdi), %ymm0, %ymm1 {%k1}
3240 %b = load <32 x i8>, <32 x i8>* %ptr_b
3241 %res = call <32 x i8> @llvm.x86.avx512.mask.paddus.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> %passThru, i32 %mask)
3245 define <32 x i8> @test_mask_adds_epu8_rmkz_256(<32 x i8> %a, <32 x i8>* %ptr_b, i32 %mask) {
3246 ;CHECK-LABEL: test_mask_adds_epu8_rmkz_256
3247 ;CHECK: vpaddusb (%rdi), %ymm0, %ymm0 {%k1} {z}
3248 %b = load <32 x i8>, <32 x i8>* %ptr_b
3249 %res = call <32 x i8> @llvm.x86.avx512.mask.paddus.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> zeroinitializer, i32 %mask)
3253 declare <32 x i8> @llvm.x86.avx512.mask.paddus.b.256(<32 x i8>, <32 x i8>, <32 x i8>, i32)
3255 define <16 x i8> @test_mask_subs_epu8_rr_128(<16 x i8> %a, <16 x i8> %b) {
3256 ;CHECK-LABEL: test_mask_subs_epu8_rr_128
3257 ;CHECK: vpsubusb %xmm1, %xmm0, %xmm0
3258 %res = call <16 x i8> @llvm.x86.avx512.mask.psubus.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> zeroinitializer, i16 -1)
3262 define <16 x i8> @test_mask_subs_epu8_rrk_128(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passThru, i16 %mask) {
3263 ;CHECK-LABEL: test_mask_subs_epu8_rrk_128
3264 ;CHECK: vpsubusb %xmm1, %xmm0, %xmm2 {%k1}
3265 %res = call <16 x i8> @llvm.x86.avx512.mask.psubus.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passThru, i16 %mask)
3269 define <16 x i8> @test_mask_subs_epu8_rrkz_128(<16 x i8> %a, <16 x i8> %b, i16 %mask) {
3270 ;CHECK-LABEL: test_mask_subs_epu8_rrkz_128
3271 ;CHECK: vpsubusb %xmm1, %xmm0, %xmm0 {%k1} {z}
3272 %res = call <16 x i8> @llvm.x86.avx512.mask.psubus.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> zeroinitializer, i16 %mask)
3276 define <16 x i8> @test_mask_subs_epu8_rm_128(<16 x i8> %a, <16 x i8>* %ptr_b) {
3277 ;CHECK-LABEL: test_mask_subs_epu8_rm_128
3278 ;CHECK: vpsubusb (%rdi), %xmm0, %xmm0
3279 %b = load <16 x i8>, <16 x i8>* %ptr_b
3280 %res = call <16 x i8> @llvm.x86.avx512.mask.psubus.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> zeroinitializer, i16 -1)
3284 define <16 x i8> @test_mask_subs_epu8_rmk_128(<16 x i8> %a, <16 x i8>* %ptr_b, <16 x i8> %passThru, i16 %mask) {
3285 ;CHECK-LABEL: test_mask_subs_epu8_rmk_128
3286 ;CHECK: vpsubusb (%rdi), %xmm0, %xmm1 {%k1}
3287 %b = load <16 x i8>, <16 x i8>* %ptr_b
3288 %res = call <16 x i8> @llvm.x86.avx512.mask.psubus.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passThru, i16 %mask)
3292 define <16 x i8> @test_mask_subs_epu8_rmkz_128(<16 x i8> %a, <16 x i8>* %ptr_b, i16 %mask) {
3293 ;CHECK-LABEL: test_mask_subs_epu8_rmkz_128
3294 ;CHECK: vpsubusb (%rdi), %xmm0, %xmm0 {%k1} {z}
3295 %b = load <16 x i8>, <16 x i8>* %ptr_b
3296 %res = call <16 x i8> @llvm.x86.avx512.mask.psubus.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> zeroinitializer, i16 %mask)
3300 declare <16 x i8> @llvm.x86.avx512.mask.psubus.b.128(<16 x i8>, <16 x i8>, <16 x i8>, i16)
3302 define <32 x i8> @test_mask_subs_epu8_rr_256(<32 x i8> %a, <32 x i8> %b) {
3303 ;CHECK-LABEL: test_mask_subs_epu8_rr_256
3304 ;CHECK: vpsubusb %ymm1, %ymm0, %ymm0
3305 %res = call <32 x i8> @llvm.x86.avx512.mask.psubus.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> zeroinitializer, i32 -1)
3309 define <32 x i8> @test_mask_subs_epu8_rrk_256(<32 x i8> %a, <32 x i8> %b, <32 x i8> %passThru, i32 %mask) {
3310 ;CHECK-LABEL: test_mask_subs_epu8_rrk_256
3311 ;CHECK: vpsubusb %ymm1, %ymm0, %ymm2 {%k1}
3312 %res = call <32 x i8> @llvm.x86.avx512.mask.psubus.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> %passThru, i32 %mask)
3316 define <32 x i8> @test_mask_subs_epu8_rrkz_256(<32 x i8> %a, <32 x i8> %b, i32 %mask) {
3317 ;CHECK-LABEL: test_mask_subs_epu8_rrkz_256
3318 ;CHECK: vpsubusb %ymm1, %ymm0, %ymm0 {%k1} {z}
3319 %res = call <32 x i8> @llvm.x86.avx512.mask.psubus.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> zeroinitializer, i32 %mask)
3323 define <32 x i8> @test_mask_subs_epu8_rm_256(<32 x i8> %a, <32 x i8>* %ptr_b) {
3324 ;CHECK-LABEL: test_mask_subs_epu8_rm_256
3325 ;CHECK: vpsubusb (%rdi), %ymm0, %ymm0
3326 %b = load <32 x i8>, <32 x i8>* %ptr_b
3327 %res = call <32 x i8> @llvm.x86.avx512.mask.psubus.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> zeroinitializer, i32 -1)
3331 define <32 x i8> @test_mask_subs_epu8_rmk_256(<32 x i8> %a, <32 x i8>* %ptr_b, <32 x i8> %passThru, i32 %mask) {
3332 ;CHECK-LABEL: test_mask_subs_epu8_rmk_256
3333 ;CHECK: vpsubusb (%rdi), %ymm0, %ymm1 {%k1}
3334 %b = load <32 x i8>, <32 x i8>* %ptr_b
3335 %res = call <32 x i8> @llvm.x86.avx512.mask.psubus.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> %passThru, i32 %mask)
3339 define <32 x i8> @test_mask_subs_epu8_rmkz_256(<32 x i8> %a, <32 x i8>* %ptr_b, i32 %mask) {
3340 ;CHECK-LABEL: test_mask_subs_epu8_rmkz_256
3341 ;CHECK: vpsubusb (%rdi), %ymm0, %ymm0 {%k1} {z}
3342 %b = load <32 x i8>, <32 x i8>* %ptr_b
3343 %res = call <32 x i8> @llvm.x86.avx512.mask.psubus.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> zeroinitializer, i32 %mask)
3347 declare <32 x i8> @llvm.x86.avx512.mask.psubus.b.256(<32 x i8>, <32 x i8>, <32 x i8>, i32)
3349 declare <16 x i8> @llvm.x86.avx512.mask.pmaxs.b.128(<16 x i8>, <16 x i8>, <16 x i8>, i16)
3351 ; CHECK-LABEL: @test_int_x86_avx512_mask_pmaxs_b_128
3353 ; CHECK: vpmaxsb %xmm
3355 define <16 x i8>@test_int_x86_avx512_mask_pmaxs_b_128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %mask) {
3356 %res = call <16 x i8> @llvm.x86.avx512.mask.pmaxs.b.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2 ,i16 %mask)
3357 %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmaxs.b.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> zeroinitializer, i16 %mask)
3358 %res2 = add <16 x i8> %res, %res1
3362 declare <32 x i8> @llvm.x86.avx512.mask.pmaxs.b.256(<32 x i8>, <32 x i8>, <32 x i8>, i32)
3364 ; CHECK-LABEL: @test_int_x86_avx512_mask_pmaxs_b_256
3366 ; CHECK: vpmaxsb %ymm
3368 define <32 x i8>@test_int_x86_avx512_mask_pmaxs_b_256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) {
3369 %res = call <32 x i8> @llvm.x86.avx512.mask.pmaxs.b.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3)
3370 %res1 = call <32 x i8> @llvm.x86.avx512.mask.pmaxs.b.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 -1)
3371 %res2 = add <32 x i8> %res, %res1
3375 declare <8 x i16> @llvm.x86.avx512.mask.pmaxs.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8)
3377 ; CHECK-LABEL: @test_int_x86_avx512_mask_pmaxs_w_128
3379 ; CHECK: vpmaxsw %xmm
3381 define <8 x i16>@test_int_x86_avx512_mask_pmaxs_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) {
3382 %res = call <8 x i16> @llvm.x86.avx512.mask.pmaxs.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3)
3383 %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmaxs.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1)
3384 %res2 = add <8 x i16> %res, %res1
3388 declare <16 x i16> @llvm.x86.avx512.mask.pmaxs.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16)
3390 ; CHECK-LABEL: @test_int_x86_avx512_mask_pmaxs_w_256
3392 ; CHECK: vpmaxsw %ymm
3394 define <16 x i16>@test_int_x86_avx512_mask_pmaxs_w_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %mask) {
3395 %res = call <16 x i16> @llvm.x86.avx512.mask.pmaxs.w.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %mask)
3396 %res1 = call <16 x i16> @llvm.x86.avx512.mask.pmaxs.w.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> zeroinitializer, i16 %mask)
3397 %res2 = add <16 x i16> %res, %res1
3398 ret <16 x i16> %res2
3401 declare <16 x i8> @llvm.x86.avx512.mask.pmaxu.b.128(<16 x i8>, <16 x i8>, <16 x i8>, i16)
3403 ; CHECK-LABEL: @test_int_x86_avx512_mask_pmaxu_b_128
3405 ; CHECK: vpmaxub %xmm
3407 define <16 x i8>@test_int_x86_avx512_mask_pmaxu_b_128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2,i16 %mask) {
3408 %res = call <16 x i8> @llvm.x86.avx512.mask.pmaxu.b.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %mask)
3409 %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmaxu.b.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> zeroinitializer, i16 %mask)
3410 %res2 = add <16 x i8> %res, %res1
3414 declare <32 x i8> @llvm.x86.avx512.mask.pmaxu.b.256(<32 x i8>, <32 x i8>, <32 x i8>, i32)
3416 ; CHECK-LABEL: @test_int_x86_avx512_mask_pmaxu_b_256
3418 ; CHECK: vpmaxub %ymm
3420 define <32 x i8>@test_int_x86_avx512_mask_pmaxu_b_256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) {
3421 %res = call <32 x i8> @llvm.x86.avx512.mask.pmaxu.b.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3)
3422 %res1 = call <32 x i8> @llvm.x86.avx512.mask.pmaxu.b.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 -1)
3423 %res2 = add <32 x i8> %res, %res1
3427 declare <8 x i16> @llvm.x86.avx512.mask.pmaxu.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8)
3429 ; CHECK-LABEL: @test_int_x86_avx512_mask_pmaxu_w_128
3431 ; CHECK: vpmaxuw %xmm
3433 define <8 x i16>@test_int_x86_avx512_mask_pmaxu_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) {
3434 %res = call <8 x i16> @llvm.x86.avx512.mask.pmaxu.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3)
3435 %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmaxu.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1)
3436 %res2 = add <8 x i16> %res, %res1
3440 declare <16 x i16> @llvm.x86.avx512.mask.pmaxu.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16)
3442 ; CHECK-LABEL: @test_int_x86_avx512_mask_pmaxu_w_256
3444 ; CHECK: vpmaxuw %ymm
3446 define <16 x i16>@test_int_x86_avx512_mask_pmaxu_w_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %mask) {
3447 %res = call <16 x i16> @llvm.x86.avx512.mask.pmaxu.w.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %mask)
3448 %res1 = call <16 x i16> @llvm.x86.avx512.mask.pmaxu.w.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> zeroinitializer, i16 %mask)
3449 %res2 = add <16 x i16> %res, %res1
3450 ret <16 x i16> %res2
3453 declare <16 x i8> @llvm.x86.avx512.mask.pmins.b.128(<16 x i8>, <16 x i8>, <16 x i8>, i16)
3455 ; CHECK-LABEL: @test_int_x86_avx512_mask_pmins_b_128
3457 ; CHECK: vpminsb %xmm
3459 define <16 x i8>@test_int_x86_avx512_mask_pmins_b_128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %mask) {
3460 %res = call <16 x i8> @llvm.x86.avx512.mask.pmins.b.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %mask)
3461 %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmins.b.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> zeroinitializer, i16 %mask)
3462 %res2 = add <16 x i8> %res, %res1
3466 declare <32 x i8> @llvm.x86.avx512.mask.pmins.b.256(<32 x i8>, <32 x i8>, <32 x i8>, i32)
3468 ; CHECK-LABEL: @test_int_x86_avx512_mask_pmins_b_256
3470 ; CHECK: vpminsb %ymm
3472 define <32 x i8>@test_int_x86_avx512_mask_pmins_b_256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) {
3473 %res = call <32 x i8> @llvm.x86.avx512.mask.pmins.b.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3)
3474 %res1 = call <32 x i8> @llvm.x86.avx512.mask.pmins.b.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 -1)
3475 %res2 = add <32 x i8> %res, %res1
3479 declare <8 x i16> @llvm.x86.avx512.mask.pmins.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8)
3481 ; CHECK-LABEL: @test_int_x86_avx512_mask_pmins_w_128
3483 ; CHECK: vpminsw %xmm
3485 define <8 x i16>@test_int_x86_avx512_mask_pmins_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) {
3486 %res = call <8 x i16> @llvm.x86.avx512.mask.pmins.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3)
3487 %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmins.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1)
3488 %res2 = add <8 x i16> %res, %res1
3492 declare <16 x i16> @llvm.x86.avx512.mask.pmins.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16)
3494 ; CHECK-LABEL: @test_int_x86_avx512_mask_pmins_w_256
3496 ; CHECK: vpminsw %ymm
3498 define <16 x i16>@test_int_x86_avx512_mask_pmins_w_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %mask) {
3499 %res = call <16 x i16> @llvm.x86.avx512.mask.pmins.w.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %mask)
3500 %res1 = call <16 x i16> @llvm.x86.avx512.mask.pmins.w.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> zeroinitializer, i16 %mask)
3501 %res2 = add <16 x i16> %res, %res1
3502 ret <16 x i16> %res2
3505 declare <16 x i8> @llvm.x86.avx512.mask.pminu.b.128(<16 x i8>, <16 x i8>, <16 x i8>, i16)
3507 ; CHECK-LABEL: @test_int_x86_avx512_mask_pminu_b_128
3509 ; CHECK: vpminub %xmm
3511 define <16 x i8>@test_int_x86_avx512_mask_pminu_b_128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %mask) {
3512 %res = call <16 x i8> @llvm.x86.avx512.mask.pminu.b.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %mask)
3513 %res1 = call <16 x i8> @llvm.x86.avx512.mask.pminu.b.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> zeroinitializer, i16 %mask)
3514 %res2 = add <16 x i8> %res, %res1
3518 declare <32 x i8> @llvm.x86.avx512.mask.pminu.b.256(<32 x i8>, <32 x i8>, <32 x i8>, i32)
3520 ; CHECK-LABEL: @test_int_x86_avx512_mask_pminu_b_256
3522 ; CHECK: vpminub %ymm
3524 define <32 x i8>@test_int_x86_avx512_mask_pminu_b_256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) {
3525 %res = call <32 x i8> @llvm.x86.avx512.mask.pminu.b.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3)
3526 %res1 = call <32 x i8> @llvm.x86.avx512.mask.pminu.b.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 -1)
3527 %res2 = add <32 x i8> %res, %res1
3531 declare <8 x i16> @llvm.x86.avx512.mask.pminu.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8)
3533 ; CHECK-LABEL: @test_int_x86_avx512_mask_pminu_w_128
3535 ; CHECK: vpminuw %xmm
3537 define <8 x i16>@test_int_x86_avx512_mask_pminu_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) {
3538 %res = call <8 x i16> @llvm.x86.avx512.mask.pminu.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3)
3539 %res1 = call <8 x i16> @llvm.x86.avx512.mask.pminu.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1)
3540 %res2 = add <8 x i16> %res, %res1
3544 declare <16 x i16> @llvm.x86.avx512.mask.pminu.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16)
3546 ; CHECK-LABEL: @test_int_x86_avx512_mask_pminu_w_256
3548 ; CHECK: vpminuw %ymm
3550 define <16 x i16>@test_int_x86_avx512_mask_pminu_w_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %mask) {
3551 %res = call <16 x i16> @llvm.x86.avx512.mask.pminu.w.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %mask)
3552 %res1 = call <16 x i16> @llvm.x86.avx512.mask.pminu.w.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> zeroinitializer, i16 %mask)
3553 %res2 = add <16 x i16> %res, %res1
3554 ret <16 x i16> %res2
3557 declare <8 x i16> @llvm.x86.avx512.mask.vpermt2var.hi.128(<8 x i16>, <8 x i16>, <8 x i16>, i8)
3559 ; CHECK-LABEL: @test_int_x86_avx512_mask_vpermt2var_hi_128
3562 ; CHECK: vpermt2w %xmm{{.*}}{%k1}
3564 define <8 x i16>@test_int_x86_avx512_mask_vpermt2var_hi_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) {
3565 %res = call <8 x i16> @llvm.x86.avx512.mask.vpermt2var.hi.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3)
3566 %res1 = call <8 x i16> @llvm.x86.avx512.mask.vpermt2var.hi.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1)
3567 %res2 = add <8 x i16> %res, %res1
3571 declare <8 x i16> @llvm.x86.avx512.maskz.vpermt2var.hi.128(<8 x i16>, <8 x i16>, <8 x i16>, i8)
3573 ; CHECK-LABEL: @test_int_x86_avx512_maskz_vpermt2var_hi_128
3576 ; CHECK: vpermt2w %xmm{{.*}}{%k1} {z}
3577 define <8 x i16>@test_int_x86_avx512_maskz_vpermt2var_hi_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) {
3578 %res = call <8 x i16> @llvm.x86.avx512.maskz.vpermt2var.hi.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3)
3579 %res1 = call <8 x i16> @llvm.x86.avx512.maskz.vpermt2var.hi.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1)
3580 %res2 = add <8 x i16> %res, %res1
3584 declare <16 x i16> @llvm.x86.avx512.mask.vpermt2var.hi.256(<16 x i16>, <16 x i16>, <16 x i16>, i16)
3586 ; CHECK-LABEL: @test_int_x86_avx512_mask_vpermt2var_hi_256
3589 ; CHECK: vpermt2w %ymm{{.*}}{%k1}
3590 define <16 x i16>@test_int_x86_avx512_mask_vpermt2var_hi_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) {
3591 %res = call <16 x i16> @llvm.x86.avx512.mask.vpermt2var.hi.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3)
3592 %res1 = call <16 x i16> @llvm.x86.avx512.mask.vpermt2var.hi.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 -1)
3593 %res2 = add <16 x i16> %res, %res1
3594 ret <16 x i16> %res2
3597 declare <16 x i16> @llvm.x86.avx512.maskz.vpermt2var.hi.256(<16 x i16>, <16 x i16>, <16 x i16>, i16)
3599 ; CHECK-LABEL: @test_int_x86_avx512_maskz_vpermt2var_hi_256
3602 ; CHECK: vpermt2w %ymm{{.*}}{%k1} {z}
3603 define <16 x i16>@test_int_x86_avx512_maskz_vpermt2var_hi_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) {
3604 %res = call <16 x i16> @llvm.x86.avx512.maskz.vpermt2var.hi.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3)
3605 %res1 = call <16 x i16> @llvm.x86.avx512.maskz.vpermt2var.hi.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 -1)
3606 %res2 = add <16 x i16> %res, %res1
3607 ret <16 x i16> %res2
3610 declare <8 x i16> @llvm.x86.avx512.mask.vpermi2var.hi.128(<8 x i16>, <8 x i16>, <8 x i16>, i8)
3612 ; CHECK-LABEL: @test_int_x86_avx512_mask_vpermi2var_hi_128
3615 ; CHECK: vpermi2w %xmm{{.*}}{%k1}
3616 define <8 x i16>@test_int_x86_avx512_mask_vpermi2var_hi_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) {
3617 %res = call <8 x i16> @llvm.x86.avx512.mask.vpermi2var.hi.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3)
3618 %res1 = call <8 x i16> @llvm.x86.avx512.mask.vpermi2var.hi.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1)
3619 %res2 = add <8 x i16> %res, %res1
3623 declare <16 x i16> @llvm.x86.avx512.mask.vpermi2var.hi.256(<16 x i16>, <16 x i16>, <16 x i16>, i16)
3625 ; CHECK-LABEL: @test_int_x86_avx512_mask_vpermi2var_hi_256
3628 ; CHECK: vpermi2w %ymm{{.*}}{%k1}
3629 define <16 x i16>@test_int_x86_avx512_mask_vpermi2var_hi_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) {
3630 %res = call <16 x i16> @llvm.x86.avx512.mask.vpermi2var.hi.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3)
3631 %res1 = call <16 x i16> @llvm.x86.avx512.mask.vpermi2var.hi.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 -1)
3632 %res2 = add <16 x i16> %res, %res1
3633 ret <16 x i16> %res2
3636 declare <16 x i8> @llvm.x86.avx512.mask.pavg.b.128(<16 x i8>, <16 x i8>, <16 x i8>, i16)
3638 ; CHECK-LABEL: @test_int_x86_avx512_mask_pavg_b_128
3640 ; CHECK: vpavgb %xmm
3642 define <16 x i8>@test_int_x86_avx512_mask_pavg_b_128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %x3) {
3643 %res = call <16 x i8> @llvm.x86.avx512.mask.pavg.b.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %x3)
3644 %res1 = call <16 x i8> @llvm.x86.avx512.mask.pavg.b.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 -1)
3645 %res2 = add <16 x i8> %res, %res1
3649 declare <32 x i8> @llvm.x86.avx512.mask.pavg.b.256(<32 x i8>, <32 x i8>, <32 x i8>, i32)
3651 ; CHECK-LABEL: @test_int_x86_avx512_mask_pavg_b_256
3653 ; CHECK: vpavgb %ymm
3655 define <32 x i8>@test_int_x86_avx512_mask_pavg_b_256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) {
3656 %res = call <32 x i8> @llvm.x86.avx512.mask.pavg.b.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3)
3657 %res1 = call <32 x i8> @llvm.x86.avx512.mask.pavg.b.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 -1)
3658 %res2 = add <32 x i8> %res, %res1
3662 declare <8 x i16> @llvm.x86.avx512.mask.pavg.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8)
3664 ; CHECK-LABEL: @test_int_x86_avx512_mask_pavg_w_128
3666 ; CHECK: vpavgw %xmm
3668 define <8 x i16>@test_int_x86_avx512_mask_pavg_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) {
3669 %res = call <8 x i16> @llvm.x86.avx512.mask.pavg.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3)
3670 %res1 = call <8 x i16> @llvm.x86.avx512.mask.pavg.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1)
3671 %res2 = add <8 x i16> %res, %res1
3675 declare <16 x i16> @llvm.x86.avx512.mask.pavg.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16)
3677 ; CHECK-LABEL: @test_int_x86_avx512_mask_pavg_w_256
3679 ; CHECK: vpavgw %ymm
3681 define <16 x i16>@test_int_x86_avx512_mask_pavg_w_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) {
3682 %res = call <16 x i16> @llvm.x86.avx512.mask.pavg.w.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3)
3683 %res1 = call <16 x i16> @llvm.x86.avx512.mask.pavg.w.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 -1)
3684 %res2 = add <16 x i16> %res, %res1
3685 ret <16 x i16> %res2
3688 declare <16 x i8> @llvm.x86.avx512.mask.pshuf.b.128(<16 x i8>, <16 x i8>, <16 x i8>, i16)
3690 ; CHECK-LABEL: @test_int_x86_avx512_mask_pshuf_b_128
3693 ; CHECK: vpshufb %xmm{{.*}}{%k1}
3694 define <16 x i8>@test_int_x86_avx512_mask_pshuf_b_128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %x3) {
3695 %res = call <16 x i8> @llvm.x86.avx512.mask.pshuf.b.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %x3)
3696 %res1 = call <16 x i8> @llvm.x86.avx512.mask.pshuf.b.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 -1)
3697 %res2 = add <16 x i8> %res, %res1
3701 declare <32 x i8> @llvm.x86.avx512.mask.pshuf.b.256(<32 x i8>, <32 x i8>, <32 x i8>, i32)
3703 ; CHECK-LABEL: @test_int_x86_avx512_mask_pshuf_b_256
3706 ; CHECK: vpshufb %ymm{{.*}}{%k1}
3707 define <32 x i8>@test_int_x86_avx512_mask_pshuf_b_256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) {
3708 %res = call <32 x i8> @llvm.x86.avx512.mask.pshuf.b.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3)
3709 %res1 = call <32 x i8> @llvm.x86.avx512.mask.pshuf.b.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 -1)
3710 %res2 = add <32 x i8> %res, %res1
3714 declare <16 x i8> @llvm.x86.avx512.mask.pabs.b.128(<16 x i8>, <16 x i8>, i16)
3716 ; CHECK-LABEL: @test_int_x86_avx512_mask_pabs_b_128
3719 ; CHECK: vpabsb{{.*}}{%k1}
3720 define <16 x i8>@test_int_x86_avx512_mask_pabs_b_128(<16 x i8> %x0, <16 x i8> %x1, i16 %x2) {
3721 %res = call <16 x i8> @llvm.x86.avx512.mask.pabs.b.128(<16 x i8> %x0, <16 x i8> %x1, i16 %x2)
3722 %res1 = call <16 x i8> @llvm.x86.avx512.mask.pabs.b.128(<16 x i8> %x0, <16 x i8> %x1, i16 -1)
3723 %res2 = add <16 x i8> %res, %res1
3727 declare <32 x i8> @llvm.x86.avx512.mask.pabs.b.256(<32 x i8>, <32 x i8>, i32)
3729 ; CHECK-LABEL: @test_int_x86_avx512_mask_pabs_b_256
3732 ; CHECK: vpabsb{{.*}}{%k1}
3733 define <32 x i8>@test_int_x86_avx512_mask_pabs_b_256(<32 x i8> %x0, <32 x i8> %x1, i32 %x2) {
3734 %res = call <32 x i8> @llvm.x86.avx512.mask.pabs.b.256(<32 x i8> %x0, <32 x i8> %x1, i32 %x2)
3735 %res1 = call <32 x i8> @llvm.x86.avx512.mask.pabs.b.256(<32 x i8> %x0, <32 x i8> %x1, i32 -1)
3736 %res2 = add <32 x i8> %res, %res1
3740 declare <8 x i16> @llvm.x86.avx512.mask.pabs.w.128(<8 x i16>, <8 x i16>, i8)
3742 ; CHECK-LABEL: @test_int_x86_avx512_mask_pabs_w_128
3745 ; CHECK: vpabsw{{.*}}{%k1}
3746 define <8 x i16>@test_int_x86_avx512_mask_pabs_w_128(<8 x i16> %x0, <8 x i16> %x1, i8 %x2) {
3747 %res = call <8 x i16> @llvm.x86.avx512.mask.pabs.w.128(<8 x i16> %x0, <8 x i16> %x1, i8 %x2)
3748 %res1 = call <8 x i16> @llvm.x86.avx512.mask.pabs.w.128(<8 x i16> %x0, <8 x i16> %x1, i8 -1)
3749 %res2 = add <8 x i16> %res, %res1
3753 declare <16 x i16> @llvm.x86.avx512.mask.pabs.w.256(<16 x i16>, <16 x i16>, i16)
3755 ; CHECK-LABEL: @test_int_x86_avx512_mask_pabs_w_256
3758 ; CHECK: vpabsw{{.*}}{%k1}
3759 define <16 x i16>@test_int_x86_avx512_mask_pabs_w_256(<16 x i16> %x0, <16 x i16> %x1, i16 %x2) {
3760 %res = call <16 x i16> @llvm.x86.avx512.mask.pabs.w.256(<16 x i16> %x0, <16 x i16> %x1, i16 %x2)
3761 %res1 = call <16 x i16> @llvm.x86.avx512.mask.pabs.w.256(<16 x i16> %x0, <16 x i16> %x1, i16 -1)
3762 %res2 = add <16 x i16> %res, %res1
3763 ret <16 x i16> %res2
3766 ; CHECK-LABEL: test_x86_mask_blend_b_256
3768 define <32 x i8> @test_x86_mask_blend_b_256(i32 %a0, <32 x i8> %a1, <32 x i8> %a2) {
3769 %res = call <32 x i8> @llvm.x86.avx512.mask.blend.b.256(<32 x i8> %a1, <32 x i8> %a2, i32 %a0) ; <<32 x i8>> [#uses=1]
3772 declare <32 x i8> @llvm.x86.avx512.mask.blend.b.256(<32 x i8>, <32 x i8>, i32) nounwind readonly
3774 ; CHECK-LABEL: test_x86_mask_blend_w_256
3775 define <16 x i16> @test_x86_mask_blend_w_256(i16 %mask, <16 x i16> %a1, <16 x i16> %a2) {
3777 %res = call <16 x i16> @llvm.x86.avx512.mask.blend.w.256(<16 x i16> %a1, <16 x i16> %a2, i16 %mask) ; <<16 x i16>> [#uses=1]
3780 declare <16 x i16> @llvm.x86.avx512.mask.blend.w.256(<16 x i16>, <16 x i16>, i16) nounwind readonly
3782 ; CHECK-LABEL: test_x86_mask_blend_b_128
3784 define <16 x i8> @test_x86_mask_blend_b_128(i16 %a0, <16 x i8> %a1, <16 x i8> %a2) {
3785 %res = call <16 x i8> @llvm.x86.avx512.mask.blend.b.128(<16 x i8> %a1, <16 x i8> %a2, i16 %a0) ; <<16 x i8>> [#uses=1]
3788 declare <16 x i8> @llvm.x86.avx512.mask.blend.b.128(<16 x i8>, <16 x i8>, i16) nounwind readonly
3790 ; CHECK-LABEL: test_x86_mask_blend_w_128
3791 define <8 x i16> @test_x86_mask_blend_w_128(i8 %mask, <8 x i16> %a1, <8 x i16> %a2) {
3793 %res = call <8 x i16> @llvm.x86.avx512.mask.blend.w.128(<8 x i16> %a1, <8 x i16> %a2, i8 %mask) ; <<8 x i16>> [#uses=1]
3796 declare <8 x i16> @llvm.x86.avx512.mask.blend.w.128(<8 x i16>, <8 x i16>, i8) nounwind readonly
3798 declare <8 x i16> @llvm.x86.avx512.mask.pmulhu.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8)
3800 ; CHECK-LABEL: @test_int_x86_avx512_mask_pmulhu_w_128
3804 ; CHECK: vpmulhuw {{.*}}encoding: [0x62
3805 define <8 x i16>@test_int_x86_avx512_mask_pmulhu_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) {
3806 %res = call <8 x i16> @llvm.x86.avx512.mask.pmulhu.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3)
3807 %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmulhu.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1)
3808 %res2 = add <8 x i16> %res, %res1
3812 declare <16 x i16> @llvm.x86.avx512.mask.pmulhu.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16)
3814 ; CHECK-LABEL: @test_int_x86_avx512_mask_pmulhu_w_256
3818 ; CHECK: vpmulhuw {{.*}}encoding: [0x62
3819 define <16 x i16>@test_int_x86_avx512_mask_pmulhu_w_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) {
3820 %res = call <16 x i16> @llvm.x86.avx512.mask.pmulhu.w.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3)
3821 %res1 = call <16 x i16> @llvm.x86.avx512.mask.pmulhu.w.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 -1)
3822 %res2 = add <16 x i16> %res, %res1
3823 ret <16 x i16> %res2
3826 declare <8 x i16> @llvm.x86.avx512.mask.pmulh.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8)
3828 ; CHECK-LABEL: @test_int_x86_avx512_mask_pmulh_w_128
3832 ; CHECK: vpmulhw {{.*}}encoding: [0x62
3833 define <8 x i16>@test_int_x86_avx512_mask_pmulh_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) {
3834 %res = call <8 x i16> @llvm.x86.avx512.mask.pmulh.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3)
3835 %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmulh.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1)
3836 %res2 = add <8 x i16> %res, %res1
3840 declare <16 x i16> @llvm.x86.avx512.mask.pmulh.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16)
3841 ; CHECK-LABEL: @test_int_x86_avx512_mask_pmulh_w_256
3845 ; CHECK: vpmulhw {{.*}}encoding: [0x62
3846 define <16 x i16>@test_int_x86_avx512_mask_pmulh_w_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) {
3847 %res = call <16 x i16> @llvm.x86.avx512.mask.pmulh.w.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3)
3848 %res1 = call <16 x i16> @llvm.x86.avx512.mask.pmulh.w.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 -1)
3849 %res2 = add <16 x i16> %res, %res1
3850 ret <16 x i16> %res2
3853 declare <8 x i16> @llvm.x86.avx512.mask.pmul.hr.sw.128(<8 x i16>, <8 x i16>, <8 x i16>, i8)
3854 ; CHECK-LABEL: @test_int_x86_avx512_mask_pmulhr_sw_128
3858 ; CHECK: vpmulhrsw {{.*}}encoding: [0x62
3859 define <8 x i16>@test_int_x86_avx512_mask_pmulhr_sw_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) {
3860 %res = call <8 x i16> @llvm.x86.avx512.mask.pmul.hr.sw.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3)
3861 %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmul.hr.sw.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1)
3862 %res2 = add <8 x i16> %res, %res1
3866 declare <16 x i16> @llvm.x86.avx512.mask.pmul.hr.sw.256(<16 x i16>, <16 x i16>, <16 x i16>, i16)
3867 ; CHECK-LABEL: @test_int_x86_avx512_mask_pmulhr_sw_256
3871 ; CHECK: vpmulhrsw {{.*}}encoding: [0x62
3872 define <16 x i16>@test_int_x86_avx512_mask_pmulhr_sw_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) {
3873 %res = call <16 x i16> @llvm.x86.avx512.mask.pmul.hr.sw.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3)
3874 %res1 = call <16 x i16> @llvm.x86.avx512.mask.pmul.hr.sw.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 -1)
3875 %res2 = add <16 x i16> %res, %res1
3876 ret <16 x i16> %res2
3879 declare <16 x i8> @llvm.x86.avx512.mask.pmov.wb.128(<8 x i16>, <16 x i8>, i8)
3881 define <16 x i8>@test_int_x86_avx512_mask_pmov_wb_128(<8 x i16> %x0, <16 x i8> %x1, i8 %x2) {
3882 ; CHECK-LABEL: test_int_x86_avx512_mask_pmov_wb_128:
3883 ; CHECK: vpmovwb %xmm0, %xmm1 {%k1}
3884 ; CHECK-NEXT: vpmovwb %xmm0, %xmm2 {%k1} {z}
3885 ; CHECK-NEXT: vpmovwb %xmm0, %xmm0
3886 %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmov.wb.128(<8 x i16> %x0, <16 x i8> %x1, i8 -1)
3887 %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmov.wb.128(<8 x i16> %x0, <16 x i8> %x1, i8 %x2)
3888 %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmov.wb.128(<8 x i16> %x0, <16 x i8> zeroinitializer, i8 %x2)
3889 %res3 = add <16 x i8> %res0, %res1
3890 %res4 = add <16 x i8> %res3, %res2
3894 declare void @llvm.x86.avx512.mask.pmov.wb.mem.128(i8* %ptr, <8 x i16>, i8)
3896 define void @test_int_x86_avx512_mask_pmov_wb_mem_128(i8* %ptr, <8 x i16> %x1, i8 %x2) {
3897 ; CHECK-LABEL: test_int_x86_avx512_mask_pmov_wb_mem_128:
3898 ; CHECK: vpmovwb %xmm0, (%rdi)
3899 ; CHECK: vpmovwb %xmm0, (%rdi) {%k1}
3900 call void @llvm.x86.avx512.mask.pmov.wb.mem.128(i8* %ptr, <8 x i16> %x1, i8 -1)
3901 call void @llvm.x86.avx512.mask.pmov.wb.mem.128(i8* %ptr, <8 x i16> %x1, i8 %x2)
3905 declare <16 x i8> @llvm.x86.avx512.mask.pmovs.wb.128(<8 x i16>, <16 x i8>, i8)
3907 define <16 x i8>@test_int_x86_avx512_mask_pmovs_wb_128(<8 x i16> %x0, <16 x i8> %x1, i8 %x2) {
3908 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_wb_128:
3909 ; CHECK: vpmovswb %xmm0, %xmm1 {%k1}
3910 ; CHECK-NEXT: vpmovswb %xmm0, %xmm2 {%k1} {z}
3911 ; CHECK-NEXT: vpmovswb %xmm0, %xmm0
3912 %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.wb.128(<8 x i16> %x0, <16 x i8> %x1, i8 -1)
3913 %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.wb.128(<8 x i16> %x0, <16 x i8> %x1, i8 %x2)
3914 %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.wb.128(<8 x i16> %x0, <16 x i8> zeroinitializer, i8 %x2)
3915 %res3 = add <16 x i8> %res0, %res1
3916 %res4 = add <16 x i8> %res3, %res2
3920 declare void @llvm.x86.avx512.mask.pmovs.wb.mem.128(i8* %ptr, <8 x i16>, i8)
3922 define void @test_int_x86_avx512_mask_pmovs_wb_mem_128(i8* %ptr, <8 x i16> %x1, i8 %x2) {
3923 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_wb_mem_128:
3924 ; CHECK: vpmovswb %xmm0, (%rdi)
3925 ; CHECK: vpmovswb %xmm0, (%rdi) {%k1}
3926 call void @llvm.x86.avx512.mask.pmovs.wb.mem.128(i8* %ptr, <8 x i16> %x1, i8 -1)
3927 call void @llvm.x86.avx512.mask.pmovs.wb.mem.128(i8* %ptr, <8 x i16> %x1, i8 %x2)
3931 declare <16 x i8> @llvm.x86.avx512.mask.pmovus.wb.128(<8 x i16>, <16 x i8>, i8)
3933 define <16 x i8>@test_int_x86_avx512_mask_pmovus_wb_128(<8 x i16> %x0, <16 x i8> %x1, i8 %x2) {
3934 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_wb_128:
3935 ; CHECK: vpmovuswb %xmm0, %xmm1 {%k1}
3936 ; CHECK-NEXT: vpmovuswb %xmm0, %xmm2 {%k1} {z}
3937 ; CHECK-NEXT: vpmovuswb %xmm0, %xmm0
3938 %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.wb.128(<8 x i16> %x0, <16 x i8> %x1, i8 -1)
3939 %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.wb.128(<8 x i16> %x0, <16 x i8> %x1, i8 %x2)
3940 %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.wb.128(<8 x i16> %x0, <16 x i8> zeroinitializer, i8 %x2)
3941 %res3 = add <16 x i8> %res0, %res1
3942 %res4 = add <16 x i8> %res3, %res2
3946 declare void @llvm.x86.avx512.mask.pmovus.wb.mem.128(i8* %ptr, <8 x i16>, i8)
3948 define void @test_int_x86_avx512_mask_pmovus_wb_mem_128(i8* %ptr, <8 x i16> %x1, i8 %x2) {
3949 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_wb_mem_128:
3950 ; CHECK: vpmovuswb %xmm0, (%rdi)
3951 ; CHECK: vpmovuswb %xmm0, (%rdi) {%k1}
3952 call void @llvm.x86.avx512.mask.pmovus.wb.mem.128(i8* %ptr, <8 x i16> %x1, i8 -1)
3953 call void @llvm.x86.avx512.mask.pmovus.wb.mem.128(i8* %ptr, <8 x i16> %x1, i8 %x2)
3957 declare <16 x i8> @llvm.x86.avx512.mask.pmov.wb.256(<16 x i16>, <16 x i8>, i16)
3959 define <16 x i8>@test_int_x86_avx512_mask_pmov_wb_256(<16 x i16> %x0, <16 x i8> %x1, i16 %x2) {
3960 ; CHECK-LABEL: test_int_x86_avx512_mask_pmov_wb_256:
3961 ; CHECK: vpmovwb %ymm0, %xmm1 {%k1}
3962 ; CHECK-NEXT: vpmovwb %ymm0, %xmm2 {%k1} {z}
3963 ; CHECK-NEXT: vpmovwb %ymm0, %xmm0
3964 %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmov.wb.256(<16 x i16> %x0, <16 x i8> %x1, i16 -1)
3965 %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmov.wb.256(<16 x i16> %x0, <16 x i8> %x1, i16 %x2)
3966 %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmov.wb.256(<16 x i16> %x0, <16 x i8> zeroinitializer, i16 %x2)
3967 %res3 = add <16 x i8> %res0, %res1
3968 %res4 = add <16 x i8> %res3, %res2
3972 declare void @llvm.x86.avx512.mask.pmov.wb.mem.256(i8* %ptr, <16 x i16>, i16)
3974 define void @test_int_x86_avx512_mask_pmov_wb_mem_256(i8* %ptr, <16 x i16> %x1, i16 %x2) {
3975 ; CHECK-LABEL: test_int_x86_avx512_mask_pmov_wb_mem_256:
3976 ; CHECK: vpmovwb %ymm0, (%rdi)
3977 ; CHECK: vpmovwb %ymm0, (%rdi) {%k1}
3978 call void @llvm.x86.avx512.mask.pmov.wb.mem.256(i8* %ptr, <16 x i16> %x1, i16 -1)
3979 call void @llvm.x86.avx512.mask.pmov.wb.mem.256(i8* %ptr, <16 x i16> %x1, i16 %x2)
3983 declare <16 x i8> @llvm.x86.avx512.mask.pmovs.wb.256(<16 x i16>, <16 x i8>, i16)
3985 define <16 x i8>@test_int_x86_avx512_mask_pmovs_wb_256(<16 x i16> %x0, <16 x i8> %x1, i16 %x2) {
3986 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_wb_256:
3987 ; CHECK: vpmovswb %ymm0, %xmm1 {%k1}
3988 ; CHECK-NEXT: vpmovswb %ymm0, %xmm2 {%k1} {z}
3989 ; CHECK-NEXT: vpmovswb %ymm0, %xmm0
3990 %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.wb.256(<16 x i16> %x0, <16 x i8> %x1, i16 -1)
3991 %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.wb.256(<16 x i16> %x0, <16 x i8> %x1, i16 %x2)
3992 %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.wb.256(<16 x i16> %x0, <16 x i8> zeroinitializer, i16 %x2)
3993 %res3 = add <16 x i8> %res0, %res1
3994 %res4 = add <16 x i8> %res3, %res2
3998 declare void @llvm.x86.avx512.mask.pmovs.wb.mem.256(i8* %ptr, <16 x i16>, i16)
4000 define void @test_int_x86_avx512_mask_pmovs_wb_mem_256(i8* %ptr, <16 x i16> %x1, i16 %x2) {
4001 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_wb_mem_256:
4002 ; CHECK: vpmovswb %ymm0, (%rdi)
4003 ; CHECK: vpmovswb %ymm0, (%rdi) {%k1}
4004 call void @llvm.x86.avx512.mask.pmovs.wb.mem.256(i8* %ptr, <16 x i16> %x1, i16 -1)
4005 call void @llvm.x86.avx512.mask.pmovs.wb.mem.256(i8* %ptr, <16 x i16> %x1, i16 %x2)
4009 declare <16 x i8> @llvm.x86.avx512.mask.pmovus.wb.256(<16 x i16>, <16 x i8>, i16)
4011 define <16 x i8>@test_int_x86_avx512_mask_pmovus_wb_256(<16 x i16> %x0, <16 x i8> %x1, i16 %x2) {
4012 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_wb_256:
4013 ; CHECK: vpmovuswb %ymm0, %xmm1 {%k1}
4014 ; CHECK-NEXT: vpmovuswb %ymm0, %xmm2 {%k1} {z}
4015 ; CHECK-NEXT: vpmovuswb %ymm0, %xmm0
4016 %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.wb.256(<16 x i16> %x0, <16 x i8> %x1, i16 -1)
4017 %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.wb.256(<16 x i16> %x0, <16 x i8> %x1, i16 %x2)
4018 %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.wb.256(<16 x i16> %x0, <16 x i8> zeroinitializer, i16 %x2)
4019 %res3 = add <16 x i8> %res0, %res1
4020 %res4 = add <16 x i8> %res3, %res2
4024 declare void @llvm.x86.avx512.mask.pmovus.wb.mem.256(i8* %ptr, <16 x i16>, i16)
4026 define void @test_int_x86_avx512_mask_pmovus_wb_mem_256(i8* %ptr, <16 x i16> %x1, i16 %x2) {
4027 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_wb_mem_256:
4028 ; CHECK: vpmovuswb %ymm0, (%rdi)
4029 ; CHECK: vpmovuswb %ymm0, (%rdi) {%k1}
4030 call void @llvm.x86.avx512.mask.pmovus.wb.mem.256(i8* %ptr, <16 x i16> %x1, i16 -1)
4031 call void @llvm.x86.avx512.mask.pmovus.wb.mem.256(i8* %ptr, <16 x i16> %x1, i16 %x2)
4035 declare <4 x i32> @llvm.x86.avx512.mask.pmaddw.d.128(<8 x i16>, <8 x i16>, <4 x i32>, i8)
4037 define <4 x i32>@test_int_x86_avx512_mask_pmaddw_d_128(<8 x i16> %x0, <8 x i16> %x1, <4 x i32> %x2, i8 %x3) {
4038 ; CHECK-LABEL: test_int_x86_avx512_mask_pmaddw_d_128:
4040 ; CHECK-NEXT: movzbl %dil, %eax
4041 ; CHECK-NEXT: kmovw %eax, %k1
4042 ; CHECK-NEXT: vpmaddwd %xmm1, %xmm0, %xmm2 {%k1}
4043 ; CHECK-NEXT: vpmaddwd %xmm1, %xmm0, %xmm0
4044 ; CHECK-NEXT: vpaddd %xmm0, %xmm2, %xmm0
4046 %res = call <4 x i32> @llvm.x86.avx512.mask.pmaddw.d.128(<8 x i16> %x0, <8 x i16> %x1, <4 x i32> %x2, i8 %x3)
4047 %res1 = call <4 x i32> @llvm.x86.avx512.mask.pmaddw.d.128(<8 x i16> %x0, <8 x i16> %x1, <4 x i32> %x2, i8 -1)
4048 %res2 = add <4 x i32> %res, %res1
4052 declare <8 x i32> @llvm.x86.avx512.mask.pmaddw.d.256(<16 x i16>, <16 x i16>, <8 x i32>, i8)
4054 define <8 x i32>@test_int_x86_avx512_mask_pmaddw_d_256(<16 x i16> %x0, <16 x i16> %x1, <8 x i32> %x2, i8 %x3) {
4055 ; CHECK-LABEL: test_int_x86_avx512_mask_pmaddw_d_256:
4057 ; CHECK-NEXT: movzbl %dil, %eax
4058 ; CHECK-NEXT: kmovw %eax, %k1
4059 ; CHECK-NEXT: vpmaddwd %ymm1, %ymm0, %ymm2 {%k1}
4060 ; CHECK-NEXT: vpmaddwd %ymm1, %ymm0, %ymm0
4061 ; CHECK-NEXT: vpaddd %ymm0, %ymm2, %ymm0
4063 %res = call <8 x i32> @llvm.x86.avx512.mask.pmaddw.d.256(<16 x i16> %x0, <16 x i16> %x1, <8 x i32> %x2, i8 %x3)
4064 %res1 = call <8 x i32> @llvm.x86.avx512.mask.pmaddw.d.256(<16 x i16> %x0, <16 x i16> %x1, <8 x i32> %x2, i8 -1)
4065 %res2 = add <8 x i32> %res, %res1
4069 declare <8 x i16> @llvm.x86.avx512.mask.pmaddubs.w.128(<16 x i8>, <16 x i8>, <8 x i16>, i8)
4071 define <8 x i16>@test_int_x86_avx512_mask_pmaddubs_w_128(<16 x i8> %x0, <16 x i8> %x1, <8 x i16> %x2, i8 %x3) {
4072 ; CHECK-LABEL: test_int_x86_avx512_mask_pmaddubs_w_128:
4074 ; CHECK-NEXT: movzbl %dil, %eax
4075 ; CHECK-NEXT: kmovw %eax, %k1
4076 ; CHECK-NEXT: vpmaddubsw %xmm1, %xmm0, %xmm2 {%k1}
4077 ; CHECK-NEXT: vpmaddubsw %xmm1, %xmm0, %xmm0
4078 ; CHECK-NEXT: vpaddw %xmm0, %xmm2, %xmm0
4080 %res = call <8 x i16> @llvm.x86.avx512.mask.pmaddubs.w.128(<16 x i8> %x0, <16 x i8> %x1, <8 x i16> %x2, i8 %x3)
4081 %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmaddubs.w.128(<16 x i8> %x0, <16 x i8> %x1, <8 x i16> %x2, i8 -1)
4082 %res2 = add <8 x i16> %res, %res1
4086 declare <16 x i16> @llvm.x86.avx512.mask.pmaddubs.w.256(<32 x i8>, <32 x i8>, <16 x i16>, i16)
4088 define <16 x i16>@test_int_x86_avx512_mask_pmaddubs_w_256(<32 x i8> %x0, <32 x i8> %x1, <16 x i16> %x2, i16 %x3) {
4089 ; CHECK-LABEL: test_int_x86_avx512_mask_pmaddubs_w_256:
4091 ; CHECK-NEXT: kmovw %edi, %k1
4092 ; CHECK-NEXT: vpmaddubsw %ymm1, %ymm0, %ymm2 {%k1}
4093 ; CHECK-NEXT: vpmaddubsw %ymm1, %ymm0, %ymm0
4094 ; CHECK-NEXT: vpaddw %ymm0, %ymm2, %ymm0
4096 %res = call <16 x i16> @llvm.x86.avx512.mask.pmaddubs.w.256(<32 x i8> %x0, <32 x i8> %x1, <16 x i16> %x2, i16 %x3)
4097 %res1 = call <16 x i16> @llvm.x86.avx512.mask.pmaddubs.w.256(<32 x i8> %x0, <32 x i8> %x1, <16 x i16> %x2, i16 -1)
4098 %res2 = add <16 x i16> %res, %res1
4099 ret <16 x i16> %res2
4102 declare <16 x i8> @llvm.x86.avx512.mask.punpckhb.w.128(<16 x i8>, <16 x i8>, <16 x i8>, i16)
4104 define <16 x i8>@test_int_x86_avx512_mask_punpckhb_w_128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %x3) {
4105 ; CHECK-LABEL: test_int_x86_avx512_mask_punpckhb_w_128:
4106 ; CHECK: vpunpckhbw %xmm1, %xmm0, %xmm2 {%k1}
4107 ; CHECK-NEXT: ## xmm2 = xmm2[8],k1[8],xmm2[9],k1[9],xmm2[10],k1[10],xmm2[11],k1[11],xmm2[12],k1[12],xmm2[13],k1[13],xmm2[14],k1[14],xmm2[15],k1[15]
4108 ; CHECK-NEXT: vpunpckhbw %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0x68,0xc1]
4109 ; CHECK-NEXT: ## xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15]
4110 %res = call <16 x i8> @llvm.x86.avx512.mask.punpckhb.w.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %x3)
4111 %res1 = call <16 x i8> @llvm.x86.avx512.mask.punpckhb.w.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 -1)
4112 %res2 = add <16 x i8> %res, %res1
4116 declare <16 x i8> @llvm.x86.avx512.mask.punpcklb.w.128(<16 x i8>, <16 x i8>, <16 x i8>, i16)
4118 define <16 x i8>@test_int_x86_avx512_mask_punpcklb_w_128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %x3) {
4119 ; CHECK-LABEL: test_int_x86_avx512_mask_punpcklb_w_128:
4120 ; CHECK: vpunpcklbw %xmm1, %xmm0, %xmm2 {%k1}
4121 ; CHECK-NEXT: ## xmm2 = xmm2[0],k1[0],xmm2[1],k1[1],xmm2[2],k1[2],xmm2[3],k1[3],xmm2[4],k1[4],xmm2[5],k1[5],xmm2[6],k1[6],xmm2[7],k1[7]
4122 ; CHECK-NEXT: vpunpcklbw %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0x60,0xc1]
4123 ; CHECK-NEXT: ## xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
4124 %res = call <16 x i8> @llvm.x86.avx512.mask.punpcklb.w.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %x3)
4125 %res1 = call <16 x i8> @llvm.x86.avx512.mask.punpcklb.w.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 -1)
4126 %res2 = add <16 x i8> %res, %res1
4130 declare <32 x i8> @llvm.x86.avx512.mask.punpckhb.w.256(<32 x i8>, <32 x i8>, <32 x i8>, i32)
4132 define <32 x i8>@test_int_x86_avx512_mask_punpckhb_w_256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) {
4133 ; CHECK-LABEL: test_int_x86_avx512_mask_punpckhb_w_256:
4134 ; CHECK: vpunpckhbw %ymm1, %ymm0, %ymm2 {%k1}
4135 ; CHECK-NEXT: ## ymm2 = ymm2[8],k1[8],ymm2[9],k1[9],ymm2[10],k1[10],ymm2[11],k1[11],ymm2[12],k1[12],ymm2[13],k1[13],ymm2[14],k1[14],ymm2[15],k1[15],ymm2[24],k1[24],ymm2[25],k1[25],ymm2[26],k1[26],ymm2[27],k1[27],ymm2[28],k1[28],ymm2[29],k1[29],ymm2[30],k1[30],ymm2[31],k1[31]
4136 ; CHECK-NEXT: vpunpckhbw %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0x68,0xc1]
4137 ; CHECK-NEXT: ## ymm0 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31]
4138 %res = call <32 x i8> @llvm.x86.avx512.mask.punpckhb.w.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3)
4139 %res1 = call <32 x i8> @llvm.x86.avx512.mask.punpckhb.w.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 -1)
4140 %res2 = add <32 x i8> %res, %res1
4144 declare <32 x i8> @llvm.x86.avx512.mask.punpcklb.w.256(<32 x i8>, <32 x i8>, <32 x i8>, i32)
4146 define <32 x i8>@test_int_x86_avx512_mask_punpcklb_w_256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) {
4147 ; CHECK-LABEL: test_int_x86_avx512_mask_punpcklb_w_256:
4148 ; CHECK: vpunpcklbw %ymm1, %ymm0, %ymm2 {%k1}
4149 ; CHECK-NEXT: ## ymm2 = ymm2[0],k1[0],ymm2[1],k1[1],ymm2[2],k1[2],ymm2[3],k1[3],ymm2[4],k1[4],ymm2[5],k1[5],ymm2[6],k1[6],ymm2[7],k1[7],ymm2[16],k1[16],ymm2[17],k1[17],ymm2[18],k1[18],ymm2[19],k1[19],ymm2[20],k1[20],ymm2[21],k1[21],ymm2[22],k1[22],ymm2[23],k1[23]
4150 ; CHECK-NEXT: vpunpcklbw %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0x60,0xc1]
4151 ; CHECK-NEXT: ## ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23]
4152 %res = call <32 x i8> @llvm.x86.avx512.mask.punpcklb.w.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3)
4153 %res1 = call <32 x i8> @llvm.x86.avx512.mask.punpcklb.w.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 -1)
4154 %res2 = add <32 x i8> %res, %res1
4158 declare <8 x i16> @llvm.x86.avx512.mask.punpcklw.d.128(<8 x i16>, <8 x i16>, <8 x i16>, i8)
4160 define <8 x i16>@test_int_x86_avx512_mask_punpcklw_d_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) {
4161 ; CHECK-LABEL: test_int_x86_avx512_mask_punpcklw_d_128:
4162 ; CHECK: vpunpcklwd %xmm1, %xmm0, %xmm2 {%k1}
4163 ; CHECK-NEXT: ## xmm2 = xmm2[0],k1[0],xmm2[1],k1[1],xmm2[2],k1[2],xmm2[3],k1[3]
4164 ; CHECK-NEXT: vpunpcklwd %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0x61,0xc1]
4165 ; CHECK-NEXT: ## xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
4166 %res = call <8 x i16> @llvm.x86.avx512.mask.punpcklw.d.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3)
4167 %res1 = call <8 x i16> @llvm.x86.avx512.mask.punpcklw.d.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1)
4168 %res2 = add <8 x i16> %res, %res1
4172 declare <8 x i16> @llvm.x86.avx512.mask.punpckhw.d.128(<8 x i16>, <8 x i16>, <8 x i16>, i8)
4174 define <8 x i16>@test_int_x86_avx512_mask_punpckhw_d_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) {
4175 ; CHECK-LABEL: test_int_x86_avx512_mask_punpckhw_d_128:
4176 ; CHECK: vpunpckhwd %xmm1, %xmm0, %xmm2 {%k1}
4177 ; CHECK-NEXT: ## xmm2 = xmm2[4],k1[4],xmm2[5],k1[5],xmm2[6],k1[6],xmm2[7],k1[7]
4178 ; CHECK-NEXT: vpunpckhwd %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0x69,0xc1]
4179 ; CHECK-NEXT: ## xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
4180 %res = call <8 x i16> @llvm.x86.avx512.mask.punpckhw.d.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3)
4181 %res1 = call <8 x i16> @llvm.x86.avx512.mask.punpckhw.d.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1)
4182 %res2 = add <8 x i16> %res, %res1
4186 declare <16 x i16> @llvm.x86.avx512.mask.punpcklw.d.256(<16 x i16>, <16 x i16>, <16 x i16>, i16)
4188 define <16 x i16>@test_int_x86_avx512_mask_punpcklw_d_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) {
4189 ; CHECK-LABEL: test_int_x86_avx512_mask_punpcklw_d_256:
4190 ; CHECK: vpunpcklwd %ymm1, %ymm0, %ymm2 {%k1}
4191 ; CHECK-NEXT: ## ymm2 = ymm2[0],k1[0],ymm2[1],k1[1],ymm2[2],k1[2],ymm2[3],k1[3],ymm2[8],k1[8],ymm2[9],k1[9],ymm2[10],k1[10],ymm2[11],k1[11]
4192 ; CHECK-NEXT: vpunpcklwd %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0x61,0xc1]
4193 ; CHECK-NEXT: ## ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11]
4194 %res = call <16 x i16> @llvm.x86.avx512.mask.punpcklw.d.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3)
4195 %res1 = call <16 x i16> @llvm.x86.avx512.mask.punpcklw.d.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 -1)
4196 %res2 = add <16 x i16> %res, %res1
4197 ret <16 x i16> %res2
4200 declare <16 x i16> @llvm.x86.avx512.mask.punpckhw.d.256(<16 x i16>, <16 x i16>, <16 x i16>, i16)
4202 define <16 x i16>@test_int_x86_avx512_mask_punpckhw_d_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) {
4203 ; CHECK-LABEL: test_int_x86_avx512_mask_punpckhw_d_256:
4204 ; CHECK: vpunpckhwd %ymm1, %ymm0, %ymm2 {%k1}
4205 ; CHECK-NEXT: ## ymm2 = ymm2[4],k1[4],ymm2[5],k1[5],ymm2[6],k1[6],ymm2[7],k1[7],ymm2[12],k1[12],ymm2[13],k1[13],ymm2[14],k1[14],ymm2[15],k1[15]
4206 ; CHECK-NEXT: vpunpckhwd %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0x69,0xc1]
4207 ; CHECK-NEXT: ## ymm0 = ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15]
4208 %res = call <16 x i16> @llvm.x86.avx512.mask.punpckhw.d.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3)
4209 %res1 = call <16 x i16> @llvm.x86.avx512.mask.punpckhw.d.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 -1)
4210 %res2 = add <16 x i16> %res, %res1
4211 ret <16 x i16> %res2
4214 declare <16 x i8> @llvm.x86.avx512.mask.palignr.128(<16 x i8>, <16 x i8>, i32, <16 x i8>, i16)
4216 define <16 x i8>@test_int_x86_avx512_mask_palignr_128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x3, i16 %x4) {
4217 ; CHECK-LABEL: test_int_x86_avx512_mask_palignr_128:
4219 ; CHECK-NEXT: kmovw %edi, %k1
4220 ; CHECK-NEXT: vpalignr $2, %xmm1, %xmm0, %xmm2 {%k1}
4221 ; CHECK-NEXT: vpalignr $2, %xmm1, %xmm0, %xmm3 {%k1} {z}
4222 ; CHECK-NEXT: vpalignr $2, %xmm1, %xmm0, %xmm0
4223 ; CHECK-NEXT: vpaddb %xmm3, %xmm2, %xmm1
4224 ; CHECK-NEXT: vpaddb %xmm0, %xmm1, %xmm0
4226 %res = call <16 x i8> @llvm.x86.avx512.mask.palignr.128(<16 x i8> %x0, <16 x i8> %x1, i32 2, <16 x i8> %x3, i16 %x4)
4227 %res1 = call <16 x i8> @llvm.x86.avx512.mask.palignr.128(<16 x i8> %x0, <16 x i8> %x1, i32 2, <16 x i8> zeroinitializer, i16 %x4)
4228 %res2 = call <16 x i8> @llvm.x86.avx512.mask.palignr.128(<16 x i8> %x0, <16 x i8> %x1, i32 2, <16 x i8> %x3, i16 -1)
4229 %res3 = add <16 x i8> %res, %res1
4230 %res4 = add <16 x i8> %res3, %res2
4234 declare <32 x i8> @llvm.x86.avx512.mask.palignr.256(<32 x i8>, <32 x i8>, i32, <32 x i8>, i32)
4236 define <32 x i8>@test_int_x86_avx512_mask_palignr_256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x3, i32 %x4) {
4237 ; CHECK-LABEL: test_int_x86_avx512_mask_palignr_256:
4239 ; CHECK-NEXT: kmovd %edi, %k1
4240 ; CHECK-NEXT: vpalignr $2, %ymm1, %ymm0, %ymm2 {%k1}
4241 ; CHECK-NEXT: vpalignr $2, %ymm1, %ymm0, %ymm3 {%k1} {z}
4242 ; CHECK-NEXT: vpalignr $2, %ymm1, %ymm0, %ymm0
4243 ; CHECK-NEXT: vpaddb %ymm3, %ymm2, %ymm1
4244 ; CHECK-NEXT: vpaddb %ymm0, %ymm1, %ymm0
4246 %res = call <32 x i8> @llvm.x86.avx512.mask.palignr.256(<32 x i8> %x0, <32 x i8> %x1, i32 2, <32 x i8> %x3, i32 %x4)
4247 %res1 = call <32 x i8> @llvm.x86.avx512.mask.palignr.256(<32 x i8> %x0, <32 x i8> %x1, i32 2, <32 x i8> zeroinitializer, i32 %x4)
4248 %res2 = call <32 x i8> @llvm.x86.avx512.mask.palignr.256(<32 x i8> %x0, <32 x i8> %x1, i32 2, <32 x i8> %x3, i32 -1)
4249 %res3 = add <32 x i8> %res, %res1
4250 %res4 = add <32 x i8> %res3, %res2
4254 declare <8 x i16> @llvm.x86.avx512.mask.dbpsadbw.128(<16 x i8>, <16 x i8>, i32, <8 x i16>, i8)
4256 define <8 x i16>@test_int_x86_avx512_mask_dbpsadbw_128(<16 x i8> %x0, <16 x i8> %x1, <8 x i16> %x3, i8 %x4) {
4257 ; CHECK-LABEL: test_int_x86_avx512_mask_dbpsadbw_128:
4259 ; CHECK-NEXT: movzbl %dil, %eax
4260 ; CHECK-NEXT: kmovw %eax, %k1
4261 ; CHECK-NEXT: vdbpsadbw $2, %xmm1, %xmm0, %xmm2 {%k1}
4262 ; CHECK-NEXT: vdbpsadbw $2, %xmm1, %xmm0, %xmm3 {%k1} {z}
4263 ; CHECK-NEXT: vdbpsadbw $2, %xmm1, %xmm0, %xmm0
4264 ; CHECK-NEXT: vpaddw %xmm3, %xmm2, %xmm1
4265 ; CHECK-NEXT: vpaddw %xmm1, %xmm0, %xmm0
4267 %res = call <8 x i16> @llvm.x86.avx512.mask.dbpsadbw.128(<16 x i8> %x0, <16 x i8> %x1, i32 2, <8 x i16> %x3, i8 %x4)
4268 %res1 = call <8 x i16> @llvm.x86.avx512.mask.dbpsadbw.128(<16 x i8> %x0, <16 x i8> %x1, i32 2, <8 x i16> zeroinitializer, i8 %x4)
4269 %res2 = call <8 x i16> @llvm.x86.avx512.mask.dbpsadbw.128(<16 x i8> %x0, <16 x i8> %x1, i32 2, <8 x i16> %x3, i8 -1)
4270 %res3 = add <8 x i16> %res, %res1
4271 %res4 = add <8 x i16> %res2, %res3
4275 declare <16 x i16> @llvm.x86.avx512.mask.dbpsadbw.256(<32 x i8>, <32 x i8>, i32, <16 x i16>, i16)
4277 define <16 x i16>@test_int_x86_avx512_mask_dbpsadbw_256(<32 x i8> %x0, <32 x i8> %x1, <16 x i16> %x3, i16 %x4) {
4278 ; CHECK-LABEL: test_int_x86_avx512_mask_dbpsadbw_256:
4280 ; CHECK-NEXT: kmovw %edi, %k1
4281 ; CHECK-NEXT: vdbpsadbw $2, %ymm1, %ymm0, %ymm2 {%k1}
4282 ; CHECK-NEXT: vdbpsadbw $2, %ymm1, %ymm0, %ymm3 {%k1} {z}
4283 ; CHECK-NEXT: vdbpsadbw $2, %ymm1, %ymm0, %ymm0
4284 ; CHECK-NEXT: vpaddw %ymm3, %ymm2, %ymm1
4285 ; CHECK-NEXT: vpaddw %ymm0, %ymm1, %ymm0
4287 %res = call <16 x i16> @llvm.x86.avx512.mask.dbpsadbw.256(<32 x i8> %x0, <32 x i8> %x1, i32 2, <16 x i16> %x3, i16 %x4)
4288 %res1 = call <16 x i16> @llvm.x86.avx512.mask.dbpsadbw.256(<32 x i8> %x0, <32 x i8> %x1, i32 2, <16 x i16> zeroinitializer, i16 %x4)
4289 %res2 = call <16 x i16> @llvm.x86.avx512.mask.dbpsadbw.256(<32 x i8> %x0, <32 x i8> %x1, i32 2, <16 x i16> %x3, i16 -1)
4290 %res3 = add <16 x i16> %res, %res1
4291 %res4 = add <16 x i16> %res3, %res2
4292 ret <16 x i16> %res4
4295 declare <32 x i8> @llvm.x86.avx512.pbroadcastb.256(<16 x i8>, <32 x i8>, i32)
4297 define <32 x i8>@test_int_x86_avx512_pbroadcastb_256(<16 x i8> %x0, <32 x i8> %x1, i32 %mask) {
4298 ; CHECK-LABEL: test_int_x86_avx512_pbroadcastb_256:
4300 ; CHECK-NEXT: kmovd %edi, %k1
4301 ; CHECK-NEXT: vpbroadcastb %xmm0, %ymm1 {%k1}
4302 ; CHECK-NEXT: vpbroadcastb %xmm0, %ymm2 {%k1} {z}
4303 ; CHECK-NEXT: vpbroadcastb %xmm0, %ymm0
4304 ; CHECK-NEXT: vpaddb %ymm1, %ymm0, %ymm0
4305 ; CHECK-NEXT: vpaddb %ymm0, %ymm2, %ymm0
4307 %res = call <32 x i8> @llvm.x86.avx512.pbroadcastb.256(<16 x i8> %x0, <32 x i8> %x1, i32 -1)
4308 %res1 = call <32 x i8> @llvm.x86.avx512.pbroadcastb.256(<16 x i8> %x0, <32 x i8> %x1, i32 %mask)
4309 %res2 = call <32 x i8> @llvm.x86.avx512.pbroadcastb.256(<16 x i8> %x0, <32 x i8> zeroinitializer, i32 %mask)
4310 %res3 = add <32 x i8> %res, %res1
4311 %res4 = add <32 x i8> %res2, %res3
4315 declare <16 x i8> @llvm.x86.avx512.pbroadcastb.128(<16 x i8>, <16 x i8>, i16)
4317 define <16 x i8>@test_int_x86_avx512_pbroadcastb_128(<16 x i8> %x0, <16 x i8> %x1, i16 %mask) {
4318 ; CHECK-LABEL: test_int_x86_avx512_pbroadcastb_128:
4320 ; CHECK-NEXT: kmovw %edi, %k1
4321 ; CHECK-NEXT: vpbroadcastb %xmm0, %xmm1 {%k1}
4322 ; CHECK-NEXT: vpbroadcastb %xmm0, %xmm2 {%k1} {z}
4323 ; CHECK-NEXT: vpbroadcastb %xmm0, %xmm0
4324 ; CHECK-NEXT: vpaddb %xmm1, %xmm0, %xmm0
4325 ; CHECK-NEXT: vpaddb %xmm0, %xmm2, %xmm0
4327 %res = call <16 x i8> @llvm.x86.avx512.pbroadcastb.128(<16 x i8> %x0, <16 x i8> %x1, i16 -1)
4328 %res1 = call <16 x i8> @llvm.x86.avx512.pbroadcastb.128(<16 x i8> %x0, <16 x i8> %x1, i16 %mask)
4329 %res2 = call <16 x i8> @llvm.x86.avx512.pbroadcastb.128(<16 x i8> %x0, <16 x i8> zeroinitializer, i16 %mask)
4330 %res3 = add <16 x i8> %res, %res1
4331 %res4 = add <16 x i8> %res2, %res3
4335 declare <16 x i16> @llvm.x86.avx512.pbroadcastw.256(<8 x i16>, <16 x i16>, i16)
4337 define <16 x i16>@test_int_x86_avx512_pbroadcastw_256(<8 x i16> %x0, <16 x i16> %x1, i16 %mask) {
4338 ; CHECK-LABEL: test_int_x86_avx512_pbroadcastw_256:
4340 ; CHECK-NEXT: kmovw %edi, %k1
4341 ; CHECK-NEXT: vpbroadcastw %xmm0, %ymm1 {%k1}
4342 ; CHECK-NEXT: vpbroadcastw %xmm0, %ymm2 {%k1} {z}
4343 ; CHECK-NEXT: vpbroadcastw %xmm0, %ymm0
4344 ; CHECK-NEXT: vpaddw %ymm1, %ymm0, %ymm0
4345 ; CHECK-NEXT: vpaddw %ymm0, %ymm2, %ymm0
4347 %res = call <16 x i16> @llvm.x86.avx512.pbroadcastw.256(<8 x i16> %x0, <16 x i16> %x1, i16 -1)
4348 %res1 = call <16 x i16> @llvm.x86.avx512.pbroadcastw.256(<8 x i16> %x0, <16 x i16> %x1, i16 %mask)
4349 %res2 = call <16 x i16> @llvm.x86.avx512.pbroadcastw.256(<8 x i16> %x0, <16 x i16> zeroinitializer, i16 %mask)
4350 %res3 = add <16 x i16> %res, %res1
4351 %res4 = add <16 x i16> %res2, %res3
4352 ret <16 x i16> %res4
4355 declare <8 x i16> @llvm.x86.avx512.pbroadcastw.128(<8 x i16>, <8 x i16>, i8)
4357 define <8 x i16>@test_int_x86_avx512_pbroadcastw_128(<8 x i16> %x0, <8 x i16> %x1, i8 %mask) {
4358 ; CHECK-LABEL: test_int_x86_avx512_pbroadcastw_128:
4360 ; CHECK-NEXT: movzbl %dil, %eax
4361 ; CHECK-NEXT: kmovw %eax, %k1
4362 ; CHECK-NEXT: vpbroadcastw %xmm0, %xmm1 {%k1}
4363 ; CHECK-NEXT: vpbroadcastw %xmm0, %xmm2 {%k1} {z}
4364 ; CHECK-NEXT: vpbroadcastw %xmm0, %xmm0
4365 ; CHECK-NEXT: vpaddw %xmm1, %xmm0, %xmm0
4366 ; CHECK-NEXT: vpaddw %xmm0, %xmm2, %xmm0
4368 %res = call <8 x i16> @llvm.x86.avx512.pbroadcastw.128(<8 x i16> %x0, <8 x i16> %x1, i8 -1)
4369 %res1 = call <8 x i16> @llvm.x86.avx512.pbroadcastw.128(<8 x i16> %x0, <8 x i16> %x1, i8 %mask)
4370 %res2 = call <8 x i16> @llvm.x86.avx512.pbroadcastw.128(<8 x i16> %x0, <8 x i16> zeroinitializer, i8 %mask)
4371 %res3 = add <8 x i16> %res, %res1
4372 %res4 = add <8 x i16> %res2, %res3
4376 declare <64 x i8> @llvm.x86.avx512.pbroadcastb.512(<16 x i8>, <64 x i8>, i64)
4378 define <64 x i8>@test_int_x86_avx512_pbroadcastb_512(<16 x i8> %x0, <64 x i8> %x1, i64 %mask) {
4379 ; CHECK-LABEL: test_int_x86_avx512_pbroadcastb_512:
4381 ; CHECK-NEXT: kmovq %rdi, %k1 ## encoding: [0xc4,0xe1,0xfb,0x92,0xcf]
4382 ; CHECK-NEXT: vpbroadcastb %xmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x78,0xc8]
4383 ; CHECK-NEXT: vpbroadcastb %xmm0, %zmm2 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x78,0xd0]
4384 ; CHECK-NEXT: vpbroadcastb %xmm0, %zmm0 ## encoding: [0x62,0xf2,0x7d,0x48,0x78,0xc0]
4385 ; CHECK-NEXT: vpaddb %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfc,0xc1]
4386 ; CHECK-NEXT: vpaddb %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x6d,0x48,0xfc,0xc0]
4387 ; CHECK-NEXT: retq ## encoding: [0xc3]
4388 %res = call <64 x i8> @llvm.x86.avx512.pbroadcastb.512(<16 x i8> %x0, <64 x i8> %x1, i64 -1)
4389 %res1 = call <64 x i8> @llvm.x86.avx512.pbroadcastb.512(<16 x i8> %x0, <64 x i8> %x1, i64 %mask)
4390 %res2 = call <64 x i8> @llvm.x86.avx512.pbroadcastb.512(<16 x i8> %x0, <64 x i8> zeroinitializer, i64 %mask)
4391 %res3 = add <64 x i8> %res, %res1
4392 %res4 = add <64 x i8> %res2, %res3
4396 declare <32 x i16> @llvm.x86.avx512.pbroadcastw.512(<8 x i16>, <32 x i16>, i32)
4398 define <32 x i16>@test_int_x86_avx512_pbroadcastw_512(<8 x i16> %x0, <32 x i16> %x1, i32 %mask) {
4399 ; CHECK-LABEL: test_int_x86_avx512_pbroadcastw_512:
4401 ; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
4402 ; CHECK-NEXT: vpbroadcastw %xmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x79,0xc8]
4403 ; CHECK-NEXT: vpbroadcastw %xmm0, %zmm2 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x79,0xd0]
4404 ; CHECK-NEXT: vpbroadcastw %xmm0, %zmm0 ## encoding: [0x62,0xf2,0x7d,0x48,0x79,0xc0]
4405 ; CHECK-NEXT: vpaddw %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfd,0xc1]
4406 ; CHECK-NEXT: vpaddw %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc0]
4407 ; CHECK-NEXT: retq ## encoding: [0xc3]
4408 %res = call <32 x i16> @llvm.x86.avx512.pbroadcastw.512(<8 x i16> %x0, <32 x i16> %x1, i32 -1)
4409 %res1 = call <32 x i16> @llvm.x86.avx512.pbroadcastw.512(<8 x i16> %x0, <32 x i16> %x1, i32 %mask)
4410 %res2 = call <32 x i16> @llvm.x86.avx512.pbroadcastw.512(<8 x i16> %x0, <32 x i16> zeroinitializer, i32 %mask)
4411 %res3 = add <32 x i16> %res, %res1
4412 %res4 = add <32 x i16> %res2, %res3
4413 ret <32 x i16> %res4