1 ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=core-avx2 -mattr=+avx2 | FileCheck %s
3 ; AVX2 Logical Shift Left
5 define <16 x i16> @test_sllw_1(<16 x i16> %InVec) {
7 %shl = shl <16 x i16> %InVec, <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>
11 ; CHECK-LABEL: test_sllw_1:
12 ; CHECK-NOT: vpsllw $0, %ymm0, %ymm0
15 define <16 x i16> @test_sllw_2(<16 x i16> %InVec) {
17 %shl = shl <16 x i16> %InVec, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
21 ; CHECK-LABEL: test_sllw_2:
22 ; CHECK: vpaddw %ymm0, %ymm0, %ymm0
25 define <16 x i16> @test_sllw_3(<16 x i16> %InVec) {
27 %shl = shl <16 x i16> %InVec, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
31 ; CHECK-LABEL: test_sllw_3:
32 ; CHECK: vpsllw $15, %ymm0, %ymm0
35 define <8 x i32> @test_slld_1(<8 x i32> %InVec) {
37 %shl = shl <8 x i32> %InVec, <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
41 ; CHECK-LABEL: test_slld_1:
42 ; CHECK-NOT: vpslld $0, %ymm0, %ymm0
45 define <8 x i32> @test_slld_2(<8 x i32> %InVec) {
47 %shl = shl <8 x i32> %InVec, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
51 ; CHECK-LABEL: test_slld_2:
52 ; CHECK: vpaddd %ymm0, %ymm0, %ymm0
55 define <8 x i32> @test_vpslld_var(i32 %shift) {
56 %amt = insertelement <8 x i32> undef, i32 %shift, i32 0
57 %tmp = shl <8 x i32> <i32 192, i32 193, i32 194, i32 195, i32 196, i32 197, i32 198, i32 199>, %amt
61 ; CHECK-LABEL: test_vpslld_var:
62 ; CHECK: vpslld %xmm0, %ymm1, %ymm0
65 define <8 x i32> @test_slld_3(<8 x i32> %InVec) {
67 %shl = shl <8 x i32> %InVec, <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
71 ; CHECK-LABEL: test_slld_3:
72 ; CHECK: vpslld $31, %ymm0, %ymm0
75 define <4 x i64> @test_sllq_1(<4 x i64> %InVec) {
77 %shl = shl <4 x i64> %InVec, <i64 0, i64 0, i64 0, i64 0>
81 ; CHECK-LABEL: test_sllq_1:
82 ; CHECK-NOT: vpsllq $0, %ymm0, %ymm0
85 define <4 x i64> @test_sllq_2(<4 x i64> %InVec) {
87 %shl = shl <4 x i64> %InVec, <i64 1, i64 1, i64 1, i64 1>
91 ; CHECK-LABEL: test_sllq_2:
92 ; CHECK: vpaddq %ymm0, %ymm0, %ymm0
95 define <4 x i64> @test_sllq_3(<4 x i64> %InVec) {
97 %shl = shl <4 x i64> %InVec, <i64 63, i64 63, i64 63, i64 63>
101 ; CHECK-LABEL: test_sllq_3:
102 ; CHECK: vpsllq $63, %ymm0, %ymm0
105 ; AVX2 Arithmetic Shift
107 define <16 x i16> @test_sraw_1(<16 x i16> %InVec) {
109 %shl = ashr <16 x i16> %InVec, <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>
113 ; CHECK-LABEL: test_sraw_1:
114 ; CHECK-NOT: vpsraw $0, %ymm0, %ymm0
117 define <16 x i16> @test_sraw_2(<16 x i16> %InVec) {
119 %shl = ashr <16 x i16> %InVec, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
123 ; CHECK-LABEL: test_sraw_2:
124 ; CHECK: vpsraw $1, %ymm0, %ymm0
127 define <16 x i16> @test_sraw_3(<16 x i16> %InVec) {
129 %shl = ashr <16 x i16> %InVec, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
133 ; CHECK-LABEL: test_sraw_3:
134 ; CHECK: vpsraw $15, %ymm0, %ymm0
137 define <8 x i32> @test_srad_1(<8 x i32> %InVec) {
139 %shl = ashr <8 x i32> %InVec, <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
143 ; CHECK-LABEL: test_srad_1:
144 ; CHECK-NOT: vpsrad $0, %ymm0, %ymm0
147 define <8 x i32> @test_srad_2(<8 x i32> %InVec) {
149 %shl = ashr <8 x i32> %InVec, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
153 ; CHECK-LABEL: test_srad_2:
154 ; CHECK: vpsrad $1, %ymm0, %ymm0
157 define <8 x i32> @test_srad_3(<8 x i32> %InVec) {
159 %shl = ashr <8 x i32> %InVec, <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
163 ; CHECK-LABEL: test_srad_3:
164 ; CHECK: vpsrad $31, %ymm0, %ymm0
167 ; SSE Logical Shift Right
169 define <16 x i16> @test_srlw_1(<16 x i16> %InVec) {
171 %shl = lshr <16 x i16> %InVec, <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>
175 ; CHECK-LABEL: test_srlw_1:
176 ; CHECK-NOT: vpsrlw $0, %ymm0, %ymm0
179 define <16 x i16> @test_srlw_2(<16 x i16> %InVec) {
181 %shl = lshr <16 x i16> %InVec, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
185 ; CHECK-LABEL: test_srlw_2:
186 ; CHECK: vpsrlw $1, %ymm0, %ymm0
189 define <16 x i16> @test_srlw_3(<16 x i16> %InVec) {
191 %shl = lshr <16 x i16> %InVec, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
195 ; CHECK-LABEL: test_srlw_3:
196 ; CHECK: vpsrlw $15, %ymm0, %ymm0
199 define <8 x i32> @test_srld_1(<8 x i32> %InVec) {
201 %shl = lshr <8 x i32> %InVec, <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
205 ; CHECK-LABEL: test_srld_1:
206 ; CHECK-NOT: vpsrld $0, %ymm0, %ymm0
209 define <8 x i32> @test_srld_2(<8 x i32> %InVec) {
211 %shl = lshr <8 x i32> %InVec, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
215 ; CHECK-LABEL: test_srld_2:
216 ; CHECK: vpsrld $1, %ymm0, %ymm0
219 define <8 x i32> @test_srld_3(<8 x i32> %InVec) {
221 %shl = lshr <8 x i32> %InVec, <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
225 ; CHECK-LABEL: test_srld_3:
226 ; CHECK: vpsrld $31, %ymm0, %ymm0
229 define <4 x i64> @test_srlq_1(<4 x i64> %InVec) {
231 %shl = lshr <4 x i64> %InVec, <i64 0, i64 0, i64 0, i64 0>
235 ; CHECK-LABEL: test_srlq_1:
236 ; CHECK-NOT: vpsrlq $0, %ymm0, %ymm0
239 define <4 x i64> @test_srlq_2(<4 x i64> %InVec) {
241 %shl = lshr <4 x i64> %InVec, <i64 1, i64 1, i64 1, i64 1>
245 ; CHECK-LABEL: test_srlq_2:
246 ; CHECK: vpsrlq $1, %ymm0, %ymm0
249 define <4 x i64> @test_srlq_3(<4 x i64> %InVec) {
251 %shl = lshr <4 x i64> %InVec, <i64 63, i64 63, i64 63, i64 63>
255 ; CHECK-LABEL: test_srlq_3:
256 ; CHECK: vpsrlq $63, %ymm0, %ymm0
259 ; CHECK-LABEL: @srl_trunc_and_v4i64
261 ; CHECK-NEXT: vpsrlvd
263 define <4 x i32> @srl_trunc_and_v4i64(<4 x i32> %x, <4 x i64> %y) nounwind {
264 %and = and <4 x i64> %y, <i64 8, i64 8, i64 8, i64 8>
265 %trunc = trunc <4 x i64> %and to <4 x i32>
266 %sra = lshr <4 x i32> %x, %trunc
271 ; Vectorized byte shifts
274 define <8 x i16> @shl_8i16(<8 x i16> %r, <8 x i16> %a) nounwind {
275 ; CHECK-LABEL: shl_8i16
276 ; CHECK: vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
277 ; CHECK-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
278 ; CHECK-NEXT: vpsllvd %ymm1, %ymm0, %ymm0
279 ; CHECK-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,20,21,24,25,28,29],zero,zero,zero,zero,zero,zero,zero,zero
280 ; CHECK-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
282 %shl = shl <8 x i16> %r, %a
286 define <16 x i16> @shl_16i16(<16 x i16> %r, <16 x i16> %a) nounwind {
287 ; CHECK-LABEL: shl_16i16
288 ; CHECK: vpxor %ymm2, %ymm2, %ymm2
289 ; CHECK-NEXT: vpunpckhwd {{.*#+}} ymm3 = ymm1[4],ymm2[4],ymm1[5],ymm2[5],ymm1[6],ymm2[6],ymm1[7],ymm2[7],ymm1[12],ymm2[12],ymm1[13],ymm2[13],ymm1[14],ymm2[14],ymm1[15],ymm2[15]
290 ; CHECK-NEXT: vpunpckhwd {{.*#+}} ymm4 = ymm0[4,4,5,5,6,6,7,7,12,12,13,13,14,14,15,15]
291 ; CHECK-NEXT: vpsllvd %ymm3, %ymm4, %ymm3
292 ; CHECK-NEXT: vpsrld $16, %ymm3, %ymm3
293 ; CHECK-NEXT: vpunpcklwd {{.*#+}} ymm1 = ymm1[0],ymm2[0],ymm1[1],ymm2[1],ymm1[2],ymm2[2],ymm1[3],ymm2[3],ymm1[8],ymm2[8],ymm1[9],ymm2[9],ymm1[10],ymm2[10],ymm1[11],ymm2[11]
294 ; CHECK-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,8,8,9,9,10,10,11,11]
295 ; CHECK-NEXT: vpsllvd %ymm1, %ymm0, %ymm0
296 ; CHECK-NEXT: vpsrld $16, %ymm0, %ymm0
297 ; CHECK-NEXT: vpackusdw %ymm3, %ymm0, %ymm0
299 %shl = shl <16 x i16> %r, %a
303 define <32 x i8> @shl_32i8(<32 x i8> %r, <32 x i8> %a) nounwind {
304 ; CHECK-LABEL: shl_32i8
305 ; CHECK: vpsllw $5, %ymm1, %ymm1
306 ; CHECK-NEXT: vpsllw $4, %ymm0, %ymm2
307 ; CHECK-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2
308 ; CHECK-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
309 ; CHECK-NEXT: vpsllw $2, %ymm0, %ymm2
310 ; CHECK-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2
311 ; CHECK-NEXT: vpaddb %ymm1, %ymm1, %ymm1
312 ; CHECK-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
313 ; CHECK-NEXT: vpaddb %ymm0, %ymm0, %ymm2
314 ; CHECK-NEXT: vpaddb %ymm1, %ymm1, %ymm1
315 ; CHECK-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
317 %shl = shl <32 x i8> %r, %a
321 define <8 x i16> @ashr_8i16(<8 x i16> %r, <8 x i16> %a) nounwind {
322 ; CHECK-LABEL: ashr_8i16
323 ; CHECK: vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
324 ; CHECK-NEXT: vpmovsxwd %xmm0, %ymm0
325 ; CHECK-NEXT: vpsravd %ymm1, %ymm0, %ymm0
326 ; CHECK-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,20,21,24,25,28,29],zero,zero,zero,zero,zero,zero,zero,zero
327 ; CHECK-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
329 %ashr = ashr <8 x i16> %r, %a
333 define <16 x i16> @ashr_16i16(<16 x i16> %r, <16 x i16> %a) nounwind {
334 ; CHECK-LABEL: ashr_16i16
335 ; CHECK: vpxor %ymm2, %ymm2, %ymm2
336 ; CHECK-NEXT: vpunpckhwd {{.*#+}} ymm3 = ymm1[4],ymm2[4],ymm1[5],ymm2[5],ymm1[6],ymm2[6],ymm1[7],ymm2[7],ymm1[12],ymm2[12],ymm1[13],ymm2[13],ymm1[14],ymm2[14],ymm1[15],ymm2[15]
337 ; CHECK-NEXT: vpunpckhwd {{.*#+}} ymm4 = ymm0[4,4,5,5,6,6,7,7,12,12,13,13,14,14,15,15]
338 ; CHECK-NEXT: vpsravd %ymm3, %ymm4, %ymm3
339 ; CHECK-NEXT: vpsrld $16, %ymm3, %ymm3
340 ; CHECK-NEXT: vpunpcklwd {{.*#+}} ymm1 = ymm1[0],ymm2[0],ymm1[1],ymm2[1],ymm1[2],ymm2[2],ymm1[3],ymm2[3],ymm1[8],ymm2[8],ymm1[9],ymm2[9],ymm1[10],ymm2[10],ymm1[11],ymm2[11]
341 ; CHECK-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,8,8,9,9,10,10,11,11]
342 ; CHECK-NEXT: vpsravd %ymm1, %ymm0, %ymm0
343 ; CHECK-NEXT: vpsrld $16, %ymm0, %ymm0
344 ; CHECK-NEXT: vpackusdw %ymm3, %ymm0, %ymm0
346 %ashr = ashr <16 x i16> %r, %a
350 define <32 x i8> @ashr_32i8(<32 x i8> %r, <32 x i8> %a) nounwind {
351 ; CHECK-LABEL: ashr_32i8
352 ; CHECK: vpsllw $5, %ymm1, %ymm1
353 ; CHECK-NEXT: vpunpckhbw {{.*#+}} ymm2 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31]
354 ; CHECK-NEXT: vpunpckhbw {{.*#+}} ymm3 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31]
355 ; CHECK-NEXT: vpsraw $4, %ymm3, %ymm4
356 ; CHECK-NEXT: vpblendvb %ymm2, %ymm4, %ymm3, %ymm3
357 ; CHECK-NEXT: vpsraw $2, %ymm3, %ymm4
358 ; CHECK-NEXT: vpaddw %ymm2, %ymm2, %ymm2
359 ; CHECK-NEXT: vpblendvb %ymm2, %ymm4, %ymm3, %ymm3
360 ; CHECK-NEXT: vpsraw $1, %ymm3, %ymm4
361 ; CHECK-NEXT: vpaddw %ymm2, %ymm2, %ymm2
362 ; CHECK-NEXT: vpblendvb %ymm2, %ymm4, %ymm3, %ymm2
363 ; CHECK-NEXT: vpsrlw $8, %ymm2, %ymm2
364 ; CHECK-NEXT: vpunpcklbw {{.*#+}} ymm1 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23]
365 ; CHECK-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
366 ; CHECK-NEXT: vpsraw $4, %ymm0, %ymm3
367 ; CHECK-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0
368 ; CHECK-NEXT: vpsraw $2, %ymm0, %ymm3
369 ; CHECK-NEXT: vpaddw %ymm1, %ymm1, %ymm1
370 ; CHECK-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0
371 ; CHECK-NEXT: vpsraw $1, %ymm0, %ymm3
372 ; CHECK-NEXT: vpaddw %ymm1, %ymm1, %ymm1
373 ; CHECK-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0
374 ; CHECK-NEXT: vpsrlw $8, %ymm0, %ymm0
375 ; CHECK-NEXT: vpackuswb %ymm2, %ymm0, %ymm0
377 %ashr = ashr <32 x i8> %r, %a
381 define <8 x i16> @lshr_8i16(<8 x i16> %r, <8 x i16> %a) nounwind {
382 ; CHECK-LABEL: lshr_8i16
383 ; CHECK: vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
384 ; CHECK-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
385 ; CHECK-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0
386 ; CHECK-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,20,21,24,25,28,29],zero,zero,zero,zero,zero,zero,zero,zero
387 ; CHECK-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
389 %lshr = lshr <8 x i16> %r, %a
393 define <16 x i16> @lshr_16i16(<16 x i16> %r, <16 x i16> %a) nounwind {
394 ; CHECK-LABEL: lshr_16i16
395 ; CHECK: vpxor %ymm2, %ymm2, %ymm2
396 ; CHECK-NEXT: vpunpckhwd {{.*#+}} ymm3 = ymm1[4],ymm2[4],ymm1[5],ymm2[5],ymm1[6],ymm2[6],ymm1[7],ymm2[7],ymm1[12],ymm2[12],ymm1[13],ymm2[13],ymm1[14],ymm2[14],ymm1[15],ymm2[15]
397 ; CHECK-NEXT: vpunpckhwd {{.*#+}} ymm4 = ymm0[4,4,5,5,6,6,7,7,12,12,13,13,14,14,15,15]
398 ; CHECK-NEXT: vpsrlvd %ymm3, %ymm4, %ymm3
399 ; CHECK-NEXT: vpsrld $16, %ymm3, %ymm3
400 ; CHECK-NEXT: vpunpcklwd {{.*#+}} ymm1 = ymm1[0],ymm2[0],ymm1[1],ymm2[1],ymm1[2],ymm2[2],ymm1[3],ymm2[3],ymm1[8],ymm2[8],ymm1[9],ymm2[9],ymm1[10],ymm2[10],ymm1[11],ymm2[11]
401 ; CHECK-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,8,8,9,9,10,10,11,11]
402 ; CHECK-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0
403 ; CHECK-NEXT: vpsrld $16, %ymm0, %ymm0
404 ; CHECK-NEXT: vpackusdw %ymm3, %ymm0, %ymm0
406 %lshr = lshr <16 x i16> %r, %a
410 define <32 x i8> @lshr_32i8(<32 x i8> %r, <32 x i8> %a) nounwind {
411 ; CHECK-LABEL: lshr_32i8
412 ; CHECK: vpsllw $5, %ymm1, %ymm1
413 ; CHECK-NEXT: vpsrlw $4, %ymm0, %ymm2
414 ; CHECK-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2
415 ; CHECK-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
416 ; CHECK-NEXT: vpsrlw $2, %ymm0, %ymm2
417 ; CHECK-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2
418 ; CHECK-NEXT: vpaddb %ymm1, %ymm1, %ymm1
419 ; CHECK-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
420 ; CHECK-NEXT: vpsrlw $1, %ymm0, %ymm2
421 ; CHECK-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2
422 ; CHECK-NEXT: vpaddb %ymm1, %ymm1, %ymm1
423 ; CHECK-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
425 %lshr = lshr <32 x i8> %r, %a