1 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s
4 define <8 x i32> @vshift00(<8 x i32> %a) {
5 ; CHECK-LABEL: vshift00:
7 ; CHECK-NEXT: vpslld $2, %xmm0, %xmm1
8 ; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0
9 ; CHECK-NEXT: vpslld $2, %xmm0, %xmm0
10 ; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
12 %s = shl <8 x i32> %a, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32
17 define <16 x i16> @vshift01(<16 x i16> %a) {
18 ; CHECK-LABEL: vshift01:
20 ; CHECK-NEXT: vpsllw $2, %xmm0, %xmm1
21 ; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0
22 ; CHECK-NEXT: vpsllw $2, %xmm0, %xmm0
23 ; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
25 %s = shl <16 x i16> %a, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
29 define <4 x i64> @vshift02(<4 x i64> %a) {
30 ; CHECK-LABEL: vshift02:
32 ; CHECK-NEXT: vpsllq $2, %xmm0, %xmm1
33 ; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0
34 ; CHECK-NEXT: vpsllq $2, %xmm0, %xmm0
35 ; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
37 %s = shl <4 x i64> %a, <i64 2, i64 2, i64 2, i64 2>
41 ;;; Logical Shift right
42 define <8 x i32> @vshift03(<8 x i32> %a) {
43 ; CHECK-LABEL: vshift03:
45 ; CHECK-NEXT: vpsrld $2, %xmm0, %xmm1
46 ; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0
47 ; CHECK-NEXT: vpsrld $2, %xmm0, %xmm0
48 ; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
50 %s = lshr <8 x i32> %a, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32
55 define <16 x i16> @vshift04(<16 x i16> %a) {
56 ; CHECK-LABEL: vshift04:
58 ; CHECK-NEXT: vpsrlw $2, %xmm0, %xmm1
59 ; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0
60 ; CHECK-NEXT: vpsrlw $2, %xmm0, %xmm0
61 ; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
63 %s = lshr <16 x i16> %a, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
67 define <4 x i64> @vshift05(<4 x i64> %a) {
68 ; CHECK-LABEL: vshift05:
70 ; CHECK-NEXT: vpsrlq $2, %xmm0, %xmm1
71 ; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0
72 ; CHECK-NEXT: vpsrlq $2, %xmm0, %xmm0
73 ; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
75 %s = lshr <4 x i64> %a, <i64 2, i64 2, i64 2, i64 2>
79 ;;; Arithmetic Shift right
80 define <8 x i32> @vshift06(<8 x i32> %a) {
81 ; CHECK-LABEL: vshift06:
83 ; CHECK-NEXT: vpsrad $2, %xmm0, %xmm1
84 ; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0
85 ; CHECK-NEXT: vpsrad $2, %xmm0, %xmm0
86 ; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
88 %s = ashr <8 x i32> %a, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32
93 define <16 x i16> @vshift07(<16 x i16> %a) {
94 ; CHECK-LABEL: vshift07:
96 ; CHECK-NEXT: vpsraw $2, %xmm0, %xmm1
97 ; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0
98 ; CHECK-NEXT: vpsraw $2, %xmm0, %xmm0
99 ; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
101 %s = ashr <16 x i16> %a, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
105 define <32 x i8> @vshift09(<32 x i8> %a) {
106 ; CHECK-LABEL: vshift09:
108 ; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm1
109 ; CHECK-NEXT: vpsrlw $2, %xmm1, %xmm1
110 ; CHECK-NEXT: vmovdqa {{.*#+}} xmm2 = [63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63]
111 ; CHECK-NEXT: vpand %xmm2, %xmm1, %xmm1
112 ; CHECK-NEXT: vmovdqa {{.*#+}} xmm3 = [32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32]
113 ; CHECK-NEXT: vpxor %xmm3, %xmm1, %xmm1
114 ; CHECK-NEXT: vpsubb %xmm3, %xmm1, %xmm1
115 ; CHECK-NEXT: vpsrlw $2, %xmm0, %xmm0
116 ; CHECK-NEXT: vpand %xmm2, %xmm0, %xmm0
117 ; CHECK-NEXT: vpxor %xmm3, %xmm0, %xmm0
118 ; CHECK-NEXT: vpsubb %xmm3, %xmm0, %xmm0
119 ; CHECK-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
121 %s = ashr <32 x i8> %a, <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2>
125 define <32 x i8> @vshift10(<32 x i8> %a) {
126 ; CHECK-LABEL: vshift10:
128 ; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm1
129 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
130 ; CHECK-NEXT: vpcmpgtb %xmm1, %xmm2, %xmm1
131 ; CHECK-NEXT: vpcmpgtb %xmm0, %xmm2, %xmm0
132 ; CHECK-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
134 %s = ashr <32 x i8> %a, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
138 define <32 x i8> @vshift11(<32 x i8> %a) {
139 ; CHECK-LABEL: vshift11:
141 ; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm1
142 ; CHECK-NEXT: vpsrlw $2, %xmm1, %xmm1
143 ; CHECK-NEXT: vmovdqa {{.*#+}} xmm2 = [63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63]
144 ; CHECK-NEXT: vpand %xmm2, %xmm1, %xmm1
145 ; CHECK-NEXT: vpsrlw $2, %xmm0, %xmm0
146 ; CHECK-NEXT: vpand %xmm2, %xmm0, %xmm0
147 ; CHECK-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
149 %s = lshr <32 x i8> %a, <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2>
153 define <32 x i8> @vshift12(<32 x i8> %a) {
154 ; CHECK-LABEL: vshift12:
156 ; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm1
157 ; CHECK-NEXT: vpsllw $2, %xmm1, %xmm1
158 ; CHECK-NEXT: vmovdqa {{.*#+}} xmm2 = [252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252]
159 ; CHECK-NEXT: vpand %xmm2, %xmm1, %xmm1
160 ; CHECK-NEXT: vpsllw $2, %xmm0, %xmm0
161 ; CHECK-NEXT: vpand %xmm2, %xmm0, %xmm0
162 ; CHECK-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
164 %s = shl <32 x i8> %a, <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2>
168 ;;; Support variable shifts
169 define <8 x i32> @vshift08(<8 x i32> %a) {
170 ; CHECK-LABEL: vshift08:
172 ; CHECK-NEXT: vpslld $23, %xmm0, %xmm1
173 ; CHECK-NEXT: vmovdqa {{.*#+}} xmm2 = [1065353216,1065353216,1065353216,1065353216]
174 ; CHECK-NEXT: vpaddd %xmm2, %xmm1, %xmm1
175 ; CHECK-NEXT: vcvttps2dq %xmm1, %xmm1
176 ; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0
177 ; CHECK-NEXT: vpslld $23, %xmm0, %xmm0
178 ; CHECK-NEXT: vpaddd %xmm2, %xmm0, %xmm0
179 ; CHECK-NEXT: vcvttps2dq %xmm0, %xmm0
180 ; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
182 %bitop = shl <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>, %a
187 define <4 x i32> @vshift13(<4 x i32> %in) {
188 ; CHECK-LABEL: vshift13:
190 ; CHECK-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm0
192 %T = shl <4 x i32> %in, <i32 0, i32 1, i32 2, i32 4>
196 ;;; Uses shifts for sign extension
197 define <16 x i16> @sext_v16i16(<16 x i16> %a) {
198 ; CHECK-LABEL: sext_v16i16:
200 ; CHECK-NEXT: vpsllw $8, %xmm0, %xmm1
201 ; CHECK-NEXT: vpsraw $8, %xmm1, %xmm1
202 ; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0
203 ; CHECK-NEXT: vpsllw $8, %xmm0, %xmm0
204 ; CHECK-NEXT: vpsraw $8, %xmm0, %xmm0
205 ; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
207 %b = trunc <16 x i16> %a to <16 x i8>
208 %c = sext <16 x i8> %b to <16 x i16>
212 define <8 x i32> @sext_v8i32(<8 x i32> %a) {
213 ; CHECK-LABEL: sext_v8i32:
215 ; CHECK-NEXT: vpslld $16, %xmm0, %xmm1
216 ; CHECK-NEXT: vpsrad $16, %xmm1, %xmm1
217 ; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0
218 ; CHECK-NEXT: vpslld $16, %xmm0, %xmm0
219 ; CHECK-NEXT: vpsrad $16, %xmm0, %xmm0
220 ; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
222 %b = trunc <8 x i32> %a to <8 x i16>
223 %c = sext <8 x i16> %b to <8 x i32>