1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s
5 define <8 x i32> @vshift00(<8 x i32> %a) {
6 ; CHECK-LABEL: vshift00:
8 ; CHECK-NEXT: vpslld $2, %xmm0, %xmm1
9 ; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0
10 ; CHECK-NEXT: vpslld $2, %xmm0, %xmm0
11 ; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
13 %s = shl <8 x i32> %a, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32
18 define <16 x i16> @vshift01(<16 x i16> %a) {
19 ; CHECK-LABEL: vshift01:
21 ; CHECK-NEXT: vpsllw $2, %xmm0, %xmm1
22 ; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0
23 ; CHECK-NEXT: vpsllw $2, %xmm0, %xmm0
24 ; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
26 %s = shl <16 x i16> %a, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
30 define <4 x i64> @vshift02(<4 x i64> %a) {
31 ; CHECK-LABEL: vshift02:
33 ; CHECK-NEXT: vpsllq $2, %xmm0, %xmm1
34 ; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0
35 ; CHECK-NEXT: vpsllq $2, %xmm0, %xmm0
36 ; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
38 %s = shl <4 x i64> %a, <i64 2, i64 2, i64 2, i64 2>
42 ;;; Logical Shift right
43 define <8 x i32> @vshift03(<8 x i32> %a) {
44 ; CHECK-LABEL: vshift03:
46 ; CHECK-NEXT: vpsrld $2, %xmm0, %xmm1
47 ; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0
48 ; CHECK-NEXT: vpsrld $2, %xmm0, %xmm0
49 ; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
51 %s = lshr <8 x i32> %a, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32
56 define <16 x i16> @vshift04(<16 x i16> %a) {
57 ; CHECK-LABEL: vshift04:
59 ; CHECK-NEXT: vpsrlw $2, %xmm0, %xmm1
60 ; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0
61 ; CHECK-NEXT: vpsrlw $2, %xmm0, %xmm0
62 ; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
64 %s = lshr <16 x i16> %a, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
68 define <4 x i64> @vshift05(<4 x i64> %a) {
69 ; CHECK-LABEL: vshift05:
71 ; CHECK-NEXT: vpsrlq $2, %xmm0, %xmm1
72 ; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0
73 ; CHECK-NEXT: vpsrlq $2, %xmm0, %xmm0
74 ; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
76 %s = lshr <4 x i64> %a, <i64 2, i64 2, i64 2, i64 2>
80 ;;; Arithmetic Shift right
81 define <8 x i32> @vshift06(<8 x i32> %a) {
82 ; CHECK-LABEL: vshift06:
84 ; CHECK-NEXT: vpsrad $2, %xmm0, %xmm1
85 ; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0
86 ; CHECK-NEXT: vpsrad $2, %xmm0, %xmm0
87 ; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
89 %s = ashr <8 x i32> %a, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32
94 define <16 x i16> @vshift07(<16 x i16> %a) {
95 ; CHECK-LABEL: vshift07:
97 ; CHECK-NEXT: vpsraw $2, %xmm0, %xmm1
98 ; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0
99 ; CHECK-NEXT: vpsraw $2, %xmm0, %xmm0
100 ; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
102 %s = ashr <16 x i16> %a, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
106 define <32 x i8> @vshift09(<32 x i8> %a) {
107 ; CHECK-LABEL: vshift09:
109 ; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm1
110 ; CHECK-NEXT: vpsrlw $2, %xmm1, %xmm1
111 ; CHECK-NEXT: vmovdqa {{.*#+}} xmm2 = [63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63]
112 ; CHECK-NEXT: vpand %xmm2, %xmm1, %xmm1
113 ; CHECK-NEXT: vmovdqa {{.*#+}} xmm3 = [32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32]
114 ; CHECK-NEXT: vpxor %xmm3, %xmm1, %xmm1
115 ; CHECK-NEXT: vpsubb %xmm3, %xmm1, %xmm1
116 ; CHECK-NEXT: vpsrlw $2, %xmm0, %xmm0
117 ; CHECK-NEXT: vpand %xmm2, %xmm0, %xmm0
118 ; CHECK-NEXT: vpxor %xmm3, %xmm0, %xmm0
119 ; CHECK-NEXT: vpsubb %xmm3, %xmm0, %xmm0
120 ; CHECK-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
122 %s = ashr <32 x i8> %a, <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2>
126 define <32 x i8> @vshift10(<32 x i8> %a) {
127 ; CHECK-LABEL: vshift10:
129 ; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm1
130 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
131 ; CHECK-NEXT: vpcmpgtb %xmm1, %xmm2, %xmm1
132 ; CHECK-NEXT: vpcmpgtb %xmm0, %xmm2, %xmm0
133 ; CHECK-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
135 %s = ashr <32 x i8> %a, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
139 define <32 x i8> @vshift11(<32 x i8> %a) {
140 ; CHECK-LABEL: vshift11:
142 ; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm1
143 ; CHECK-NEXT: vpsrlw $2, %xmm1, %xmm1
144 ; CHECK-NEXT: vmovdqa {{.*#+}} xmm2 = [63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63]
145 ; CHECK-NEXT: vpand %xmm2, %xmm1, %xmm1
146 ; CHECK-NEXT: vpsrlw $2, %xmm0, %xmm0
147 ; CHECK-NEXT: vpand %xmm2, %xmm0, %xmm0
148 ; CHECK-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
150 %s = lshr <32 x i8> %a, <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2>
154 define <32 x i8> @vshift12(<32 x i8> %a) {
155 ; CHECK-LABEL: vshift12:
157 ; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm1
158 ; CHECK-NEXT: vpsllw $2, %xmm1, %xmm1
159 ; CHECK-NEXT: vmovdqa {{.*#+}} xmm2 = [252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252]
160 ; CHECK-NEXT: vpand %xmm2, %xmm1, %xmm1
161 ; CHECK-NEXT: vpsllw $2, %xmm0, %xmm0
162 ; CHECK-NEXT: vpand %xmm2, %xmm0, %xmm0
163 ; CHECK-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
165 %s = shl <32 x i8> %a, <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2>
169 ;;; Support variable shifts
170 define <8 x i32> @vshift08(<8 x i32> %a) {
171 ; CHECK-LABEL: vshift08:
173 ; CHECK-NEXT: vpslld $23, %xmm0, %xmm1
174 ; CHECK-NEXT: vmovdqa {{.*#+}} xmm2 = [1065353216,1065353216,1065353216,1065353216]
175 ; CHECK-NEXT: vpaddd %xmm2, %xmm1, %xmm1
176 ; CHECK-NEXT: vcvttps2dq %xmm1, %xmm1
177 ; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0
178 ; CHECK-NEXT: vpslld $23, %xmm0, %xmm0
179 ; CHECK-NEXT: vpaddd %xmm2, %xmm0, %xmm0
180 ; CHECK-NEXT: vcvttps2dq %xmm0, %xmm0
181 ; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
183 %bitop = shl <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>, %a
188 define <4 x i32> @vshift13(<4 x i32> %in) {
189 ; CHECK-LABEL: vshift13:
191 ; CHECK-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm0
193 %T = shl <4 x i32> %in, <i32 0, i32 1, i32 2, i32 4>
197 ;;; Uses shifts for sign extension
198 define <16 x i16> @sext_v16i16(<16 x i16> %a) {
199 ; CHECK-LABEL: sext_v16i16:
201 ; CHECK-NEXT: vpsllw $8, %xmm0, %xmm1
202 ; CHECK-NEXT: vpsraw $8, %xmm1, %xmm1
203 ; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0
204 ; CHECK-NEXT: vpsllw $8, %xmm0, %xmm0
205 ; CHECK-NEXT: vpsraw $8, %xmm0, %xmm0
206 ; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
208 %b = trunc <16 x i16> %a to <16 x i8>
209 %c = sext <16 x i8> %b to <16 x i16>
213 define <8 x i32> @sext_v8i32(<8 x i32> %a) {
214 ; CHECK-LABEL: sext_v8i32:
216 ; CHECK-NEXT: vpslld $16, %xmm0, %xmm1
217 ; CHECK-NEXT: vpsrad $16, %xmm1, %xmm1
218 ; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0
219 ; CHECK-NEXT: vpslld $16, %xmm0, %xmm0
220 ; CHECK-NEXT: vpsrad $16, %xmm0, %xmm0
221 ; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
223 %b = trunc <8 x i32> %a to <8 x i16>
224 %c = sext <8 x i16> %b to <8 x i32>