1 ; RUN: llc < %s -mcpu=x86-64 -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX1
2 ; RUN: llc < %s -mcpu=x86-64 -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX2
4 target triple = "x86_64-unknown-unknown"
6 define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00(<16 x i16> %a, <16 x i16> %b) {
7 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
9 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
10 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
13 ; AVX2-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
15 ; AVX2-NEXT: vpbroadcastw %xmm0, %ymm0
17 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
18 ret <16 x i16> %shuffle
21 define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00(<16 x i16> %a, <16 x i16> %b) {
22 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00:
24 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,3]
25 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
26 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm1 = xmm0[0,1,2,3,4,4,4,4]
27 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,5,4]
28 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
31 ; AVX2-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00:
33 ; AVX2-NEXT: vpbroadcastw %xmm0, %xmm1
34 ; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,2,3,0,1]
35 ; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
37 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 0>
38 ret <16 x i16> %shuffle
41 define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00(<16 x i16> %a, <16 x i16> %b) {
42 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00:
44 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
45 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,4,5,0,1,0,1]
46 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
49 ; AVX2-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00:
51 ; AVX2-NEXT: vpbroadcastw %xmm0, %xmm1
52 ; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,4,5,0,1,0,1]
53 ; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
55 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 2, i32 0, i32 0>
56 ret <16 x i16> %shuffle
59 define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_03_00_00_00(<16 x i16> %a, <16 x i16> %b) {
60 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_03_00_00_00:
62 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
63 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,6,7,0,1,0,1,0,1]
64 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
67 ; AVX2-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_03_00_00_00:
69 ; AVX2-NEXT: vpbroadcastw %xmm0, %xmm1
70 ; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,6,7,0,1,0,1,0,1]
71 ; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
73 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 3, i32 0, i32 0, i32 0>
74 ret <16 x i16> %shuffle
77 define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_04_00_00_00_00(<16 x i16> %a, <16 x i16> %b) {
78 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_04_00_00_00_00:
80 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
81 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,8,9,0,1,0,1,0,1,0,1]
82 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
85 ; AVX2-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_04_00_00_00_00:
87 ; AVX2-NEXT: vpbroadcastw %xmm0, %xmm1
88 ; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,8,9,0,1,0,1,0,1,0,1]
89 ; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
91 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 4, i32 0, i32 0, i32 0, i32 0>
92 ret <16 x i16> %shuffle
95 define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_05_00_00_00_00_00(<16 x i16> %a, <16 x i16> %b) {
96 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_05_00_00_00_00_00:
98 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
99 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,10,11,0,1,0,1,0,1,0,1,0,1]
100 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
103 ; AVX2-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_05_00_00_00_00_00:
105 ; AVX2-NEXT: vpbroadcastw %xmm0, %xmm1
106 ; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,10,11,0,1,0,1,0,1,0,1,0,1]
107 ; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
109 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 5, i32 0, i32 0, i32 0, i32 0, i32 0>
110 ret <16 x i16> %shuffle
113 define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_00_00_00_06_00_00_00_00_00_00(<16 x i16> %a, <16 x i16> %b) {
114 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_06_00_00_00_00_00_00:
116 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
117 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,12,13,0,1,0,1,0,1,0,1,0,1,0,1]
118 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
121 ; AVX2-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_06_00_00_00_00_00_00:
123 ; AVX2-NEXT: vpbroadcastw %xmm0, %xmm1
124 ; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,12,13,0,1,0,1,0,1,0,1,0,1,0,1]
125 ; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
127 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 6, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
128 ret <16 x i16> %shuffle
131 define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00(<16 x i16> %a, <16 x i16> %b) {
132 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00:
134 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
135 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[14,15,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
136 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
139 ; AVX2-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00:
141 ; AVX2-NEXT: vpbroadcastw %xmm0, %xmm1
142 ; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[14,15,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
143 ; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
145 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 7, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
146 ret <16 x i16> %shuffle
149 define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00(<16 x i16> %a, <16 x i16> %b) {
150 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00:
152 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
153 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm1 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
154 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,0,1,0,1,0,1,0,1,0,1,0,1,2,3]
155 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
156 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
159 ; AVX2-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00:
161 ; AVX2-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
162 ; AVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17]
163 ; AVX2-NEXT: vpbroadcastw %xmm0, %ymm0
164 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
165 ; AVX2-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
167 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
168 ret <16 x i16> %shuffle
171 define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_09_00_00_00_00_00_00_00_00_00(<16 x i16> %a, <16 x i16> %b) {
172 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_00_09_00_00_00_00_00_00_00_00_00:
174 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
175 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm1 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
176 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,0,1,0,1,0,1,0,1,0,1,6,7,0,1]
177 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
178 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
181 ; AVX2-LABEL: shuffle_v16i16_00_00_00_00_00_00_09_00_00_00_00_00_00_00_00_00:
183 ; AVX2-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
184 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = <0,0,255,255,u,u,u,u,u,u,u,u,u,u,u,u,255,255,u,u,u,u,u,u,u,u,u,u,u,u,u,u>
185 ; AVX2-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
186 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,0,1,0,1,2,3,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17]
188 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 9, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
189 ret <16 x i16> %shuffle
192 define <16 x i16> @shuffle_v16i16_00_00_00_00_00_10_00_00_00_00_00_00_00_00_00_00(<16 x i16> %a, <16 x i16> %b) {
193 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_10_00_00_00_00_00_00_00_00_00_00:
195 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
196 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm1 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
197 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,0,1,0,1,0,1,0,1,10,11,0,1,0,1]
198 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
199 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
202 ; AVX2-LABEL: shuffle_v16i16_00_00_00_00_00_10_00_00_00_00_00_00_00_00_00_00:
204 ; AVX2-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
205 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5,6,7]
206 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,0,1,4,5,0,1,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17]
208 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 10, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
209 ret <16 x i16> %shuffle
212 define <16 x i16> @shuffle_v16i16_00_00_00_00_11_00_00_00_00_00_00_00_00_00_00_00(<16 x i16> %a, <16 x i16> %b) {
213 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_11_00_00_00_00_00_00_00_00_00_00_00:
215 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
216 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm1 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
217 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,0,1,0,1,0,1,14,15,0,1,0,1,0,1]
218 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
219 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
222 ; AVX2-LABEL: shuffle_v16i16_00_00_00_00_11_00_00_00_00_00_00_00_00_00_00_00:
224 ; AVX2-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
225 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5,6,7]
226 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,6,7,0,1,0,1,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17]
228 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 11, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
229 ret <16 x i16> %shuffle
232 define <16 x i16> @shuffle_v16i16_00_00_00_12_00_00_00_00_00_00_00_00_00_00_00_00(<16 x i16> %a, <16 x i16> %b) {
233 ; AVX1-LABEL: shuffle_v16i16_00_00_00_12_00_00_00_00_00_00_00_00_00_00_00_00:
235 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
236 ; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0,1,2,3],xmm1[4,5,6,7]
237 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,0,1,0,1,8,9,0,1,0,1,0,1,0,1]
238 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
239 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
242 ; AVX2-LABEL: shuffle_v16i16_00_00_00_12_00_00_00_00_00_00_00_00_00_00_00_00:
244 ; AVX2-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
245 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4,5,6,7]
246 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,8,9,0,1,0,1,0,1,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17]
248 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 12, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
249 ret <16 x i16> %shuffle
252 define <16 x i16> @shuffle_v16i16_00_00_13_00_00_00_00_00_00_00_00_00_00_00_00_00(<16 x i16> %a, <16 x i16> %b) {
253 ; AVX1-LABEL: shuffle_v16i16_00_00_13_00_00_00_00_00_00_00_00_00_00_00_00_00:
255 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
256 ; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0,1,2,3],xmm1[4,5,6,7]
257 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,0,1,10,11,0,1,0,1,0,1,0,1,0,1]
258 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
259 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
262 ; AVX2-LABEL: shuffle_v16i16_00_00_13_00_00_00_00_00_00_00_00_00_00_00_00_00:
264 ; AVX2-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
265 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4,5,6,7]
266 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,10,11,0,1,0,1,0,1,0,1,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17]
268 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 13, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
269 ret <16 x i16> %shuffle
272 define <16 x i16> @shuffle_v16i16_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00(<16 x i16> %a, <16 x i16> %b) {
273 ; AVX1-LABEL: shuffle_v16i16_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
275 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
276 ; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0,1,2,3],xmm1[4,5,6,7]
277 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,12,13,0,1,0,1,0,1,0,1,0,1,0,1]
278 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
279 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
282 ; AVX2-LABEL: shuffle_v16i16_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
284 ; AVX2-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
285 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4,5,6,7]
286 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,12,13,0,1,0,1,0,1,0,1,0,1,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17]
288 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 14, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
289 ret <16 x i16> %shuffle
292 define <16 x i16> @shuffle_v16i16_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00(<16 x i16> %a, <16 x i16> %b) {
293 ; AVX1-LABEL: shuffle_v16i16_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
295 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
296 ; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0,1,2,3],xmm1[4,5,6,7]
297 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[14,15,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
298 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
299 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
302 ; AVX2-LABEL: shuffle_v16i16_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
304 ; AVX2-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
305 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4,5,6,7]
306 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[14,15,0,1,0,1,0,1,0,1,0,1,0,1,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17]
308 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
309 ret <16 x i16> %shuffle
312 define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08(<16 x i16> %a, <16 x i16> %b) {
313 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08:
315 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
316 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
317 ; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
318 ; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
319 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
322 ; AVX2-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08:
324 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17]
326 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
327 ret <16 x i16> %shuffle
330 define <16 x i16> @shuffle_v16i16_07_07_07_07_07_07_07_07_15_15_15_15_15_15_15_15(<16 x i16> %a, <16 x i16> %b) {
331 ; AVX1-LABEL: shuffle_v16i16_07_07_07_07_07_07_07_07_15_15_15_15_15_15_15_15:
333 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
334 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [14,15,14,15,14,15,14,15,14,15,14,15,14,15,14,15]
335 ; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
336 ; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
337 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
340 ; AVX2-LABEL: shuffle_v16i16_07_07_07_07_07_07_07_07_15_15_15_15_15_15_15_15:
342 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[14,15,14,15,14,15,14,15,14,15,14,15,14,15,14,15,30,31,30,31,30,31,30,31,30,31,30,31,30,31,30,31]
344 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
345 ret <16 x i16> %shuffle
348 define <16 x i16> @shuffle_v16i16_00_00_00_00_04_04_04_04_08_08_08_08_12_12_12_12(<16 x i16> %a, <16 x i16> %b) {
349 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_04_04_04_04_08_08_08_08_12_12_12_12:
351 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[0,0,0,0,4,5,6,7]
352 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,4,4,4]
353 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
354 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
355 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
356 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
359 ; AVX2-LABEL: shuffle_v16i16_00_00_00_00_04_04_04_04_08_08_08_08_12_12_12_12:
361 ; AVX2-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[0,0,0,0,4,5,6,7,8,8,8,8,12,13,14,15]
362 ; AVX2-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,4,4,4,8,9,10,11,12,12,12,12]
364 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4, i32 8, i32 8, i32 8, i32 8, i32 12, i32 12, i32 12, i32 12>
365 ret <16 x i16> %shuffle
368 define <16 x i16> @shuffle_v16i16_03_03_03_03_07_07_07_07_11_11_11_11_15_15_15_15(<16 x i16> %a, <16 x i16> %b) {
369 ; AVX1-LABEL: shuffle_v16i16_03_03_03_03_07_07_07_07_11_11_11_11_15_15_15_15:
371 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[3,3,3,3,4,5,6,7]
372 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,7,7,7,7]
373 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
374 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[3,3,3,3,4,5,6,7]
375 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,7,7,7]
376 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
379 ; AVX2-LABEL: shuffle_v16i16_03_03_03_03_07_07_07_07_11_11_11_11_15_15_15_15:
381 ; AVX2-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[3,3,3,3,4,5,6,7,11,11,11,11,12,13,14,15]
382 ; AVX2-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,7,7,7,7,8,9,10,11,15,15,15,15]
384 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 7, i32 7, i32 7, i32 7, i32 11, i32 11, i32 11, i32 11, i32 15, i32 15, i32 15, i32 15>
385 ret <16 x i16> %shuffle
388 define <16 x i16> @shuffle_v16i16_00_00_02_02_04_04_06_06_08_08_10_10_12_12_14_14(<16 x i16> %a, <16 x i16> %b) {
389 ; AVX1-LABEL: shuffle_v16i16_00_00_02_02_04_04_06_06_08_08_10_10_12_12_14_14:
391 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[0,0,2,2,4,5,6,7]
392 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,4,6,6]
393 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
394 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,2,4,5,6,7]
395 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,6,6]
396 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
399 ; AVX2-LABEL: shuffle_v16i16_00_00_02_02_04_04_06_06_08_08_10_10_12_12_14_14:
401 ; AVX2-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[0,0,2,2,4,5,6,7,8,8,10,10,12,13,14,15]
402 ; AVX2-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,4,6,6,8,9,10,11,12,12,14,14]
404 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6, i32 8, i32 8, i32 10, i32 10, i32 12, i32 12, i32 14, i32 14>
405 ret <16 x i16> %shuffle
408 define <16 x i16> @shuffle_v16i16_01_01_03_03_05_05_07_07_09_09_11_11_13_13_15_15(<16 x i16> %a, <16 x i16> %b) {
409 ; AVX1-LABEL: shuffle_v16i16_01_01_03_03_05_05_07_07_09_09_11_11_13_13_15_15:
411 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[1,1,3,3,4,5,6,7]
412 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,5,5,7,7]
413 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
414 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,1,3,3,4,5,6,7]
415 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,5,7,7]
416 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
419 ; AVX2-LABEL: shuffle_v16i16_01_01_03_03_05_05_07_07_09_09_11_11_13_13_15_15:
421 ; AVX2-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[1,1,3,3,4,5,6,7,9,9,11,11,12,13,14,15]
422 ; AVX2-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,5,5,7,7,8,9,10,11,13,13,15,15]
424 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7, i32 9, i32 9, i32 11, i32 11, i32 13, i32 13, i32 15, i32 15>
425 ret <16 x i16> %shuffle
428 define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_01_00_00_00_00_00_00_00_01_00(<16 x i16> %a, <16 x i16> %b) {
429 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_00_01_00_00_00_00_00_00_00_01_00:
431 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,2,3,0,1]
432 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
435 ; AVX2-LABEL: shuffle_v16i16_00_00_00_00_00_00_01_00_00_00_00_00_00_00_01_00:
437 ; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,2,3,0,1]
438 ; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
440 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 0>
441 ret <16 x i16> %shuffle
444 define <16 x i16> @shuffle_v16i16_00_00_00_00_00_02_00_00_00_00_00_00_00_02_00_00(<16 x i16> %a, <16 x i16> %b) {
445 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_02_00_00_00_00_00_00_00_02_00_00:
447 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,4,5,0,1,0,1]
448 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
451 ; AVX2-LABEL: shuffle_v16i16_00_00_00_00_00_02_00_00_00_00_00_00_00_02_00_00:
453 ; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,4,5,0,1,0,1]
454 ; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
456 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 2, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 2, i32 0, i32 0>
457 ret <16 x i16> %shuffle
460 define <16 x i16> @shuffle_v16i16_00_00_00_00_03_00_00_00_00_00_00_00_03_00_00_00(<16 x i16> %a, <16 x i16> %b) {
461 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_03_00_00_00_00_00_00_00_03_00_00_00:
463 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,6,7,0,1,0,1,0,1]
464 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
467 ; AVX2-LABEL: shuffle_v16i16_00_00_00_00_03_00_00_00_00_00_00_00_03_00_00_00:
469 ; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,6,7,0,1,0,1,0,1]
470 ; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
472 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 3, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 3, i32 0, i32 0, i32 0>
473 ret <16 x i16> %shuffle
476 define <16 x i16> @shuffle_v16i16_00_00_00_04_00_00_00_00_00_00_00_04_00_00_00_00(<16 x i16> %a, <16 x i16> %b) {
477 ; AVX1-LABEL: shuffle_v16i16_00_00_00_04_00_00_00_00_00_00_00_04_00_00_00_00:
479 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,8,9,0,1,0,1,0,1,0,1]
480 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
483 ; AVX2-LABEL: shuffle_v16i16_00_00_00_04_00_00_00_00_00_00_00_04_00_00_00_00:
485 ; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,8,9,0,1,0,1,0,1,0,1]
486 ; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
488 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 4, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 4, i32 0, i32 0, i32 0, i32 0>
489 ret <16 x i16> %shuffle
492 define <16 x i16> @shuffle_v16i16_00_00_05_00_00_00_00_00_00_00_05_00_00_00_00_00(<16 x i16> %a, <16 x i16> %b) {
493 ; AVX1-LABEL: shuffle_v16i16_00_00_05_00_00_00_00_00_00_00_05_00_00_00_00_00:
495 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,10,11,0,1,0,1,0,1,0,1,0,1]
496 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
499 ; AVX2-LABEL: shuffle_v16i16_00_00_05_00_00_00_00_00_00_00_05_00_00_00_00_00:
501 ; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,10,11,0,1,0,1,0,1,0,1,0,1]
502 ; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
504 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 5, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 5, i32 0, i32 0, i32 0, i32 0, i32 0>
505 ret <16 x i16> %shuffle
508 define <16 x i16> @shuffle_v16i16_00_06_00_00_00_00_00_00_00_06_00_00_00_00_00_00(<16 x i16> %a, <16 x i16> %b) {
509 ; AVX1-LABEL: shuffle_v16i16_00_06_00_00_00_00_00_00_00_06_00_00_00_00_00_00:
511 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,12,13,0,1,0,1,0,1,0,1,0,1,0,1]
512 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
515 ; AVX2-LABEL: shuffle_v16i16_00_06_00_00_00_00_00_00_00_06_00_00_00_00_00_00:
517 ; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,12,13,0,1,0,1,0,1,0,1,0,1,0,1]
518 ; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
520 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 6, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 6, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
521 ret <16 x i16> %shuffle
524 define <16 x i16> @shuffle_v16i16_07_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00(<16 x i16> %a, <16 x i16> %b) {
525 ; AVX1-LABEL: shuffle_v16i16_07_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00:
527 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[14,15,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
528 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
531 ; AVX2-LABEL: shuffle_v16i16_07_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00:
533 ; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[14,15,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
534 ; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
536 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 7, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 7, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
537 ret <16 x i16> %shuffle
540 define <16 x i16> @shuffle_v16i16_00_17_02_19_04_21_06_23_08_25_10_27_12_29_14_31(<16 x i16> %a, <16 x i16> %b) {
541 ; AVX1-LABEL: shuffle_v16i16_00_17_02_19_04_21_06_23_08_25_10_27_12_29_14_31:
543 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
544 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
545 ; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm3[0],xmm2[1],xmm3[2],xmm2[3],xmm3[4],xmm2[5],xmm3[6],xmm2[7]
546 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7]
547 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
550 ; AVX2-LABEL: shuffle_v16i16_00_17_02_19_04_21_06_23_08_25_10_27_12_29_14_31:
552 ; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15]
554 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 17, i32 2, i32 19, i32 4, i32 21, i32 6, i32 23, i32 8, i32 25, i32 10, i32 27, i32 12, i32 29, i32 14, i32 31>
555 ret <16 x i16> %shuffle
558 define <16 x i16> @shuffle_v16i16_16_01_18_03_20_05_22_07_24_09_26_11_28_13_30_15(<16 x i16> %a, <16 x i16> %b) {
559 ; AVX1-LABEL: shuffle_v16i16_16_01_18_03_20_05_22_07_24_09_26_11_28_13_30_15:
561 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
562 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
563 ; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm3[0],xmm2[1],xmm3[2],xmm2[3],xmm3[4],xmm2[5],xmm3[6],xmm2[7]
564 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2],xmm0[3],xmm1[4],xmm0[5],xmm1[6],xmm0[7]
565 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
568 ; AVX2-LABEL: shuffle_v16i16_16_01_18_03_20_05_22_07_24_09_26_11_28_13_30_15:
570 ; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3],ymm1[4],ymm0[5],ymm1[6],ymm0[7],ymm1[8],ymm0[9],ymm1[10],ymm0[11],ymm1[12],ymm0[13],ymm1[14],ymm0[15]
572 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 16, i32 1, i32 18, i32 3, i32 20, i32 5, i32 22, i32 7, i32 24, i32 9, i32 26, i32 11, i32 28, i32 13, i32 30, i32 15>
573 ret <16 x i16> %shuffle
576 define <16 x i16> @shuffle_v16i16_00_01_18_19_04_05_22_23_08_09_26_27_12_13_30_31(<16 x i16> %a, <16 x i16> %b) {
577 ; AVX1-LABEL: shuffle_v16i16_00_01_18_19_04_05_22_23_08_09_26_27_12_13_30_31:
579 ; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
582 ; AVX2-LABEL: shuffle_v16i16_00_01_18_19_04_05_22_23_08_09_26_27_12_13_30_31:
584 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
586 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 1, i32 18, i32 19, i32 4, i32 5, i32 22, i32 23, i32 8, i32 9, i32 26, i32 27, i32 12, i32 13, i32 30, i32 31>
587 ret <16 x i16> %shuffle
590 define <16 x i16> @shuffle_v16i16_16_17_18_19_04_05_06_07_24_25_26_27_12_13_14_15(<16 x i16> %a, <16 x i16> %b) {
591 ; AVX1-LABEL: shuffle_v16i16_16_17_18_19_04_05_06_07_24_25_26_27_12_13_14_15:
593 ; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3]
596 ; AVX2-LABEL: shuffle_v16i16_16_17_18_19_04_05_06_07_24_25_26_27_12_13_14_15:
598 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7]
600 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 4, i32 5, i32 6, i32 7, i32 24, i32 25, i32 26, i32 27, i32 12, i32 13, i32 14, i32 15>
601 ret <16 x i16> %shuffle
604 define <16 x i16> @shuffle_v16i16_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_31(<16 x i16> %a, <16 x i16> %b) {
605 ; AVX1-LABEL: shuffle_v16i16_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_31:
607 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
608 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
609 ; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm2[0,1,2,3,4,5,6],xmm1[7]
610 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
613 ; AVX2-LABEL: shuffle_v16i16_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_31:
615 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,0]
616 ; AVX2-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
618 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 31>
619 ret <16 x i16> %shuffle
622 define <16 x i16> @shuffle_v16i16_16_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15(<16 x i16> %a, <16 x i16> %b) {
623 ; AVX1-LABEL: shuffle_v16i16_16_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15:
625 ; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm0[1,2,3,4,5,6,7]
626 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
627 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
630 ; AVX2-LABEL: shuffle_v16i16_16_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15:
632 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
633 ; AVX2-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
635 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 16, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
636 ret <16 x i16> %shuffle
639 define <16 x i16> @shuffle_v16i16_00_17_02_19_04_21_06_23_24_09_26_11_28_13_30_15(<16 x i16> %a, <16 x i16> %b) {
640 ; AVX1-LABEL: shuffle_v16i16_00_17_02_19_04_21_06_23_24_09_26_11_28_13_30_15:
642 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
643 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
644 ; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm3[0],xmm2[1],xmm3[2],xmm2[3],xmm3[4],xmm2[5],xmm3[6],xmm2[7]
645 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7]
646 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
649 ; AVX2-LABEL: shuffle_v16i16_00_17_02_19_04_21_06_23_24_09_26_11_28_13_30_15:
651 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,0,0,255,255,0,0,255,255,0,0,255,255,0,0,0,0,255,255,0,0,255,255,0,0,255,255,0,0,255,255]
652 ; AVX2-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
654 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 17, i32 2, i32 19, i32 4, i32 21, i32 6, i32 23, i32 24, i32 9, i32 26, i32 11, i32 28, i32 13, i32 30, i32 15>
655 ret <16 x i16> %shuffle
658 define <16 x i16> @shuffle_v16i16_16_01_18_03_20_05_22_07_08_25_10_27_12_29_14_31(<16 x i16> %a, <16 x i16> %b) {
659 ; AVX1-LABEL: shuffle_v16i16_16_01_18_03_20_05_22_07_08_25_10_27_12_29_14_31:
661 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
662 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
663 ; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm3[0],xmm2[1],xmm3[2],xmm2[3],xmm3[4],xmm2[5],xmm3[6],xmm2[7]
664 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2],xmm0[3],xmm1[4],xmm0[5],xmm1[6],xmm0[7]
665 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
668 ; AVX2-LABEL: shuffle_v16i16_16_01_18_03_20_05_22_07_08_25_10_27_12_29_14_31:
670 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [0,0,255,255,0,0,255,255,0,0,255,255,0,0,255,255,255,255,0,0,255,255,0,0,255,255,0,0,255,255,0,0]
671 ; AVX2-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
673 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 16, i32 1, i32 18, i32 3, i32 20, i32 5, i32 22, i32 7, i32 8, i32 25, i32 10, i32 27, i32 12, i32 29, i32 14, i32 31>
674 ret <16 x i16> %shuffle
677 define <16 x i16> @shuffle_v16i16_00_01_18_19_20_21_06_07_08_09_26_27_12_13_30_31(<16 x i16> %a, <16 x i16> %b) {
678 ; AVX1-LABEL: shuffle_v16i16_00_01_18_19_20_21_06_07_08_09_26_27_12_13_30_31:
680 ; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3,4],ymm1[5],ymm0[6],ymm1[7]
683 ; AVX2-LABEL: shuffle_v16i16_00_01_18_19_20_21_06_07_08_09_26_27_12_13_30_31:
685 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3,4],ymm1[5],ymm0[6],ymm1[7]
687 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 1, i32 18, i32 19, i32 20, i32 21, i32 6, i32 7, i32 8, i32 9, i32 26, i32 27, i32 12, i32 13, i32 30, i32 31>
688 ret <16 x i16> %shuffle
691 define <16 x i16> @shuffle_v16i16_00_16_00_16_00_16_00_16_00_16_00_16_00_16_00_16(<16 x i16> %a, <16 x i16> %b) {
692 ; AVX1-LABEL: shuffle_v16i16_00_16_00_16_00_16_00_16_00_16_00_16_00_16_00_16:
694 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
695 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
696 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
699 ; AVX2-LABEL: shuffle_v16i16_00_16_00_16_00_16_00_16_00_16_00_16_00_16_00_16:
701 ; AVX2-NEXT: vpbroadcastw %xmm1, %ymm1
702 ; AVX2-NEXT: vpbroadcastd %xmm0, %ymm0
703 ; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15]
705 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 16, i32 0, i32 16, i32 0, i32 16, i32 0, i32 16, i32 0, i32 16, i32 0, i32 16, i32 0, i32 16, i32 0, i32 16>
706 ret <16 x i16> %shuffle
709 define <16 x i16> @shuffle_v16i16_00_16_00_16_00_16_00_16_08_24_08_24_08_24_08_24(<16 x i16> %a, <16 x i16> %b) {
710 ; AVX1-LABEL: shuffle_v16i16_00_16_00_16_00_16_00_16_08_24_08_24_08_24_08_24:
712 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm2 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
713 ; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,0,0,0]
714 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
715 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
716 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
717 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
718 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0
721 ; AVX2-LABEL: shuffle_v16i16_00_16_00_16_00_16_00_16_08_24_08_24_08_24_08_24:
723 ; AVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[0,1,0,1,4,5,0,1,0,1,0,1,12,13,0,1,16,17,16,17,20,21,16,17,16,17,16,17,28,29,16,17]
724 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,0,0,0,4,4,4,4]
725 ; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15]
727 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 16, i32 0, i32 16, i32 0, i32 16, i32 0, i32 16, i32 8, i32 24, i32 8, i32 24, i32 8, i32 24, i32 8, i32 24>
728 ret <16 x i16> %shuffle
731 define <16 x i16> @shuffle_v16i16_16_16_16_16_04_05_06_07_24_24_24_24_12_13_14_15(<16 x i16> %a, <16 x i16> %b) {
732 ; AVX1-LABEL: shuffle_v16i16_16_16_16_16_04_05_06_07_24_24_24_24_12_13_14_15:
734 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
735 ; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[2,3,0,1]
736 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
737 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm3 = xmm3[0,0,0,0,4,5,6,7]
738 ; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0]
739 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
740 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7]
741 ; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
742 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
745 ; AVX2-LABEL: shuffle_v16i16_16_16_16_16_04_05_06_07_24_24_24_24_12_13_14_15:
747 ; AVX2-NEXT: vpshuflw {{.*#+}} ymm1 = ymm1[0,0,0,0,4,5,6,7,8,8,8,8,12,13,14,15]
748 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7]
750 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 16, i32 16, i32 16, i32 16, i32 4, i32 5, i32 6, i32 7, i32 24, i32 24, i32 24, i32 24, i32 12, i32 13, i32 14, i32 15>
751 ret <16 x i16> %shuffle
754 define <16 x i16> @shuffle_v16i16_19_18_17_16_07_06_05_04_27_26_25_24_15_14_13_12(<16 x i16> %a, <16 x i16> %b) {
755 ; AVX1-LABEL: shuffle_v16i16_19_18_17_16_07_06_05_04_27_26_25_24_15_14_13_12:
757 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
758 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm2 = xmm2[3,2,1,0,4,5,6,7]
759 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
760 ; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm3[2,3,2,3]
761 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm3 = xmm3[3,2,1,0,4,5,6,7]
762 ; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0]
763 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[3,2,1,0,4,5,6,7]
764 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
765 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7]
766 ; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
767 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
770 ; AVX2-LABEL: shuffle_v16i16_19_18_17_16_07_06_05_04_27_26_25_24_15_14_13_12:
772 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7]
773 ; AVX2-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[3,2,1,0,4,5,6,7,11,10,9,8,12,13,14,15]
774 ; AVX2-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,7,6,5,4,8,9,10,11,15,14,13,12]
776 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 19, i32 18, i32 17, i32 16, i32 7, i32 6, i32 5, i32 4, i32 27, i32 26, i32 25, i32 24, i32 15, i32 14, i32 13, i32 12>
777 ret <16 x i16> %shuffle
780 define <16 x i16> @shuffle_v16i16_19_18_17_16_03_02_01_00_27_26_25_24_11_10_09_08(<16 x i16> %a, <16 x i16> %b) {
781 ; AVX1-LABEL: shuffle_v16i16_19_18_17_16_03_02_01_00_27_26_25_24_11_10_09_08:
783 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
784 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
785 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
786 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [12,13,8,9,4,5,0,1,14,15,10,11,6,7,2,3]
787 ; AVX1-NEXT: vpshufb %xmm3, %xmm2, %xmm2
788 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
789 ; AVX1-NEXT: vpshufb %xmm3, %xmm0, %xmm0
790 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
793 ; AVX2-LABEL: shuffle_v16i16_19_18_17_16_03_02_01_00_27_26_25_24_11_10_09_08:
795 ; AVX2-NEXT: vpshuflw {{.*#+}} ymm1 = ymm1[3,2,1,0,4,5,6,7,11,10,9,8,12,13,14,15]
796 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,1,0,1,4,5,4,5]
797 ; AVX2-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,7,6,5,4,8,9,10,11,15,14,13,12]
798 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7]
800 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 19, i32 18, i32 17, i32 16, i32 3, i32 2, i32 1, i32 0, i32 27, i32 26, i32 25, i32 24, i32 11, i32 10, i32 9, i32 8>
801 ret <16 x i16> %shuffle
804 define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_01_00_08_08_08_08_08_08_09_08(<16 x i16> %a, <16 x i16> %b) {
805 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_00_01_00_08_08_08_08_08_08_09_08:
807 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
808 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,0,1,0,1,0,1,0,1,0,1,2,3,0,1]
809 ; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
810 ; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
811 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
814 ; AVX2-LABEL: shuffle_v16i16_00_00_00_00_00_00_01_00_08_08_08_08_08_08_09_08:
816 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,0,1,0,1,2,3,0,1,16,17,16,17,16,17,16,17,16,17,16,17,18,19,16,17]
818 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 0, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 8>
819 ret <16 x i16> %shuffle
822 define <16 x i16> @shuffle_v16i16_00_00_00_00_00_02_00_00_08_08_08_08_08_10_08_08(<16 x i16> %a, <16 x i16> %b) {
823 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_02_00_00_08_08_08_08_08_10_08_08:
825 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
826 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,0,1,0,1,0,1,0,1,4,5,0,1,0,1]
827 ; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
828 ; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
829 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
832 ; AVX2-LABEL: shuffle_v16i16_00_00_00_00_00_02_00_00_08_08_08_08_08_10_08_08:
834 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,0,1,4,5,0,1,0,1,16,17,16,17,16,17,16,17,16,17,20,21,16,17,16,17]
836 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 2, i32 0, i32 0, i32 8, i32 8, i32 8, i32 8, i32 8, i32 10, i32 8, i32 8>
837 ret <16 x i16> %shuffle
840 define <16 x i16> @shuffle_v16i16_00_00_00_00_03_00_00_00_08_08_08_08_11_08_08_08(<16 x i16> %a, <16 x i16> %b) {
841 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_03_00_00_00_08_08_08_08_11_08_08_08:
843 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
844 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,0,1,0,1,0,1,6,7,0,1,0,1,0,1]
845 ; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
846 ; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
847 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
850 ; AVX2-LABEL: shuffle_v16i16_00_00_00_00_03_00_00_00_08_08_08_08_11_08_08_08:
852 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,6,7,0,1,0,1,0,1,16,17,16,17,16,17,16,17,22,23,16,17,16,17,16,17]
854 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 3, i32 0, i32 0, i32 0, i32 8, i32 8, i32 8, i32 8, i32 11, i32 8, i32 8, i32 8>
855 ret <16 x i16> %shuffle
858 define <16 x i16> @shuffle_v16i16_00_00_00_04_00_00_00_00_08_08_08_12_08_08_08_08(<16 x i16> %a, <16 x i16> %b) {
859 ; AVX1-LABEL: shuffle_v16i16_00_00_00_04_00_00_00_00_08_08_08_12_08_08_08_08:
861 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
862 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,0,1,0,1,8,9,0,1,0,1,0,1,0,1]
863 ; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
864 ; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
865 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
868 ; AVX2-LABEL: shuffle_v16i16_00_00_00_04_00_00_00_00_08_08_08_12_08_08_08_08:
870 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,8,9,0,1,0,1,0,1,0,1,16,17,16,17,16,17,24,25,16,17,16,17,16,17,16,17]
872 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 4, i32 0, i32 0, i32 0, i32 0, i32 8, i32 8, i32 8, i32 12, i32 8, i32 8, i32 8, i32 8>
873 ret <16 x i16> %shuffle
876 define <16 x i16> @shuffle_v16i16_00_00_05_00_00_00_00_00_08_08_13_08_08_08_08_08(<16 x i16> %a, <16 x i16> %b) {
877 ; AVX1-LABEL: shuffle_v16i16_00_00_05_00_00_00_00_00_08_08_13_08_08_08_08_08:
879 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
880 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,0,1,10,11,0,1,0,1,0,1,0,1,0,1]
881 ; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
882 ; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
883 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
886 ; AVX2-LABEL: shuffle_v16i16_00_00_05_00_00_00_00_00_08_08_13_08_08_08_08_08:
888 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,10,11,0,1,0,1,0,1,0,1,0,1,16,17,16,17,26,27,16,17,16,17,16,17,16,17,16,17]
890 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 5, i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 8, i32 13, i32 8, i32 8, i32 8, i32 8, i32 8>
891 ret <16 x i16> %shuffle
894 define <16 x i16> @shuffle_v16i16_00_06_00_00_00_00_00_00_08_14_08_08_08_08_08_08(<16 x i16> %a, <16 x i16> %b) {
895 ; AVX1-LABEL: shuffle_v16i16_00_06_00_00_00_00_00_00_08_14_08_08_08_08_08_08:
897 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
898 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,12,13,0,1,0,1,0,1,0,1,0,1,0,1]
899 ; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
900 ; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
901 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
904 ; AVX2-LABEL: shuffle_v16i16_00_06_00_00_00_00_00_00_08_14_08_08_08_08_08_08:
906 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,12,13,0,1,0,1,0,1,0,1,0,1,0,1,16,17,28,29,16,17,16,17,16,17,16,17,16,17,16,17]
908 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 6, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 14, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
909 ret <16 x i16> %shuffle
912 define <16 x i16> @shuffle_v16i16_07_00_00_00_00_00_00_00_15_08_08_08_08_08_08_08(<16 x i16> %a, <16 x i16> %b) {
913 ; AVX1-LABEL: shuffle_v16i16_07_00_00_00_00_00_00_00_15_08_08_08_08_08_08_08:
915 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
916 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [14,15,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
917 ; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
918 ; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
919 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
922 ; AVX2-LABEL: shuffle_v16i16_07_00_00_00_00_00_00_00_15_08_08_08_08_08_08_08:
924 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[14,15,0,1,0,1,0,1,0,1,0,1,0,1,0,1,30,31,16,17,16,17,16,17,16,17,16,17,16,17,16,17]
926 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 7, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 15, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
927 ret <16 x i16> %shuffle
930 define <16 x i16> @shuffle_v16i16_00_16_01_17_02_18_03_19_08_24_09_25_10_26_11_27(<16 x i16> %a, <16 x i16> %b) {
931 ; AVX1-LABEL: shuffle_v16i16_00_16_01_17_02_18_03_19_08_24_09_25_10_26_11_27:
933 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
934 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
935 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
936 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
937 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
940 ; AVX2-LABEL: shuffle_v16i16_00_16_01_17_02_18_03_19_08_24_09_25_10_26_11_27:
942 ; AVX2-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11]
944 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27>
945 ret <16 x i16> %shuffle
948 define <16 x i16> @shuffle_v16i16_04_20_05_21_06_22_07_23_12_28_13_29_14_30_15_31(<16 x i16> %a, <16 x i16> %b) {
949 ; AVX1-LABEL: shuffle_v16i16_04_20_05_21_06_22_07_23_12_28_13_29_14_30_15_31:
951 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
952 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
953 ; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm2 = xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
954 ; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
955 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
958 ; AVX2-LABEL: shuffle_v16i16_04_20_05_21_06_22_07_23_12_28_13_29_14_30_15_31:
960 ; AVX2-NEXT: vpunpckhwd {{.*#+}} ymm0 = ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15]
962 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
963 ret <16 x i16> %shuffle
966 define <16 x i16> @shuffle_v16i16_00_16_01_17_02_18_03_19_12_28_13_29_14_30_15_31(<16 x i16> %a, <16 x i16> %b) {
967 ; AVX1-LABEL: shuffle_v16i16_00_16_01_17_02_18_03_19_12_28_13_29_14_30_15_31:
969 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
970 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
971 ; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm2 = xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
972 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
973 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
976 ; AVX2-LABEL: shuffle_v16i16_00_16_01_17_02_18_03_19_12_28_13_29_14_30_15_31:
978 ; AVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[u,u,0,1,u,u,2,3,u,u,4,5,u,u,6,7,u,u,24,25,u,u,26,27,u,u,28,29,u,u,30,31]
979 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,u,u,2,3,u,u,4,5,u,u,6,7,u,u,24,25,u,u,26,27,u,u,28,29,u,u,30,31,u,u]
980 ; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15]
982 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
983 ret <16 x i16> %shuffle
986 define <16 x i16> @shuffle_v16i16_04_20_05_21_06_22_07_23_08_24_09_25_10_26_11_27(<16 x i16> %a, <16 x i16> %b) {
987 ; AVX1-LABEL: shuffle_v16i16_04_20_05_21_06_22_07_23_08_24_09_25_10_26_11_27:
989 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
990 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
991 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
992 ; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
993 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
996 ; AVX2-LABEL: shuffle_v16i16_04_20_05_21_06_22_07_23_08_24_09_25_10_26_11_27:
998 ; AVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[u,u,8,9,u,u,10,11,u,u,12,13,u,u,14,15,u,u,16,17,u,u,18,19,u,u,20,21,u,u,22,23]
999 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[8,9,u,u,10,11,u,u,12,13,u,u,14,15,u,u,16,17,u,u,18,19,u,u,20,21,u,u,22,23,u,u]
1000 ; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15]
1002 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23, i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27>
1003 ret <16 x i16> %shuffle
1006 define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_01_00_08_09_08_08_08_08_08_08(<16 x i16> %a, <16 x i16> %b) {
1007 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_00_01_00_08_09_08_08_08_08_08_08:
1009 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,2,3,0,1]
1010 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
1011 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,0,1,0,1,0,1,0,1,0,1,0,1]
1012 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1015 ; AVX2-LABEL: shuffle_v16i16_00_00_00_00_00_00_01_00_08_09_08_08_08_08_08_08:
1017 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,0,1,0,1,2,3,0,1,16,17,18,19,16,17,16,17,16,17,16,17,16,17,16,17]
1019 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 0, i32 8, i32 9, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
1020 ret <16 x i16> %shuffle
1023 define <16 x i16> @shuffle_v16i16_00_00_00_00_00_02_00_00_08_08_10_08_08_08_08_08(<16 x i16> %a, <16 x i16> %b) {
1024 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_02_00_00_08_08_10_08_08_08_08_08:
1026 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[0,1,0,1,0,1,0,1,0,1,4,5,0,1,0,1]
1027 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
1028 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,4,5,0,1,0,1,0,1,0,1,0,1]
1029 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1032 ; AVX2-LABEL: shuffle_v16i16_00_00_00_00_00_02_00_00_08_08_10_08_08_08_08_08:
1034 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,0,1,4,5,0,1,0,1,16,17,16,17,20,21,16,17,16,17,16,17,16,17,16,17]
1036 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 2, i32 0, i32 0, i32 8, i32 8, i32 10, i32 8, i32 8, i32 8, i32 8, i32 8>
1037 ret <16 x i16> %shuffle
1040 define <16 x i16> @shuffle_v16i16_00_00_00_00_03_00_00_00_08_08_08_11_08_08_08_08(<16 x i16> %a, <16 x i16> %b) {
1041 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_03_00_00_00_08_08_08_11_08_08_08_08:
1043 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[0,1,0,1,0,1,0,1,6,7,0,1,0,1,0,1]
1044 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
1045 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,6,7,0,1,0,1,0,1,0,1]
1046 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1049 ; AVX2-LABEL: shuffle_v16i16_00_00_00_00_03_00_00_00_08_08_08_11_08_08_08_08:
1051 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,6,7,0,1,0,1,0,1,16,17,16,17,16,17,22,23,16,17,16,17,16,17,16,17]
1053 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 3, i32 0, i32 0, i32 0, i32 8, i32 8, i32 8, i32 11, i32 8, i32 8, i32 8, i32 8>
1054 ret <16 x i16> %shuffle
1057 define <16 x i16> @shuffle_v16i16_00_00_00_04_00_00_00_00_08_08_08_08_12_08_08_08(<16 x i16> %a, <16 x i16> %b) {
1058 ; AVX1-LABEL: shuffle_v16i16_00_00_00_04_00_00_00_00_08_08_08_08_12_08_08_08:
1060 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[0,1,0,1,0,1,8,9,0,1,0,1,0,1,0,1]
1061 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
1062 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,8,9,0,1,0,1,0,1]
1063 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1066 ; AVX2-LABEL: shuffle_v16i16_00_00_00_04_00_00_00_00_08_08_08_08_12_08_08_08:
1068 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,8,9,0,1,0,1,0,1,0,1,16,17,16,17,16,17,16,17,24,25,16,17,16,17,16,17]
1070 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 4, i32 0, i32 0, i32 0, i32 0, i32 8, i32 8, i32 8, i32 8, i32 12, i32 8, i32 8, i32 8>
1071 ret <16 x i16> %shuffle
1074 define <16 x i16> @shuffle_v16i16_00_00_05_00_00_00_00_00_08_08_08_08_08_13_08_08(<16 x i16> %a, <16 x i16> %b) {
1075 ; AVX1-LABEL: shuffle_v16i16_00_00_05_00_00_00_00_00_08_08_08_08_08_13_08_08:
1077 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[0,1,0,1,10,11,0,1,0,1,0,1,0,1,0,1]
1078 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
1079 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,10,11,0,1,0,1]
1080 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1083 ; AVX2-LABEL: shuffle_v16i16_00_00_05_00_00_00_00_00_08_08_08_08_08_13_08_08:
1085 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,10,11,0,1,0,1,0,1,0,1,0,1,16,17,16,17,16,17,16,17,16,17,26,27,16,17,16,17]
1087 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 5, i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 8, i32 8, i32 8, i32 8, i32 13, i32 8, i32 8>
1088 ret <16 x i16> %shuffle
1091 define <16 x i16> @shuffle_v16i16_00_06_00_00_00_00_00_00_08_08_08_08_08_08_14_08(<16 x i16> %a, <16 x i16> %b) {
1092 ; AVX1-LABEL: shuffle_v16i16_00_06_00_00_00_00_00_00_08_08_08_08_08_08_14_08:
1094 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[0,1,12,13,0,1,0,1,0,1,0,1,0,1,0,1]
1095 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
1096 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,12,13,0,1]
1097 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1100 ; AVX2-LABEL: shuffle_v16i16_00_06_00_00_00_00_00_00_08_08_08_08_08_08_14_08:
1102 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,12,13,0,1,0,1,0,1,0,1,0,1,0,1,16,17,16,17,16,17,16,17,16,17,16,17,28,29,16,17]
1104 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 6, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 14, i32 8>
1105 ret <16 x i16> %shuffle
1108 define <16 x i16> @shuffle_v16i16_07_00_00_00_00_00_00_00_08_08_08_08_08_08_08_15(<16 x i16> %a, <16 x i16> %b) {
1109 ; AVX1-LABEL: shuffle_v16i16_07_00_00_00_00_00_00_00_08_08_08_08_08_08_08_15:
1111 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[14,15,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
1112 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
1113 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,14,15]
1114 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1117 ; AVX2-LABEL: shuffle_v16i16_07_00_00_00_00_00_00_00_08_08_08_08_08_08_08_15:
1119 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[14,15,0,1,0,1,0,1,0,1,0,1,0,1,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,30,31]
1121 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 7, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 15>
1122 ret <16 x i16> %shuffle
1125 define <16 x i16> @shuffle_v16i16_00_00_02_02_04_04_06_06_14_14_12_12_10_10_08_08(<16 x i16> %a, <16 x i16> %b) {
1126 ; AVX1-LABEL: shuffle_v16i16_00_00_02_02_04_04_06_06_14_14_12_12_10_10_08_08:
1128 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[0,0,2,2,4,5,6,7]
1129 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,4,6,6]
1130 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
1131 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[12,13,12,13,8,9,8,9,4,5,4,5,0,1,0,1]
1132 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1135 ; AVX2-LABEL: shuffle_v16i16_00_00_02_02_04_04_06_06_14_14_12_12_10_10_08_08:
1137 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,4,5,4,5,8,9,8,9,12,13,12,13,28,29,28,29,24,25,24,25,20,21,20,21,16,17,16,17]
1139 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6, i32 14, i32 14, i32 12, i32 12, i32 10, i32 10, i32 8, i32 8>
1140 ret <16 x i16> %shuffle
1143 define <16 x i16> @shuffle_v16i16_04_04_04_04_00_00_00_00_08_08_08_08_12_12_12_12(<16 x i16> %a, <16 x i16> %b) {
1144 ; AVX1-LABEL: shuffle_v16i16_04_04_04_04_00_00_00_00_08_08_08_08_12_12_12_12:
1146 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[8,9,8,9,8,9,8,9,0,1,0,1,0,1,0,1]
1147 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
1148 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
1149 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
1150 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1153 ; AVX2-LABEL: shuffle_v16i16_04_04_04_04_00_00_00_00_08_08_08_08_12_12_12_12:
1155 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[8,9,8,9,8,9,8,9,0,1,0,1,0,1,0,1,16,17,16,17,16,17,16,17,24,25,24,25,24,25,24,25]
1157 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 4, i32 4, i32 4, i32 4, i32 0, i32 0, i32 0, i32 0, i32 8, i32 8, i32 8, i32 8, i32 12, i32 12, i32 12, i32 12>
1158 ret <16 x i16> %shuffle
1161 define <16 x i16> @shuffle_v16i16_00_uu_uu_00_00_00_00_00_08_08_uu_uu_08_08_14_08(<16 x i16> %a, <16 x i16> %b) {
1162 ; AVX1-LABEL: shuffle_v16i16_00_uu_uu_00_00_00_00_00_08_08_uu_uu_08_08_14_08:
1164 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[0,1,2,3,4,5,0,1,0,1,0,1,0,1,0,1]
1165 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
1166 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,4,5,6,7,0,1,0,1,12,13,0,1]
1167 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1170 ; AVX2-LABEL: shuffle_v16i16_00_uu_uu_00_00_00_00_00_08_08_uu_uu_08_08_14_08:
1172 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,u,u,u,u,0,1,0,1,0,1,0,1,0,1,16,17,16,17,u,u,u,u,16,17,16,17,28,29,16,17]
1174 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 undef, i32 undef, i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 8, i32 undef, i32 undef, i32 8, i32 8, i32 14, i32 8>
1175 ret <16 x i16> %shuffle
1178 define <16 x i16> @shuffle_v16i16_07_uu_00_00_00_00_00_00_08_08_uu_uu_08_08_08_15(<16 x i16> %a, <16 x i16> %b) {
1179 ; AVX1-LABEL: shuffle_v16i16_07_uu_00_00_00_00_00_00_08_08_uu_uu_08_08_08_15:
1181 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[14,15,2,3,0,1,0,1,0,1,0,1,0,1,0,1]
1182 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
1183 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,4,5,6,7,0,1,0,1,0,1,14,15]
1184 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1187 ; AVX2-LABEL: shuffle_v16i16_07_uu_00_00_00_00_00_00_08_08_uu_uu_08_08_08_15:
1189 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[14,15,u,u,0,1,0,1,0,1,0,1,0,1,0,1,16,17,16,17,u,u,u,u,16,17,16,17,16,17,30,31]
1191 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 7, i32 undef, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 8, i32 undef, i32 undef, i32 8, i32 8, i32 8, i32 15>
1192 ret <16 x i16> %shuffle
1195 define <16 x i16> @shuffle_v16i16_00_uu_uu_02_04_04_uu_06_14_14_uu_12_10_10_08_08(<16 x i16> %a, <16 x i16> %b) {
1196 ; AVX1-LABEL: shuffle_v16i16_00_uu_uu_02_04_04_uu_06_14_14_uu_12_10_10_08_08:
1198 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[0,1,2,2,4,5,6,7]
1199 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,4,6,6]
1200 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
1201 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[12,13,12,13,12,13,8,9,4,5,4,5,0,1,0,1]
1202 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1205 ; AVX2-LABEL: shuffle_v16i16_00_uu_uu_02_04_04_uu_06_14_14_uu_12_10_10_08_08:
1207 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,u,u,u,u,4,5,8,9,8,9,u,u,12,13,28,29,28,29,u,u,24,25,20,21,20,21,16,17,16,17]
1209 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 undef, i32 undef, i32 2, i32 4, i32 4, i32 undef, i32 6, i32 14, i32 14, i32 undef, i32 12, i32 10, i32 10, i32 8, i32 8>
1210 ret <16 x i16> %shuffle
1213 define <16 x i16> @shuffle_v16i16_04_04_04_04_uu_uu_uu_uu_08_08_08_uu_uu_12_12_12(<16 x i16> %a, <16 x i16> %b) {
1214 ; AVX1-LABEL: shuffle_v16i16_04_04_04_04_uu_uu_uu_uu_08_08_08_uu_uu_12_12_12:
1216 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,1,2,3]
1217 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7]
1218 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
1219 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,3,4,5,6,7]
1220 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
1221 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1224 ; AVX2-LABEL: shuffle_v16i16_04_04_04_04_uu_uu_uu_uu_08_08_08_uu_uu_12_12_12:
1226 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[8,9,8,9,8,9,8,9,u,u,u,u,u,u,u,u,16,17,16,17,16,17,u,u,u,u,24,25,24,25,24,25]
1228 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 4, i32 4, i32 4, i32 4, i32 undef, i32 undef, i32 undef, i32 undef, i32 8, i32 8, i32 8, i32 undef, i32 undef, i32 12, i32 12, i32 12>
1229 ret <16 x i16> %shuffle
1232 define <16 x i16> @shuffle_v16i16_00_00_00_00_04_04_04_04_16_16_16_16_20_20_20_20(<16 x i16> %a, <16 x i16> %b) {
1233 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_04_04_04_04_16_16_16_16_20_20_20_20:
1235 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
1236 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
1237 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7]
1238 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,4,4,4]
1239 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1242 ; AVX2-LABEL: shuffle_v16i16_00_00_00_00_04_04_04_04_16_16_16_16_20_20_20_20:
1244 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
1245 ; AVX2-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[0,0,0,0,4,5,6,7,8,8,8,8,12,13,14,15]
1246 ; AVX2-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,4,4,4,8,9,10,11,12,12,12,12]
1248 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4, i32 16, i32 16, i32 16, i32 16, i32 20, i32 20, i32 20, i32 20>
1249 ret <16 x i16> %shuffle
1252 define <16 x i16> @shuffle_v16i16_08_08_08_08_12_12_12_12_16_16_16_16_20_20_20_20(<16 x i16> %a, <16 x i16> %b) {
1253 ; AVX1-LABEL: shuffle_v16i16_08_08_08_08_12_12_12_12_16_16_16_16_20_20_20_20:
1255 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
1256 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
1257 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
1258 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7]
1259 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,4,4,4]
1260 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1263 ; AVX2-LABEL: shuffle_v16i16_08_08_08_08_12_12_12_12_16_16_16_16_20_20_20_20:
1265 ; AVX2-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1]
1266 ; AVX2-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[0,0,0,0,4,5,6,7,8,8,8,8,12,13,14,15]
1267 ; AVX2-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,4,4,4,8,9,10,11,12,12,12,12]
1269 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 8, i32 8, i32 8, i32 8, i32 12, i32 12, i32 12, i32 12, i32 16, i32 16, i32 16, i32 16, i32 20, i32 20, i32 20, i32 20>
1270 ret <16 x i16> %shuffle
1273 define <16 x i16> @shuffle_v16i16_08_08_08_08_12_12_12_12_24_24_24_24_28_28_28_28(<16 x i16> %a, <16 x i16> %b) {
1274 ; AVX1-LABEL: shuffle_v16i16_08_08_08_08_12_12_12_12_24_24_24_24_28_28_28_28:
1276 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
1277 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
1278 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
1279 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
1280 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7]
1281 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,4,4,4]
1282 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1285 ; AVX2-LABEL: shuffle_v16i16_08_08_08_08_12_12_12_12_24_24_24_24_28_28_28_28:
1287 ; AVX2-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
1288 ; AVX2-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[0,0,0,0,4,5,6,7,8,8,8,8,12,13,14,15]
1289 ; AVX2-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,4,4,4,8,9,10,11,12,12,12,12]
1291 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 8, i32 8, i32 8, i32 8, i32 12, i32 12, i32 12, i32 12, i32 24, i32 24, i32 24, i32 24, i32 28, i32 28, i32 28, i32 28>
1292 ret <16 x i16> %shuffle
1295 define <16 x i16> @shuffle_v16i16_00_00_00_00_04_04_04_04_24_24_24_24_28_28_28_28(<16 x i16> %a, <16 x i16> %b) {
1296 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_04_04_04_04_24_24_24_24_28_28_28_28:
1298 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
1299 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
1300 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
1301 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7]
1302 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,4,4,4]
1303 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1306 ; AVX2-LABEL: shuffle_v16i16_00_00_00_00_04_04_04_04_24_24_24_24_28_28_28_28:
1308 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
1309 ; AVX2-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[0,0,0,0,4,5,6,7,8,8,8,8,12,13,14,15]
1310 ; AVX2-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,4,4,4,8,9,10,11,12,12,12,12]
1312 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4, i32 24, i32 24, i32 24, i32 24, i32 28, i32 28, i32 28, i32 28>
1313 ret <16 x i16> %shuffle
1316 define <16 x i16> @shuffle_v16i16_00_16_01_17_02_18_03_19_04_20_05_21_06_22_07_23(<16 x i16> %a, <16 x i16> %b) {
1317 ; AVX1-LABEL: shuffle_v16i16_00_16_01_17_02_18_03_19_04_20_05_21_06_22_07_23:
1319 ; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm2 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
1320 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1321 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
1324 ; AVX2-LABEL: shuffle_v16i16_00_16_01_17_02_18_03_19_04_20_05_21_06_22_07_23:
1326 ; AVX2-NEXT: vpunpckhwd {{.*#+}} xmm2 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
1327 ; AVX2-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1328 ; AVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
1330 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
1331 ret <16 x i16> %shuffle
1334 define <16 x i16> @shuffle_v16i16_zz_zz_zz_zz_zz_zz_zz_16_zz_zz_zz_zz_zz_zz_zz_24(<16 x i16> %a) {
1335 ; AVX1-LABEL: shuffle_v16i16_zz_zz_zz_zz_zz_zz_zz_16_zz_zz_zz_zz_zz_zz_zz_24:
1337 ; AVX1-NEXT: vpslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1]
1338 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
1339 ; AVX1-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1]
1340 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1343 ; AVX2-LABEL: shuffle_v16i16_zz_zz_zz_zz_zz_zz_zz_16_zz_zz_zz_zz_zz_zz_zz_24:
1345 ; AVX2-NEXT: vpslldq {{.*#+}} ymm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[0,1],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17]
1347 %shuffle = shufflevector <16 x i16> zeroinitializer, <16 x i16> %a, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 16, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 24>
1348 ret <16 x i16> %shuffle
1351 define <16 x i16> @shuffle_v16i16_17_18_19_20_21_22_23_zz_25_26_27_28_29_30_31_zz(<16 x i16> %a) {
1352 ; AVX1-LABEL: shuffle_v16i16_17_18_19_20_21_22_23_zz_25_26_27_28_29_30_31_zz:
1354 ; AVX1-NEXT: vpsrldq {{.*#+}} xmm1 = xmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero
1355 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
1356 ; AVX1-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero
1357 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1360 ; AVX2-LABEL: shuffle_v16i16_17_18_19_20_21_22_23_zz_25_26_27_28_29_30_31_zz:
1362 ; AVX2-NEXT: vpsrldq {{.*#+}} ymm0 = ymm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,ymm0[18,19,20,21,22,23,24,25,26,27,28,29,30,31],zero,zero
1364 %shuffle = shufflevector <16 x i16> zeroinitializer, <16 x i16> %a, <16 x i32> <i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 0, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 0>
1365 ret <16 x i16> %shuffle
1369 ; Shuffle to logical bit shifts
1372 define <16 x i16> @shuffle_v16i16_zz_00_zz_02_zz_04_zz_06_zz_08_zz_10_zz_12_zz_14(<16 x i16> %a) {
1373 ; AVX1-LABEL: shuffle_v16i16_zz_00_zz_02_zz_04_zz_06_zz_08_zz_10_zz_12_zz_14:
1375 ; AVX1-NEXT: vpslld $16, %xmm0, %xmm1
1376 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
1377 ; AVX1-NEXT: vpslld $16, %xmm0, %xmm0
1378 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1381 ; AVX2-LABEL: shuffle_v16i16_zz_00_zz_02_zz_04_zz_06_zz_08_zz_10_zz_12_zz_14:
1383 ; AVX2-NEXT: vpslld $16, %ymm0, %ymm0
1385 %shuffle = shufflevector <16 x i16> %a, <16 x i16> zeroinitializer, <16 x i32> <i32 16, i32 0, i32 16, i32 2, i32 16, i32 4, i32 16, i32 6, i32 16, i32 8, i32 16, i32 10, i32 16, i32 12, i32 16, i32 14>
1386 ret <16 x i16> %shuffle
1389 define <16 x i16> @shuffle_v16i16_zz_zz_zz_00_zz_zz_zz_04_zz_zz_zz_08_zz_zz_zz_12(<16 x i16> %a) {
1390 ; AVX1-LABEL: shuffle_v16i16_zz_zz_zz_00_zz_zz_zz_04_zz_zz_zz_08_zz_zz_zz_12:
1392 ; AVX1-NEXT: vpsllq $48, %xmm0, %xmm1
1393 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
1394 ; AVX1-NEXT: vpsllq $48, %xmm0, %xmm0
1395 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1398 ; AVX2-LABEL: shuffle_v16i16_zz_zz_zz_00_zz_zz_zz_04_zz_zz_zz_08_zz_zz_zz_12:
1400 ; AVX2-NEXT: vpsllq $48, %ymm0, %ymm0
1402 %shuffle = shufflevector <16 x i16> %a, <16 x i16> zeroinitializer, <16 x i32> <i32 16, i32 16, i32 16, i32 0, i32 16, i32 16, i32 16, i32 4, i32 16, i32 16, i32 16, i32 8, i32 16, i32 16, i32 16, i32 12>
1403 ret <16 x i16> %shuffle
1406 define <16 x i16> @shuffle_v16i16_01_zz_03_zz_05_zz_07_zz_09_zz_11_zz_13_zz_15_zz(<16 x i16> %a) {
1407 ; AVX1-LABEL: shuffle_v16i16_01_zz_03_zz_05_zz_07_zz_09_zz_11_zz_13_zz_15_zz:
1409 ; AVX1-NEXT: vpsrld $16, %xmm0, %xmm1
1410 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
1411 ; AVX1-NEXT: vpsrld $16, %xmm0, %xmm0
1412 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1415 ; AVX2-LABEL: shuffle_v16i16_01_zz_03_zz_05_zz_07_zz_09_zz_11_zz_13_zz_15_zz:
1417 ; AVX2-NEXT: vpsrld $16, %ymm0, %ymm0
1419 %shuffle = shufflevector <16 x i16> %a, <16 x i16> zeroinitializer, <16 x i32> <i32 1, i32 16, i32 3, i32 16, i32 5, i32 16, i32 7, i32 16, i32 9, i32 16, i32 11, i32 16, i32 13, i32 16, i32 15, i32 16>
1420 ret <16 x i16> %shuffle
1423 define <16 x i16> @shuffle_v16i16_02_03_zz_zz_06_07_zz_zz_10_11_zz_zz_14_15_zz_zz(<16 x i16> %a) {
1424 ; AVX1-LABEL: shuffle_v16i16_02_03_zz_zz_06_07_zz_zz_10_11_zz_zz_14_15_zz_zz:
1426 ; AVX1-NEXT: vxorps %ymm1, %ymm1, %ymm1
1427 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[1,3],ymm1[1,3],ymm0[5,7],ymm1[5,7]
1428 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2,1,3,4,6,5,7]
1431 ; AVX2-LABEL: shuffle_v16i16_02_03_zz_zz_06_07_zz_zz_10_11_zz_zz_14_15_zz_zz:
1433 ; AVX2-NEXT: vpsrlq $32, %ymm0, %ymm0
1435 %shuffle = shufflevector <16 x i16> %a, <16 x i16> zeroinitializer, <16 x i32> <i32 2, i32 3, i32 16, i32 16, i32 6, i32 7, i32 16, i32 16, i32 10, i32 11, i32 16, i32 16, i32 14, i32 15, i32 16, i32 16>
1436 ret <16 x i16> %shuffle
1439 define <16 x i16> @shuffle_v16i16_16_zz_zz_zz_17_zz_zz_zz_18_zz_zz_zz_19_zz_zz_zz(<16 x i16> %a) {
1440 ; AVX1-LABEL: shuffle_v16i16_16_zz_zz_zz_17_zz_zz_zz_18_zz_zz_zz_19_zz_zz_zz:
1442 ; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
1443 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
1444 ; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
1445 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1448 ; AVX2-LABEL: shuffle_v16i16_16_zz_zz_zz_17_zz_zz_zz_18_zz_zz_zz_19_zz_zz_zz:
1450 ; AVX2-NEXT: vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
1452 %shuffle = shufflevector <16 x i16> zeroinitializer, <16 x i16> %a, <16 x i32> <i32 16, i32 0, i32 0, i32 0, i32 17, i32 0, i32 0, i32 0, i32 18, i32 0, i32 0, i32 0, i32 19, i32 0, i32 0, i32 0>
1453 ret <16 x i16> %shuffle
1456 define <16 x i16> @shuffle_v16i16_16_zz_17_zz_18_zz_19_zz_20_zz_21_zz_22_zz_22_zz(<16 x i16> %a) {
1457 ; AVX1-LABEL: shuffle_v16i16_16_zz_17_zz_18_zz_19_zz_20_zz_21_zz_22_zz_22_zz:
1459 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
1460 ; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
1461 ; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
1462 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1465 ; AVX2-LABEL: shuffle_v16i16_16_zz_17_zz_18_zz_19_zz_20_zz_21_zz_22_zz_22_zz:
1467 ; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
1469 %shuffle = shufflevector <16 x i16> zeroinitializer, <16 x i16> %a, <16 x i32> <i32 16, i32 0, i32 17, i32 0, i32 18, i32 0, i32 19, i32 0, i32 20, i32 0, i32 21, i32 0, i32 22, i32 0, i32 23, i32 0>
1470 ret <16 x i16> %shuffle
1473 define <16 x i16> @shuffle_v16i16_23_00_01_02_03_04_05_06_31_08_09_10_11_12_13_14(<16 x i16> %a, <16 x i16> %b) {
1474 ; AVX1-LABEL: shuffle_v16i16_23_00_01_02_03_04_05_06_31_08_09_10_11_12_13_14:
1476 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
1477 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
1478 ; AVX1-NEXT: vpalignr {{.*#+}} xmm2 = xmm2[14,15],xmm3[0,1,2,3,4,5,6,7,8,9,10,11,12,13]
1479 ; AVX1-NEXT: vpalignr {{.*#+}} xmm0 = xmm1[14,15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13]
1480 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
1483 ; AVX2-LABEL: shuffle_v16i16_23_00_01_02_03_04_05_06_31_08_09_10_11_12_13_14:
1485 ; AVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm1[14,15],ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13],ymm1[30,31],ymm0[16,17,18,19,20,21,22,23,24,25,26,27,28,29]
1487 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 23, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 31, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14>
1488 ret <16 x i16> %shuffle
1491 define <16 x i16> @shuffle_v16i16_01_02_03_04_05_06_07_16_09_10_11_12_13_14_15_24(<16 x i16> %a, <16 x i16> %b) {
1492 ; AVX1-LABEL: shuffle_v16i16_01_02_03_04_05_06_07_16_09_10_11_12_13_14_15_24:
1494 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
1495 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
1496 ; AVX1-NEXT: vpalignr {{.*#+}} xmm2 = xmm2[2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm3[0,1]
1497 ; AVX1-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm1[0,1]
1498 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
1501 ; AVX2-LABEL: shuffle_v16i16_01_02_03_04_05_06_07_16_09_10_11_12_13_14_15_24:
1503 ; AVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm1[0,1],ymm0[18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm1[16,17]
1505 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 24>
1506 ret <16 x i16> %shuffle
1509 define <16 x i16> @shuffle_v16i16_17_18_19_20_21_22_23_00_25_26_27_28_29_30_31_8(<16 x i16> %a, <16 x i16> %b) {
1510 ; AVX1-LABEL: shuffle_v16i16_17_18_19_20_21_22_23_00_25_26_27_28_29_30_31_8:
1512 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
1513 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
1514 ; AVX1-NEXT: vpalignr {{.*#+}} xmm2 = xmm2[2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm3[0,1]
1515 ; AVX1-NEXT: vpalignr {{.*#+}} xmm0 = xmm1[2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0,1]
1516 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
1519 ; AVX2-LABEL: shuffle_v16i16_17_18_19_20_21_22_23_00_25_26_27_28_29_30_31_8:
1521 ; AVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm1[2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm0[0,1],ymm1[18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm0[16,17]
1523 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 00, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 8>
1524 ret <16 x i16> %shuffle
1527 define <16 x i16> @shuffle_v16i16_07_16_17_18_19_20_21_22_15_24_25_26_27_28_29_30(<16 x i16> %a, <16 x i16> %b) {
1528 ; AVX1-LABEL: shuffle_v16i16_07_16_17_18_19_20_21_22_15_24_25_26_27_28_29_30:
1530 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
1531 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
1532 ; AVX1-NEXT: vpalignr {{.*#+}} xmm2 = xmm2[14,15],xmm3[0,1,2,3,4,5,6,7,8,9,10,11,12,13]
1533 ; AVX1-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[14,15],xmm1[0,1,2,3,4,5,6,7,8,9,10,11,12,13]
1534 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
1537 ; AVX2-LABEL: shuffle_v16i16_07_16_17_18_19_20_21_22_15_24_25_26_27_28_29_30:
1539 ; AVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[14,15],ymm1[0,1,2,3,4,5,6,7,8,9,10,11,12,13],ymm0[30,31],ymm1[16,17,18,19,20,21,22,23,24,25,26,27,28,29]
1541 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 15, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30>
1542 ret <16 x i16> %shuffle
1545 define <16 x i16> @shuffle_v16i16_01_02_03_04_05_06_07_00_17_18_19_20_21_22_23_16(<16 x i16> %a, <16 x i16> %b) {
1546 ; AVX1-LABEL: shuffle_v16i16_01_02_03_04_05_06_07_00_17_18_19_20_21_22_23_16:
1548 ; AVX1-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15,0,1]
1549 ; AVX1-NEXT: vpalignr {{.*#+}} xmm1 = xmm1[2,3,4,5,6,7,8,9,10,11,12,13,14,15,0,1]
1550 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1553 ; AVX2-LABEL: shuffle_v16i16_01_02_03_04_05_06_07_00_17_18_19_20_21_22_23_16:
1555 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
1556 ; AVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15,0,1,18,19,20,21,22,23,24,25,26,27,28,29,30,31,16,17]
1558 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 16>
1559 ret <16 x i16> %shuffle
1562 define <16 x i16> @shuffle_v16i16_07_00_01_02_03_04_05_06_23_16_17_18_19_20_21_22(<16 x i16> %a, <16 x i16> %b) {
1563 ; AVX1-LABEL: shuffle_v16i16_07_00_01_02_03_04_05_06_23_16_17_18_19_20_21_22:
1565 ; AVX1-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[14,15,0,1,2,3,4,5,6,7,8,9,10,11,12,13]
1566 ; AVX1-NEXT: vpalignr {{.*#+}} xmm1 = xmm1[14,15,0,1,2,3,4,5,6,7,8,9,10,11,12,13]
1567 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1570 ; AVX2-LABEL: shuffle_v16i16_07_00_01_02_03_04_05_06_23_16_17_18_19_20_21_22:
1572 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
1573 ; AVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[14,15,0,1,2,3,4,5,6,7,8,9,10,11,12,13,30,31,16,17,18,19,20,21,22,23,24,25,26,27,28,29]
1575 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 23, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22>
1576 ret <16 x i16> %shuffle
1579 define <16 x i16> @shuffle_v16i16_00_01_00_01_02_03_02_11_08_09_08_09_10_11_10_11(<16 x i16> %a, <16 x i16> %b) {
1580 ; AVX1-LABEL: shuffle_v16i16_00_01_00_01_02_03_02_11_08_09_08_09_10_11_10_11:
1582 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
1583 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1584 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,0,2,4,5,6,7]
1585 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,4,7]
1586 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
1587 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1590 ; AVX2-LABEL: shuffle_v16i16_00_01_00_01_02_03_02_11_08_09_08_09_10_11_10_11:
1592 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
1593 ; AVX2-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1594 ; AVX2-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,0,2,4,5,6,7]
1595 ; AVX2-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,4,7]
1596 ; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
1597 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
1599 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 2, i32 3, i32 2, i32 11, i32 8, i32 9, i32 8, i32 9, i32 10, i32 11, i32 10, i32 11>
1600 ret <16 x i16> %shuffle
1603 define <16 x i16> @shuffle_v16i16_06_07_04_05_02_03_00_09_14_15_12_13_10_11_08_09(<16 x i16> %a, <16 x i16> %b) {
1604 ; AVX1-LABEL: shuffle_v16i16_06_07_04_05_02_03_00_09_14_15_12_13_10_11_08_09:
1606 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
1607 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3,4,5,6,7]
1608 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[3,2,1,0]
1609 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[3,2,1,0]
1610 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1613 ; AVX2-LABEL: shuffle_v16i16_06_07_04_05_02_03_00_09_14_15_12_13_10_11_08_09:
1615 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
1616 ; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3,4,5,6,7]
1617 ; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[3,2,1,0]
1618 ; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[3,2,1,0]
1619 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
1621 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 6, i32 7, i32 4, i32 5, i32 2, i32 3, i32 0, i32 9, i32 14, i32 15, i32 12, i32 13, i32 10, i32 11, i32 8, i32 9>
1622 ret <16 x i16> %shuffle
1625 define <16 x i16> @shuffle_v16i16_04_05_06_07_16_17_18_27_12_13_14_15_24_25_26_27(<16 x i16> %a, <16 x i16> %b) {
1626 ; AVX1-LABEL: shuffle_v16i16_04_05_06_07_16_17_18_27_12_13_14_15_24_25_26_27:
1628 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
1629 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
1630 ; AVX1-NEXT: vpalignr {{.*#+}} xmm2 = xmm2[8,9,10,11,12,13,14,15],xmm3[0,1,2,3,4,5,6,7]
1631 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3]
1632 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,4,5,4,5,6,7,0,1,4,5,8,9,14,15]
1633 ; AVX1-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1]
1634 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
1637 ; AVX2-LABEL: shuffle_v16i16_04_05_06_07_16_17_18_27_12_13_14_15_24_25_26_27:
1639 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7]
1640 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
1641 ; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3],xmm0[4,5,6,7]
1642 ; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
1643 ; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
1644 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
1646 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 27, i32 12, i32 13, i32 14, i32 15, i32 24, i32 25, i32 26, i32 27>
1647 ret <16 x i16> %shuffle
1650 define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08_08(<16 x i16> %a, <16 x i16> %b) {
1651 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08_08:
1653 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
1654 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1655 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,2,3]
1656 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
1657 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1660 ; AVX2-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08_08:
1662 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
1663 ; AVX2-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1664 ; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,2,3]
1665 ; AVX2-NEXT: vpbroadcastw %xmm1, %xmm1
1666 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
1668 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
1669 ret <16 x i16> %shuffle
1672 define <16 x i16> @shuffle_v16i16_00_00_00_00_04_04_04_12_08_08_08_08_12_12_12_12(<16 x i16> %a, <16 x i16> %b) {
1673 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_04_04_04_12_08_08_08_08_12_12_12_12:
1675 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
1676 ; AVX1-NEXT: vpsllq $48, %xmm1, %xmm2
1677 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
1678 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,7]
1679 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
1680 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7]
1681 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,4,4,4]
1682 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1685 ; AVX2-LABEL: shuffle_v16i16_00_00_00_00_04_04_04_12_08_08_08_08_12_12_12_12:
1687 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
1688 ; AVX2-NEXT: vpsllq $48, %xmm1, %xmm2
1689 ; AVX2-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
1690 ; AVX2-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,7]
1691 ; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
1692 ; AVX2-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7]
1693 ; AVX2-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,4,4,4]
1694 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
1696 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 12, i32 8, i32 8, i32 8, i32 8, i32 12, i32 12, i32 12, i32 12>
1697 ret <16 x i16> %shuffle
1700 define <16 x i16> @shuffle_v16i16_uu_00_uu_01_uu_02_uu_11_uu_08_uu_09_uu_10_uu_11(<16 x i16> %a, <16 x i16> %b) {
1701 ; AVX1-LABEL: shuffle_v16i16_uu_00_uu_01_uu_02_uu_11_uu_08_uu_09_uu_10_uu_11:
1703 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
1704 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm2 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1705 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1706 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,2,4,5,6,7]
1707 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,6,7]
1708 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
1711 ; AVX2-LABEL: shuffle_v16i16_uu_00_uu_01_uu_02_uu_11_uu_08_uu_09_uu_10_uu_11:
1713 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
1714 ; AVX2-NEXT: vpunpcklwd {{.*#+}} xmm2 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1715 ; AVX2-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1716 ; AVX2-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,2,4,5,6,7]
1717 ; AVX2-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,6,7]
1718 ; AVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
1720 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 undef, i32 0, i32 undef, i32 1, i32 undef, i32 2, i32 undef, i32 11, i32 undef, i32 8, i32 undef, i32 9, i32 undef, i32 10, i32 undef, i32 11>
1721 ret <16 x i16> %shuffle
1724 define <16 x i16> @shuffle_v16i16_uu_04_uu_05_uu_06_uu_15_uu_12_uu_13_uu_14_uu_15(<16 x i16> %a, <16 x i16> %b) {
1725 ; AVX1-LABEL: shuffle_v16i16_uu_04_uu_05_uu_06_uu_15_uu_12_uu_13_uu_14_uu_15:
1727 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
1728 ; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm2 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
1729 ; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
1730 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,2,4,5,6,7]
1731 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,6,7]
1732 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
1735 ; AVX2-LABEL: shuffle_v16i16_uu_04_uu_05_uu_06_uu_15_uu_12_uu_13_uu_14_uu_15:
1737 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
1738 ; AVX2-NEXT: vpunpckhwd {{.*#+}} xmm2 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
1739 ; AVX2-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
1740 ; AVX2-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,2,4,5,6,7]
1741 ; AVX2-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,6,7]
1742 ; AVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
1744 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 undef, i32 4, i32 undef, i32 5, i32 undef, i32 6, i32 undef, i32 15, i32 undef, i32 12, i32 undef, i32 13, i32 undef, i32 14, i32 undef, i32 15>
1745 ret <16 x i16> %shuffle
1748 define <16 x i16> @shuffle_v16i16_03_01_02_00_06_07_04_13_11_09_10_08_14_15_12_13(<16 x i16> %a, <16 x i16> %b) {
1749 ; AVX1-LABEL: shuffle_v16i16_03_01_02_00_06_07_04_13_11_09_10_08_14_15_12_13:
1751 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
1752 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4],xmm1[5],xmm0[6,7]
1753 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[3,1,2,0,4,5,6,7]
1754 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,3,2]
1755 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[3,1,2,0,4,5,6,7]
1756 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,1,3,2]
1757 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1760 ; AVX2-LABEL: shuffle_v16i16_03_01_02_00_06_07_04_13_11_09_10_08_14_15_12_13:
1762 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
1763 ; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4],xmm1[5],xmm0[6,7]
1764 ; AVX2-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[3,1,2,0,4,5,6,7]
1765 ; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,3,2]
1766 ; AVX2-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[3,1,2,0,4,5,6,7]
1767 ; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,1,3,2]
1768 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
1770 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 3, i32 1, i32 2, i32 0, i32 6, i32 7, i32 4, i32 13, i32 11, i32 9, i32 10, i32 8, i32 14, i32 15, i32 12, i32 13>
1771 ret <16 x i16> %shuffle
1774 define <16 x i16> @shuffle_v16i16_04_04_04_04_00_00_00_08_12_12_12_12_08_08_08_08(<16 x i16> %a, <16 x i16> %b) {
1775 ; AVX1-LABEL: shuffle_v16i16_04_04_04_04_00_00_00_08_12_12_12_12_08_08_08_08:
1777 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
1778 ; AVX1-NEXT: vpslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1]
1779 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,8,9,8,9,0,1,0,1,0,1,14,15]
1780 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
1781 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[8,9,8,9,8,9,8,9,0,1,0,1,0,1,0,1]
1782 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1785 ; AVX2-LABEL: shuffle_v16i16_04_04_04_04_00_00_00_08_12_12_12_12_08_08_08_08:
1787 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
1788 ; AVX2-NEXT: vpbroadcastw %xmm1, %xmm2
1789 ; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,8,9,8,9,0,1,0,1,0,1,14,15]
1790 ; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
1791 ; AVX2-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[8,9,8,9,8,9,8,9,0,1,0,1,0,1,0,1]
1792 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
1794 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 4, i32 4, i32 4, i32 4, i32 0, i32 0, i32 0, i32 8, i32 12, i32 12, i32 12, i32 12, i32 8, i32 8, i32 8, i32 8>
1795 ret <16 x i16> %shuffle
1798 define <16 x i16> @shuffle_v16i16_02_03_00_01_06_07_04_13_10_11_08_09_14_15_12_13(<16 x i16> %a, <16 x i16> %b) {
1799 ; AVX1-LABEL: shuffle_v16i16_02_03_00_01_06_07_04_13_10_11_08_09_14_15_12_13:
1801 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
1802 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4],xmm1[5],xmm0[6,7]
1803 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,0,3,2]
1804 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,0,3,2]
1805 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1808 ; AVX2-LABEL: shuffle_v16i16_02_03_00_01_06_07_04_13_10_11_08_09_14_15_12_13:
1810 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
1811 ; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4],xmm1[5],xmm0[6,7]
1812 ; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,0,3,2]
1813 ; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,0,3,2]
1814 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
1816 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 2, i32 3, i32 0, i32 1, i32 6, i32 7, i32 4, i32 13, i32 10, i32 11, i32 8, i32 9, i32 14, i32 15, i32 12, i32 13>
1817 ret <16 x i16> %shuffle
1820 define <16 x i16> @shuffle_v16i16_02_03_00_02_06_07_04_13_10_11_08_10_14_15_12_13(<16 x i16> %a, <16 x i16> %b) {
1821 ; AVX1-LABEL: shuffle_v16i16_02_03_00_02_06_07_04_13_10_11_08_10_14_15_12_13:
1823 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
1824 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4],xmm1[5],xmm0[6,7]
1825 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[2,3,0,2,4,5,6,7]
1826 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,3,2]
1827 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[2,3,0,2,4,5,6,7]
1828 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,1,3,2]
1829 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1832 ; AVX2-LABEL: shuffle_v16i16_02_03_00_02_06_07_04_13_10_11_08_10_14_15_12_13:
1834 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
1835 ; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4],xmm1[5],xmm0[6,7]
1836 ; AVX2-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[2,3,0,2,4,5,6,7]
1837 ; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,3,2]
1838 ; AVX2-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[2,3,0,2,4,5,6,7]
1839 ; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,1,3,2]
1840 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
1842 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 2, i32 3, i32 0, i32 2, i32 6, i32 7, i32 4, i32 13, i32 10, i32 11, i32 8, i32 10, i32 14, i32 15, i32 12, i32 13>
1843 ret <16 x i16> %shuffle
1846 define <16 x i16> @shuffle_v16i16_02_03_00_01_06_07_04_15_10_11_08_09_14_15_12_15(<16 x i16> %a, <16 x i16> %b) {
1847 ; AVX1-LABEL: shuffle_v16i16_02_03_00_01_06_07_04_15_10_11_08_09_14_15_12_15:
1849 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
1850 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,0,3,2]
1851 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7]
1852 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,0,2,3]
1853 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,6,7,4,7]
1854 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1857 ; AVX2-LABEL: shuffle_v16i16_02_03_00_01_06_07_04_15_10_11_08_09_14_15_12_15:
1859 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
1860 ; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,0,3,2]
1861 ; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7]
1862 ; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,0,2,3]
1863 ; AVX2-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,6,7,4,7]
1864 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
1866 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 2, i32 3, i32 0, i32 1, i32 6, i32 7, i32 4, i32 15, i32 10, i32 11, i32 8, i32 9, i32 14, i32 15, i32 12, i32 15>
1867 ret <16 x i16> %shuffle
1870 define <16 x i16> @shuffle_v16i16_07_05_06_04_03_01_02_08_15_13_14_12_11_09_10_08(<16 x i16> %a, <16 x i16> %b) {
1871 ; AVX1-LABEL: shuffle_v16i16_07_05_06_04_03_01_02_08_15_13_14_12_11_09_10_08:
1873 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
1874 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [14,15,10,11,12,13,8,9,6,7,2,3,4,5,0,1]
1875 ; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm3
1876 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3,4,5,6,7]
1877 ; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
1878 ; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
1881 ; AVX2-LABEL: shuffle_v16i16_07_05_06_04_03_01_02_08_15_13_14_12_11_09_10_08:
1883 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
1884 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [14,15,10,11,12,13,8,9,6,7,2,3,4,5,0,1]
1885 ; AVX2-NEXT: vpshufb %xmm2, %xmm1, %xmm3
1886 ; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3,4,5,6,7]
1887 ; AVX2-NEXT: vpshufb %xmm2, %xmm0, %xmm0
1888 ; AVX2-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0
1890 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 7, i32 5, i32 6, i32 4, i32 3, i32 1, i32 2, i32 8, i32 15, i32 13, i32 14, i32 12, i32 11, i32 9, i32 10, i32 8>
1891 ret <16 x i16> %shuffle
1894 define <16 x i16> @shuffle_v16i16_01_00_05_04_05_04_01_08_09_08_13_12_13_12_09_08(<16 x i16> %a, <16 x i16> %b) {
1895 ; AVX1-LABEL: shuffle_v16i16_01_00_05_04_05_04_01_08_09_08_13_12_13_12_09_08:
1897 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
1898 ; AVX1-NEXT: vpslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1]
1899 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[2,3,0,1,10,11,8,9,10,11,8,9,2,3,2,3]
1900 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
1901 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[2,3,0,1,10,11,8,9,10,11,8,9,2,3,0,1]
1902 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1905 ; AVX2-LABEL: shuffle_v16i16_01_00_05_04_05_04_01_08_09_08_13_12_13_12_09_08:
1907 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
1908 ; AVX2-NEXT: vpbroadcastw %xmm1, %xmm2
1909 ; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[2,3,0,1,10,11,8,9,10,11,8,9,2,3,2,3]
1910 ; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
1911 ; AVX2-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[2,3,0,1,10,11,8,9,10,11,8,9,2,3,0,1]
1912 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
1914 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 1, i32 0, i32 5, i32 4, i32 5, i32 4, i32 1, i32 8, i32 9, i32 8, i32 13, i32 12, i32 13, i32 12, i32 9, i32 8>
1915 ret <16 x i16> %shuffle
1918 define <16 x i16> @shuffle_v16i16_05_04_01_00_05_04_01_08_13_12_09_08_13_12_09_08(<16 x i16> %a, <16 x i16> %b) {
1919 ; AVX1-LABEL: shuffle_v16i16_05_04_01_00_05_04_01_08_13_12_09_08_13_12_09_08:
1921 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
1922 ; AVX1-NEXT: vpslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1]
1923 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[10,11,8,9,2,3,0,1,10,11,8,9,2,3,2,3]
1924 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
1925 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[10,11,8,9,2,3,0,1,10,11,8,9,2,3,0,1]
1926 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1929 ; AVX2-LABEL: shuffle_v16i16_05_04_01_00_05_04_01_08_13_12_09_08_13_12_09_08:
1931 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
1932 ; AVX2-NEXT: vpbroadcastw %xmm1, %xmm2
1933 ; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[10,11,8,9,2,3,0,1,10,11,8,9,2,3,2,3]
1934 ; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
1935 ; AVX2-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[10,11,8,9,2,3,0,1,10,11,8,9,2,3,0,1]
1936 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
1938 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 5, i32 4, i32 1, i32 0, i32 5, i32 4, i32 1, i32 8, i32 13, i32 12, i32 9, i32 8, i32 13, i32 12, i32 9, i32 8>
1939 ret <16 x i16> %shuffle
1942 define <16 x i16> @shuffle_v16i16_05_04_01_00_01_00_05_12_13_12_09_08_09_08_13_12(<16 x i16> %a, <16 x i16> %b) {
1943 ; AVX1-LABEL: shuffle_v16i16_05_04_01_00_01_00_05_12_13_12_09_08_09_08_13_12:
1945 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
1946 ; AVX1-NEXT: vpsllq $48, %xmm1, %xmm2
1947 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[10,11,8,9,2,3,0,1,2,3,0,1,10,11,2,3]
1948 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
1949 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[10,11,8,9,2,3,0,1,2,3,0,1,10,11,8,9]
1950 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1953 ; AVX2-LABEL: shuffle_v16i16_05_04_01_00_01_00_05_12_13_12_09_08_09_08_13_12:
1955 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
1956 ; AVX2-NEXT: vpsllq $48, %xmm1, %xmm2
1957 ; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[10,11,8,9,2,3,0,1,2,3,0,1,10,11,2,3]
1958 ; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
1959 ; AVX2-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[10,11,8,9,2,3,0,1,2,3,0,1,10,11,8,9]
1960 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
1962 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 5, i32 4, i32 1, i32 0, i32 1, i32 0, i32 5, i32 12, i32 13, i32 12, i32 9, i32 8, i32 9, i32 8, i32 13, i32 12>
1963 ret <16 x i16> %shuffle
1966 define <16 x i16> @shuffle_v16i16_00_04_04_00_00_04_04_08_08_12_12_08_08_12_12_08(<16 x i16> %a, <16 x i16> %b) {
1967 ; AVX1-LABEL: shuffle_v16i16_00_04_04_00_00_04_04_08_08_12_12_08_08_12_12_08:
1969 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
1970 ; AVX1-NEXT: vpslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1]
1971 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,0,1,8,9,8,9,2,3]
1972 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
1973 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,8,9,8,9,0,1,0,1,8,9,8,9,0,1]
1974 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1977 ; AVX2-LABEL: shuffle_v16i16_00_04_04_00_00_04_04_08_08_12_12_08_08_12_12_08:
1979 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
1980 ; AVX2-NEXT: vpbroadcastw %xmm1, %xmm2
1981 ; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,0,1,8,9,8,9,2,3]
1982 ; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
1983 ; AVX2-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,8,9,8,9,0,1,0,1,8,9,8,9,0,1]
1984 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
1986 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 4, i32 4, i32 0, i32 0, i32 4, i32 4, i32 8, i32 8, i32 12, i32 12, i32 8, i32 8, i32 12, i32 12, i32 8>
1987 ret <16 x i16> %shuffle
1990 define <16 x i16> @shuffle_v16i16_04_00_00_04_04_00_00_12_12_08_08_12_12_08_08_12(<16 x i16> %a, <16 x i16> %b) {
1991 ; AVX1-LABEL: shuffle_v16i16_04_00_00_04_04_00_00_12_12_08_08_12_12_08_08_12:
1993 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
1994 ; AVX1-NEXT: vpsllq $48, %xmm1, %xmm2
1995 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,9,0,1,0,1,8,9,8,9,0,1,0,1,2,3]
1996 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
1997 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[8,9,0,1,0,1,8,9,8,9,0,1,0,1,8,9]
1998 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
2001 ; AVX2-LABEL: shuffle_v16i16_04_00_00_04_04_00_00_12_12_08_08_12_12_08_08_12:
2003 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
2004 ; AVX2-NEXT: vpsllq $48, %xmm1, %xmm2
2005 ; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,9,0,1,0,1,8,9,8,9,0,1,0,1,2,3]
2006 ; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
2007 ; AVX2-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[8,9,0,1,0,1,8,9,8,9,0,1,0,1,8,9]
2008 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
2010 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 4, i32 0, i32 0, i32 4, i32 4, i32 0, i32 0, i32 12, i32 12, i32 8, i32 8, i32 12, i32 12, i32 8, i32 8, i32 12>
2011 ret <16 x i16> %shuffle
2014 define <16 x i16> @shuffle_v16i16_02_06_04_00_05_01_07_11_10_14_12_08_13_09_15_11(<16 x i16> %a, <16 x i16> %b) {
2015 ; AVX1-LABEL: shuffle_v16i16_02_06_04_00_05_01_07_11_10_14_12_08_13_09_15_11:
2017 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
2018 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [4,5,12,13,8,9,0,1,10,11,2,3,14,15,6,7]
2019 ; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm3
2020 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3],xmm0[4,5,6,7]
2021 ; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
2022 ; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
2025 ; AVX2-LABEL: shuffle_v16i16_02_06_04_00_05_01_07_11_10_14_12_08_13_09_15_11:
2027 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
2028 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [4,5,12,13,8,9,0,1,10,11,2,3,14,15,6,7]
2029 ; AVX2-NEXT: vpshufb %xmm2, %xmm1, %xmm3
2030 ; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3],xmm0[4,5,6,7]
2031 ; AVX2-NEXT: vpshufb %xmm2, %xmm0, %xmm0
2032 ; AVX2-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0
2034 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 2, i32 6, i32 4, i32 0, i32 5, i32 1, i32 7, i32 11, i32 10, i32 14, i32 12, i32 8, i32 13, i32 9, i32 15, i32 11>
2035 ret <16 x i16> %shuffle
2038 define <16 x i16> @shuffle_v16i16_02_00_06_04_05_01_07_11_10_08_14_12_13_09_15_11(<16 x i16> %a, <16 x i16> %b) {
2039 ; AVX1-LABEL: shuffle_v16i16_02_00_06_04_05_01_07_11_10_08_14_12_13_09_15_11:
2041 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
2042 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [4,5,0,1,12,13,8,9,10,11,2,3,14,15,6,7]
2043 ; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm3
2044 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3],xmm0[4,5,6,7]
2045 ; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
2046 ; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
2049 ; AVX2-LABEL: shuffle_v16i16_02_00_06_04_05_01_07_11_10_08_14_12_13_09_15_11:
2051 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
2052 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [4,5,0,1,12,13,8,9,10,11,2,3,14,15,6,7]
2053 ; AVX2-NEXT: vpshufb %xmm2, %xmm1, %xmm3
2054 ; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3],xmm0[4,5,6,7]
2055 ; AVX2-NEXT: vpshufb %xmm2, %xmm0, %xmm0
2056 ; AVX2-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0
2058 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 2, i32 0, i32 6, i32 4, i32 5, i32 1, i32 7, i32 11, i32 10, i32 8, i32 14, i32 12, i32 13, i32 9, i32 15, i32 11>
2059 ret <16 x i16> %shuffle
2062 define <16 x i16> @shuffle_v16i16_02_06_04_00_01_03_07_13_10_14_12_08_09_11_15_13(<16 x i16> %a, <16 x i16> %b) {
2063 ; AVX1-LABEL: shuffle_v16i16_02_06_04_00_01_03_07_13_10_14_12_08_09_11_15_13:
2065 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
2066 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [4,5,12,13,8,9,0,1,2,3,6,7,14,15,10,11]
2067 ; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm3
2068 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4],xmm1[5],xmm0[6,7]
2069 ; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
2070 ; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
2073 ; AVX2-LABEL: shuffle_v16i16_02_06_04_00_01_03_07_13_10_14_12_08_09_11_15_13:
2075 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
2076 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [4,5,12,13,8,9,0,1,2,3,6,7,14,15,10,11]
2077 ; AVX2-NEXT: vpshufb %xmm2, %xmm1, %xmm3
2078 ; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4],xmm1[5],xmm0[6,7]
2079 ; AVX2-NEXT: vpshufb %xmm2, %xmm0, %xmm0
2080 ; AVX2-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0
2082 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 2, i32 6, i32 4, i32 0, i32 1, i32 3, i32 7, i32 13, i32 10, i32 14, i32 12, i32 8, i32 9, i32 11, i32 15, i32 13>
2083 ret <16 x i16> %shuffle
2086 define <16 x i16> @shuffle_v16i16_06_06_07_05_01_06_04_11_14_14_15_13_09_14_12_11(<16 x i16> %a, <16 x i16> %b) {
2087 ; AVX1-LABEL: shuffle_v16i16_06_06_07_05_01_06_04_11_14_14_15_13_09_14_12_11:
2089 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
2090 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [12,13,12,13,14,15,10,11,2,3,12,13,8,9,6,7]
2091 ; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm3
2092 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5,6,7]
2093 ; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
2094 ; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
2097 ; AVX2-LABEL: shuffle_v16i16_06_06_07_05_01_06_04_11_14_14_15_13_09_14_12_11:
2099 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
2100 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [12,13,12,13,14,15,10,11,2,3,12,13,8,9,6,7]
2101 ; AVX2-NEXT: vpshufb %xmm2, %xmm1, %xmm3
2102 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3]
2103 ; AVX2-NEXT: vpshufb %xmm2, %xmm0, %xmm0
2104 ; AVX2-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0
2106 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 6, i32 6, i32 7, i32 5, i32 1, i32 6, i32 4, i32 11, i32 14, i32 14, i32 15, i32 13, i32 9, i32 14, i32 12, i32 11>
2107 ret <16 x i16> %shuffle
2110 define <16 x i16> @shuffle_v16i16_00_00_04_04_04_04_04_12_08_08_12_12_12_12_12_12(<16 x i16> %a, <16 x i16> %b) {
2111 ; AVX1-LABEL: shuffle_v16i16_00_00_04_04_04_04_04_12_08_08_12_12_12_12_12_12:
2113 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
2114 ; AVX1-NEXT: vpsllq $48, %xmm1, %xmm2
2115 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,8,9,8,9,8,9,8,9,8,9,14,15]
2116 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
2117 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,0,1,8,9,8,9,8,9,8,9,8,9,8,9]
2118 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
2121 ; AVX2-LABEL: shuffle_v16i16_00_00_04_04_04_04_04_12_08_08_12_12_12_12_12_12:
2123 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
2124 ; AVX2-NEXT: vpsllq $48, %xmm1, %xmm2
2125 ; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,8,9,8,9,8,9,8,9,8,9,14,15]
2126 ; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
2127 ; AVX2-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,0,1,8,9,8,9,8,9,8,9,8,9,8,9]
2128 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
2130 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 4, i32 4, i32 4, i32 4, i32 4, i32 12, i32 8, i32 8, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12>
2131 ret <16 x i16> %shuffle
2134 define <16 x i16> @shuffle_v16i16_04_04_00_00_04_04_04_12_12_12_08_08_12_12_12_12(<16 x i16> %a, <16 x i16> %b) {
2135 ; AVX1-LABEL: shuffle_v16i16_04_04_00_00_04_04_04_12_12_12_08_08_12_12_12_12:
2137 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
2138 ; AVX1-NEXT: vpsllq $48, %xmm1, %xmm2
2139 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,0,1,0,1,8,9,8,9,8,9,14,15]
2140 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
2141 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[8,9,8,9,0,1,0,1,8,9,8,9,8,9,8,9]
2142 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
2145 ; AVX2-LABEL: shuffle_v16i16_04_04_00_00_04_04_04_12_12_12_08_08_12_12_12_12:
2147 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
2148 ; AVX2-NEXT: vpsllq $48, %xmm1, %xmm2
2149 ; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,0,1,0,1,8,9,8,9,8,9,14,15]
2150 ; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
2151 ; AVX2-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[8,9,8,9,0,1,0,1,8,9,8,9,8,9,8,9]
2152 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
2154 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 4, i32 4, i32 0, i32 0, i32 4, i32 4, i32 4, i32 12, i32 12, i32 12, i32 8, i32 8, i32 12, i32 12, i32 12, i32 12>
2155 ret <16 x i16> %shuffle
2158 define <16 x i16> @shuffle_v16i16_00_04_04_00_04_04_04_12_08_12_12_08_12_12_12_12(<16 x i16> %a, <16 x i16> %b) {
2159 ; AVX1-LABEL: shuffle_v16i16_00_04_04_00_04_04_04_12_08_12_12_08_12_12_12_12:
2161 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
2162 ; AVX1-NEXT: vpsllq $48, %xmm1, %xmm2
2163 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,8,9,8,9,8,9,14,15]
2164 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
2165 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,8,9,8,9,0,1,8,9,8,9,8,9,8,9]
2166 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
2169 ; AVX2-LABEL: shuffle_v16i16_00_04_04_00_04_04_04_12_08_12_12_08_12_12_12_12:
2171 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
2172 ; AVX2-NEXT: vpsllq $48, %xmm1, %xmm2
2173 ; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,8,9,8,9,8,9,14,15]
2174 ; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
2175 ; AVX2-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,8,9,8,9,0,1,8,9,8,9,8,9,8,9]
2176 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
2178 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 4, i32 4, i32 0, i32 4, i32 4, i32 4, i32 12, i32 8, i32 12, i32 12, i32 8, i32 12, i32 12, i32 12, i32 12>
2179 ret <16 x i16> %shuffle
2182 define <16 x i16> @shuffle_v16i16_00_04_04_00_00_00_00_08_08_12_12_08_08_08_08_08(<16 x i16> %a, <16 x i16> %b) {
2183 ; AVX1-LABEL: shuffle_v16i16_00_04_04_00_00_00_00_08_08_12_12_08_08_08_08_08:
2185 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
2186 ; AVX1-NEXT: vpslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1]
2187 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,0,1,0,1,0,1,14,15]
2188 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
2189 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,8,9,8,9,0,1,0,1,0,1,0,1,0,1]
2190 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
2193 ; AVX2-LABEL: shuffle_v16i16_00_04_04_00_00_00_00_08_08_12_12_08_08_08_08_08:
2195 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
2196 ; AVX2-NEXT: vpbroadcastw %xmm1, %xmm2
2197 ; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,0,1,0,1,0,1,14,15]
2198 ; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
2199 ; AVX2-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,8,9,8,9,0,1,0,1,0,1,0,1,0,1]
2200 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
2202 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 4, i32 4, i32 0, i32 0, i32 0, i32 0, i32 8, i32 8, i32 12, i32 12, i32 8, i32 8, i32 8, i32 8, i32 8>
2203 ret <16 x i16> %shuffle
2206 define <16 x i16> @shuffle_v16i16_00_04_04_00_04_05_06_15_08_12_12_08_12_13_14_15(<16 x i16> %a, <16 x i16> %b) {
2207 ; AVX1-LABEL: shuffle_v16i16_00_04_04_00_04_05_06_15_08_12_12_08_12_13_14_15:
2209 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
2210 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
2211 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,0,4,5,6,7]
2212 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7]
2213 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
2214 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,2,2,0,4,5,6,7]
2215 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
2218 ; AVX2-LABEL: shuffle_v16i16_00_04_04_00_04_05_06_15_08_12_12_08_12_13_14_15:
2220 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
2221 ; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
2222 ; AVX2-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,0,4,5,6,7]
2223 ; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7]
2224 ; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
2225 ; AVX2-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,2,2,0,4,5,6,7]
2226 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
2228 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 4, i32 4, i32 0, i32 4, i32 5, i32 6, i32 15, i32 8, i32 12, i32 12, i32 8, i32 12, i32 13, i32 14, i32 15>
2229 ret <16 x i16> %shuffle
2232 define <16 x i16> @shuffle_v16i16_00_uu_04_04_04_04_04_12_08_uu_12_12_12_12_12_12(<16 x i16> %a, <16 x i16> %b) {
2233 ; AVX1-LABEL: shuffle_v16i16_00_uu_04_04_04_04_04_12_08_uu_12_12_12_12_12_12:
2235 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
2236 ; AVX1-NEXT: vpsllq $48, %xmm1, %xmm2
2237 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,8,9,8,9,8,9,8,9,8,9,14,15]
2238 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
2239 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,2,3,8,9,8,9,8,9,8,9,8,9,8,9]
2240 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
2243 ; AVX2-LABEL: shuffle_v16i16_00_uu_04_04_04_04_04_12_08_uu_12_12_12_12_12_12:
2245 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
2246 ; AVX2-NEXT: vpsllq $48, %xmm1, %xmm2
2247 ; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,8,9,8,9,8,9,8,9,8,9,14,15]
2248 ; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
2249 ; AVX2-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,2,3,8,9,8,9,8,9,8,9,8,9,8,9]
2250 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
2252 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 undef, i32 4, i32 4, i32 4, i32 4, i32 4, i32 12, i32 8, i32 undef, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12>
2253 ret <16 x i16> %shuffle
2256 define <16 x i16> @shuffle_v16i16_04_04_uu_00_04_04_04_12_12_12_uu_08_12_12_12_12(<16 x i16> %a, <16 x i16> %b) {
2257 ; AVX1-LABEL: shuffle_v16i16_04_04_uu_00_04_04_04_12_12_12_uu_08_12_12_12_12:
2259 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
2260 ; AVX1-NEXT: vpsllq $48, %xmm1, %xmm2
2261 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,8,9,0,1,8,9,8,9,8,9,14,15]
2262 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
2263 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[8,9,8,9,8,9,0,1,8,9,8,9,8,9,8,9]
2264 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
2267 ; AVX2-LABEL: shuffle_v16i16_04_04_uu_00_04_04_04_12_12_12_uu_08_12_12_12_12:
2269 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
2270 ; AVX2-NEXT: vpsllq $48, %xmm1, %xmm2
2271 ; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,8,9,0,1,8,9,8,9,8,9,14,15]
2272 ; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
2273 ; AVX2-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[8,9,8,9,8,9,0,1,8,9,8,9,8,9,8,9]
2274 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
2276 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 4, i32 4, i32 undef, i32 0, i32 4, i32 4, i32 4, i32 12, i32 12, i32 12, i32 undef, i32 8, i32 12, i32 12, i32 12, i32 12>
2277 ret <16 x i16> %shuffle
2280 define <16 x i16> @shuffle_v16i16_uu_04_04_00_04_04_04_12_uu_12_12_08_12_12_12_12(<16 x i16> %a, <16 x i16> %b) {
2281 ; AVX1-LABEL: shuffle_v16i16_uu_04_04_00_04_04_04_12_uu_12_12_08_12_12_12_12:
2283 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
2284 ; AVX1-NEXT: vpsllq $48, %xmm1, %xmm2
2285 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,8,9,8,9,8,9,14,15]
2286 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
2287 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,8,9,8,9,0,1,8,9,8,9,8,9,8,9]
2288 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
2291 ; AVX2-LABEL: shuffle_v16i16_uu_04_04_00_04_04_04_12_uu_12_12_08_12_12_12_12:
2293 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
2294 ; AVX2-NEXT: vpsllq $48, %xmm1, %xmm2
2295 ; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,8,9,8,9,8,9,14,15]
2296 ; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
2297 ; AVX2-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,8,9,8,9,0,1,8,9,8,9,8,9,8,9]
2298 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
2300 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 undef, i32 4, i32 4, i32 0, i32 4, i32 4, i32 4, i32 12, i32 undef, i32 12, i32 12, i32 8, i32 12, i32 12, i32 12, i32 12>
2301 ret <16 x i16> %shuffle
2304 define <16 x i16> @shuffle_v16i16_00_01_02_07_uu_uu_uu_uu_08_09_10_15_uu_uu_uu_uu(<16 x i16> %a, <16 x i16> %b) {
2305 ; AVX1-LABEL: shuffle_v16i16_00_01_02_07_uu_uu_uu_uu_08_09_10_15_uu_uu_uu_uu:
2307 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
2308 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,2,3,4,5,14,15,4,5,14,15,12,13,14,15]
2309 ; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
2310 ; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
2311 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
2314 ; AVX2-LABEL: shuffle_v16i16_00_01_02_07_uu_uu_uu_uu_08_09_10_15_uu_uu_uu_uu:
2316 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,14,15,4,5,14,15,12,13,14,15,16,17,18,19,20,21,30,31,20,21,30,31,28,29,30,31]
2318 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 8, i32 9, i32 10, i32 15, i32 undef, i32 undef, i32 undef, i32 undef>
2319 ret <16 x i16> %shuffle
2322 define <16 x i16> @shuffle_v16i16_uu_uu_uu_uu_04_05_06_11_uu_uu_uu_uu_12_13_14_11(<16 x i16> %a, <16 x i16> %b) {
2323 ; AVX1-LABEL: shuffle_v16i16_uu_uu_uu_uu_04_05_06_11_uu_uu_uu_uu_12_13_14_11:
2325 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
2326 ; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[0,1,0,1]
2327 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
2328 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[12,13,6,7,4,5,6,7,8,9,10,11,12,13,6,7]
2329 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
2332 ; AVX2-LABEL: shuffle_v16i16_uu_uu_uu_uu_04_05_06_11_uu_uu_uu_uu_12_13_14_11:
2334 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
2335 ; AVX2-NEXT: vpbroadcastq %xmm1, %xmm2
2336 ; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
2337 ; AVX2-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[12,13,6,7,4,5,6,7,8,9,10,11,12,13,6,7]
2338 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
2340 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 4, i32 5, i32 6, i32 11, i32 undef, i32 undef, i32 undef, i32 undef, i32 12, i32 13, i32 14, i32 11>
2341 ret <16 x i16> %shuffle
2344 define <16 x i16> @shuffle_v16i16_04_05_06_03_uu_uu_uu_uu_12_13_14_11_uu_uu_uu_uu(<16 x i16> %a, <16 x i16> %b) {
2345 ; AVX1-LABEL: shuffle_v16i16_04_05_06_03_uu_uu_uu_uu_12_13_14_11_uu_uu_uu_uu:
2347 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
2348 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [8,9,10,11,12,13,6,7,8,9,10,11,0,1,2,3]
2349 ; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
2350 ; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
2351 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
2354 ; AVX2-LABEL: shuffle_v16i16_04_05_06_03_uu_uu_uu_uu_12_13_14_11_uu_uu_uu_uu:
2356 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[8,9,10,11,12,13,6,7,8,9,10,11,0,1,2,3,24,25,26,27,28,29,22,23,24,25,26,27,16,17,18,19]
2358 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 4, i32 5, i32 6, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 12, i32 13, i32 14, i32 11, i32 undef, i32 undef, i32 undef, i32 undef>
2359 ret <16 x i16> %shuffle
2362 define <16 x i16> @shuffle_v16i16_00_01_02_07_04_05_06_11_08_09_10_15_12_13_14_11(<16 x i16> %a, <16 x i16> %b) {
2363 ; AVX1-LABEL: shuffle_v16i16_00_01_02_07_04_05_06_11_08_09_10_15_12_13_14_11:
2365 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
2366 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,2,3,4,5,14,15,8,9,10,11,12,13,6,7]
2367 ; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm3
2368 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3],xmm0[4,5,6,7]
2369 ; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
2370 ; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
2373 ; AVX2-LABEL: shuffle_v16i16_00_01_02_07_04_05_06_11_08_09_10_15_12_13_14_11:
2375 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
2376 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,2,3,4,5,14,15,8,9,10,11,12,13,6,7]
2377 ; AVX2-NEXT: vpshufb %xmm2, %xmm1, %xmm3
2378 ; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3],xmm0[4,5,6,7]
2379 ; AVX2-NEXT: vpshufb %xmm2, %xmm0, %xmm0
2380 ; AVX2-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0
2382 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 7, i32 4, i32 5, i32 6, i32 11, i32 8, i32 9, i32 10, i32 15, i32 12, i32 13, i32 14, i32 11>
2383 ret <16 x i16> %shuffle
2386 define <16 x i16> @shuffle_v16i16_04_05_06_03_00_01_02_15_12_13_14_11_08_09_10_15(<16 x i16> %a, <16 x i16> %b) {
2387 ; AVX1-LABEL: shuffle_v16i16_04_05_06_03_00_01_02_15_12_13_14_11_08_09_10_15:
2389 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
2390 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,6,7,0,1,2,3,4,5,2,3]
2391 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7]
2392 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[8,9,10,11,12,13,6,7,0,1,2,3,4,5,14,15]
2393 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
2396 ; AVX2-LABEL: shuffle_v16i16_04_05_06_03_00_01_02_15_12_13_14_11_08_09_10_15:
2398 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
2399 ; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,6,7,0,1,2,3,4,5,2,3]
2400 ; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7]
2401 ; AVX2-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[8,9,10,11,12,13,6,7,0,1,2,3,4,5,14,15]
2402 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
2404 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 4, i32 5, i32 6, i32 3, i32 0, i32 1, i32 2, i32 15, i32 12, i32 13, i32 14, i32 11, i32 8, i32 9, i32 10, i32 15>
2405 ret <16 x i16> %shuffle
2408 define <16 x i16> @shuffle_v16i16_03_07_01_00_02_07_03_13_11_15_09_08_10_15_11_13(<16 x i16> %a, <16 x i16> %b) {
2409 ; AVX1-LABEL: shuffle_v16i16_03_07_01_00_02_07_03_13_11_15_09_08_10_15_11_13:
2411 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
2412 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [6,7,14,15,2,3,0,1,4,5,14,15,6,7,10,11]
2413 ; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm3
2414 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5],xmm0[6,7]
2415 ; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
2416 ; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
2419 ; AVX2-LABEL: shuffle_v16i16_03_07_01_00_02_07_03_13_11_15_09_08_10_15_11_13:
2421 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
2422 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [6,7,14,15,2,3,0,1,4,5,14,15,6,7,10,11]
2423 ; AVX2-NEXT: vpshufb %xmm2, %xmm1, %xmm3
2424 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3]
2425 ; AVX2-NEXT: vpshufb %xmm2, %xmm0, %xmm0
2426 ; AVX2-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0
2428 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 3, i32 7, i32 1, i32 0, i32 2, i32 7, i32 3, i32 13, i32 11, i32 15, i32 9, i32 8, i32 10, i32 15, i32 11, i32 13>
2429 ret <16 x i16> %shuffle
2432 define <16 x i16> @shuffle_v16i16_00_16_01_17_02_18_03_27_08_24_09_25_10_26_11_27(<16 x i16> %a, <16 x i16> %b) {
2433 ; AVX1-LABEL: shuffle_v16i16_00_16_01_17_02_18_03_27_08_24_09_25_10_26_11_27:
2435 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
2436 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
2437 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
2438 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
2439 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,4,5,8,9,14,15,14,15,8,9,12,13,14,15]
2440 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
2441 ; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
2444 ; AVX2-LABEL: shuffle_v16i16_00_16_01_17_02_18_03_27_08_24_09_25_10_26_11_27:
2446 ; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2
2447 ; AVX2-NEXT: vpunpcklwd {{.*#+}} xmm3 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
2448 ; AVX2-NEXT: vpunpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
2449 ; AVX2-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,0,2,2,4,5,6,7]
2450 ; AVX2-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,4,6,7]
2451 ; AVX2-NEXT: vinserti128 $1, %xmm3, %ymm1, %ymm1
2452 ; AVX2-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,8,8,9,9,10,10,11,11]
2453 ; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15]
2455 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 27, i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27>
2456 ret <16 x i16> %shuffle
2459 define <16 x i16> @shuffle_v16i16_00_20_01_21_02_22_03_31_08_28_09_29_10_30_11_31(<16 x i16> %a, <16 x i16> %b) {
2460 ; AVX1-LABEL: shuffle_v16i16_00_20_01_21_02_22_03_31_08_28_09_29_10_30_11_31:
2462 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
2463 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
2464 ; AVX1-NEXT: vpshufd {{.*#+}} xmm4 = xmm3[2,3,0,1]
2465 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1],xmm2[2],xmm4[2],xmm2[3],xmm4[3]
2466 ; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm1[4],xmm3[4],xmm1[5],xmm3[5],xmm1[6],xmm3[6],xmm1[7],xmm3[7]
2467 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,4,5,8,9,14,15,14,15,8,9,12,13,14,15]
2468 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
2469 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
2472 ; AVX2-LABEL: shuffle_v16i16_00_20_01_21_02_22_03_31_08_28_09_29_10_30_11_31:
2474 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7]
2475 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [0,1,8,9,2,3,10,11,4,5,12,13,6,7,14,15]
2476 ; AVX2-NEXT: vpshufb %xmm1, %xmm0, %xmm2
2477 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0
2478 ; AVX2-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1,2,3,4,5,6],xmm0[7]
2479 ; AVX2-NEXT: vpshufb %xmm1, %xmm0, %xmm0
2480 ; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm2, %ymm0
2482 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 20, i32 1, i32 21, i32 2, i32 22, i32 3, i32 31, i32 8, i32 28, i32 9, i32 29, i32 10, i32 30, i32 11, i32 31>
2483 ret <16 x i16> %shuffle
2486 define <16 x i16> @shuffle_v16i16_04_20_05_21_06_22_07_31_12_28_13_29_14_30_15_31(<16 x i16> %a, <16 x i16> %b) {
2487 ; AVX1-LABEL: shuffle_v16i16_04_20_05_21_06_22_07_31_12_28_13_29_14_30_15_31:
2489 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
2490 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
2491 ; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm3 = xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
2492 ; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
2493 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[4,5,0,1,4,5,4,5,0,1,4,5,8,9,14,15]
2494 ; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
2495 ; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
2498 ; AVX2-LABEL: shuffle_v16i16_04_20_05_21_06_22_07_31_12_28_13_29_14_30_15_31:
2500 ; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2
2501 ; AVX2-NEXT: vpunpckhwd {{.*#+}} xmm3 = xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
2502 ; AVX2-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
2503 ; AVX2-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,0,2,2,4,5,6,7]
2504 ; AVX2-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,4,6,7]
2505 ; AVX2-NEXT: vinserti128 $1, %xmm3, %ymm1, %ymm1
2506 ; AVX2-NEXT: vpunpckhwd {{.*#+}} ymm0 = ymm0[4,4,5,5,6,6,7,7,12,12,13,13,14,14,15,15]
2507 ; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15]
2509 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 31, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
2510 ret <16 x i16> %shuffle
2513 define <16 x i16> @shuffle_v16i16_04_16_05_17_06_18_07_27_12_24_13_25_14_26_15_27(<16 x i16> %a, <16 x i16> %b) {
2514 ; AVX1-LABEL: shuffle_v16i16_04_16_05_17_06_18_07_27_12_24_13_25_14_26_15_27:
2516 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
2517 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
2518 ; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm3[2,3,0,1]
2519 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
2520 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
2521 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[4,5,0,1,4,5,4,5,0,1,4,5,8,9,14,15]
2522 ; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
2523 ; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
2526 ; AVX2-LABEL: shuffle_v16i16_04_16_05_17_06_18_07_27_12_24_13_25_14_26_15_27:
2528 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7]
2529 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
2530 ; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3],xmm0[4,5,6,7]
2531 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [8,9,0,1,10,11,2,3,12,13,4,5,14,15,6,7]
2532 ; AVX2-NEXT: vpshufb %xmm2, %xmm0, %xmm0
2533 ; AVX2-NEXT: vpshufb %xmm2, %xmm1, %xmm1
2534 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
2536 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 4, i32 16, i32 5, i32 17, i32 6, i32 18, i32 7, i32 27, i32 12, i32 24, i32 13, i32 25, i32 14, i32 26, i32 15, i32 27>
2537 ret <16 x i16> %shuffle
2540 define <16 x i16> @shuffle_v16i16_00_16_01_17_06_22_07_31_08_24_09_25_14_30_15_31(<16 x i16> %a, <16 x i16> %b) {
2541 ; AVX1-LABEL: shuffle_v16i16_00_16_01_17_06_22_07_31_08_24_09_25_14_30_15_31:
2543 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
2544 ; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm2[0,3,2,3]
2545 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4
2546 ; AVX1-NEXT: vpshufd {{.*#+}} xmm4 = xmm4[0,3,2,3]
2547 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm3 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3]
2548 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,0,2,1,4,5,6,7]
2549 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,6,6,7]
2550 ; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,5],xmm2[6,7]
2551 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[2,3,6,7,10,11,14,15,14,15,10,11,12,13,14,15]
2552 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,3,2,3]
2553 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
2554 ; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
2557 ; AVX2-LABEL: shuffle_v16i16_00_16_01_17_06_22_07_31_08_24_09_25_14_30_15_31:
2559 ; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2
2560 ; AVX2-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,0,2,1,4,5,6,7]
2561 ; AVX2-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,6,6,7]
2562 ; AVX2-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0,1,2],xmm2[3]
2563 ; AVX2-NEXT: vpshuflw {{.*#+}} xmm2 = xmm2[0,0,2,1,4,5,6,7]
2564 ; AVX2-NEXT: vpshufhw {{.*#+}} xmm2 = xmm2[0,1,2,3,4,6,6,7]
2565 ; AVX2-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1
2566 ; AVX2-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[0,1,1,3,4,5,6,7,8,9,9,11,12,13,14,15]
2567 ; AVX2-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,6,5,7,7,8,9,10,11,14,13,15,15]
2568 ; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15]
2570 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 6, i32 22, i32 7, i32 31, i32 8, i32 24, i32 9, i32 25, i32 14, i32 30, i32 15, i32 31>
2571 ret <16 x i16> %shuffle
2574 define <16 x i16> @shuffle_v16i16_00_20_01_21_06_16_07_25_08_28_09_29_14_24_15_25(<16 x i16> %a, <16 x i16> %b) {
2575 ; AVX1-LABEL: shuffle_v16i16_00_20_01_21_06_16_07_25_08_28_09_29_14_24_15_25:
2577 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
2578 ; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm2[2,0,2,3]
2579 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4
2580 ; AVX1-NEXT: vpshufd {{.*#+}} xmm4 = xmm4[0,3,2,3]
2581 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm3 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3]
2582 ; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2,3,4,5,6,7]
2583 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[8,9,10,11,0,1,2,3,2,3,0,1,12,13,2,3]
2584 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,3,2,3]
2585 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
2586 ; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
2589 ; AVX2-LABEL: shuffle_v16i16_00_20_01_21_06_16_07_25_08_28_09_29_14_24_15_25:
2591 ; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2
2592 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [8,9,8,9,4,5,10,11,0,1,0,1,12,13,2,3]
2593 ; AVX2-NEXT: vpshufb %xmm3, %xmm2, %xmm4
2594 ; AVX2-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2,3,4,5,6,7]
2595 ; AVX2-NEXT: vpshufb %xmm3, %xmm1, %xmm1
2596 ; AVX2-NEXT: vinserti128 $1, %xmm4, %ymm1, %ymm1
2597 ; AVX2-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[0,1,1,3,4,5,6,7,8,9,9,11,12,13,14,15]
2598 ; AVX2-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,6,5,7,7,8,9,10,11,14,13,15,15]
2599 ; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15]
2601 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 20, i32 1, i32 21, i32 6, i32 16, i32 7, i32 25, i32 8, i32 28, i32 9, i32 29, i32 14, i32 24, i32 15, i32 25>
2602 ret <16 x i16> %shuffle
2605 define <16 x i16> @shuffle_v16i16_01_00_17_16_03_02_19_26_09_08_25_24_11_10_27_26(<16 x i16> %a, <16 x i16> %b) {
2606 ; AVX1-LABEL: shuffle_v16i16_01_00_17_16_03_02_19_26_09_08_25_24_11_10_27_26:
2608 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
2609 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
2610 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[4,5,0,1,12,13,10,11,8,9,10,11,12,13,10,11]
2611 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm3 = xmm0[1,0,3,2,4,5,6,7]
2612 ; AVX1-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm3[0],xmm1[0],xmm3[1],xmm1[1]
2613 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
2614 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
2615 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[2,0,3,1,4,5,6,7]
2616 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,4,7,5]
2617 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
2620 ; AVX2-LABEL: shuffle_v16i16_01_00_17_16_03_02_19_26_09_08_25_24_11_10_27_26:
2622 ; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2
2623 ; AVX2-NEXT: vpunpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
2624 ; AVX2-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,1,2,0,4,5,6,7]
2625 ; AVX2-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,5,6,5]
2626 ; AVX2-NEXT: vpshufb {{.*#+}} xmm2 = xmm2[0,1,2,3,2,3,0,1,8,9,10,11,6,7,4,5]
2627 ; AVX2-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1
2628 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[2,3,0,1,4,5,6,7,6,7,4,5,4,5,6,7,18,19,16,17,20,21,22,23,22,23,20,21,20,21,22,23]
2629 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
2631 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 1, i32 0, i32 17, i32 16, i32 3, i32 2, i32 19, i32 26, i32 9, i32 8, i32 25, i32 24, i32 11, i32 10, i32 27, i32 26>
2632 ret <16 x i16> %shuffle
2635 define <16 x i16> @shuffle_v16i16_16_00_17_01_18_02_19_11_24_08_25_09_26_10_27_11(<16 x i16> %a, <16 x i16> %b) {
2636 ; AVX1-LABEL: shuffle_v16i16_16_00_17_01_18_02_19_11_24_08_25_09_26_10_27_11:
2638 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
2639 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
2640 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
2641 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
2642 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,14,15,14,15,8,9,12,13,14,15]
2643 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
2644 ; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
2647 ; AVX2-LABEL: shuffle_v16i16_16_00_17_01_18_02_19_11_24_08_25_09_26_10_27_11:
2649 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm2
2650 ; AVX2-NEXT: vpunpcklwd {{.*#+}} xmm3 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
2651 ; AVX2-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
2652 ; AVX2-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,2,4,5,6,7]
2653 ; AVX2-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,6,7]
2654 ; AVX2-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0
2655 ; AVX2-NEXT: vpunpcklwd {{.*#+}} ymm1 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[2],ymm0[2],ymm1[3],ymm0[3],ymm1[8],ymm0[8],ymm1[9],ymm0[9],ymm1[10],ymm0[10],ymm1[11],ymm0[11]
2656 ; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3],ymm1[4],ymm0[5],ymm1[6],ymm0[7],ymm1[8],ymm0[9],ymm1[10],ymm0[11],ymm1[12],ymm0[13],ymm1[14],ymm0[15]
2658 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 16, i32 0, i32 17, i32 1, i32 18, i32 2, i32 19, i32 11, i32 24, i32 8, i32 25, i32 9, i32 26, i32 10, i32 27, i32 11>
2659 ret <16 x i16> %shuffle
2662 define <16 x i16> @shuffle_v16i16_20_04_21_05_22_06_23_15_28_12_29_13_30_14_31_15(<16 x i16> %a, <16 x i16> %b) {
2663 ; AVX1-LABEL: shuffle_v16i16_20_04_21_05_22_06_23_15_28_12_29_13_30_14_31_15:
2665 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
2666 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
2667 ; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm3 = xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
2668 ; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
2669 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[4,5,0,1,4,5,4,5,0,1,4,5,8,9,14,15]
2670 ; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
2671 ; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
2674 ; AVX2-LABEL: shuffle_v16i16_20_04_21_05_22_06_23_15_28_12_29_13_30_14_31_15:
2676 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm2
2677 ; AVX2-NEXT: vpunpckhwd {{.*#+}} xmm3 = xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
2678 ; AVX2-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
2679 ; AVX2-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,2,4,5,6,7]
2680 ; AVX2-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,6,7]
2681 ; AVX2-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0
2682 ; AVX2-NEXT: vpunpckhwd {{.*#+}} ymm1 = ymm1[4],ymm0[4],ymm1[5],ymm0[5],ymm1[6],ymm0[6],ymm1[7],ymm0[7],ymm1[12],ymm0[12],ymm1[13],ymm0[13],ymm1[14],ymm0[14],ymm1[15],ymm0[15]
2683 ; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3],ymm1[4],ymm0[5],ymm1[6],ymm0[7],ymm1[8],ymm0[9],ymm1[10],ymm0[11],ymm1[12],ymm0[13],ymm1[14],ymm0[15]
2685 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 20, i32 4, i32 21, i32 5, i32 22, i32 6, i32 23, i32 15, i32 28, i32 12, i32 29, i32 13, i32 30, i32 14, i32 31, i32 15>
2686 ret <16 x i16> %shuffle
2689 define <16 x i16> @shuffle_v16i16_00_02_01_03_20_22_21_31_08_10_09_11_28_30_29_31(<16 x i16> %a, <16 x i16> %b) {
2690 ; AVX1-LABEL: shuffle_v16i16_00_02_01_03_20_22_21_31_08_10_09_11_28_30_29_31:
2692 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
2693 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm2 = xmm2[0,2,1,3,4,5,6,7]
2694 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
2695 ; AVX1-NEXT: vpshufd {{.*#+}} xmm4 = xmm3[2,3,2,3]
2696 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm4 = xmm4[0,2,1,3,4,5,6,7]
2697 ; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm4[0]
2698 ; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm1[4],xmm3[4],xmm1[5],xmm3[5],xmm1[6],xmm3[6],xmm1[7],xmm3[7]
2699 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,8,9,4,5,14,15,0,1,4,5,4,5,6,7]
2700 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,1,3,4,5,6,7]
2701 ; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
2702 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
2705 ; AVX2-LABEL: shuffle_v16i16_00_02_01_03_20_22_21_31_08_10_09_11_28_30_29_31:
2707 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7]
2708 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
2709 ; AVX2-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,1,3,4,5,6,7]
2710 ; AVX2-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,5,7]
2711 ; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7]
2712 ; AVX2-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,2,1,3,4,5,6,7]
2713 ; AVX2-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,6,5,7]
2714 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
2716 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 2, i32 1, i32 3, i32 20, i32 22, i32 21, i32 31, i32 8, i32 10, i32 9, i32 11, i32 28, i32 30, i32 29, i32 31>
2717 ret <16 x i16> %shuffle
2720 define <16 x i16> @shuffle_v16i16_04_04_03_18_uu_uu_uu_uu_12_12_11_26_uu_uu_uu_uu(<16 x i16> %a, <16 x i16> %b) {
2721 ; AVX1-LABEL: shuffle_v16i16_04_04_03_18_uu_uu_uu_uu_12_12_11_26_uu_uu_uu_uu:
2723 ; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm0[0,1],xmm1[2],xmm0[3,4,5,6,7]
2724 ; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[2,1,2,3]
2725 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm2 = xmm2[0,0,3,2,4,5,6,7]
2726 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
2727 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
2728 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3,4,5,6,7]
2729 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,1,2,3]
2730 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,3,2,4,5,6,7]
2731 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0
2734 ; AVX2-LABEL: shuffle_v16i16_04_04_03_18_uu_uu_uu_uu_12_12_11_26_uu_uu_uu_uu:
2736 ; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1],ymm1[2],ymm0[3,4,5,6,7,8,9],ymm1[10],ymm0[11,12,13,14,15]
2737 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[2,1,2,3,6,5,6,7]
2738 ; AVX2-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[0,0,3,2,4,5,6,7,8,8,11,10,12,13,14,15]
2740 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 4, i32 4, i32 3, i32 18, i32 undef, i32 undef, i32 undef, i32 undef, i32 12, i32 12, i32 11, i32 26, i32 undef, i32 undef, i32 undef, i32 undef>
2741 ret <16 x i16> %shuffle
2744 define <16 x i16> @shuffle_v16i16_00_03_02_21_uu_uu_uu_uu_08_11_10_29_uu_uu_uu_uu(<16 x i16> %a, <16 x i16> %b) {
2745 ; AVX1-LABEL: shuffle_v16i16_00_03_02_21_uu_uu_uu_uu_08_11_10_29_uu_uu_uu_uu:
2747 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
2748 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
2749 ; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm3[0,1,2,3],xmm2[4,5,6,7]
2750 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,6,7,4,5,10,11,0,1,10,11,0,1,2,3]
2751 ; AVX1-NEXT: vpshufb %xmm3, %xmm2, %xmm2
2752 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
2753 ; AVX1-NEXT: vpshufb %xmm3, %xmm0, %xmm0
2754 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
2757 ; AVX2-LABEL: shuffle_v16i16_00_03_02_21_uu_uu_uu_uu_08_11_10_29_uu_uu_uu_uu:
2759 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7]
2760 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,6,7,4,5,10,11,0,1,10,11,0,1,2,3,16,17,22,23,20,21,26,27,16,17,26,27,16,17,18,19]
2762 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 3, i32 2, i32 21, i32 undef, i32 undef, i32 undef, i32 undef, i32 8, i32 11, i32 10, i32 29, i32 undef, i32 undef, i32 undef, i32 undef>
2763 ret <16 x i16> %shuffle
2766 define <16 x i16> @shuffle_v16i16_uu_uu_uu_21_uu_uu_uu_uu_uu_uu_uu_29_uu_uu_uu_uu(<16 x i16> %a, <16 x i16> %b) {
2767 ; AVX1-LABEL: shuffle_v16i16_uu_uu_uu_21_uu_uu_uu_uu_uu_uu_uu_29_uu_uu_uu_uu:
2769 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm1[0,2,2,3,4,6,6,7]
2772 ; AVX2-LABEL: shuffle_v16i16_uu_uu_uu_21_uu_uu_uu_uu_uu_uu_uu_29_uu_uu_uu_uu:
2774 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm1[0,2,2,3,4,6,6,7]
2776 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 undef, i32 undef, i32 undef, i32 21, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 29, i32 undef, i32 undef, i32 undef, i32 undef>
2777 ret <16 x i16> %shuffle
2780 define <16 x i16> @shuffle_v16i16_00_01_02_21_uu_uu_uu_uu_08_09_10_29_uu_uu_uu_uu(<16 x i16> %a, <16 x i16> %b) {
2781 ; AVX1-LABEL: shuffle_v16i16_00_01_02_21_uu_uu_uu_uu_08_09_10_29_uu_uu_uu_uu:
2783 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
2784 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
2785 ; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm3[2,2,3,3]
2786 ; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1,2],xmm3[3],xmm2[4,5,6,7]
2787 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,2,3,3]
2788 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3],xmm0[4,5,6,7]
2789 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
2792 ; AVX2-LABEL: shuffle_v16i16_00_01_02_21_uu_uu_uu_uu_08_09_10_29_uu_uu_uu_uu:
2794 ; AVX2-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[0,2,2,3,4,6,6,7]
2795 ; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1,2],ymm1[3],ymm0[4,5,6,7,8,9,10],ymm1[11],ymm0[12,13,14,15]
2797 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 21, i32 undef, i32 undef, i32 undef, i32 undef, i32 8, i32 9, i32 10, i32 29, i32 undef, i32 undef, i32 undef, i32 undef>
2798 ret <16 x i16> %shuffle
2801 define <16 x i16> @shuffle_v16i16_uu_uu_uu_uu_20_21_22_11_uu_uu_uu_uu_28_29_30_11(<16 x i16> %a, <16 x i16> %b) {
2802 ; AVX1-LABEL: shuffle_v16i16_uu_uu_uu_uu_20_21_22_11_uu_uu_uu_uu_28_29_30_11:
2804 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
2805 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
2806 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
2807 ; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1,2,3,4,5,6],xmm0[7]
2808 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3,4,5,6],xmm0[7]
2809 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
2812 ; AVX2-LABEL: shuffle_v16i16_uu_uu_uu_uu_20_21_22_11_uu_uu_uu_uu_28_29_30_11:
2814 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,2]
2815 ; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1,2,3,4,5,6],ymm0[7],ymm1[8,9,10,11,12,13,14],ymm0[15]
2817 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 20, i32 21, i32 22, i32 11, i32 undef, i32 undef, i32 undef, i32 undef, i32 28, i32 29, i32 30, i32 11>
2818 ret <16 x i16> %shuffle
2821 define <16 x i16> @shuffle_v16i16_20_21_22_03_uu_uu_uu_uu_28_29_30_11_uu_uu_uu_uu(<16 x i16> %a, <16 x i16> %b) {
2822 ; AVX1-LABEL: shuffle_v16i16_20_21_22_03_uu_uu_uu_uu_28_29_30_11_uu_uu_uu_uu:
2824 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
2825 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
2826 ; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm3[2,3,0,1]
2827 ; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm3[0,1,2],xmm2[3],xmm3[4,5,6,7]
2828 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
2829 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3],xmm1[4,5,6,7]
2830 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
2833 ; AVX2-LABEL: shuffle_v16i16_20_21_22_03_uu_uu_uu_uu_28_29_30_11_uu_uu_uu_uu:
2835 ; AVX2-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[2,3,2,3,6,7,6,7]
2836 ; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1,2],ymm0[3],ymm1[4,5,6,7,8,9,10],ymm0[11],ymm1[12,13,14,15]
2838 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 20, i32 21, i32 22, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 28, i32 29, i32 30, i32 11, i32 undef, i32 undef, i32 undef, i32 undef>
2839 ret <16 x i16> %shuffle
2842 define <16 x i16> @shuffle_v16i16_00_01_02_21_20_21_22_11_08_09_10_29_28_29_30_11(<16 x i16> %a, <16 x i16> %b) {
2843 ; AVX1-LABEL: shuffle_v16i16_00_01_02_21_20_21_22_11_08_09_10_29_28_29_30_11:
2845 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
2846 ; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
2847 ; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm1[0,2,2,3]
2848 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm3[3,4,5,6],xmm0[7]
2849 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
2850 ; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm2[0,1,2,3],xmm1[4,5,6,7]
2851 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,2,3,4,5,10,11,8,9,10,11,12,13,6,7]
2852 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
2855 ; AVX2-LABEL: shuffle_v16i16_00_01_02_21_20_21_22_11_08_09_10_29_28_29_30_11:
2857 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7]
2858 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
2859 ; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3],xmm0[4,5,6,7]
2860 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,2,3,4,5,10,11,8,9,10,11,12,13,6,7]
2861 ; AVX2-NEXT: vpshufb %xmm2, %xmm0, %xmm0
2862 ; AVX2-NEXT: vpshufb %xmm2, %xmm1, %xmm1
2863 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
2865 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 21, i32 20, i32 21, i32 22, i32 11, i32 8, i32 9, i32 10, i32 29, i32 28, i32 29, i32 30, i32 11>
2866 ret <16 x i16> %shuffle
2869 define <16 x i16> @shuffle_v16i16_00_17_02_03_20_21_22_15_08_25_10_11_28_29_30_15(<16 x i16> %a, <16 x i16> %b) {
2870 ; AVX1-LABEL: shuffle_v16i16_00_17_02_03_20_21_22_15_08_25_10_11_28_29_30_15:
2872 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
2873 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
2874 ; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm3[0],xmm2[1],xmm3[2,3],xmm2[4,5,6],xmm3[7]
2875 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm3[4,5,6,7]
2876 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3],xmm1[4,5,6],xmm0[7]
2877 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
2880 ; AVX2-LABEL: shuffle_v16i16_00_17_02_03_20_21_22_15_08_25_10_11_28_29_30_15:
2882 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,2,3]
2883 ; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2,3],ymm1[4,5,6],ymm0[7,8],ymm1[9],ymm0[10,11],ymm1[12,13,14],ymm0[15]
2885 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 17, i32 2, i32 3, i32 20, i32 21, i32 22, i32 15, i32 8, i32 25, i32 10, i32 11, i32 28, i32 29, i32 30, i32 15>
2886 ret <16 x i16> %shuffle
2889 define <16 x i16> @shuffle_v16i16_uu_uu_uu_01_uu_05_07_25_uu_uu_uu_09_uu_13_15_25(<16 x i16> %a, <16 x i16> %b) {
2890 ; AVX1-LABEL: shuffle_v16i16_uu_uu_uu_01_uu_05_07_25_uu_uu_uu_09_uu_13_15_25:
2892 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
2893 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,1,2,0]
2894 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
2895 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm2 = xmm2[0,1,2,1,4,5,6,7]
2896 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm2 = xmm2[0,1,2,3,4,5,7,7]
2897 ; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1,2,3,4,5,6],xmm1[7]
2898 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,1,2,1,4,5,6,7]
2899 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,7,7]
2900 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7]
2901 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
2904 ; AVX2-LABEL: shuffle_v16i16_uu_uu_uu_01_uu_05_07_25_uu_uu_uu_09_uu_13_15_25:
2906 ; AVX2-NEXT: vpbroadcastd {{.*}}(%rip), %ymm2
2907 ; AVX2-NEXT: vpermd %ymm1, %ymm2, %ymm1
2908 ; AVX2-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[0,1,2,1,4,5,6,7,8,9,10,9,12,13,14,15]
2909 ; AVX2-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,7,7,8,9,10,11,12,13,15,15]
2910 ; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,6],ymm1[7],ymm0[8,9,10,11,12,13,14],ymm1[15]
2912 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 undef, i32 undef, i32 undef, i32 1, i32 undef, i32 5, i32 7, i32 25, i32 undef, i32 undef, i32 undef, i32 9, i32 undef, i32 13, i32 15, i32 25>
2913 ret <16 x i16> %shuffle
2916 define <16 x i16> @shuffle_v16i16_uu_uu_04_uu_16_18_20_uu_uu_uu_12_uu_24_26_28_uu(<16 x i16> %a, <16 x i16> %b) {
2917 ; AVX1-LABEL: shuffle_v16i16_uu_uu_04_uu_16_18_20_uu_uu_uu_12_uu_24_26_28_uu:
2919 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
2920 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,4,5,4,5,6,7,0,1,4,5,8,9,4,5]
2921 ; AVX1-NEXT: vpshufb %xmm3, %xmm2, %xmm2
2922 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4
2923 ; AVX1-NEXT: vpshufd {{.*#+}} xmm4 = xmm4[2,2,3,3]
2924 ; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm4[0,1,2,3],xmm2[4,5,6,7]
2925 ; AVX1-NEXT: vpshufb %xmm3, %xmm1, %xmm1
2926 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,2,3,3]
2927 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
2928 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
2931 ; AVX2-LABEL: shuffle_v16i16_uu_uu_04_uu_16_18_20_uu_uu_uu_12_uu_24_26_28_uu:
2933 ; AVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[0,1,4,5,4,5,6,7,0,1,4,5,8,9,4,5,16,17,20,21,20,21,22,23,16,17,20,21,24,25,20,21]
2934 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,2,2,3,4,6,6,7]
2935 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7]
2937 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 undef, i32 undef, i32 4, i32 undef, i32 16, i32 18, i32 20, i32 undef, i32 undef, i32 undef, i32 12, i32 undef, i32 24, i32 26, i32 28, i32 undef>
2938 ret <16 x i16> %shuffle
2941 define <16 x i16> @shuffle_v16i16_21_22_23_00_01_02_03_12_29_30_31_08_09_10_11_12(<16 x i16> %a, <16 x i16> %b) {
2942 ; AVX1-LABEL: shuffle_v16i16_21_22_23_00_01_02_03_12_29_30_31_08_09_10_11_12:
2944 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
2945 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
2946 ; AVX1-NEXT: vpalignr {{.*#+}} xmm2 = xmm2[10,11,12,13,14,15],xmm3[0,1,2,3,4,5,6,7,8,9]
2947 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm3[4,5,6,7]
2948 ; AVX1-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9]
2949 ; AVX1-NEXT: vpsrldq {{.*#+}} xmm1 = xmm1[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
2950 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3,4,5,6,7]
2951 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
2954 ; AVX2-LABEL: shuffle_v16i16_21_22_23_00_01_02_03_12_29_30_31_08_09_10_11_12:
2956 ; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1,2,3,4],ymm1[5,6,7],ymm0[8,9,10,11,12],ymm1[13,14,15]
2957 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
2958 ; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4],xmm0[5,6,7]
2959 ; AVX2-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9]
2960 ; AVX2-NEXT: vpalignr {{.*#+}} xmm1 = xmm1[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9]
2961 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
2963 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 21, i32 22, i32 23, i32 0, i32 1, i32 2, i32 3, i32 12, i32 29, i32 30, i32 31, i32 8, i32 9, i32 10, i32 11, i32 12>
2964 ret <16 x i16> %shuffle
2967 define <16 x i16> @shuffle_v16i16_uu_22_uu_uu_01_02_03_uu_uu_30_uu_uu_09_10_11_uu(<16 x i16> %a, <16 x i16> %b) {
2968 ; AVX1-LABEL: shuffle_v16i16_uu_22_uu_uu_01_02_03_uu_uu_30_uu_uu_09_10_11_uu:
2970 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
2971 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
2972 ; AVX1-NEXT: vpalignr {{.*#+}} xmm2 = xmm2[10,11,12,13,14,15],xmm3[0,1,2,3,4,5,6,7,8,9]
2973 ; AVX1-NEXT: vpalignr {{.*#+}} xmm0 = xmm1[10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7,8,9]
2974 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
2977 ; AVX2-LABEL: shuffle_v16i16_uu_22_uu_uu_01_02_03_uu_uu_30_uu_uu_09_10_11_uu:
2979 ; AVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm1[10,11,12,13,14,15],ymm0[0,1,2,3,4,5,6,7,8,9],ymm1[26,27,28,29,30,31],ymm0[16,17,18,19,20,21,22,23,24,25]
2981 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 undef, i32 22, i32 undef, i32 undef, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 30, i32 undef, i32 undef, i32 9, i32 10, i32 11, i32 undef>
2982 ret <16 x i16> %shuffle
2985 define <16 x i16> @shuffle_v16i16_05_06_07_00_01_02_03_12_13_14_15_08_09_10_11_12(<16 x i16> %a, <16 x i16> %b) {
2986 ; AVX1-LABEL: shuffle_v16i16_05_06_07_00_01_02_03_12_13_14_15_08_09_10_11_12:
2988 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
2989 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4],xmm0[5,6,7]
2990 ; AVX1-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9]
2991 ; AVX1-NEXT: vpalignr {{.*#+}} xmm1 = xmm1[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9]
2992 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
2995 ; AVX2-LABEL: shuffle_v16i16_05_06_07_00_01_02_03_12_13_14_15_08_09_10_11_12:
2997 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
2998 ; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4],xmm0[5,6,7]
2999 ; AVX2-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9]
3000 ; AVX2-NEXT: vpalignr {{.*#+}} xmm1 = xmm1[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9]
3001 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
3003 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12>
3004 ret <16 x i16> %shuffle
3007 define <16 x i16> @shuffle_v16i16_uu_06_uu_uu_01_02_03_uu_uu_14_uu_uu_09_10_11_uu(<16 x i16> %a, <16 x i16> %b) {
3008 ; AVX1-LABEL: shuffle_v16i16_uu_06_uu_uu_01_02_03_uu_uu_14_uu_uu_09_10_11_uu:
3010 ; AVX1-NEXT: vpalignr {{.*#+}} xmm1 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9]
3011 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
3012 ; AVX1-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9]
3013 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
3016 ; AVX2-LABEL: shuffle_v16i16_uu_06_uu_uu_01_02_03_uu_uu_14_uu_uu_09_10_11_uu:
3018 ; AVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9,26,27,28,29,30,31,16,17,18,19,20,21,22,23,24,25]
3020 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 undef, i32 6, i32 undef, i32 undef, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 14, i32 undef, i32 undef, i32 9, i32 10, i32 11, i32 undef>
3021 ret <16 x i16> %shuffle
3024 define <16 x i16> @shuffle_v16i16_uu_uu_uu_uu_01_02_03_uu_uu_uu_uu_uu_09_10_11_uu(<16 x i16> %a, <16 x i16> %b) {
3025 ; AVX1-LABEL: shuffle_v16i16_uu_uu_uu_uu_01_02_03_uu_uu_uu_uu_uu_09_10_11_uu:
3027 ; AVX1-NEXT: vpslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9]
3028 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
3029 ; AVX1-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9]
3030 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
3033 ; AVX2-LABEL: shuffle_v16i16_uu_uu_uu_uu_01_02_03_uu_uu_uu_uu_uu_09_10_11_uu:
3035 ; AVX2-NEXT: vpslldq {{.*#+}} ymm0 = zero,zero,zero,zero,zero,zero,ymm0[0,1,2,3,4,5,6,7,8,9],zero,zero,zero,zero,zero,zero,ymm0[16,17,18,19,20,21,22,23,24,25]
3037 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 9, i32 10, i32 11, i32 undef>
3038 ret <16 x i16> %shuffle
3041 define <16 x i16> @shuffle_v16i16_19_20_21_22_23_00_01_10_27_28_29_30_31_08_09_10(<16 x i16> %a, <16 x i16> %b) {
3042 ; AVX1-LABEL: shuffle_v16i16_19_20_21_22_23_00_01_10_27_28_29_30_31_08_09_10:
3044 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
3045 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
3046 ; AVX1-NEXT: vpalignr {{.*#+}} xmm2 = xmm2[6,7,8,9,10,11,12,13,14,15],xmm3[0,1,2,3,4,5]
3047 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3]
3048 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,4,5,6,7,8,9,0,1,4,5,10,11]
3049 ; AVX1-NEXT: vpsrldq {{.*#+}} xmm1 = xmm1[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
3050 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3,4],xmm0[5,6,7]
3051 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
3054 ; AVX2-LABEL: shuffle_v16i16_19_20_21_22_23_00_01_10_27_28_29_30_31_08_09_10:
3056 ; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1,2],ymm1[3,4,5,6,7],ymm0[8,9,10],ymm1[11,12,13,14,15]
3057 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
3058 ; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3,4,5,6,7]
3059 ; AVX2-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5]
3060 ; AVX2-NEXT: vpalignr {{.*#+}} xmm1 = xmm1[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5]
3061 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
3063 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 19, i32 20, i32 21, i32 22, i32 23, i32 0, i32 1, i32 10, i32 27, i32 28, i32 29, i32 30, i32 31, i32 8, i32 9, i32 10>
3064 ret <16 x i16> %shuffle
3067 define <16 x i16> @shuffle_v16i16_uu_20_21_22_uu_uu_01_uu_uu_28_29_30_uu_uu_09_uu(<16 x i16> %a, <16 x i16> %b) {
3068 ; AVX1-LABEL: shuffle_v16i16_uu_20_21_22_uu_uu_01_uu_uu_28_29_30_uu_uu_09_uu:
3070 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
3071 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
3072 ; AVX1-NEXT: vpalignr {{.*#+}} xmm2 = xmm2[6,7,8,9,10,11,12,13,14,15],xmm3[0,1,2,3,4,5]
3073 ; AVX1-NEXT: vpalignr {{.*#+}} xmm0 = xmm1[6,7,8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5]
3074 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
3077 ; AVX2-LABEL: shuffle_v16i16_uu_20_21_22_uu_uu_01_uu_uu_28_29_30_uu_uu_09_uu:
3079 ; AVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm1[6,7,8,9,10,11,12,13,14,15],ymm0[0,1,2,3,4,5],ymm1[22,23,24,25,26,27,28,29,30,31],ymm0[16,17,18,19,20,21]
3081 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 undef, i32 20, i32 21, i32 22, i32 undef, i32 undef, i32 1, i32 undef, i32 undef, i32 28, i32 29, i32 30, i32 undef, i32 undef, i32 9, i32 undef>
3082 ret <16 x i16> %shuffle
3085 define <16 x i16> @shuffle_v16i16_03_04_05_06_07_00_01_10_11_12_13_14_15_08_09_10(<16 x i16> %a, <16 x i16> %b) {
3086 ; AVX1-LABEL: shuffle_v16i16_03_04_05_06_07_00_01_10_11_12_13_14_15_08_09_10:
3088 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
3089 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3,4,5,6,7]
3090 ; AVX1-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5]
3091 ; AVX1-NEXT: vpalignr {{.*#+}} xmm1 = xmm1[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5]
3092 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
3095 ; AVX2-LABEL: shuffle_v16i16_03_04_05_06_07_00_01_10_11_12_13_14_15_08_09_10:
3097 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
3098 ; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3,4,5,6,7]
3099 ; AVX2-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5]
3100 ; AVX2-NEXT: vpalignr {{.*#+}} xmm1 = xmm1[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5]
3101 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
3103 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10>
3104 ret <16 x i16> %shuffle
3107 define <16 x i16> @shuffle_v16i16_uu_04_05_06_uu_uu_01_uu_uu_12_13_14_uu_uu_09_uu(<16 x i16> %a, <16 x i16> %b) {
3108 ; AVX1-LABEL: shuffle_v16i16_uu_04_05_06_uu_uu_01_uu_uu_12_13_14_uu_uu_09_uu:
3110 ; AVX1-NEXT: vpalignr {{.*#+}} xmm1 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5]
3111 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
3112 ; AVX1-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5]
3113 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
3116 ; AVX2-LABEL: shuffle_v16i16_uu_04_05_06_uu_uu_01_uu_uu_12_13_14_uu_uu_09_uu:
3118 ; AVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5,22,23,24,25,26,27,28,29,30,31,16,17,18,19,20,21]
3120 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 undef, i32 4, i32 5, i32 6, i32 undef, i32 undef, i32 1, i32 undef, i32 undef, i32 12, i32 13, i32 14, i32 undef, i32 undef, i32 9, i32 undef>
3121 ret <16 x i16> %shuffle
3124 define <16 x i16> @shuffle_v16i16_uu_04_05_06_uu_uu_uu_uu_uu_12_13_14_uu_uu_uu_uu(<16 x i16> %a, <16 x i16> %b) {
3125 ; AVX1-LABEL: shuffle_v16i16_uu_04_05_06_uu_uu_uu_uu_uu_12_13_14_uu_uu_uu_uu:
3127 ; AVX1-NEXT: vpsrldq {{.*#+}} xmm1 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
3128 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
3129 ; AVX1-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
3130 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
3133 ; AVX2-LABEL: shuffle_v16i16_uu_04_05_06_uu_uu_uu_uu_uu_12_13_14_uu_uu_uu_uu:
3135 ; AVX2-NEXT: vpsrldq {{.*#+}} ymm0 = ymm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,ymm0[22,23,24,25,26,27,28,29,30,31],zero,zero,zero,zero,zero,zero
3137 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 undef, i32 4, i32 5, i32 6, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 12, i32 13, i32 14, i32 undef, i32 undef, i32 undef, i32 undef>
3138 ret <16 x i16> %shuffle
3141 define <16 x i16> @shuffle_v16i16_03_04_05_06_07_16_17_26_11_12_13_14_15_24_25_26(<16 x i16> %a, <16 x i16> %b) {
3142 ; AVX1-LABEL: shuffle_v16i16_03_04_05_06_07_16_17_26_11_12_13_14_15_24_25_26:
3144 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
3145 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
3146 ; AVX1-NEXT: vpalignr {{.*#+}} xmm2 = xmm2[6,7,8,9,10,11,12,13,14,15],xmm3[0,1,2,3,4,5]
3147 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3]
3148 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,4,5,4,5,6,7,8,9,0,1,4,5,10,11]
3149 ; AVX1-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
3150 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4],xmm1[5,6,7]
3151 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
3154 ; AVX2-LABEL: shuffle_v16i16_03_04_05_06_07_16_17_26_11_12_13_14_15_24_25_26:
3156 ; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1,2],ymm0[3,4,5,6,7],ymm1[8,9,10],ymm0[11,12,13,14,15]
3157 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
3158 ; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3,4,5,6,7]
3159 ; AVX2-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5]
3160 ; AVX2-NEXT: vpalignr {{.*#+}} xmm1 = xmm1[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5]
3161 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
3163 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 26, i32 11, i32 12, i32 13, i32 14, i32 15, i32 24, i32 25, i32 26>
3164 ret <16 x i16> %shuffle
3167 define <16 x i16> @shuffle_v16i16_uu_04_05_06_uu_uu_17_uu_uu_12_13_14_uu_uu_25_uu(<16 x i16> %a, <16 x i16> %b) {
3168 ; AVX1-LABEL: shuffle_v16i16_uu_04_05_06_uu_uu_17_uu_uu_12_13_14_uu_uu_25_uu:
3170 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
3171 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
3172 ; AVX1-NEXT: vpalignr {{.*#+}} xmm2 = xmm2[6,7,8,9,10,11,12,13,14,15],xmm3[0,1,2,3,4,5]
3173 ; AVX1-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5]
3174 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
3177 ; AVX2-LABEL: shuffle_v16i16_uu_04_05_06_uu_uu_17_uu_uu_12_13_14_uu_uu_25_uu:
3179 ; AVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[6,7,8,9,10,11,12,13,14,15],ymm1[0,1,2,3,4,5],ymm0[22,23,24,25,26,27,28,29,30,31],ymm1[16,17,18,19,20,21]
3181 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 undef, i32 4, i32 5, i32 6, i32 undef, i32 undef, i32 17, i32 undef, i32 undef, i32 12, i32 13, i32 14, i32 undef, i32 undef, i32 25, i32 undef>
3182 ret <16 x i16> %shuffle
3185 define <16 x i16> @shuffle_v16i16_05_06_07_16_17_18_19_28_13_14_15_24_25_26_27_28(<16 x i16> %a, <16 x i16> %b) {
3186 ; AVX1-LABEL: shuffle_v16i16_05_06_07_16_17_18_19_28_13_14_15_24_25_26_27_28:
3188 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
3189 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
3190 ; AVX1-NEXT: vpalignr {{.*#+}} xmm2 = xmm2[10,11,12,13,14,15],xmm3[0,1,2,3,4,5,6,7,8,9]
3191 ; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm3[4,5,6,7]
3192 ; AVX1-NEXT: vpslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4,5,6,7,8,9]
3193 ; AVX1-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
3194 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3,4,5,6,7]
3195 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
3198 ; AVX2-LABEL: shuffle_v16i16_05_06_07_16_17_18_19_28_13_14_15_24_25_26_27_28:
3200 ; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1,2,3,4],ymm0[5,6,7],ymm1[8,9,10,11,12],ymm0[13,14,15]
3201 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
3202 ; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4],xmm0[5,6,7]
3203 ; AVX2-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9]
3204 ; AVX2-NEXT: vpalignr {{.*#+}} xmm1 = xmm1[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9]
3205 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
3207 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 28, i32 13, i32 14, i32 15, i32 24, i32 25, i32 26, i32 27, i32 28>
3208 ret <16 x i16> %shuffle
3211 define <16 x i16> @shuffle_v16i16_uu_06_uu_uu_17_18_19_uu_uu_14_uu_uu_25_26_27_uu(<16 x i16> %a, <16 x i16> %b) {
3212 ; AVX1-LABEL: shuffle_v16i16_uu_06_uu_uu_17_18_19_uu_uu_14_uu_uu_25_26_27_uu:
3214 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
3215 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
3216 ; AVX1-NEXT: vpalignr {{.*#+}} xmm2 = xmm2[10,11,12,13,14,15],xmm3[0,1,2,3,4,5,6,7,8,9]
3217 ; AVX1-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7,8,9]
3218 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
3221 ; AVX2-LABEL: shuffle_v16i16_uu_06_uu_uu_17_18_19_uu_uu_14_uu_uu_25_26_27_uu:
3223 ; AVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[10,11,12,13,14,15],ymm1[0,1,2,3,4,5,6,7,8,9],ymm0[26,27,28,29,30,31],ymm1[16,17,18,19,20,21,22,23,24,25]
3225 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 undef, i32 6, i32 undef, i32 undef, i32 17, i32 18, i32 19, i32 undef, i32 undef, i32 14, i32 undef, i32 undef, i32 25, i32 26, i32 27, i32 undef>
3226 ret <16 x i16> %shuffle
3229 define <16 x i16> @shuffle_v16i16_23_uu_03_uu_20_20_05_uu_31_uu_11_uu_28_28_13_uu(<16 x i16> %a, <16 x i16> %b) {
3230 ; AVX1-LABEL: shuffle_v16i16_23_uu_03_uu_20_20_05_uu_31_uu_11_uu_28_28_13_uu:
3232 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
3233 ; AVX1-NEXT: vpslldq {{.*#+}} xmm2 = zero,zero,xmm2[0,1,2,3,4,5,6,7,8,9,10,11,12,13]
3234 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
3235 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm3 = xmm3[0,1,2,3,7,5,4,4]
3236 ; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm3[2],xmm2[2],xmm3[3],xmm2[3]
3237 ; AVX1-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13]
3238 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,7,5,4,4]
3239 ; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm0 = xmm1[2],xmm0[2],xmm1[3],xmm0[3]
3240 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
3243 ; AVX2-LABEL: shuffle_v16i16_23_uu_03_uu_20_20_05_uu_31_uu_11_uu_28_28_13_uu:
3245 ; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4],ymm0[5,6],ymm1[7],ymm0[8,9,10,11],ymm1[12],ymm0[13,14],ymm1[15]
3246 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[14,15,14,15,6,7,6,7,8,9,8,9,10,11,14,15,30,31,30,31,22,23,22,23,24,25,24,25,26,27,30,31]
3248 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 23, i32 undef, i32 3, i32 undef, i32 20, i32 20, i32 5, i32 undef, i32 31, i32 undef, i32 11, i32 undef, i32 28, i32 28, i32 13, i32 undef>
3249 ret <16 x i16> %shuffle
3252 define <16 x i16> @insert_v16i16_0elt_into_zero_vector(i16* %ptr) {
3253 ; ALL-LABEL: insert_v16i16_0elt_into_zero_vector:
3255 ; ALL-NEXT: movzwl (%rdi), %eax
3256 ; ALL-NEXT: vmovd %eax, %xmm0
3258 %val = load i16, i16* %ptr
3259 %i0 = insertelement <16 x i16> zeroinitializer, i16 %val, i32 0
3263 define <16 x i16> @concat_v16i16_0_1_2_3_4_5_6_7_24_25_26_27_28_29_30_31(<16 x i16> %a, <16 x i16> %b) {
3264 ; AVX1-LABEL: concat_v16i16_0_1_2_3_4_5_6_7_24_25_26_27_28_29_30_31:
3266 ; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3]
3269 ; AVX2-LABEL: concat_v16i16_0_1_2_3_4_5_6_7_24_25_26_27_28_29_30_31:
3271 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
3273 %alo = shufflevector <16 x i16> %a, <16 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
3274 %bhi = shufflevector <16 x i16> %b, <16 x i16> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
3275 %shuf = shufflevector <8 x i16> %alo, <8 x i16> %bhi, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
3276 ret <16 x i16> %shuf
3279 define <16 x i16> @concat_v16i16_8_9_10_11_12_13_14_15_24_25_26_27_28_29_30_31_bc(<16 x i16> %a, <16 x i16> %b) {
3280 ; ALL-LABEL: concat_v16i16_8_9_10_11_12_13_14_15_24_25_26_27_28_29_30_31_bc:
3282 ; ALL-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
3284 %ahi = shufflevector <16 x i16> %a, <16 x i16> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
3285 %bhi = shufflevector <16 x i16> %b, <16 x i16> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
3286 %bc0hi = bitcast <8 x i16> %ahi to <16 x i8>
3287 %bc1hi = bitcast <8 x i16> %bhi to <16 x i8>
3288 %shuffle8 = shufflevector <16 x i8> %bc0hi, <16 x i8> %bc1hi, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
3289 %shuffle16 = bitcast <32 x i8> %shuffle8 to <16 x i16>
3290 ret <16 x i16> %shuffle16
3293 define <16 x i16> @insert_dup_mem_v16i16_i32(i32* %ptr) {
3294 ; AVX1-LABEL: insert_dup_mem_v16i16_i32:
3296 ; AVX1-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
3297 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
3298 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
3301 ; AVX2-LABEL: insert_dup_mem_v16i16_i32:
3303 ; AVX2-NEXT: vpbroadcastw (%rdi), %ymm0
3305 %tmp = load i32, i32* %ptr, align 4
3306 %tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 0
3307 %tmp2 = bitcast <4 x i32> %tmp1 to <8 x i16>
3308 %tmp3 = shufflevector <8 x i16> %tmp2, <8 x i16> undef, <16 x i32> zeroinitializer
3309 ret <16 x i16> %tmp3
3312 define <16 x i16> @insert_dup_mem_v16i16_sext_i16(i16* %ptr) {
3313 ; AVX1-LABEL: insert_dup_mem_v16i16_sext_i16:
3315 ; AVX1-NEXT: movswl (%rdi), %eax
3316 ; AVX1-NEXT: vmovd %eax, %xmm0
3317 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
3318 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
3321 ; AVX2-LABEL: insert_dup_mem_v16i16_sext_i16:
3323 ; AVX2-NEXT: movswl (%rdi), %eax
3324 ; AVX2-NEXT: vmovd %eax, %xmm0
3325 ; AVX2-NEXT: vpbroadcastw %xmm0, %ymm0
3327 %tmp = load i16, i16* %ptr, align 2
3328 %tmp1 = sext i16 %tmp to i32
3329 %tmp2 = insertelement <4 x i32> zeroinitializer, i32 %tmp1, i32 0
3330 %tmp3 = bitcast <4 x i32> %tmp2 to <8 x i16>
3331 %tmp4 = shufflevector <8 x i16> %tmp3, <8 x i16> undef, <16 x i32> zeroinitializer
3332 ret <16 x i16> %tmp4
3335 define <16 x i16> @insert_dup_elt1_mem_v16i16_i32(i32* %ptr) #0 {
3336 ; AVX1-LABEL: insert_dup_elt1_mem_v16i16_i32:
3338 ; AVX1-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
3339 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3]
3340 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
3343 ; AVX2-LABEL: insert_dup_elt1_mem_v16i16_i32:
3345 ; AVX2-NEXT: vpbroadcastw 2(%rdi), %ymm0
3347 %tmp = load i32, i32* %ptr, align 4
3348 %tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 0
3349 %tmp2 = bitcast <4 x i32> %tmp1 to <8 x i16>
3350 %tmp3 = shufflevector <8 x i16> %tmp2, <8 x i16> undef, <16 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
3351 ret <16 x i16> %tmp3
3354 define <16 x i16> @insert_dup_elt3_mem_v16i16_i32(i32* %ptr) #0 {
3355 ; AVX1-LABEL: insert_dup_elt3_mem_v16i16_i32:
3357 ; AVX1-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
3358 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3]
3359 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
3362 ; AVX2-LABEL: insert_dup_elt3_mem_v16i16_i32:
3364 ; AVX2-NEXT: vpbroadcastw 2(%rdi), %ymm0
3366 %tmp = load i32, i32* %ptr, align 4
3367 %tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 1
3368 %tmp2 = bitcast <4 x i32> %tmp1 to <8 x i16>
3369 %tmp3 = shufflevector <8 x i16> %tmp2, <8 x i16> undef, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
3370 ret <16 x i16> %tmp3