1 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl --show-mc-encoding| FileCheck %s
22 define <16 x float> @test1(<16 x float> %a) nounwind {
23 %c = shufflevector <16 x float> %a, <16 x float> undef, <16 x i32> <i32 2, i32 5, i32 undef, i32 undef, i32 7, i32 undef, i32 10, i32 1, i32 0, i32 5, i32 undef, i32 4, i32 7, i32 undef, i32 10, i32 1>
30 define <16 x i32> @test2(<16 x i32> %a) nounwind {
31 %c = shufflevector <16 x i32> %a, <16 x i32> undef, <16 x i32> <i32 2, i32 5, i32 undef, i32 undef, i32 7, i32 undef, i32 10, i32 1, i32 0, i32 5, i32 undef, i32 4, i32 7, i32 undef, i32 10, i32 1>
38 define <8 x i64> @test3(<8 x i64> %a) nounwind {
39 %c = shufflevector <8 x i64> %a, <8 x i64> undef, <8 x i32> <i32 2, i32 5, i32 1, i32 undef, i32 7, i32 undef, i32 3, i32 1>
46 define <8 x double> @test4(<8 x double> %a) nounwind {
47 %c = shufflevector <8 x double> %a, <8 x double> undef, <8 x i32> <i32 1, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
54 define <8 x double> @test5(<8 x double> %a, <8 x double> %b) nounwind {
55 %c = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 2, i32 8, i32 0, i32 1, i32 6, i32 10, i32 4, i32 5>
59 ; The reg variant of vpermt2 with a writemask
60 ; CHECK-LABEL: test5m:
61 ; CHECK: vpermt2pd {{.* {%k[1-7]} {z}}}
62 define <8 x double> @test5m(<8 x double> %a, <8 x double> %b, i8 %mask) nounwind {
63 %c = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 2, i32 8, i32 0, i32 1, i32 6, i32 10, i32 4, i32 5>
64 %m = bitcast i8 %mask to <8 x i1>
65 %res = select <8 x i1> %m, <8 x double> %c, <8 x double> zeroinitializer
72 define <8 x i64> @test6(<8 x i64> %a) nounwind {
73 %c = shufflevector <8 x i64> %a, <8 x i64> undef, <8 x i32> <i32 2, i32 3, i32 1, i32 0, i32 6, i32 7, i32 5, i32 4>
80 define <8 x i64> @test7(<8 x i64> %a, <8 x i64> %b) nounwind {
81 %c = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 2, i32 8, i32 0, i32 1, i32 6, i32 10, i32 4, i32 5>
85 ; The reg variant of vpermt2 with a writemask
86 ; CHECK-LABEL: test7m:
87 ; CHECK: vpermt2q {{.* {%k[1-7]} {z}}}
88 define <8 x i64> @test7m(<8 x i64> %a, <8 x i64> %b, i8 %mask) nounwind {
89 %c = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 2, i32 8, i32 0, i32 1, i32 6, i32 10, i32 4, i32 5>
90 %m = bitcast i8 %mask to <8 x i1>
91 %res = select <8 x i1> %m, <8 x i64> %c, <8 x i64> zeroinitializer
95 ; The mem variant of vpermt2 with a writemask
96 ; CHECK-LABEL: test7mm:
97 ; CHECK: vpermt2q {{\(.*\).* {%k[1-7]} {z}}}
98 define <8 x i64> @test7mm(<8 x i64> %a, <8 x i64> *%pb, i8 %mask) nounwind {
99 %b = load <8 x i64>* %pb
100 %c = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 2, i32 8, i32 0, i32 1, i32 6, i32 10, i32 4, i32 5>
101 %m = bitcast i8 %mask to <8 x i1>
102 %res = select <8 x i1> %m, <8 x i64> %c, <8 x i64> zeroinitializer
106 ; CHECK-LABEL: test8:
109 define <16 x i32> @test8(<16 x i32> %a, <16 x i32> %b) nounwind {
110 %c = shufflevector <16 x i32> %a, <16 x i32> %b, <16 x i32> <i32 15, i32 31, i32 14, i32 22, i32 13, i32 29, i32 4, i32 28, i32 11, i32 27, i32 10, i32 26, i32 9, i32 25, i32 8, i32 24>
114 ; The reg variant of vpermt2 with a writemask
115 ; CHECK-LABEL: test8m:
116 ; CHECK: vpermt2d {{.* {%k[1-7]} {z}}}
117 define <16 x i32> @test8m(<16 x i32> %a, <16 x i32> %b, i16 %mask) nounwind {
118 %c = shufflevector <16 x i32> %a, <16 x i32> %b, <16 x i32> <i32 15, i32 31, i32 14, i32 22, i32 13, i32 29, i32 4, i32 28, i32 11, i32 27, i32 10, i32 26, i32 9, i32 25, i32 8, i32 24>
119 %m = bitcast i16 %mask to <16 x i1>
120 %res = select <16 x i1> %m, <16 x i32> %c, <16 x i32> zeroinitializer
124 ; The mem variant of vpermt2 with a writemask
125 ; CHECK-LABEL: test8mm:
126 ; CHECK: vpermt2d {{\(.*\).* {%k[1-7]} {z}}}
127 define <16 x i32> @test8mm(<16 x i32> %a, <16 x i32> *%pb, i16 %mask) nounwind {
128 %b = load <16 x i32> * %pb
129 %c = shufflevector <16 x i32> %a, <16 x i32> %b, <16 x i32> <i32 15, i32 31, i32 14, i32 22, i32 13, i32 29, i32 4, i32 28, i32 11, i32 27, i32 10, i32 26, i32 9, i32 25, i32 8, i32 24>
130 %m = bitcast i16 %mask to <16 x i1>
131 %res = select <16 x i1> %m, <16 x i32> %c, <16 x i32> zeroinitializer
135 ; CHECK-LABEL: test9:
138 define <16 x float> @test9(<16 x float> %a, <16 x float> %b) nounwind {
139 %c = shufflevector <16 x float> %a, <16 x float> %b, <16 x i32> <i32 15, i32 31, i32 14, i32 22, i32 13, i32 29, i32 4, i32 28, i32 11, i32 27, i32 10, i32 26, i32 9, i32 25, i32 8, i32 24>
143 ; The reg variant of vpermt2 with a writemask
144 ; CHECK-LABEL: test9m:
145 ; CHECK: vpermt2ps {{.*}} {%k{{.}}} {z}
146 define <16 x float> @test9m(<16 x float> %a, <16 x float> %b, i16 %mask) nounwind {
147 %c = shufflevector <16 x float> %a, <16 x float> %b, <16 x i32> <i32 15, i32 31, i32 14, i32 22, i32 13, i32 29, i32 4, i32 28, i32 11, i32 27, i32 10, i32 26, i32 9, i32 25, i32 8, i32 24>
148 %m = bitcast i16 %mask to <16 x i1>
149 %res = select <16 x i1> %m, <16 x float> %c, <16 x float> zeroinitializer
150 ret <16 x float> %res
153 ; CHECK-LABEL: test10:
156 define <16 x float> @test10(<16 x float> %a, <16 x float>* %b) nounwind {
157 %c = load <16 x float>* %b
158 %d = shufflevector <16 x float> %a, <16 x float> %c, <16 x i32> <i32 15, i32 31, i32 14, i32 22, i32 13, i32 29, i32 4, i32 28, i32 11, i32 27, i32 10, i32 26, i32 9, i32 25, i32 8, i32 24>
162 ; CHECK-LABEL: test11:
165 define <16 x i32> @test11(<16 x i32> %a, <16 x i32>* %b) nounwind {
166 %c = load <16 x i32>* %b
167 %d = shufflevector <16 x i32> %a, <16 x i32> %c, <16 x i32> <i32 15, i32 31, i32 14, i32 22, i32 13, i32 29, i32 4, i32 28, i32 11, i32 27, i32 10, i32 26, i32 9, i32 25, i32 8, i32 24>
171 ; CHECK-LABEL: test12
172 ; CHECK: vmovlhps {{.*}}## encoding: [0x62
174 define <4 x i32> @test12(<4 x i32> %a, <4 x i32> %b) nounwind {
175 %c = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
179 ; CHECK-LABEL: test13
180 ; CHECK: vpermilps $-79, %zmm
182 define <16 x float> @test13(<16 x float> %a) {
183 %b = shufflevector <16 x float> %a, <16 x float> undef, <16 x i32><i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14>
187 ; CHECK-LABEL: test14
188 ; CHECK: vpermilpd $-53, %zmm
190 define <8 x double> @test14(<8 x double> %a) {
191 %b = shufflevector <8 x double> %a, <8 x double> undef, <8 x i32><i32 1, i32 1, i32 2, i32 3, i32 4, i32 4, i32 7, i32 7>
195 ; CHECK-LABEL: test15
196 ; CHECK: vpshufd $-79, %zmm
198 define <16 x i32> @test15(<16 x i32> %a) {
199 %b = shufflevector <16 x i32> %a, <16 x i32> undef, <16 x i32><i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14>
203 ; CHECK-LABEL: valign_test_v16f32
204 ; CHECK: valignd $2, %zmm0, %zmm0
206 define <16 x float> @valign_test_v16f32(<16 x float> %a, <16 x float> %b) nounwind {
207 %c = shufflevector <16 x float> %a, <16 x float> %b, <16 x i32><i32 2, i32 3, i32 undef, i32 undef, i32 6, i32 7, i32 undef, i32 undef, i32 10, i32 11, i32 undef, i32 undef, i32 14, i32 15, i32 undef, i32 undef>
211 ; CHECK-LABEL: valign_test_v16i32
212 ; CHECK: valignd $2, %zmm0, %zmm0
214 define <16 x i32> @valign_test_v16i32(<16 x i32> %a, <16 x i32> %b) nounwind {
215 %c = shufflevector <16 x i32> %a, <16 x i32> %b, <16 x i32><i32 2, i32 3, i32 undef, i32 undef, i32 6, i32 7, i32 undef, i32 undef, i32 10, i32 11, i32 undef, i32 undef, i32 14, i32 15, i32 undef, i32 undef>
220 ; CHECK-LABEL: test16
221 ; CHECK: valignq $2, %zmm0, %zmm1
223 define <8 x double> @test16(<8 x double> %a, <8 x double> %b) nounwind {
224 %c = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9>
228 ; CHECK-LABEL: test16k
229 ; CHECK: valignq $2, %zmm0, %zmm1, %zmm2 {%k1} #
230 define <8 x i64> @test16k(<8 x i64> %a, <8 x i64> %b, <8 x i64> %src, i8 %mask) nounwind {
231 %c = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9>
232 %m = bitcast i8 %mask to <8 x i1>
233 %res = select <8 x i1> %m, <8 x i64> %c, <8 x i64> %src
237 ; CHECK-LABEL: test16kz
238 ; CHECK: valignq $2, %zmm0, %zmm1, %zmm0 {%k1} {z} ## encoding: [0x62,0xf3,0xf5,0xc9,0x03,0xc0,0x02]
239 define <8 x i64> @test16kz(<8 x i64> %a, <8 x i64> %b, i8 %mask) nounwind {
240 %c = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9>
241 %m = bitcast i8 %mask to <8 x i1>
242 %res = select <8 x i1> %m, <8 x i64> %c, <8 x i64> zeroinitializer
246 ; CHECK-LABEL: test17
247 ; CHECK: vshufpd $19, %zmm1, %zmm0
249 define <8 x double> @test17(<8 x double> %a, <8 x double> %b) nounwind {
250 %c = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 1, i32 9, i32 2, i32 10, i32 5, i32 undef, i32 undef, i32 undef>
254 ; CHECK-LABEL: test18
255 ; CHECK: vpunpckhdq %zmm
257 define <16 x i32> @test18(<16 x i32> %a, <16 x i32> %c) {
258 %b = shufflevector <16 x i32> %a, <16 x i32> %c, <16 x i32><i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15, i32 18, i32 26, i32 19, i32 27, i32 22, i32 30, i32 23, i32 31>
262 ; CHECK-LABEL: test19
263 ; CHECK: vpunpckldq %zmm
265 define <16 x i32> @test19(<16 x i32> %a, <16 x i32> %c) {
266 %b = shufflevector <16 x i32> %a, <16 x i32> %c, <16 x i32><i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13, i32 16, i32 24, i32 17, i32 25, i32 20, i32 28, i32 21, i32 29>
270 ; CHECK-LABEL: test20
271 ; CHECK: vpunpckhqdq %zmm
273 define <8 x i64> @test20(<8 x i64> %a, <8 x i64> %c) {
274 %b = shufflevector <8 x i64> %a, <8 x i64> %c, <8 x i32><i32 1, i32 5, i32 3, i32 7, i32 9, i32 13, i32 11, i32 15>
278 ; CHECK-LABEL: test21
279 ; CHECK: vunpcklps %zmm
281 define <16 x float> @test21(<16 x float> %a, <16 x float> %c) {
282 %b = shufflevector <16 x float> %a, <16 x float> %c, <16 x i32><i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13, i32 16, i32 24, i32 17, i32 25, i32 20, i32 28, i32 21, i32 29>
286 ; CHECK-LABEL: test22
287 ; CHECK: vmovhlps {{.*}}## encoding: [0x62
289 define <4 x i32> @test22(<4 x i32> %a, <4 x i32> %b) nounwind {
290 %c = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 2, i32 3, i32 6, i32 7>
294 ; CHECK-LABEL: @test23
295 ; CHECK: vshufps $-112, %zmm
297 define <16 x float> @test23(<16 x float> %a, <16 x float> %c) {
298 %b = shufflevector <16 x float> %a, <16 x float> %c, <16 x i32><i32 0, i32 0, i32 17, i32 18, i32 4, i32 4, i32 21, i32 22, i32 8, i32 8, i32 25, i32 26, i32 12, i32 12, i32 29, i32 30>
302 ; CHECK-LABEL: @test24
305 define <16 x i32> @test24(<16 x i32> %a, <16 x i32> %b) nounwind {
306 %c = shufflevector <16 x i32> %a, <16 x i32> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 19, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
310 ; CHECK-LABEL: @test25
313 define <16 x i32> @test25(<16 x i32> %a, <16 x i32> %b) nounwind {
314 %c = shufflevector <16 x i32> %a, <16 x i32> %b, <16 x i32> <i32 0, i32 1, i32 19, i32 undef, i32 4, i32 5, i32 23, i32 undef, i32 8, i32 9, i32 27, i32 undef, i32 12, i32 13, i32 undef, i32 undef>
318 ; CHECK-LABEL: @test26
321 define <16 x i32> @test26(<16 x i32> %a) nounwind {
322 %c = shufflevector <16 x i32> %a, <16 x i32> undef, <16 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 undef, i32 9, i32 9, i32 undef, i32 11, i32 13, i32 undef, i32 undef, i32 undef>
326 ; CHECK-LABEL: @test27
328 define <16 x i32> @test27(<4 x i32>%a) {
329 %res = shufflevector <4 x i32> %a, <4 x i32> undef, <16 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
333 ; CHECK-LABEL: @test28
334 ; CHECK: vinserti64x4 $1
336 define <16 x i32> @test28(<16 x i32>%x, <16 x i32>%y) {
337 %res = shufflevector <16 x i32>%x, <16 x i32>%y, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7,
338 i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
342 ; CHECK-LABEL: @test29
343 ; CHECK: vinserti64x4 $0
345 define <16 x i32> @test29(<16 x i32>%x, <16 x i32>%y) {
346 %res = shufflevector <16 x i32>%x, <16 x i32>%y, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23,
347 i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>