1 ; RUN: llc < %s -march=x86-64 -mtriple=x86_64-apple-darwin -mcpu=knl --show-mc-encoding| FileCheck %s
4 ; CHECK: vmovd %xmm0, %eax ## encoding: [0x62
6 define i32 @test1(float %x) {
7 %res = bitcast float %x to i32
12 ; CHECK: vmovd %edi, %xmm0 ## encoding: [0x62
14 define <4 x i32> @test2(i32 %x) {
15 %res = insertelement <4 x i32>undef, i32 %x, i32 0
20 ; CHECK: vmovq %rdi, %xmm0 ## encoding: [0x62
22 define <2 x i64> @test3(i64 %x) {
23 %res = insertelement <2 x i64>undef, i64 %x, i32 0
28 ; CHECK: vmovd (%rdi), %xmm0 ## encoding: [0x62
30 define <4 x i32> @test4(i32* %x) {
32 %res = insertelement <4 x i32>undef, i32 %y, i32 0
37 ; CHECK: vmovss %xmm0, (%rdi) ## encoding: [0x62
39 define void @test5(float %x, float* %y) {
40 store float %x, float* %y, align 4
45 ; CHECK: vmovsd %xmm0, (%rdi) ## encoding: [0x62
47 define void @test6(double %x, double* %y) {
48 store double %x, double* %y, align 8
53 ; CHECK: vmovss (%rdi), %xmm0 ## encoding: [0x62
55 define float @test7(i32* %x) {
57 %res = bitcast i32 %y to float
62 ; CHECK: vmovd %xmm0, %eax ## encoding: [0x62
64 define i32 @test8(<4 x i32> %x) {
65 %res = extractelement <4 x i32> %x, i32 0
70 ; CHECK: vmovq %xmm0, %rax ## encoding: [0x62
72 define i64 @test9(<2 x i64> %x) {
73 %res = extractelement <2 x i64> %x, i32 0
77 ; CHECK-LABEL: @test10
78 ; CHECK: vmovd (%rdi), %xmm0 ## encoding: [0x62
80 define <4 x i32> @test10(i32* %x) {
81 %y = load i32* %x, align 4
82 %res = insertelement <4 x i32>zeroinitializer, i32 %y, i32 0
86 ; CHECK-LABEL: @test11
87 ; CHECK: vmovss (%rdi), %xmm0 ## encoding: [0x62
89 define <4 x float> @test11(float* %x) {
90 %y = load float* %x, align 4
91 %res = insertelement <4 x float>zeroinitializer, float %y, i32 0
95 ; CHECK-LABEL: @test12
96 ; CHECK: vmovsd (%rdi), %xmm0 ## encoding: [0x62
98 define <2 x double> @test12(double* %x) {
99 %y = load double* %x, align 8
100 %res = insertelement <2 x double>zeroinitializer, double %y, i32 0
104 ; CHECK-LABEL: @test13
105 ; CHECK: vmovq %rdi, %xmm0 ## encoding: [0x62
107 define <2 x i64> @test13(i64 %x) {
108 %res = insertelement <2 x i64>zeroinitializer, i64 %x, i32 0
112 ; CHECK-LABEL: @test14
113 ; CHECK: vmovd %edi, %xmm0 ## encoding: [0x62
115 define <4 x i32> @test14(i32 %x) {
116 %res = insertelement <4 x i32>zeroinitializer, i32 %x, i32 0
120 ; CHECK-LABEL: @test15
121 ; CHECK: vmovd (%rdi), %xmm0 ## encoding: [0x62
123 define <4 x i32> @test15(i32* %x) {
124 %y = load i32* %x, align 4
125 %res = insertelement <4 x i32>zeroinitializer, i32 %y, i32 0
129 ; CHECK-LABEL: test16
132 define <16 x i32> @test16(i8 * %addr) {
133 %vaddr = bitcast i8* %addr to <16 x i32>*
134 %res = load <16 x i32>* %vaddr, align 1
138 ; CHECK-LABEL: test17
141 define <16 x i32> @test17(i8 * %addr) {
142 %vaddr = bitcast i8* %addr to <16 x i32>*
143 %res = load <16 x i32>* %vaddr, align 64
147 ; CHECK-LABEL: test18
150 define void @test18(i8 * %addr, <8 x i64> %data) {
151 %vaddr = bitcast i8* %addr to <8 x i64>*
152 store <8 x i64>%data, <8 x i64>* %vaddr, align 64
156 ; CHECK-LABEL: test19
159 define void @test19(i8 * %addr, <16 x i32> %data) {
160 %vaddr = bitcast i8* %addr to <16 x i32>*
161 store <16 x i32>%data, <16 x i32>* %vaddr, align 1
165 ; CHECK-LABEL: test20
168 define void @test20(i8 * %addr, <16 x i32> %data) {
169 %vaddr = bitcast i8* %addr to <16 x i32>*
170 store <16 x i32>%data, <16 x i32>* %vaddr, align 64
174 ; CHECK-LABEL: test21
177 define <8 x i64> @test21(i8 * %addr) {
178 %vaddr = bitcast i8* %addr to <8 x i64>*
179 %res = load <8 x i64>* %vaddr, align 64
183 ; CHECK-LABEL: test22
186 define void @test22(i8 * %addr, <8 x i64> %data) {
187 %vaddr = bitcast i8* %addr to <8 x i64>*
188 store <8 x i64>%data, <8 x i64>* %vaddr, align 1
192 ; CHECK-LABEL: test23
195 define <8 x i64> @test23(i8 * %addr) {
196 %vaddr = bitcast i8* %addr to <8 x i64>*
197 %res = load <8 x i64>* %vaddr, align 1
201 ; CHECK-LABEL: test24
204 define void @test24(i8 * %addr, <8 x double> %data) {
205 %vaddr = bitcast i8* %addr to <8 x double>*
206 store <8 x double>%data, <8 x double>* %vaddr, align 64
210 ; CHECK-LABEL: test25
213 define <8 x double> @test25(i8 * %addr) {
214 %vaddr = bitcast i8* %addr to <8 x double>*
215 %res = load <8 x double>* %vaddr, align 64
219 ; CHECK-LABEL: test26
222 define void @test26(i8 * %addr, <16 x float> %data) {
223 %vaddr = bitcast i8* %addr to <16 x float>*
224 store <16 x float>%data, <16 x float>* %vaddr, align 64
228 ; CHECK-LABEL: test27
231 define <16 x float> @test27(i8 * %addr) {
232 %vaddr = bitcast i8* %addr to <16 x float>*
233 %res = load <16 x float>* %vaddr, align 64
237 ; CHECK-LABEL: test28
240 define void @test28(i8 * %addr, <8 x double> %data) {
241 %vaddr = bitcast i8* %addr to <8 x double>*
242 store <8 x double>%data, <8 x double>* %vaddr, align 1
246 ; CHECK-LABEL: test29
249 define <8 x double> @test29(i8 * %addr) {
250 %vaddr = bitcast i8* %addr to <8 x double>*
251 %res = load <8 x double>* %vaddr, align 1
255 ; CHECK-LABEL: test30
258 define void @test30(i8 * %addr, <16 x float> %data) {
259 %vaddr = bitcast i8* %addr to <16 x float>*
260 store <16 x float>%data, <16 x float>* %vaddr, align 1
264 ; CHECK-LABEL: test31
267 define <16 x float> @test31(i8 * %addr) {
268 %vaddr = bitcast i8* %addr to <16 x float>*
269 %res = load <16 x float>* %vaddr, align 1
273 ; CHECK-LABEL: test32
274 ; CHECK: vmovdqa32{{.*{%k[1-7]} }}
276 define <16 x i32> @test32(i8 * %addr, <16 x i32> %old, <16 x i32> %mask1) {
277 %mask = icmp ne <16 x i32> %mask1, zeroinitializer
278 %vaddr = bitcast i8* %addr to <16 x i32>*
279 %r = load <16 x i32>* %vaddr, align 64
280 %res = select <16 x i1> %mask, <16 x i32> %r, <16 x i32> %old
284 ; CHECK-LABEL: test33
285 ; CHECK: vmovdqu32{{.*{%k[1-7]} }}
287 define <16 x i32> @test33(i8 * %addr, <16 x i32> %old, <16 x i32> %mask1) {
288 %mask = icmp ne <16 x i32> %mask1, zeroinitializer
289 %vaddr = bitcast i8* %addr to <16 x i32>*
290 %r = load <16 x i32>* %vaddr, align 1
291 %res = select <16 x i1> %mask, <16 x i32> %r, <16 x i32> %old
295 ; CHECK-LABEL: test34
296 ; CHECK: vmovdqa32{{.*{%k[1-7]} {z} }}
298 define <16 x i32> @test34(i8 * %addr, <16 x i32> %mask1) {
299 %mask = icmp ne <16 x i32> %mask1, zeroinitializer
300 %vaddr = bitcast i8* %addr to <16 x i32>*
301 %r = load <16 x i32>* %vaddr, align 64
302 %res = select <16 x i1> %mask, <16 x i32> %r, <16 x i32> zeroinitializer
306 ; CHECK-LABEL: test35
307 ; CHECK: vmovdqu32{{.*{%k[1-7]} {z} }}
309 define <16 x i32> @test35(i8 * %addr, <16 x i32> %mask1) {
310 %mask = icmp ne <16 x i32> %mask1, zeroinitializer
311 %vaddr = bitcast i8* %addr to <16 x i32>*
312 %r = load <16 x i32>* %vaddr, align 1
313 %res = select <16 x i1> %mask, <16 x i32> %r, <16 x i32> zeroinitializer
317 ; CHECK-LABEL: test36
318 ; CHECK: vmovdqa64{{.*{%k[1-7]} }}
320 define <8 x i64> @test36(i8 * %addr, <8 x i64> %old, <8 x i64> %mask1) {
321 %mask = icmp ne <8 x i64> %mask1, zeroinitializer
322 %vaddr = bitcast i8* %addr to <8 x i64>*
323 %r = load <8 x i64>* %vaddr, align 64
324 %res = select <8 x i1> %mask, <8 x i64> %r, <8 x i64> %old
328 ; CHECK-LABEL: test37
329 ; CHECK: vmovdqu64{{.*{%k[1-7]} }}
331 define <8 x i64> @test37(i8 * %addr, <8 x i64> %old, <8 x i64> %mask1) {
332 %mask = icmp ne <8 x i64> %mask1, zeroinitializer
333 %vaddr = bitcast i8* %addr to <8 x i64>*
334 %r = load <8 x i64>* %vaddr, align 1
335 %res = select <8 x i1> %mask, <8 x i64> %r, <8 x i64> %old
339 ; CHECK-LABEL: test38
340 ; CHECK: vmovdqa64{{.*{%k[1-7]} {z} }}
342 define <8 x i64> @test38(i8 * %addr, <8 x i64> %mask1) {
343 %mask = icmp ne <8 x i64> %mask1, zeroinitializer
344 %vaddr = bitcast i8* %addr to <8 x i64>*
345 %r = load <8 x i64>* %vaddr, align 64
346 %res = select <8 x i1> %mask, <8 x i64> %r, <8 x i64> zeroinitializer
350 ; CHECK-LABEL: test39
351 ; CHECK: vmovdqu64{{.*{%k[1-7]} {z} }}
353 define <8 x i64> @test39(i8 * %addr, <8 x i64> %mask1) {
354 %mask = icmp ne <8 x i64> %mask1, zeroinitializer
355 %vaddr = bitcast i8* %addr to <8 x i64>*
356 %r = load <8 x i64>* %vaddr, align 1
357 %res = select <8 x i1> %mask, <8 x i64> %r, <8 x i64> zeroinitializer
361 ; CHECK-LABEL: test40
362 ; CHECK: vmovaps{{.*{%k[1-7]} }}
364 define <16 x float> @test40(i8 * %addr, <16 x float> %old, <16 x float> %mask1) {
365 %mask = fcmp one <16 x float> %mask1, zeroinitializer
366 %vaddr = bitcast i8* %addr to <16 x float>*
367 %r = load <16 x float>* %vaddr, align 64
368 %res = select <16 x i1> %mask, <16 x float> %r, <16 x float> %old
372 ; CHECK-LABEL: test41
373 ; CHECK: vmovups{{.*{%k[1-7]} }}
375 define <16 x float> @test41(i8 * %addr, <16 x float> %old, <16 x float> %mask1) {
376 %mask = fcmp one <16 x float> %mask1, zeroinitializer
377 %vaddr = bitcast i8* %addr to <16 x float>*
378 %r = load <16 x float>* %vaddr, align 1
379 %res = select <16 x i1> %mask, <16 x float> %r, <16 x float> %old
383 ; CHECK-LABEL: test42
384 ; CHECK: vmovaps{{.*{%k[1-7]} {z} }}
386 define <16 x float> @test42(i8 * %addr, <16 x float> %mask1) {
387 %mask = fcmp one <16 x float> %mask1, zeroinitializer
388 %vaddr = bitcast i8* %addr to <16 x float>*
389 %r = load <16 x float>* %vaddr, align 64
390 %res = select <16 x i1> %mask, <16 x float> %r, <16 x float> zeroinitializer
394 ; CHECK-LABEL: test43
395 ; CHECK: vmovups{{.*{%k[1-7]} {z} }}
397 define <16 x float> @test43(i8 * %addr, <16 x float> %mask1) {
398 %mask = fcmp one <16 x float> %mask1, zeroinitializer
399 %vaddr = bitcast i8* %addr to <16 x float>*
400 %r = load <16 x float>* %vaddr, align 1
401 %res = select <16 x i1> %mask, <16 x float> %r, <16 x float> zeroinitializer
405 ; CHECK-LABEL: test44
406 ; CHECK: vmovapd{{.*{%k[1-7]} }}
408 define <8 x double> @test44(i8 * %addr, <8 x double> %old, <8 x double> %mask1) {
409 %mask = fcmp one <8 x double> %mask1, zeroinitializer
410 %vaddr = bitcast i8* %addr to <8 x double>*
411 %r = load <8 x double>* %vaddr, align 64
412 %res = select <8 x i1> %mask, <8 x double> %r, <8 x double> %old
416 ; CHECK-LABEL: test45
417 ; CHECK: vmovupd{{.*{%k[1-7]} }}
419 define <8 x double> @test45(i8 * %addr, <8 x double> %old, <8 x double> %mask1) {
420 %mask = fcmp one <8 x double> %mask1, zeroinitializer
421 %vaddr = bitcast i8* %addr to <8 x double>*
422 %r = load <8 x double>* %vaddr, align 1
423 %res = select <8 x i1> %mask, <8 x double> %r, <8 x double> %old
427 ; CHECK-LABEL: test46
428 ; CHECK: vmovapd{{.*{%k[1-7]} {z} }}
430 define <8 x double> @test46(i8 * %addr, <8 x double> %mask1) {
431 %mask = fcmp one <8 x double> %mask1, zeroinitializer
432 %vaddr = bitcast i8* %addr to <8 x double>*
433 %r = load <8 x double>* %vaddr, align 64
434 %res = select <8 x i1> %mask, <8 x double> %r, <8 x double> zeroinitializer
438 ; CHECK-LABEL: test47
439 ; CHECK: vmovupd{{.*{%k[1-7]} {z} }}
441 define <8 x double> @test47(i8 * %addr, <8 x double> %mask1) {
442 %mask = fcmp one <8 x double> %mask1, zeroinitializer
443 %vaddr = bitcast i8* %addr to <8 x double>*
444 %r = load <8 x double>* %vaddr, align 1
445 %res = select <8 x i1> %mask, <8 x double> %r, <8 x double> zeroinitializer