1 ; RUN: llc < %s -mtriple=i386-pc-win32 -mcpu=corei7-avx -mattr=+avx | FileCheck -check-prefix=WIN32 %s
2 ; RUN: llc < %s -mtriple=x86_64-win32 -mcpu=corei7-avx -mattr=+avx | FileCheck -check-prefix=WIN64 %s
3 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck -check-prefix=NOT_WIN %s
5 declare <16 x float> @func_float16_ptr(<16 x float>, <16 x float> *)
6 declare <16 x float> @func_float16(<16 x float>, <16 x float>)
8 ; WIN64: vaddps {{.*}}, {{%ymm[0-1]}}
9 ; WIN64: vaddps {{.*}}, {{%ymm[0-1]}}
10 ; WIN64: leaq {{.*}}(%rsp), %rcx
15 ; WIN32: movl %eax, (%esp)
16 ; WIN32: vaddps {{.*}}, {{%ymm[0-1]}}
17 ; WIN32: vaddps {{.*}}, {{%ymm[0-1]}}
21 ; NOT_WIN: testf16_inp
22 ; NOT_WIN: vaddps {{.*}}, {{%ymm[0-1]}}
23 ; NOT_WIN: vaddps {{.*}}, {{%ymm[0-1]}}
24 ; NOT_WIN: leaq {{.*}}(%rsp), %rdi
28 ;test calling conventions - input parameters
29 define <16 x float> @testf16_inp(<16 x float> %a, <16 x float> %b) nounwind {
30 %y = alloca <16 x float>, align 16
31 %x = fadd <16 x float> %a, %b
32 %1 = call intel_ocl_bicc <16 x float> @func_float16_ptr(<16 x float> %x, <16 x float>* %y)
33 %2 = load <16 x float>* %y, align 16
34 %3 = fadd <16 x float> %2, %1
38 ;test calling conventions - preserved registers
40 ; preserved ymm6-ymm15
43 ; WIN64: vaddps {{%ymm[6-7]}}, %ymm0, %ymm0
44 ; WIN64: vaddps {{%ymm[6-7]}}, %ymm1, %ymm1
47 ; preserved ymm8-ymm15
48 ; NOT_WIN: testf16_regs
50 ; NOT_WIN: vaddps {{%ymm[8-9]}}, %ymm0, %ymm0
51 ; NOT_WIN: vaddps {{%ymm[8-9]}}, %ymm1, %ymm1
54 define <16 x float> @testf16_regs(<16 x float> %a, <16 x float> %b) nounwind {
55 %y = alloca <16 x float>, align 16
56 %x = fadd <16 x float> %a, %b
57 %1 = call intel_ocl_bicc <16 x float> @func_float16_ptr(<16 x float> %x, <16 x float>* %y)
58 %2 = load <16 x float>* %y, align 16
59 %3 = fadd <16 x float> %1, %b
60 %4 = fadd <16 x float> %2, %3
64 ; test calling conventions - prolog and epilog
65 ; WIN64: vmovaps {{%ymm([6-9]|1[0-5])}}, {{.*(%rsp).*}} # 32-byte Spill
66 ; WIN64: vmovaps {{%ymm([6-9]|1[0-5])}}, {{.*(%rsp).*}} # 32-byte Spill
67 ; WIN64: vmovaps {{%ymm([6-9]|1[0-5])}}, {{.*(%rsp).*}} # 32-byte Spill
68 ; WIN64: vmovaps {{%ymm([6-9]|1[0-5])}}, {{.*(%rsp).*}} # 32-byte Spill
69 ; WIN64: vmovaps {{%ymm([6-9]|1[0-5])}}, {{.*(%rsp).*}} # 32-byte Spill
70 ; WIN64: vmovaps {{%ymm([6-9]|1[0-5])}}, {{.*(%rsp).*}} # 32-byte Spill
71 ; WIN64: vmovaps {{%ymm([6-9]|1[0-5])}}, {{.*(%rsp).*}} # 32-byte Spill
72 ; WIN64: vmovaps {{%ymm([6-9]|1[0-5])}}, {{.*(%rsp).*}} # 32-byte Spill
73 ; WIN64: vmovaps {{%ymm([6-9]|1[0-5])}}, {{.*(%rsp).*}} # 32-byte Spill
74 ; WIN64: vmovaps {{%ymm([6-9]|1[0-5])}}, {{.*(%rsp).*}} # 32-byte Spill
76 ; WIN64: vmovaps {{.*(%rsp).*}}, {{%ymm([6-9]|1[0-5])}} # 32-byte Reload
77 ; WIN64: vmovaps {{.*(%rsp).*}}, {{%ymm([6-9]|1[0-5])}} # 32-byte Reload
78 ; WIN64: vmovaps {{.*(%rsp).*}}, {{%ymm([6-9]|1[0-5])}} # 32-byte Reload
79 ; WIN64: vmovaps {{.*(%rsp).*}}, {{%ymm([6-9]|1[0-5])}} # 32-byte Reload
80 ; WIN64: vmovaps {{.*(%rsp).*}}, {{%ymm([6-9]|1[0-5])}} # 32-byte Reload
81 ; WIN64: vmovaps {{.*(%rsp).*}}, {{%ymm([6-9]|1[0-5])}} # 32-byte Reload
82 ; WIN64: vmovaps {{.*(%rsp).*}}, {{%ymm([6-9]|1[0-5])}} # 32-byte Reload
83 ; WIN64: vmovaps {{.*(%rsp).*}}, {{%ymm([6-9]|1[0-5])}} # 32-byte Reload
84 ; WIN64: vmovaps {{.*(%rsp).*}}, {{%ymm([6-9]|1[0-5])}} # 32-byte Reload
85 ; WIN64: vmovaps {{.*(%rsp).*}}, {{%ymm([6-9]|1[0-5])}} # 32-byte Reload
87 ; NOT_WIN: vmovaps {{%ymm([8-9]|1[0-5])}}, {{.*}}(%rbp) ## 32-byte Spill
88 ; NOT_WIN: vmovaps {{%ymm([8-9]|1[0-5])}}, {{.*}}(%rbp) ## 32-byte Spill
89 ; NOT_WIN: vmovaps {{%ymm([8-9]|1[0-5])}}, {{.*}}(%rbp) ## 32-byte Spill
90 ; NOT_WIN: vmovaps {{%ymm([8-9]|1[0-5])}}, {{.*}}(%rbp) ## 32-byte Spill
91 ; NOT_WIN: vmovaps {{%ymm([8-9]|1[0-5])}}, {{.*}}(%rbp) ## 32-byte Spill
92 ; NOT_WIN: vmovaps {{%ymm([8-9]|1[0-5])}}, {{.*}}(%rbp) ## 32-byte Spill
93 ; NOT_WIN: vmovaps {{%ymm([8-9]|1[0-5])}}, {{.*}}(%rbp) ## 32-byte Spill
94 ; NOT_WIN: vmovaps {{%ymm([8-9]|1[0-5])}}, {{.*}}(%rbp) ## 32-byte Spill
96 ; NOT_WIN: vmovaps {{.*}}(%rbp), {{%ymm([8-9]|1[0-5])}} ## 32-byte Reload
97 ; NOT_WIN: vmovaps {{.*}}(%rbp), {{%ymm([8-9]|1[0-5])}} ## 32-byte Reload
98 ; NOT_WIN: vmovaps {{.*}}(%rbp), {{%ymm([8-9]|1[0-5])}} ## 32-byte Reload
99 ; NOT_WIN: vmovaps {{.*}}(%rbp), {{%ymm([8-9]|1[0-5])}} ## 32-byte Reload
100 ; NOT_WIN: vmovaps {{.*}}(%rbp), {{%ymm([8-9]|1[0-5])}} ## 32-byte Reload
101 ; NOT_WIN: vmovaps {{.*}}(%rbp), {{%ymm([8-9]|1[0-5])}} ## 32-byte Reload
102 ; NOT_WIN: vmovaps {{.*}}(%rbp), {{%ymm([8-9]|1[0-5])}} ## 32-byte Reload
103 ; NOT_WIN: vmovaps {{.*}}(%rbp), {{%ymm([8-9]|1[0-5])}} ## 32-byte Reload
104 define intel_ocl_bicc <16 x float> @test_prolog_epilog(<16 x float> %a, <16 x float> %b) nounwind {
105 %c = call <16 x float> @func_float16(<16 x float> %a, <16 x float> %b)