1 ; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
2 ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
4 attributes #0 = { "ShaderType"="1" }
6 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
8 ; GCN-LABEL: {{^}}vgpr:
9 ; GCN: v_mov_b32_e32 v1, v0
10 ; GCN-DAG: v_add_f32_e32 v0, 1.0, v1
11 ; GCN-DAG: exp 15, 0, 1, 1, 1, v1, v1, v1, v1
12 ; GCN: s_waitcnt expcnt(0)
14 define {float, float} @vgpr([9 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, float) #0 {
15 call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %3, float %3, float %3, float %3)
16 %x = fadd float %3, 1.0
17 %a = insertvalue {float, float} undef, float %x, 0
18 %b = insertvalue {float, float} %a, float %3, 1
22 ; GCN-LABEL: {{^}}vgpr_literal:
23 ; GCN: v_mov_b32_e32 v4, v0
24 ; GCN-DAG: v_mov_b32_e32 v0, 1.0
25 ; GCN-DAG: v_mov_b32_e32 v1, 2.0
26 ; GCN-DAG: v_mov_b32_e32 v2, 4.0
27 ; GCN-DAG: v_mov_b32_e32 v3, -1.0
28 ; GCN: exp 15, 0, 1, 1, 1, v4, v4, v4, v4
29 ; GCN: s_waitcnt expcnt(0)
31 define {float, float, float, float} @vgpr_literal([9 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, float) #0 {
32 call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %3, float %3, float %3, float %3)
33 ret {float, float, float, float} {float 1.0, float 2.0, float 4.0, float -1.0}
39 ; GCN-NEXT: .long 165584
41 ; GCN-LABEL: {{^}}vgpr_ps_addr0:
42 ; GCN-NOT: v_mov_b32_e32 v0
43 ; GCN-NOT: v_mov_b32_e32 v1
44 ; GCN-NOT: v_mov_b32_e32 v2
45 ; GCN: v_mov_b32_e32 v3, v4
46 ; GCN: v_mov_b32_e32 v4, v6
48 attributes #1 = { "ShaderType"="0" "InitialPSInputAddr"="0" }
49 define {float, float, float, float, float} @vgpr_ps_addr0([9 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #1 {
50 %i0 = extractelement <2 x i32> %4, i32 0
51 %i1 = extractelement <2 x i32> %4, i32 1
52 %i2 = extractelement <2 x i32> %7, i32 0
53 %i3 = extractelement <2 x i32> %8, i32 0
54 %f0 = bitcast i32 %i0 to float
55 %f1 = bitcast i32 %i1 to float
56 %f2 = bitcast i32 %i2 to float
57 %f3 = bitcast i32 %i3 to float
58 %r0 = insertvalue {float, float, float, float, float} undef, float %f0, 0
59 %r1 = insertvalue {float, float, float, float, float} %r0, float %f1, 1
60 %r2 = insertvalue {float, float, float, float, float} %r1, float %f2, 2
61 %r3 = insertvalue {float, float, float, float, float} %r2, float %f3, 3
62 %r4 = insertvalue {float, float, float, float, float} %r3, float %12, 4
63 ret {float, float, float, float, float} %r4
69 ; GCN-NEXT: .long 165584
71 ; GCN-LABEL: {{^}}ps_input_ena_no_inputs:
72 ; GCN: v_mov_b32_e32 v0, 1.0
74 define float @ps_input_ena_no_inputs([9 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #1 {
81 ; GCN-NEXT: .long 165584
83 ; GCN-LABEL: {{^}}vgpr_ps_addr1:
84 ; GCN-DAG: v_mov_b32_e32 v0, v2
85 ; GCN-DAG: v_mov_b32_e32 v1, v3
86 ; GCN: v_mov_b32_e32 v2, v4
87 ; GCN-DAG: v_mov_b32_e32 v3, v6
88 ; GCN-DAG: v_mov_b32_e32 v4, v8
90 attributes #2 = { "ShaderType"="0" "InitialPSInputAddr"="1" }
91 define {float, float, float, float, float} @vgpr_ps_addr1([9 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #2 {
92 %i0 = extractelement <2 x i32> %4, i32 0
93 %i1 = extractelement <2 x i32> %4, i32 1
94 %i2 = extractelement <2 x i32> %7, i32 0
95 %i3 = extractelement <2 x i32> %8, i32 0
96 %f0 = bitcast i32 %i0 to float
97 %f1 = bitcast i32 %i1 to float
98 %f2 = bitcast i32 %i2 to float
99 %f3 = bitcast i32 %i3 to float
100 %r0 = insertvalue {float, float, float, float, float} undef, float %f0, 0
101 %r1 = insertvalue {float, float, float, float, float} %r0, float %f1, 1
102 %r2 = insertvalue {float, float, float, float, float} %r1, float %f2, 2
103 %r3 = insertvalue {float, float, float, float, float} %r2, float %f3, 3
104 %r4 = insertvalue {float, float, float, float, float} %r3, float %12, 4
105 ret {float, float, float, float, float} %r4
110 ; GCN-NEXT: .long 562
111 ; GCN-NEXT: .long 165584
112 ; GCN-NEXT: .long 631
113 ; GCN-LABEL: {{^}}vgpr_ps_addr119:
114 ; GCN-DAG: v_mov_b32_e32 v0, v2
115 ; GCN-DAG: v_mov_b32_e32 v1, v3
116 ; GCN: v_mov_b32_e32 v2, v6
117 ; GCN: v_mov_b32_e32 v3, v8
118 ; GCN: v_mov_b32_e32 v4, v12
120 attributes #3 = { "ShaderType"="0" "InitialPSInputAddr"="119" }
121 define {float, float, float, float, float} @vgpr_ps_addr119([9 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #3 {
122 %i0 = extractelement <2 x i32> %4, i32 0
123 %i1 = extractelement <2 x i32> %4, i32 1
124 %i2 = extractelement <2 x i32> %7, i32 0
125 %i3 = extractelement <2 x i32> %8, i32 0
126 %f0 = bitcast i32 %i0 to float
127 %f1 = bitcast i32 %i1 to float
128 %f2 = bitcast i32 %i2 to float
129 %f3 = bitcast i32 %i3 to float
130 %r0 = insertvalue {float, float, float, float, float} undef, float %f0, 0
131 %r1 = insertvalue {float, float, float, float, float} %r0, float %f1, 1
132 %r2 = insertvalue {float, float, float, float, float} %r1, float %f2, 2
133 %r3 = insertvalue {float, float, float, float, float} %r2, float %f3, 3
134 %r4 = insertvalue {float, float, float, float, float} %r3, float %12, 4
135 ret {float, float, float, float, float} %r4
140 ; GCN-NEXT: .long 562
141 ; GCN-NEXT: .long 165584
142 ; GCN-NEXT: .long 946
143 ; GCN-LABEL: {{^}}vgpr_ps_addr418:
144 ; GCN-NOT: v_mov_b32_e32 v0
145 ; GCN-NOT: v_mov_b32_e32 v1
146 ; GCN-NOT: v_mov_b32_e32 v2
147 ; GCN: v_mov_b32_e32 v3, v4
148 ; GCN: v_mov_b32_e32 v4, v8
150 attributes #4 = { "ShaderType"="0" "InitialPSInputAddr"="418" }
151 define {float, float, float, float, float} @vgpr_ps_addr418([9 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #4 {
152 %i0 = extractelement <2 x i32> %4, i32 0
153 %i1 = extractelement <2 x i32> %4, i32 1
154 %i2 = extractelement <2 x i32> %7, i32 0
155 %i3 = extractelement <2 x i32> %8, i32 0
156 %f0 = bitcast i32 %i0 to float
157 %f1 = bitcast i32 %i1 to float
158 %f2 = bitcast i32 %i2 to float
159 %f3 = bitcast i32 %i3 to float
160 %r0 = insertvalue {float, float, float, float, float} undef, float %f0, 0
161 %r1 = insertvalue {float, float, float, float, float} %r0, float %f1, 1
162 %r2 = insertvalue {float, float, float, float, float} %r1, float %f2, 2
163 %r3 = insertvalue {float, float, float, float, float} %r2, float %f3, 3
164 %r4 = insertvalue {float, float, float, float, float} %r3, float %12, 4
165 ret {float, float, float, float, float} %r4
169 ; GCN-LABEL: {{^}}sgpr:
170 ; GCN: s_add_i32 s0, s3, 2
171 ; GCN: s_mov_b32 s2, s3
173 define {i32, i32, i32} @sgpr([9 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, float) #0 {
175 %a = insertvalue {i32, i32, i32} undef, i32 %x, 0
176 %b = insertvalue {i32, i32, i32} %a, i32 %1, 1
177 %c = insertvalue {i32, i32, i32} %a, i32 %2, 2
178 ret {i32, i32, i32} %c
182 ; GCN-LABEL: {{^}}sgpr_literal:
183 ; GCN: s_mov_b32 s0, 5
184 ; GCN-NOT: s_mov_b32 s0, s0
185 ; GCN-DAG: s_mov_b32 s1, 6
186 ; GCN-DAG: s_mov_b32 s2, 7
187 ; GCN-DAG: s_mov_b32 s3, 8
189 define {i32, i32, i32, i32} @sgpr_literal([9 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, float) #0 {
191 ret {i32, i32, i32, i32} {i32 5, i32 6, i32 7, i32 8}
195 ; GCN-LABEL: {{^}}both:
196 ; GCN: v_mov_b32_e32 v1, v0
197 ; GCN-DAG: exp 15, 0, 1, 1, 1, v1, v1, v1, v1
198 ; GCN-DAG: v_add_f32_e32 v0, 1.0, v1
199 ; GCN-DAG: s_add_i32 s0, s3, 2
200 ; GCN-DAG: s_mov_b32 s1, s2
201 ; GCN: s_mov_b32 s2, s3
202 ; GCN: s_waitcnt expcnt(0)
204 define {float, i32, float, i32, i32} @both([9 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, float) #0 {
205 call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %3, float %3, float %3, float %3)
206 %v = fadd float %3, 1.0
208 %a0 = insertvalue {float, i32, float, i32, i32} undef, float %v, 0
209 %a1 = insertvalue {float, i32, float, i32, i32} %a0, i32 %s, 1
210 %a2 = insertvalue {float, i32, float, i32, i32} %a1, float %3, 2
211 %a3 = insertvalue {float, i32, float, i32, i32} %a2, i32 %1, 3
212 %a4 = insertvalue {float, i32, float, i32, i32} %a3, i32 %2, 4
213 ret {float, i32, float, i32, i32} %a4
217 ; GCN-LABEL: {{^}}structure_literal:
218 ; GCN: v_mov_b32_e32 v3, v0
219 ; GCN-DAG: v_mov_b32_e32 v0, 1.0
220 ; GCN-DAG: s_mov_b32 s0, 2
221 ; GCN-DAG: s_mov_b32 s1, 3
222 ; GCN-DAG: v_mov_b32_e32 v1, 2.0
223 ; GCN-DAG: v_mov_b32_e32 v2, 4.0
224 ; GCN-DAG: exp 15, 0, 1, 1, 1, v3, v3, v3, v3
225 define {{float, i32}, {i32, <2 x float>}} @structure_literal([9 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, float) #0 {
226 call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %3, float %3, float %3, float %3)
227 ret {{float, i32}, {i32, <2 x float>}} {{float, i32} {float 1.0, i32 2}, {i32, <2 x float>} {i32 3, <2 x float> <float 2.0, float 4.0>}}