1 ; RUN: opt -S -codegenprepare -mtriple=amdgcn-unknown-unknown -mcpu=tahiti < %s | FileCheck -check-prefix=OPT -check-prefix=OPT-SI %s
2 ; RUN: opt -S -codegenprepare -mtriple=amdgcn-unknown-unknown -mcpu=bonaire < %s | FileCheck -check-prefix=OPT -check-prefix=OPT-CI %s
3 ; RUN: opt -S -codegenprepare -mtriple=amdgcn-unknown-unknown -mcpu=tonga < %s | FileCheck -check-prefix=OPT -check-prefix=OPT-VI %s
4 ; RUN: llc -march=amdgcn -mcpu=tahiti -mattr=-promote-alloca < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
5 ; RUN: llc -march=amdgcn -mcpu=bonaire -mattr=-promote-alloca < %s | FileCheck -check-prefix=GCN -check-prefix=CI %s
6 ; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-promote-alloca < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
8 declare i32 @llvm.r600.read.tidig.x() #0
10 ; OPT-LABEL: @test_sink_global_small_offset_i32(
11 ; OPT-CI-NOT: getelementptr i32, i32 addrspace(1)* %in
12 ; OPT-VI: getelementptr i32, i32 addrspace(1)* %in
16 ; GCN-LABEL: {{^}}test_sink_global_small_offset_i32:
18 define void @test_sink_global_small_offset_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in, i32 %cond) {
20 %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 999999
21 %in.gep = getelementptr i32, i32 addrspace(1)* %in, i64 7
22 %tmp0 = icmp eq i32 %cond, 0
23 br i1 %tmp0, label %endif, label %if
26 %tmp1 = load i32, i32 addrspace(1)* %in.gep
30 %x = phi i32 [ %tmp1, %if ], [ 0, %entry ]
31 store i32 %x, i32 addrspace(1)* %out.gep
38 ; OPT-LABEL: @test_sink_global_small_max_i32_ds_offset(
39 ; OPT: %in.gep = getelementptr i8, i8 addrspace(1)* %in, i64 65535
42 ; GCN-LABEL: {{^}}test_sink_global_small_max_i32_ds_offset:
43 ; GCN: s_and_saveexec_b64
44 ; GCN: buffer_load_sbyte {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, s{{[0-9]+$}}
47 define void @test_sink_global_small_max_i32_ds_offset(i32 addrspace(1)* %out, i8 addrspace(1)* %in, i32 %cond) {
49 %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 99999
50 %in.gep = getelementptr i8, i8 addrspace(1)* %in, i64 65535
51 %tmp0 = icmp eq i32 %cond, 0
52 br i1 %tmp0, label %endif, label %if
55 %tmp1 = load i8, i8 addrspace(1)* %in.gep
56 %tmp2 = sext i8 %tmp1 to i32
60 %x = phi i32 [ %tmp2, %if ], [ 0, %entry ]
61 store i32 %x, i32 addrspace(1)* %out.gep
68 ; GCN-LABEL: {{^}}test_sink_global_small_max_mubuf_offset:
69 ; GCN: s_and_saveexec_b64
70 ; GCN: buffer_load_sbyte {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offset:4095{{$}}
73 define void @test_sink_global_small_max_mubuf_offset(i32 addrspace(1)* %out, i8 addrspace(1)* %in, i32 %cond) {
75 %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 1024
76 %in.gep = getelementptr i8, i8 addrspace(1)* %in, i64 4095
77 %tmp0 = icmp eq i32 %cond, 0
78 br i1 %tmp0, label %endif, label %if
81 %tmp1 = load i8, i8 addrspace(1)* %in.gep
82 %tmp2 = sext i8 %tmp1 to i32
86 %x = phi i32 [ %tmp2, %if ], [ 0, %entry ]
87 store i32 %x, i32 addrspace(1)* %out.gep
94 ; GCN-LABEL: {{^}}test_sink_global_small_max_plus_1_mubuf_offset:
95 ; GCN: s_and_saveexec_b64
96 ; GCN: buffer_load_sbyte {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, s{{[0-9]+$}}
99 define void @test_sink_global_small_max_plus_1_mubuf_offset(i32 addrspace(1)* %out, i8 addrspace(1)* %in, i32 %cond) {
101 %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 99999
102 %in.gep = getelementptr i8, i8 addrspace(1)* %in, i64 4096
103 %tmp0 = icmp eq i32 %cond, 0
104 br i1 %tmp0, label %endif, label %if
107 %tmp1 = load i8, i8 addrspace(1)* %in.gep
108 %tmp2 = sext i8 %tmp1 to i32
112 %x = phi i32 [ %tmp2, %if ], [ 0, %entry ]
113 store i32 %x, i32 addrspace(1)* %out.gep
120 ; OPT-LABEL: @test_sink_scratch_small_offset_i32(
121 ; OPT-NOT: getelementptr [512 x i32]
125 ; GCN-LABEL: {{^}}test_sink_scratch_small_offset_i32:
126 ; GCN: s_and_saveexec_b64
127 ; GCN: buffer_store_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen offset:4092{{$}}
128 ; GCN: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen offset:4092{{$}}
130 define void @test_sink_scratch_small_offset_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in, i32 %cond, i32 %arg) {
132 %alloca = alloca [512 x i32], align 4
133 %out.gep.0 = getelementptr i32, i32 addrspace(1)* %out, i64 999998
134 %out.gep.1 = getelementptr i32, i32 addrspace(1)* %out, i64 999999
135 %add.arg = add i32 %arg, 8
136 %alloca.gep = getelementptr [512 x i32], [512 x i32]* %alloca, i32 0, i32 1023
137 %tmp0 = icmp eq i32 %cond, 0
138 br i1 %tmp0, label %endif, label %if
141 store volatile i32 123, i32* %alloca.gep
142 %tmp1 = load volatile i32, i32* %alloca.gep
146 %x = phi i32 [ %tmp1, %if ], [ 0, %entry ]
147 store i32 %x, i32 addrspace(1)* %out.gep.0
148 %load = load volatile i32, i32* %alloca.gep
149 store i32 %load, i32 addrspace(1)* %out.gep.1
156 ; OPT-LABEL: @test_no_sink_scratch_large_offset_i32(
157 ; OPT: %alloca.gep = getelementptr [512 x i32], [512 x i32]* %alloca, i32 0, i32 1024
161 ; GCN-LABEL: {{^}}test_no_sink_scratch_large_offset_i32:
162 ; GCN: s_and_saveexec_b64
163 ; GCN: buffer_store_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen{{$}}
164 ; GCN: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen{{$}}
166 define void @test_no_sink_scratch_large_offset_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in, i32 %cond, i32 %arg) {
168 %alloca = alloca [512 x i32], align 4
169 %out.gep.0 = getelementptr i32, i32 addrspace(1)* %out, i64 999998
170 %out.gep.1 = getelementptr i32, i32 addrspace(1)* %out, i64 999999
171 %add.arg = add i32 %arg, 8
172 %alloca.gep = getelementptr [512 x i32], [512 x i32]* %alloca, i32 0, i32 1024
173 %tmp0 = icmp eq i32 %cond, 0
174 br i1 %tmp0, label %endif, label %if
177 store volatile i32 123, i32* %alloca.gep
178 %tmp1 = load volatile i32, i32* %alloca.gep
182 %x = phi i32 [ %tmp1, %if ], [ 0, %entry ]
183 store i32 %x, i32 addrspace(1)* %out.gep.0
184 %load = load volatile i32, i32* %alloca.gep
185 store i32 %load, i32 addrspace(1)* %out.gep.1
192 ; GCN-LABEL: {{^}}test_sink_global_vreg_sreg_i32:
193 ; VI-DAG: s_movk_i32 flat_scratch_lo, 0x0
194 ; VI-DAG: s_movk_i32 flat_scratch_hi, 0x0
195 ; GCN: s_and_saveexec_b64
196 ; CI: buffer_load_dword {{v[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
197 ; VI: flat_load_dword v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}]
199 define void @test_sink_global_vreg_sreg_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in, i32 %offset, i32 %cond) {
201 %offset.ext = zext i32 %offset to i64
202 %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 999999
203 %in.gep = getelementptr i32, i32 addrspace(1)* %in, i64 %offset.ext
204 %tmp0 = icmp eq i32 %cond, 0
205 br i1 %tmp0, label %endif, label %if
208 %tmp1 = load i32, i32 addrspace(1)* %in.gep
212 %x = phi i32 [ %tmp1, %if ], [ 0, %entry ]
213 store i32 %x, i32 addrspace(1)* %out.gep
220 attributes #0 = { nounwind readnone }
221 attributes #1 = { nounwind }
225 ; OPT-LABEL: @test_sink_constant_small_offset_i32
226 ; OPT-NOT: getelementptr i32, i32 addrspace(2)*
229 ; GCN-LABEL: {{^}}test_sink_constant_small_offset_i32:
230 ; GCN: s_and_saveexec_b64
231 ; SI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0x7{{$}}
232 ; GCN: s_or_b64 exec, exec
233 define void @test_sink_constant_small_offset_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in, i32 %cond) {
235 %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 999999
236 %in.gep = getelementptr i32, i32 addrspace(2)* %in, i64 7
237 %tmp0 = icmp eq i32 %cond, 0
238 br i1 %tmp0, label %endif, label %if
241 %tmp1 = load i32, i32 addrspace(2)* %in.gep
245 %x = phi i32 [ %tmp1, %if ], [ 0, %entry ]
246 store i32 %x, i32 addrspace(1)* %out.gep
253 ; OPT-LABEL: @test_sink_constant_max_8_bit_offset_i32
254 ; OPT-NOT: getelementptr i32, i32 addrspace(2)*
257 ; GCN-LABEL: {{^}}test_sink_constant_max_8_bit_offset_i32:
258 ; GCN: s_and_saveexec_b64
259 ; SI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0xff{{$}}
260 ; GCN: s_or_b64 exec, exec
261 define void @test_sink_constant_max_8_bit_offset_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in, i32 %cond) {
263 %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 999999
264 %in.gep = getelementptr i32, i32 addrspace(2)* %in, i64 255
265 %tmp0 = icmp eq i32 %cond, 0
266 br i1 %tmp0, label %endif, label %if
269 %tmp1 = load i32, i32 addrspace(2)* %in.gep
273 %x = phi i32 [ %tmp1, %if ], [ 0, %entry ]
274 store i32 %x, i32 addrspace(1)* %out.gep
281 ; OPT-LABEL: @test_sink_constant_max_8_bit_offset_p1_i32
282 ; OPT-SI: getelementptr i32, i32 addrspace(2)*
283 ; OPT-CI-NOT: getelementptr i32, i32 addrspace(2)*
284 ; OPT-VI-NOT: getelementptr i32, i32 addrspace(2)*
287 ; GCN-LABEL: {{^}}test_sink_constant_max_8_bit_offset_p1_i32:
288 ; GCN: s_and_saveexec_b64
289 ; SI: s_movk_i32 [[OFFSET:s[0-9]+]], 0x400
291 ; SI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, [[OFFSET]]{{$}}
292 ; GCN: s_or_b64 exec, exec
293 define void @test_sink_constant_max_8_bit_offset_p1_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in, i32 %cond) {
295 %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 999999
296 %in.gep = getelementptr i32, i32 addrspace(2)* %in, i64 256
297 %tmp0 = icmp eq i32 %cond, 0
298 br i1 %tmp0, label %endif, label %if
301 %tmp1 = load i32, i32 addrspace(2)* %in.gep
305 %x = phi i32 [ %tmp1, %if ], [ 0, %entry ]
306 store i32 %x, i32 addrspace(1)* %out.gep
313 ; OPT-LABEL: @test_sink_constant_max_32_bit_offset_i32
314 ; OPT-SI: getelementptr i32, i32 addrspace(2)*
315 ; OPT-CI-NOT: getelementptr i32, i32 addrspace(2)*
318 ; GCN-LABEL: {{^}}test_sink_constant_max_32_bit_offset_i32:
319 ; GCN: s_and_saveexec_b64
320 ; GCN: s_add_u32 s{{[0-9]+}}, s{{[0-9]+}}, -4{{$}}
321 ; GCN: s_addc_u32 s{{[0-9]+}}, s{{[0-9]+}}, 3{{$}}
322 ; SI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0x0{{$}}
323 ; GCN: s_or_b64 exec, exec
324 define void @test_sink_constant_max_32_bit_offset_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in, i32 %cond) {
326 %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 999999
327 %in.gep = getelementptr i32, i32 addrspace(2)* %in, i64 4294967295
328 %tmp0 = icmp eq i32 %cond, 0
329 br i1 %tmp0, label %endif, label %if
332 %tmp1 = load i32, i32 addrspace(2)* %in.gep
336 %x = phi i32 [ %tmp1, %if ], [ 0, %entry ]
337 store i32 %x, i32 addrspace(1)* %out.gep
344 ; OPT-LABEL: @test_sink_constant_max_32_bit_offset_p1_i32
345 ; OPT: getelementptr i32, i32 addrspace(2)*
348 ; GCN-LABEL: {{^}}test_sink_constant_max_32_bit_offset_p1_i32:
349 ; GCN: s_and_saveexec_b64
352 ; SI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0x0{{$}}
353 ; GCN: s_or_b64 exec, exec
354 define void @test_sink_constant_max_32_bit_offset_p1_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in, i32 %cond) {
356 %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 999999
357 %in.gep = getelementptr i32, i32 addrspace(2)* %in, i64 17179869181
358 %tmp0 = icmp eq i32 %cond, 0
359 br i1 %tmp0, label %endif, label %if
362 %tmp1 = load i32, i32 addrspace(2)* %in.gep
366 %x = phi i32 [ %tmp1, %if ], [ 0, %entry ]
367 store i32 %x, i32 addrspace(1)* %out.gep
374 ; GCN-LABEL: {{^}}test_sink_constant_max_20_bit_byte_offset_i32:
375 ; GCN: s_and_saveexec_b64
376 ; SI: s_mov_b32 [[OFFSET:s[0-9]+]], 0xffffc{{$}}
377 ; SI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, [[OFFSET]]{{$}}
379 ; CI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0x3ffff{{$}}
380 ; VI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0xffffc{{$}}
382 ; GCN: s_or_b64 exec, exec
383 define void @test_sink_constant_max_20_bit_byte_offset_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in, i32 %cond) {
385 %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 999999
386 %in.gep = getelementptr i32, i32 addrspace(2)* %in, i64 262143
387 %tmp0 = icmp eq i32 %cond, 0
388 br i1 %tmp0, label %endif, label %if
391 %tmp1 = load i32, i32 addrspace(2)* %in.gep
395 %x = phi i32 [ %tmp1, %if ], [ 0, %entry ]
396 store i32 %x, i32 addrspace(1)* %out.gep
403 ; OPT-LABEL: @test_sink_constant_max_20_bit_byte_offset_p1_i32
404 ; OPT-SI: getelementptr i32, i32 addrspace(2)*
405 ; OPT-CI-NOT: getelementptr i32, i32 addrspace(2)*
406 ; OPT-VI: getelementptr i32, i32 addrspace(2)*
409 ; GCN-LABEL: {{^}}test_sink_constant_max_20_bit_byte_offset_p1_i32:
410 ; GCN: s_and_saveexec_b64
411 ; SI: s_mov_b32 [[OFFSET:s[0-9]+]], 0x100000{{$}}
412 ; SI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, [[OFFSET]]{{$}}
414 ; CI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0x40000{{$}}
416 ; VI: s_mov_b32 [[OFFSET:s[0-9]+]], 0x100000{{$}}
417 ; VI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, [[OFFSET]]{{$}}
419 ; GCN: s_or_b64 exec, exec
420 define void @test_sink_constant_max_20_bit_byte_offset_p1_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in, i32 %cond) {
422 %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 999999
423 %in.gep = getelementptr i32, i32 addrspace(2)* %in, i64 262144
424 %tmp0 = icmp eq i32 %cond, 0
425 br i1 %tmp0, label %endif, label %if
428 %tmp1 = load i32, i32 addrspace(2)* %in.gep
432 %x = phi i32 [ %tmp1, %if ], [ 0, %entry ]
433 store i32 %x, i32 addrspace(1)* %out.gep