This avoids switching to .AMDGPU.config and back and hardcoding the
section it switches back to.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@232479
91177308-0d34-0410-b5e6-
96231b3b80d8
SetupMachineFunction(MF);
SetupMachineFunction(MF);
- EmitFunctionHeader();
-
MCContext &Context = getObjFileLowering().getContext();
const MCSectionELF *ConfigSection =
Context.getELFSection(".AMDGPU.config", ELF::SHT_PROGBITS, 0);
MCContext &Context = getObjFileLowering().getContext();
const MCSectionELF *ConfigSection =
Context.getELFSection(".AMDGPU.config", ELF::SHT_PROGBITS, 0);
HexLines.clear();
DisasmLineMaxLen = 0;
HexLines.clear();
DisasmLineMaxLen = 0;
- OutStreamer.SwitchSection(getObjFileLowering().getTextSection());
EmitFunctionBody();
if (isVerbose()) {
EmitFunctionBody();
if (isVerbose()) {
; RUN: llc < %s -march=r600 -mcpu=redwood -show-mc-encoding -o - | FileCheck --check-prefix=EG %s
; RUN: llc < %s -march=r600 -mcpu=rv710 -show-mc-encoding -o - | FileCheck --check-prefix=R600 %s
; RUN: llc < %s -march=r600 -mcpu=redwood -show-mc-encoding -o - | FileCheck --check-prefix=EG %s
; RUN: llc < %s -march=r600 -mcpu=rv710 -show-mc-encoding -o - | FileCheck --check-prefix=R600 %s
; EG: CALL_FS ; encoding: [0x00,0x00,0x00,0x00,0x00,0x00,0xc0,0x84]
; EG: CALL_FS ; encoding: [0x00,0x00,0x00,0x00,0x00,0x00,0xc0,0x84]
; R600:CALL_FS ; encoding: [0x00,0x00,0x00,0x00,0x00,0x00,0x80,0x89]
; R600:CALL_FS ; encoding: [0x00,0x00,0x00,0x00,0x00,0x00,0x80,0x89]
; ELF: Name: test
; ELF: Binding: Global
; ELF: Name: test
; ELF: Binding: Global
-; CONFIG: .align 256
-; CONFIG: test:
; CONFIG: .section .AMDGPU.config
; CONFIG-NEXT: .long 45096
; TYPICAL-NEXT: .long 0
; TONGA-NEXT: .long 576
; CONFIG: .section .AMDGPU.config
; CONFIG-NEXT: .long 45096
; TYPICAL-NEXT: .long 0
; TONGA-NEXT: .long 576
+; CONFIG: .align 256
+; CONFIG: test:
define void @test(i32 %p) #0 {
%i = add i32 %p, 2
%r = bitcast i32 %i to float
define void @test(i32 %p) #0 {
%i = add i32 %p, 2
%r = bitcast i32 %i to float
; Make sure we don't assert on empty functions
; Make sure we don't assert on empty functions
-; SI-LABEL: {{^}}empty_function_ret:
+; SI-LABEL: {{^}}empty_function_ret:
; SI: s_endpgm
; SI: codeLenInByte = 4
define void @empty_function_ret() #0 {
ret void
}
; SI: s_endpgm
; SI: codeLenInByte = 4
define void @empty_function_ret() #0 {
ret void
}
-; SI-LABEL: {{^}}empty_function_unreachable:
+; SI-LABEL: {{^}}empty_function_unreachable:
; SI: codeLenInByte = 0
define void @empty_function_unreachable() #0 {
unreachable
; SI: codeLenInByte = 0
define void @empty_function_unreachable() #0 {
unreachable
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=kaveri | FileCheck --check-prefix=HSA %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=kaveri | FileCheck --check-prefix=HSA %s
; HSA: .section .hsa.version
; HSA-NEXT: .ascii "HSA Code Unit:0.0:AMD:0.1:GFX8.1:0"
; HSA: .section .hsa.version
; HSA-NEXT: .ascii "HSA Code Unit:0.0:AMD:0.1:GFX8.1:0"
; Make sure we are setting the ATC bit:
; HSA: s_mov_b32 s[[HI:[0-9]]], 0x100f000
; HSA: buffer_store_dword v{{[0-9]+}}, s[0:[[HI]]], 0
; Make sure we are setting the ATC bit:
; HSA: s_mov_b32 s[[HI:[0-9]]], 0x100f000
; HSA: buffer_store_dword v{{[0-9]+}}, s[0:[[HI]]], 0
; This test makes sure we do not double count global values when they are
; used in different basic blocks.
; This test makes sure we do not double count global values when they are
; used in different basic blocks.
-; CHECK-LABEL: {{^}}test:
; CHECK: .long 166120
; CHECK-NEXT: .long 1
; CHECK: .long 166120
; CHECK-NEXT: .long 1
+; CHECK-LABEL: {{^}}test:
@lds = internal unnamed_addr addrspace(3) global i32 undef, align 4
define void @test(i32 addrspace(1)* %out, i32 %cond) {
@lds = internal unnamed_addr addrspace(3) global i32 undef, align 4
define void @test(i32 addrspace(1)* %out, i32 %cond) {
@local_memory_two_objects.local_mem0 = internal unnamed_addr addrspace(3) global [4 x i32] undef, align 4
@local_memory_two_objects.local_mem1 = internal unnamed_addr addrspace(3) global [4 x i32] undef, align 4
@local_memory_two_objects.local_mem0 = internal unnamed_addr addrspace(3) global [4 x i32] undef, align 4
@local_memory_two_objects.local_mem1 = internal unnamed_addr addrspace(3) global [4 x i32] undef, align 4
-; EG: {{^}}local_memory_two_objects:
; Check that the LDS size emitted correctly
; EG: .long 166120
; Check that the LDS size emitted correctly
; EG: .long 166120
; GCN: .long 47180
; GCN-NEXT: .long 38792
; GCN: .long 47180
; GCN-NEXT: .long 38792
+; EG: {{^}}local_memory_two_objects:
+
; We would like to check the the lds writes are using different
; addresses, but due to variations in the scheduler, we can't do
; this consistently on evergreen GPUs.
; We would like to check the the lds writes are using different
; addresses, but due to variations in the scheduler, we can't do
; this consistently on evergreen GPUs.
@local_memory.local_mem = internal unnamed_addr addrspace(3) global [128 x i32] undef, align 4
@local_memory.local_mem = internal unnamed_addr addrspace(3) global [128 x i32] undef, align 4
-; FUNC-LABEL: {{^}}local_memory:
; Check that the LDS size emitted correctly
; EG: .long 166120
; Check that the LDS size emitted correctly
; EG: .long 166120
; CI: .long 47180
; CI-NEXT: .long 38792
; CI: .long 47180
; CI-NEXT: .long 38792
+; FUNC-LABEL: {{^}}local_memory:
+
; EG: LDS_WRITE
; SI-NOT: s_wqm_b64
; SI: ds_write_b32
; EG: LDS_WRITE
; SI-NOT: s_wqm_b64
; SI: ds_write_b32
-; FUNC-LABEL: {{^}}sextload_i8_to_i32_bfe_0:
+; FUNC-LABEL: {{^}}sextload_i8_to_i32_bfe_0:{{.*$}}
; SI-NOT: {{[^@]}}bfe
; SI: s_endpgm
define void @sextload_i8_to_i32_bfe_0(i32 addrspace(1)* %out, i8 addrspace(1)* %ptr) nounwind {
; SI-NOT: {{[^@]}}bfe
; SI: s_endpgm
define void @sextload_i8_to_i32_bfe_0(i32 addrspace(1)* %out, i8 addrspace(1)* %ptr) nounwind {