From: Tom Stellard Date: Wed, 9 Sep 2015 15:43:26 +0000 (+0000) Subject: AMDGPU/SI: Fold operands through REG_SEQUENCE instructions X-Git-Url: http://demsky.eecs.uci.edu/git/?a=commitdiff_plain;h=6680fc357944e670a57f865d8a5457e73c7e92b7;p=oota-llvm.git AMDGPU/SI: Fold operands through REG_SEQUENCE instructions Summary: This helps mostly when we use add instructions for address calculations that contain immediates. Reviewers: arsenm Subscribers: arsenm, llvm-commits Differential Revision: http://reviews.llvm.org/D12256 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@247157 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/AMDGPU/SIFoldOperands.cpp b/lib/Target/AMDGPU/SIFoldOperands.cpp index eff9c072f40..fe11385d0cd 100644 --- a/lib/Target/AMDGPU/SIFoldOperands.cpp +++ b/lib/Target/AMDGPU/SIFoldOperands.cpp @@ -245,6 +245,27 @@ static void foldOperand(MachineOperand &OpToFold, MachineInstr *UseMI, } } + // Special case for REG_SEQUENCE: We can't fold literals into + // REG_SEQUENCE instructions, so we have to fold them into the + // uses of REG_SEQUENCE. + if (UseMI->getOpcode() == AMDGPU::REG_SEQUENCE) { + unsigned RegSeqDstReg = UseMI->getOperand(0).getReg(); + unsigned RegSeqDstSubReg = UseMI->getOperand(UseOpIdx + 1).getImm(); + + for (MachineRegisterInfo::use_iterator + RSUse = MRI.use_begin(RegSeqDstReg), + RSE = MRI.use_end(); RSUse != RSE; ++RSUse) { + + MachineInstr *RSUseMI = RSUse->getParent(); + if (RSUse->getSubReg() != RegSeqDstSubReg) + continue; + + foldOperand(OpToFold, RSUseMI, RSUse.getOperandNo(), FoldList, + TII, TRI, MRI); + } + return; + } + const MCInstrDesc &UseDesc = UseMI->getDesc(); // Don't fold into target independent nodes. Target independent opcodes diff --git a/test/CodeGen/AMDGPU/cgp-addressing-modes.ll b/test/CodeGen/AMDGPU/cgp-addressing-modes.ll index 5cb6cc3ff67..698494265a7 100644 --- a/test/CodeGen/AMDGPU/cgp-addressing-modes.ll +++ b/test/CodeGen/AMDGPU/cgp-addressing-modes.ll @@ -317,10 +317,8 @@ done: ; GCN-LABEL: {{^}}test_sink_constant_max_32_bit_offset_i32: ; GCN: s_and_saveexec_b64 -; GCN-DAG: s_mov_b32 s{{[0-9]+}}, 3{{$}} -; GCN-DAG: s_mov_b32 s{{[0-9]+}}, -4{{$}} -; GCN: s_add_u32 -; GCN: s_addc_u32 +; GCN: s_add_u32 s{{[0-9]+}}, s{{[0-9]+}}, -4{{$}} +; GCN: s_addc_u32 s{{[0-9]+}}, s{{[0-9]+}}, 3{{$}} ; SI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0x0{{$}} ; GCN: s_or_b64 exec, exec define void @test_sink_constant_max_32_bit_offset_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in, i32 %cond) { diff --git a/test/CodeGen/AMDGPU/merge-stores.ll b/test/CodeGen/AMDGPU/merge-stores.ll index 34a2fc7ffa7..87148ae9f69 100644 --- a/test/CodeGen/AMDGPU/merge-stores.ll +++ b/test/CodeGen/AMDGPU/merge-stores.ll @@ -508,10 +508,8 @@ define void @merge_local_store_2_constants_i8(i8 addrspace(3)* %out) #0 { } ; GCN-LABEL: {{^}}merge_local_store_2_constants_i32: -; GCN-DAG: s_movk_i32 [[SLO:s[0-9]+]], 0x1c8 -; GCN-DAG: s_movk_i32 [[SHI:s[0-9]+]], 0x7b -; GCN-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], [[SLO]] -; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], [[SHI]] +; GCN-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], 0x1c8 +; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0x7b ; GCN: ds_write2_b32 v{{[0-9]+}}, v[[LO]], v[[HI]] offset1:1{{$}} define void @merge_local_store_2_constants_i32(i32 addrspace(3)* %out) #0 { %out.gep.1 = getelementptr i32, i32 addrspace(3)* %out, i32 1 diff --git a/test/CodeGen/AMDGPU/select64.ll b/test/CodeGen/AMDGPU/select64.ll index 13fb575b2b1..a68fdecb00a 100644 --- a/test/CodeGen/AMDGPU/select64.ll +++ b/test/CodeGen/AMDGPU/select64.ll @@ -51,12 +51,8 @@ define void @v_select_trunc_i64_2(i32 addrspace(1)* %out, i32 %cond, i64 addrspa } ; CHECK-LABEL: {{^}}v_select_i64_split_imm: -; CHECK: s_mov_b32 [[SHI:s[0-9]+]], 63 -; CHECK: s_mov_b32 [[SLO:s[0-9]+]], 0 -; CHECK-DAG: v_mov_b32_e32 [[VHI:v[0-9]+]], [[SHI]] -; CHECK-DAG: v_mov_b32_e32 [[VLO:v[0-9]+]], [[SLO]] -; CHECK-DAG: v_cndmask_b32_e32 {{v[0-9]+}}, [[VLO]], {{v[0-9]+}} -; CHECK-DAG: v_cndmask_b32_e32 {{v[0-9]+}}, [[VHI]], {{v[0-9]+}} +; CHECK-DAG: v_cndmask_b32_e32 {{v[0-9]+}}, 0, {{v[0-9]+}} +; CHECK-DAG: v_cndmask_b32_e32 {{v[0-9]+}}, 63, {{v[0-9]+}} ; CHECK: s_endpgm define void @v_select_i64_split_imm(i64 addrspace(1)* %out, i32 %cond, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) nounwind { %cmp = icmp ugt i32 %cond, 5