AMDGPU/SI: Fold operands through REG_SEQUENCE instructions

author Tom Stellard <thomas.stellard@amd.com>

Wed, 9 Sep 2015 15:43:26 +0000 (15:43 +0000)

committer Tom Stellard <thomas.stellard@amd.com>

Wed, 9 Sep 2015 15:43:26 +0000 (15:43 +0000)
author Tom Stellard <thomas.stellard@amd.com>
Wed, 9 Sep 2015 15:43:26 +0000 (15:43 +0000)
committer Tom Stellard <thomas.stellard@amd.com>
Wed, 9 Sep 2015 15:43:26 +0000 (15:43 +0000)
diff --git a/lib/Target/AMDGPU/SIFoldOperands.cpp b/lib/Target/AMDGPU/SIFoldOperands.cpp

index eff9c072f40b52ecb56d6f896efbd1b541b1da5a..fe11385d0cd77aef20aa7af8fc57680d0467b7e0 100644 (file)
--- a/lib/Target/AMDGPU/SIFoldOperands.cpp
+++ b/lib/Target/AMDGPU/SIFoldOperands.cpp
@@ -245,6 +245,27 @@ static void foldOperand(MachineOperand &OpToFold, MachineInstr *UseMI,
      }
    }
  
+  // Special case for REG_SEQUENCE: We can't fold literals into
+  // REG_SEQUENCE instructions, so we have to fold them into the
+  // uses of REG_SEQUENCE.
+  if (UseMI->getOpcode() == AMDGPU::REG_SEQUENCE) {
+    unsigned RegSeqDstReg = UseMI->getOperand(0).getReg();
+    unsigned RegSeqDstSubReg = UseMI->getOperand(UseOpIdx + 1).getImm();
+
+    for (MachineRegisterInfo::use_iterator
+         RSUse = MRI.use_begin(RegSeqDstReg),
+         RSE = MRI.use_end(); RSUse != RSE; ++RSUse) {
+
+      MachineInstr *RSUseMI = RSUse->getParent();
+      if (RSUse->getSubReg() != RegSeqDstSubReg)
+        continue;
+
+      foldOperand(OpToFold, RSUseMI, RSUse.getOperandNo(), FoldList,
+                  TII, TRI, MRI);
+    }
+    return;
+  }
+
    const MCInstrDesc &UseDesc = UseMI->getDesc();
  
    // Don't fold into target independent nodes.  Target independent opcodes
diff --git a/test/CodeGen/AMDGPU/cgp-addressing-modes.ll b/test/CodeGen/AMDGPU/cgp-addressing-modes.ll

index 5cb6cc3ff677f434bad36e174b3b9768e828543e..698494265a7d43c7b9df6de72aef97f2a78b6aff 100644 (file)
--- a/test/CodeGen/AMDGPU/cgp-addressing-modes.ll
+++ b/test/CodeGen/AMDGPU/cgp-addressing-modes.ll
@@ -317,10 +317,8 @@ done:
  
  ; GCN-LABEL: {{^}}test_sink_constant_max_32_bit_offset_i32:
  ; GCN: s_and_saveexec_b64
-; GCN-DAG: s_mov_b32 s{{[0-9]+}}, 3{{$}}
-; GCN-DAG: s_mov_b32 s{{[0-9]+}}, -4{{$}}
-; GCN: s_add_u32
-; GCN: s_addc_u32
+; GCN: s_add_u32 s{{[0-9]+}}, s{{[0-9]+}}, -4{{$}}
+; GCN: s_addc_u32 s{{[0-9]+}}, s{{[0-9]+}}, 3{{$}}
  ; SI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0x0{{$}}
  ; GCN: s_or_b64 exec, exec
  define void @test_sink_constant_max_32_bit_offset_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in, i32 %cond) {
diff --git a/test/CodeGen/AMDGPU/merge-stores.ll b/test/CodeGen/AMDGPU/merge-stores.ll

index 34a2fc7ffa7451a49faf8667a44dc2bc717833e3..87148ae9f69c5fa8cc4efedc53a6f124ed258137 100644 (file)
--- a/test/CodeGen/AMDGPU/merge-stores.ll
+++ b/test/CodeGen/AMDGPU/merge-stores.ll
@@ -508,10 +508,8 @@ define void @merge_local_store_2_constants_i8(i8 addrspace(3)* %out) #0 {
  }
  
  ; GCN-LABEL: {{^}}merge_local_store_2_constants_i32:
-; GCN-DAG: s_movk_i32 [[SLO:s[0-9]+]], 0x1c8
-; GCN-DAG: s_movk_i32 [[SHI:s[0-9]+]], 0x7b
-; GCN-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], [[SLO]]
-; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], [[SHI]]
+; GCN-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], 0x1c8
+; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0x7b
  ; GCN: ds_write2_b32 v{{[0-9]+}}, v[[LO]], v[[HI]] offset1:1{{$}}
  define void @merge_local_store_2_constants_i32(i32 addrspace(3)* %out) #0 {
    %out.gep.1 = getelementptr i32, i32 addrspace(3)* %out, i32 1
diff --git a/test/CodeGen/AMDGPU/select64.ll b/test/CodeGen/AMDGPU/select64.ll

index 13fb575b2b151a619f74fdf5f24ad3f887a4cd33..a68fdecb00af7cd74a243f211ab656770d228d5f 100644 (file)
--- a/test/CodeGen/AMDGPU/select64.ll
+++ b/test/CodeGen/AMDGPU/select64.ll
@@ -51,12 +51,8 @@ define void @v_select_trunc_i64_2(i32 addrspace(1)* %out, i32 %cond, i64 addrspa
  }
  
  ; CHECK-LABEL: {{^}}v_select_i64_split_imm:
-; CHECK: s_mov_b32 [[SHI:s[0-9]+]], 63
-; CHECK: s_mov_b32 [[SLO:s[0-9]+]], 0
-; CHECK-DAG: v_mov_b32_e32 [[VHI:v[0-9]+]], [[SHI]]
-; CHECK-DAG: v_mov_b32_e32 [[VLO:v[0-9]+]], [[SLO]]
-; CHECK-DAG: v_cndmask_b32_e32 {{v[0-9]+}}, [[VLO]], {{v[0-9]+}}
-; CHECK-DAG: v_cndmask_b32_e32 {{v[0-9]+}}, [[VHI]], {{v[0-9]+}}
+; CHECK-DAG: v_cndmask_b32_e32 {{v[0-9]+}}, 0, {{v[0-9]+}}
+; CHECK-DAG: v_cndmask_b32_e32 {{v[0-9]+}}, 63, {{v[0-9]+}}
  ; CHECK: s_endpgm
  define void @v_select_i64_split_imm(i64 addrspace(1)* %out, i32 %cond, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) nounwind {
    %cmp = icmp ugt i32 %cond, 5
author	Tom Stellard <thomas.stellard@amd.com>
	Wed, 9 Sep 2015 15:43:26 +0000 (15:43 +0000)
committer	Tom Stellard <thomas.stellard@amd.com>
	Wed, 9 Sep 2015 15:43:26 +0000 (15:43 +0000)
lib/Target/AMDGPU/SIFoldOperands.cpp		patch \| blob \| history
test/CodeGen/AMDGPU/cgp-addressing-modes.ll		patch \| blob \| history
test/CodeGen/AMDGPU/merge-stores.ll		patch \| blob \| history
test/CodeGen/AMDGPU/select64.ll		patch \| blob \| history