R600/SI: Fix indirect addressing with a negative constant offset

author Tom Stellard <thomas.stellard@amd.com>

Thu, 23 Apr 2015 20:32:01 +0000 (20:32 +0000)

committer Tom Stellard <thomas.stellard@amd.com>

Thu, 23 Apr 2015 20:32:01 +0000 (20:32 +0000)
author Tom Stellard <thomas.stellard@amd.com>
Thu, 23 Apr 2015 20:32:01 +0000 (20:32 +0000)
committer Tom Stellard <thomas.stellard@amd.com>
Thu, 23 Apr 2015 20:32:01 +0000 (20:32 +0000)
diff --git a/lib/Target/R600/SILowerControlFlow.cpp b/lib/Target/R600/SILowerControlFlow.cpp

index 2e08c9f5a5de6ad92fc346b0719979aa2a65fcd7..c319b32111feb5e2702c24def97d4000ed29df75 100644 (file)
--- a/lib/Target/R600/SILowerControlFlow.cpp
+++ b/lib/Target/R600/SILowerControlFlow.cpp
@@ -88,7 +88,8 @@ private:
    void Kill(MachineInstr &MI);
    void Branch(MachineInstr &MI);
  
-  void LoadM0(MachineInstr &MI, MachineInstr *MovRel);
+  void LoadM0(MachineInstr &MI, MachineInstr *MovRel, int Offset = 0);
+  void computeIndirectRegAndOffset(unsigned VecReg, unsigned &Reg, int &Offset);
    void IndirectSrc(MachineInstr &MI);
    void IndirectDst(MachineInstr &MI);
  
@@ -323,7 +324,7 @@ void SILowerControlFlowPass::Kill(MachineInstr &MI) {
    MI.eraseFromParent();
  }
  
-void SILowerControlFlowPass::LoadM0(MachineInstr &MI, MachineInstr *MovRel) {
+void SILowerControlFlowPass::LoadM0(MachineInstr &MI, MachineInstr *MovRel, int Offset) {
  
    MachineBasicBlock &MBB = *MI.getParent();
    DebugLoc DL = MI.getDebugLoc();
@@ -333,8 +334,14 @@ void SILowerControlFlowPass::LoadM0(MachineInstr &MI, MachineInstr *MovRel) {
    unsigned Idx = MI.getOperand(3).getReg();
  
    if (AMDGPU::SReg_32RegClass.contains(Idx)) {
-    BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_MOV_B32), AMDGPU::M0)
-            .addReg(Idx);
+    if (Offset) {
+      BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_ADD_I32), AMDGPU::M0)
+              .addReg(Idx)
+              .addImm(Offset);
+    } else {
+      BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_MOV_B32), AMDGPU::M0)
+              .addReg(Idx);
+    }
      MBB.insert(I, MovRel);
    } else {
  
@@ -363,6 +370,11 @@ void SILowerControlFlowPass::LoadM0(MachineInstr &MI, MachineInstr *MovRel) {
      BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_AND_SAVEEXEC_B64), AMDGPU::VCC)
              .addReg(AMDGPU::VCC);
  
+    if (Offset) {
+      BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_ADD_I32), AMDGPU::M0)
+              .addReg(AMDGPU::M0)
+              .addImm(Offset);
+    }
      // Do the actual move
      MBB.insert(I, MovRel);
  
@@ -384,6 +396,33 @@ void SILowerControlFlowPass::LoadM0(MachineInstr &MI, MachineInstr *MovRel) {
    MI.eraseFromParent();
  }
  
+/// \param @VecReg The register which holds element zero of the vector
+///                 being addressed into.
+/// \param[out] @Reg The base register to use in the indirect addressing instruction.
+/// \param[in,out] @Offset As an input, this is the constant offset part of the
+//                         indirect Index. e.g. v0 = v[VecReg + Offset]
+//                         As an output, this is a constant value that needs
+//                         to be added to the value stored in M0.
+void SILowerControlFlowPass::computeIndirectRegAndOffset(unsigned VecReg,
+                                                         unsigned &Reg,
+                                                         int &Offset) {
+  unsigned SubReg = TRI->getSubReg(VecReg, AMDGPU::sub0);
+  if (!SubReg)
+    SubReg = VecReg;
+
+  const TargetRegisterClass *RC = TRI->getPhysRegClass(SubReg);
+  int RegIdx = TRI->getHWRegIndex(SubReg) + Offset;
+
+  if (RegIdx < 0) {
+    Offset = RegIdx;
+    RegIdx = 0;
+  } else {
+    Offset = 0;
+  }
+
+  Reg = RC->getRegister(RegIdx);
+}
+
  void SILowerControlFlowPass::IndirectSrc(MachineInstr &MI) {
  
    MachineBasicBlock &MBB = *MI.getParent();
@@ -391,18 +430,18 @@ void SILowerControlFlowPass::IndirectSrc(MachineInstr &MI) {
  
    unsigned Dst = MI.getOperand(0).getReg();
    unsigned Vec = MI.getOperand(2).getReg();
-  unsigned Off = MI.getOperand(4).getImm();
-  unsigned SubReg = TRI->getSubReg(Vec, AMDGPU::sub0);
-  if (!SubReg)
-    SubReg = Vec;
+  int Off = MI.getOperand(4).getImm();
+  unsigned Reg;
+
+  computeIndirectRegAndOffset(Vec, Reg, Off);
  
    MachineInstr *MovRel =
      BuildMI(*MBB.getParent(), DL, TII->get(AMDGPU::V_MOVRELS_B32_e32), Dst)
-            .addReg(SubReg + Off)
+            .addReg(Reg)
              .addReg(AMDGPU::M0, RegState::Implicit)
              .addReg(Vec, RegState::Implicit);
  
-  LoadM0(MI, MovRel);
+  LoadM0(MI, MovRel, Off);
  }
  
  void SILowerControlFlowPass::IndirectDst(MachineInstr &MI) {
@@ -411,20 +450,20 @@ void SILowerControlFlowPass::IndirectDst(MachineInstr &MI) {
    DebugLoc DL = MI.getDebugLoc();
  
    unsigned Dst = MI.getOperand(0).getReg();
-  unsigned Off = MI.getOperand(4).getImm();
+  int Off = MI.getOperand(4).getImm();
    unsigned Val = MI.getOperand(5).getReg();
-  unsigned SubReg = TRI->getSubReg(Dst, AMDGPU::sub0);
-  if (!SubReg)
-    SubReg = Dst;
+  unsigned Reg;
+
+  computeIndirectRegAndOffset(Dst, Reg, Off);
  
    MachineInstr *MovRel = 
      BuildMI(*MBB.getParent(), DL, TII->get(AMDGPU::V_MOVRELD_B32_e32))
-            .addReg(SubReg + Off, RegState::Define)
+            .addReg(Reg, RegState::Define)
              .addReg(Val)
              .addReg(AMDGPU::M0, RegState::Implicit)
              .addReg(Dst, RegState::Implicit);
  
-  LoadM0(MI, MovRel);
+  LoadM0(MI, MovRel, Off);
  }
  
  bool SILowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) {
diff --git a/test/CodeGen/R600/indirect-addressing-si.ll b/test/CodeGen/R600/indirect-addressing-si.ll

index 319910fcabfe518f626c56bd1b32315ea6476265..f551606d63a73bbc551fb73aec13fb2ea21683df 100644 (file)
--- a/test/CodeGen/R600/indirect-addressing-si.ll
+++ b/test/CodeGen/R600/indirect-addressing-si.ll
@@ -25,6 +25,33 @@ entry:
    ret void
  }
  
+; CHECK-LABEL: {{^}}extract_neg_offset_sgpr:
+; The offset depends on the register that holds the first element of the vector.
+; CHECK: s_add_i32 m0, s{{[0-9]+}}, 0xfffffe{{[0-9a-z]+}}
+; CHECK: v_movrels_b32_e32 v{{[0-9]}}, v0
+define void @extract_neg_offset_sgpr(i32 addrspace(1)* %out, i32 %offset) {
+entry:
+  %index = add i32 %offset, -512
+  %value = extractelement <4 x i32> <i32 0, i32 1, i32 2, i32 3>, i32 %index
+  store i32 %value, i32 addrspace(1)* %out
+  ret void
+}
+
+; CHECK-LABEL: {{^}}extract_neg_offset_vgpr:
+; The offset depends on the register that holds the first element of the vector.
+; CHECK: v_readfirstlane_b32
+; CHECK: s_add_i32 m0, m0, 0xfffffe{{[0-9a-z]+}}
+; CHECK-NEXT: v_movrels_b32_e32 v{{[0-9]}}, v0
+; CHECK: s_cbranch_execnz
+define void @extract_neg_offset_vgpr(i32 addrspace(1)* %out) {
+entry:
+  %id = call i32 @llvm.r600.read.tidig.x() #1
+  %index = add i32 %id, -512
+  %value = extractelement <4 x i32> <i32 0, i32 1, i32 2, i32 3>, i32 %index
+  store i32 %value, i32 addrspace(1)* %out
+  ret void
+}
+
  ; CHECK-LABEL: {{^}}insert_w_offset:
  ; CHECK: s_mov_b32 m0
  ; CHECK-NEXT: v_movreld_b32_e32
@@ -47,3 +74,48 @@ entry:
    store float %1, float addrspace(1)* %out
    ret void
  }
+
+; CHECK-LABEL: {{^}}insert_neg_offset_sgpr:
+; The offset depends on the register that holds the first element of the vector.
+; CHECK: s_add_i32 m0, s{{[0-9]+}}, 0xfffffe{{[0-9a-z]+}}
+; CHECK: v_movreld_b32_e32 v0, v{{[0-9]}}
+define void @insert_neg_offset_sgpr(i32 addrspace(1)* %in, <4 x i32> addrspace(1)* %out, i32 %offset) {
+entry:
+  %index = add i32 %offset, -512
+  %value = insertelement <4 x i32> <i32 0, i32 1, i32 2, i32 3>, i32 5, i32 %index
+  store <4 x i32> %value, <4 x i32> addrspace(1)* %out
+  ret void
+}
+
+; CHECK-LABEL: {{^}}insert_neg_offset_vgpr:
+; The offset depends on the register that holds the first element of the vector.
+; CHECK: v_readfirstlane_b32
+; CHECK: s_add_i32 m0, m0, 0xfffffe{{[0-9a-z]+}}
+; CHECK-NEXT: v_movreld_b32_e32 v0, v{{[0-9]}}
+; CHECK: s_cbranch_execnz
+define void @insert_neg_offset_vgpr(i32 addrspace(1)* %in, <4 x i32> addrspace(1)* %out) {
+entry:
+  %id = call i32 @llvm.r600.read.tidig.x() #1
+  %index = add i32 %id, -512
+  %value = insertelement <4 x i32> <i32 0, i32 1, i32 2, i32 3>, i32 5, i32 %index
+  store <4 x i32> %value, <4 x i32> addrspace(1)* %out
+  ret void
+}
+
+; CHECK-LABEL: {{^}}insert_neg_inline_offset_vgpr:
+; The offset depends on the register that holds the first element of the vector.
+; CHECK: v_readfirstlane_b32
+; CHECK: s_add_i32 m0, m0, -{{[0-9]+}}
+; CHECK-NEXT: v_movreld_b32_e32 v0, v{{[0-9]}}
+; CHECK: s_cbranch_execnz
+define void @insert_neg_inline_offset_vgpr(i32 addrspace(1)* %in, <4 x i32> addrspace(1)* %out) {
+entry:
+  %id = call i32 @llvm.r600.read.tidig.x() #1
+  %index = add i32 %id, -16
+  %value = insertelement <4 x i32> <i32 0, i32 1, i32 2, i32 3>, i32 5, i32 %index
+  store <4 x i32> %value, <4 x i32> addrspace(1)* %out
+  ret void
+}
+
+declare i32 @llvm.r600.read.tidig.x() #1
+attributes #1 = { nounwind readnone }
author	Tom Stellard <thomas.stellard@amd.com>
	Thu, 23 Apr 2015 20:32:01 +0000 (20:32 +0000)
committer	Tom Stellard <thomas.stellard@amd.com>
	Thu, 23 Apr 2015 20:32:01 +0000 (20:32 +0000)
lib/Target/R600/SILowerControlFlow.cpp		patch \| blob \| history
test/CodeGen/R600/indirect-addressing-si.ll		patch \| blob \| history