From: Marek Olsak Date: Tue, 3 Feb 2015 17:37:52 +0000 (+0000) Subject: R600/SI: Fix dependency between instruction writing M0 and S_SENDMSG on VI (v2) X-Git-Url: http://demsky.eecs.uci.edu/git/?a=commitdiff_plain;h=b19dbd9eb32d72a55b3bffbc07b042419a86ec9f;p=oota-llvm.git R600/SI: Fix dependency between instruction writing M0 and S_SENDMSG on VI (v2) This fixes a hang when using an empty geometry shader. v2: - don't add s_nop when followed by s_waitcnt - comestic changes Tested-by: Michel Dänzer git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@227986 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/R600/SIInsertWaits.cpp b/lib/Target/R600/SIInsertWaits.cpp index 181b11643bf..50f20ac3619 100644 --- a/lib/Target/R600/SIInsertWaits.cpp +++ b/lib/Target/R600/SIInsertWaits.cpp @@ -82,6 +82,8 @@ private: /// \brief Type of the last opcode. InstType LastOpcodeType; + bool LastInstWritesM0; + /// \brief Get increment/decrement amount for this instruction. Counters getHwCounts(MachineInstr &MI); @@ -106,6 +108,9 @@ private: /// \brief Resolve all operand dependencies to counter requirements Counters handleOperands(MachineInstr &MI); + /// \brief Insert S_NOP between an instruction writing M0 and S_SENDMSG. + void handleSendMsg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I); + public: SIInsertWaits(TargetMachine &tm) : MachineFunctionPass(ID), @@ -269,6 +274,7 @@ void SIInsertWaits::pushInstruction(MachineBasicBlock &MBB, // Insert a NOP to break the clause. BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::S_NOP)) .addImm(0); + LastInstWritesM0 = false; } if (TII->isSMRD(I->getOpcode())) @@ -362,6 +368,7 @@ bool SIInsertWaits::insertWait(MachineBasicBlock &MBB, ((Counts.Named.LGKM & 0x7) << 8)); LastOpcodeType = OTHER; + LastInstWritesM0 = false; return true; } @@ -403,6 +410,30 @@ Counters SIInsertWaits::handleOperands(MachineInstr &MI) { return Result; } +void SIInsertWaits::handleSendMsg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) { + if (TRI->ST.getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS) + return; + + // There must be "S_NOP 0" between an instruction writing M0 and S_SENDMSG. + if (LastInstWritesM0 && I->getOpcode() == AMDGPU::S_SENDMSG) { + BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::S_NOP)).addImm(0); + LastInstWritesM0 = false; + return; + } + + // Set whether this instruction sets M0 + LastInstWritesM0 = false; + + unsigned NumOperands = I->getNumOperands(); + for (unsigned i = 0; i < NumOperands; i++) { + const MachineOperand &Op = I->getOperand(i); + + if (Op.isReg() && Op.isDef() && Op.getReg() == AMDGPU::M0) + LastInstWritesM0 = true; + } +} + // FIXME: Insert waits listed in Table 4.2 "Required User-Inserted Wait States" // around other non-memory instructions. bool SIInsertWaits::runOnMachineFunction(MachineFunction &MF) { @@ -417,6 +448,7 @@ bool SIInsertWaits::runOnMachineFunction(MachineFunction &MF) { WaitedOn = ZeroCounts; LastIssued = ZeroCounts; LastOpcodeType = OTHER; + LastInstWritesM0 = false; memset(&UsedRegs, 0, sizeof(UsedRegs)); memset(&DefinedRegs, 0, sizeof(DefinedRegs)); @@ -433,7 +465,9 @@ bool SIInsertWaits::runOnMachineFunction(MachineFunction &MF) { Changes |= insertWait(MBB, I, LastIssued); else Changes |= insertWait(MBB, I, handleOperands(*I)); + pushInstruction(MBB, I); + handleSendMsg(MBB, I); } // Wait for everything at the end of the MBB diff --git a/test/CodeGen/R600/llvm.SI.sendmsg-m0.ll b/test/CodeGen/R600/llvm.SI.sendmsg-m0.ll new file mode 100644 index 00000000000..2198590f2df --- /dev/null +++ b/test/CodeGen/R600/llvm.SI.sendmsg-m0.ll @@ -0,0 +1,20 @@ +;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=SI --check-prefix=BOTH %s +;RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck --check-prefix=VI --check-prefix=BOTH %s + +; BOTH-LABEL: {{^}}main: +; BOTH: s_mov_b32 m0, s0 +; VI-NEXT: s_nop 0 +; BOTH-NEXT: s_sendmsg Gs_done(nop) +; BOTH-NEXT: s_endpgm + +define void @main(i32 inreg %a) #0 { +main_body: + call void @llvm.SI.sendmsg(i32 3, i32 %a) + ret void +} + +; Function Attrs: nounwind +declare void @llvm.SI.sendmsg(i32, i32) #1 + +attributes #0 = { "ShaderType"="2" "unsafe-fp-math"="true" } +attributes #1 = { nounwind }