X-Git-Url: http://demsky.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FTarget%2FR600%2FSIFixSGPRCopies.cpp;h=c108571fcfb7f9283c3c8bed91e3a80b70535c8f;hb=12af22e8cc217827cf4f118b0f5e4ebbda9925ae;hp=435172a08ee0d03f12b0e91582784270712b1836;hpb=3492eefa4b2509c87598678a6977074a3f6a50e6;p=oota-llvm.git diff --git a/lib/Target/R600/SIFixSGPRCopies.cpp b/lib/Target/R600/SIFixSGPRCopies.cpp index 435172a08ee..c108571fcfb 100644 --- a/lib/Target/R600/SIFixSGPRCopies.cpp +++ b/lib/Target/R600/SIFixSGPRCopies.cpp @@ -23,9 +23,9 @@ /// %vreg3 = COPY %vreg2 /// BB2: /// %vreg4 = PHI %vreg1 , , %vreg3 , -/// %vreg5 = VECTOR_INST %vreg4 +/// %vreg5 = VECTOR_INST %vreg4 +/// /// -/// /// The coalescer will begin at BB0 and eliminate its copy, then the resulting /// code will look like this: /// @@ -43,7 +43,7 @@ /// Now that the result of the PHI instruction is an SGPR, the register /// allocator is now forced to constrain the register class of %vreg3 to /// so we end up with final code like this: -/// +/// /// BB0: /// %vreg0 = SCALAR_INST /// ... @@ -55,7 +55,7 @@ /// %vreg4 = PHI %vreg0 , , %vreg3 , /// %vreg5 = VECTOR_INST %vreg4 /// -/// Now this code contains an illegal copy from a VGPR to an SGPR. +/// Now this code contains an illegal copy from a VGPR to an SGPR. /// /// In order to avoid this problem, this pass searches for PHI instructions /// which define a register and constrains its definition class to @@ -66,29 +66,42 @@ //===----------------------------------------------------------------------===// #include "AMDGPU.h" +#include "AMDGPUSubtarget.h" #include "SIInstrInfo.h" #include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetMachine.h" using namespace llvm; +#define DEBUG_TYPE "sgpr-copies" + namespace { class SIFixSGPRCopies : public MachineFunctionPass { private: static char ID; - const TargetRegisterClass *inferRegClass(const TargetRegisterInfo *TRI, + const TargetRegisterClass *inferRegClassFromUses(const SIRegisterInfo *TRI, const MachineRegisterInfo &MRI, - unsigned Reg) const; + unsigned Reg, + unsigned SubReg) const; + const TargetRegisterClass *inferRegClassFromDef(const SIRegisterInfo *TRI, + const MachineRegisterInfo &MRI, + unsigned Reg, + unsigned SubReg) const; + bool isVGPRToSGPRCopy(const MachineInstr &Copy, const SIRegisterInfo *TRI, + const MachineRegisterInfo &MRI) const; public: SIFixSGPRCopies(TargetMachine &tm) : MachineFunctionPass(ID) { } - virtual bool runOnMachineFunction(MachineFunction &MF); + bool runOnMachineFunction(MachineFunction &MF) override; - const char *getPassName() const { + const char *getPassName() const override { return "SI Fix SGPR copies"; } @@ -102,25 +115,41 @@ FunctionPass *llvm::createSIFixSGPRCopiesPass(TargetMachine &tm) { return new SIFixSGPRCopies(tm); } -/// This functions walks the use/def chains starting with the definition of -/// \p Reg until it finds an Instruction that isn't a COPY returns -/// the register class of that instruction. -const TargetRegisterClass *SIFixSGPRCopies::inferRegClass( - const TargetRegisterInfo *TRI, +static bool hasVGPROperands(const MachineInstr &MI, const SIRegisterInfo *TRI) { + const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo(); + for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { + if (!MI.getOperand(i).isReg() || + !TargetRegisterInfo::isVirtualRegister(MI.getOperand(i).getReg())) + continue; + + if (TRI->hasVGPRs(MRI.getRegClass(MI.getOperand(i).getReg()))) + return true; + } + return false; +} + +/// This functions walks the use list of Reg until it finds an Instruction +/// that isn't a COPY returns the register class of that instruction. +/// \return The register defined by the first non-COPY instruction. +const TargetRegisterClass *SIFixSGPRCopies::inferRegClassFromUses( + const SIRegisterInfo *TRI, const MachineRegisterInfo &MRI, - unsigned Reg) const { + unsigned Reg, + unsigned SubReg) const { // The Reg parameter to the function must always be defined by either a PHI // or a COPY, therefore it cannot be a physical register. assert(TargetRegisterInfo::isVirtualRegister(Reg) && "Reg cannot be a physical register"); const TargetRegisterClass *RC = MRI.getRegClass(Reg); - for (MachineRegisterInfo::use_iterator I = MRI.use_begin(Reg), - E = MRI.use_end(); I != E; ++I) { + RC = TRI->getSubRegClass(RC, SubReg); + for (MachineRegisterInfo::use_instr_iterator + I = MRI.use_instr_begin(Reg), E = MRI.use_instr_end(); I != E; ++I) { switch (I->getOpcode()) { case AMDGPU::COPY: - RC = TRI->getCommonSubClass(RC, inferRegClass(TRI, MRI, - I->getOperand(0).getReg())); + RC = TRI->getCommonSubClass(RC, inferRegClassFromUses(TRI, MRI, + I->getOperand(0).getReg(), + I->getOperand(0).getSubReg())); break; } } @@ -128,9 +157,49 @@ const TargetRegisterClass *SIFixSGPRCopies::inferRegClass( return RC; } +const TargetRegisterClass *SIFixSGPRCopies::inferRegClassFromDef( + const SIRegisterInfo *TRI, + const MachineRegisterInfo &MRI, + unsigned Reg, + unsigned SubReg) const { + if (!TargetRegisterInfo::isVirtualRegister(Reg)) { + const TargetRegisterClass *RC = TRI->getPhysRegClass(Reg); + return TRI->getSubRegClass(RC, SubReg); + } + MachineInstr *Def = MRI.getVRegDef(Reg); + if (Def->getOpcode() != AMDGPU::COPY) { + return TRI->getSubRegClass(MRI.getRegClass(Reg), SubReg); + } + + return inferRegClassFromDef(TRI, MRI, Def->getOperand(1).getReg(), + Def->getOperand(1).getSubReg()); +} + +bool SIFixSGPRCopies::isVGPRToSGPRCopy(const MachineInstr &Copy, + const SIRegisterInfo *TRI, + const MachineRegisterInfo &MRI) const { + + unsigned DstReg = Copy.getOperand(0).getReg(); + unsigned SrcReg = Copy.getOperand(1).getReg(); + unsigned SrcSubReg = Copy.getOperand(1).getSubReg(); + const TargetRegisterClass *DstRC = MRI.getRegClass(DstReg); + const TargetRegisterClass *SrcRC; + + if (!TargetRegisterInfo::isVirtualRegister(SrcReg) || + DstRC == &AMDGPU::M0RegRegClass || + MRI.getRegClass(SrcReg) == &AMDGPU::VReg_1RegClass) + return false; + + SrcRC = TRI->getSubRegClass(MRI.getRegClass(SrcReg), SrcSubReg); + return TRI->isSGPRClass(DstRC) && TRI->hasVGPRs(SrcRC); +} + bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) { MachineRegisterInfo &MRI = MF.getRegInfo(); - const TargetRegisterInfo *TRI = MF.getTarget().getRegisterInfo(); + const SIRegisterInfo *TRI = + static_cast(MF.getSubtarget().getRegisterInfo()); + const SIInstrInfo *TII = + static_cast(MF.getSubtarget().getInstrInfo()); for (MachineFunction::iterator BI = MF.begin(), BE = MF.end(); BI != BE; ++BI) { @@ -138,13 +207,71 @@ bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) { for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); I != E; ++I) { MachineInstr &MI = *I; - if (MI.getOpcode() != AMDGPU::PHI) { - continue; + if (MI.getOpcode() == AMDGPU::COPY && isVGPRToSGPRCopy(MI, TRI, MRI)) { + DEBUG(dbgs() << "Fixing VGPR -> SGPR copy:\n"); + DEBUG(MI.print(dbgs())); + TII->moveToVALU(MI); + + } + + switch (MI.getOpcode()) { + default: continue; + case AMDGPU::PHI: { + DEBUG(dbgs() << " Fixing PHI:\n"); + DEBUG(MI.print(dbgs())); + + for (unsigned i = 1; i < MI.getNumOperands(); i+=2) { + unsigned Reg = MI.getOperand(i).getReg(); + const TargetRegisterClass *RC = inferRegClassFromDef(TRI, MRI, Reg, + MI.getOperand(0).getSubReg()); + MRI.constrainRegClass(Reg, RC); + } + unsigned Reg = MI.getOperand(0).getReg(); + const TargetRegisterClass *RC = inferRegClassFromUses(TRI, MRI, Reg, + MI.getOperand(0).getSubReg()); + if (TRI->getCommonSubClass(RC, &AMDGPU::VReg_32RegClass)) { + MRI.constrainRegClass(Reg, &AMDGPU::VReg_32RegClass); + } + + if (!TRI->isSGPRClass(MRI.getRegClass(Reg))) + break; + + // If a PHI node defines an SGPR and any of its operands are VGPRs, + // then we need to move it to the VALU. + for (unsigned i = 1; i < MI.getNumOperands(); i+=2) { + unsigned Reg = MI.getOperand(i).getReg(); + if (TRI->hasVGPRs(MRI.getRegClass(Reg))) { + TII->moveToVALU(MI); + break; + } + } + + break; + } + case AMDGPU::REG_SEQUENCE: { + if (TRI->hasVGPRs(TII->getOpRegClass(MI, 0)) || + !hasVGPROperands(MI, TRI)) + continue; + + DEBUG(dbgs() << "Fixing REG_SEQUENCE:\n"); + DEBUG(MI.print(dbgs())); + + TII->moveToVALU(MI); + break; + } + case AMDGPU::INSERT_SUBREG: { + const TargetRegisterClass *DstRC, *Src0RC, *Src1RC; + DstRC = MRI.getRegClass(MI.getOperand(0).getReg()); + Src0RC = MRI.getRegClass(MI.getOperand(1).getReg()); + Src1RC = MRI.getRegClass(MI.getOperand(2).getReg()); + if (TRI->isSGPRClass(DstRC) && + (TRI->hasVGPRs(Src0RC) || TRI->hasVGPRs(Src1RC))) { + DEBUG(dbgs() << " Fixing INSERT_SUBREG:\n"); + DEBUG(MI.print(dbgs())); + TII->moveToVALU(MI); + } + break; } - unsigned Reg = MI.getOperand(0).getReg(); - const TargetRegisterClass *RC = inferRegClass(TRI, MRI, Reg); - if (RC == &AMDGPU::VSrc_32RegClass) { - MRI.constrainRegClass(Reg, &AMDGPU::VReg_32RegClass); } } }