/// %vreg3 <vsrc> = COPY %vreg2 <vgpr>
/// BB2:
/// %vreg4 <vsrc> = PHI %vreg1 <vsrc>, <BB#0>, %vreg3 <vrsc>, <BB#1>
-/// %vreg5 <vgpr> = VECTOR_INST %vreg4 <vsrc>
+/// %vreg5 <vgpr> = VECTOR_INST %vreg4 <vsrc>
+///
///
-///
/// The coalescer will begin at BB0 and eliminate its copy, then the resulting
/// code will look like this:
///
/// Now that the result of the PHI instruction is an SGPR, the register
/// allocator is now forced to constrain the register class of %vreg3 to
/// <sgpr> so we end up with final code like this:
-///
+///
/// BB0:
/// %vreg0 <sgpr> = SCALAR_INST
/// ...
/// %vreg4 <sgpr> = PHI %vreg0 <sgpr>, <BB#0>, %vreg3 <sgpr>, <BB#1>
/// %vreg5 <vgpr> = VECTOR_INST %vreg4 <sgpr>
///
-/// Now this code contains an illegal copy from a VGPR to an SGPR.
+/// Now this code contains an illegal copy from a VGPR to an SGPR.
///
/// In order to avoid this problem, this pass searches for PHI instructions
/// which define a <vsrc> register and constrains its definition class to
/// ultimately led to the creation of an illegal COPY.
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "sgpr-copies"
#include "AMDGPU.h"
+#include "AMDGPUSubtarget.h"
#include "SIInstrInfo.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetMachine.h"
using namespace llvm;
+#define DEBUG_TYPE "sgpr-copies"
+
namespace {
class SIFixSGPRCopies : public MachineFunctionPass {
public:
SIFixSGPRCopies(TargetMachine &tm) : MachineFunctionPass(ID) { }
- virtual bool runOnMachineFunction(MachineFunction &MF);
+ bool runOnMachineFunction(MachineFunction &MF) override;
- const char *getPassName() const {
+ const char *getPassName() const override {
return "SI Fix SGPR copies";
}
/// This functions walks the use list of Reg until it finds an Instruction
/// that isn't a COPY returns the register class of that instruction.
-/// \param[out] The register defined by the first non-COPY instruction.
+/// \return The register defined by the first non-COPY instruction.
const TargetRegisterClass *SIFixSGPRCopies::inferRegClassFromUses(
const SIRegisterInfo *TRI,
const MachineRegisterInfo &MRI,
const TargetRegisterClass *RC = MRI.getRegClass(Reg);
RC = TRI->getSubRegClass(RC, SubReg);
- for (MachineRegisterInfo::use_iterator I = MRI.use_begin(Reg),
- E = MRI.use_end(); I != E; ++I) {
+ for (MachineRegisterInfo::use_instr_iterator
+ I = MRI.use_instr_begin(Reg), E = MRI.use_instr_end(); I != E; ++I) {
switch (I->getOpcode()) {
case AMDGPU::COPY:
RC = TRI->getCommonSubClass(RC, inferRegClassFromUses(TRI, MRI,
unsigned SrcReg = Copy.getOperand(1).getReg();
unsigned SrcSubReg = Copy.getOperand(1).getSubReg();
const TargetRegisterClass *DstRC = MRI.getRegClass(DstReg);
+ const TargetRegisterClass *SrcRC;
if (!TargetRegisterInfo::isVirtualRegister(SrcReg) ||
- DstRC == &AMDGPU::M0RegRegClass)
+ DstRC == &AMDGPU::M0RegRegClass ||
+ MRI.getRegClass(SrcReg) == &AMDGPU::VReg_1RegClass)
return false;
- const TargetRegisterClass *SrcRC = TRI->getSubRegClass(
- MRI.getRegClass(SrcReg), SrcSubReg);
-
- return TRI->isSGPRClass(DstRC) &&
- !TRI->getCommonSubClass(DstRC, SrcRC);
+ SrcRC = TRI->getSubRegClass(MRI.getRegClass(SrcReg), SrcSubReg);
+ return TRI->isSGPRClass(DstRC) && TRI->hasVGPRs(SrcRC);
}
bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) {
MachineRegisterInfo &MRI = MF.getRegInfo();
- const SIRegisterInfo *TRI = static_cast<const SIRegisterInfo *>(
- MF.getTarget().getRegisterInfo());
- const SIInstrInfo *TII = static_cast<const SIInstrInfo *>(
- MF.getTarget().getInstrInfo());
+ const SIRegisterInfo *TRI =
+ static_cast<const SIRegisterInfo *>(MF.getSubtarget().getRegisterInfo());
+ const SIInstrInfo *TII =
+ static_cast<const SIInstrInfo *>(MF.getSubtarget().getInstrInfo());
for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
BI != BE; ++BI) {
!hasVGPROperands(MI, TRI))
continue;
- DEBUG(dbgs() << "Fixing REG_SEQUENCE: \n");
+ DEBUG(dbgs() << "Fixing REG_SEQUENCE:\n");
DEBUG(MI.print(dbgs()));
TII->moveToVALU(MI);
- TII->legalizeOperands(&MI);
+ break;
+ }
+ case AMDGPU::INSERT_SUBREG: {
+ const TargetRegisterClass *DstRC, *Src0RC, *Src1RC;
+ DstRC = MRI.getRegClass(MI.getOperand(0).getReg());
+ Src0RC = MRI.getRegClass(MI.getOperand(1).getReg());
+ Src1RC = MRI.getRegClass(MI.getOperand(2).getReg());
+ if (TRI->isSGPRClass(DstRC) &&
+ (TRI->hasVGPRs(Src0RC) || TRI->hasVGPRs(Src1RC))) {
+ DEBUG(dbgs() << " Fixing INSERT_SUBREG:\n");
+ DEBUG(MI.print(dbgs()));
+ TII->moveToVALU(MI);
+ }
break;
}
}