lib/Target/AMDGPU/SIFixSGPRCopies.cpp

   1 //===-- SIFixSGPRCopies.cpp - Remove potential VGPR => SGPR copies --------===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 /// \file
  11 /// Copies from VGPR to SGPR registers are illegal and the register coalescer
  12 /// will sometimes generate these illegal copies in situations like this:
  13 ///
  14 ///  Register Class <vsrc> is the union of <vgpr> and <sgpr>
  15 ///
  16 /// BB0:
  17 ///   %vreg0 <sgpr> = SCALAR_INST
  18 ///   %vreg1 <vsrc> = COPY %vreg0 <sgpr>
  19 ///    ...
  20 ///    BRANCH %cond BB1, BB2
  21 ///  BB1:
  22 ///    %vreg2 <vgpr> = VECTOR_INST
  23 ///    %vreg3 <vsrc> = COPY %vreg2 <vgpr>
  24 ///  BB2:
  25 ///    %vreg4 <vsrc> = PHI %vreg1 <vsrc>, <BB#0>, %vreg3 <vrsc>, <BB#1>
  26 ///    %vreg5 <vgpr> = VECTOR_INST %vreg4 <vsrc>
  27 ///
  28 ///
  29 /// The coalescer will begin at BB0 and eliminate its copy, then the resulting
  30 /// code will look like this:
  31 ///
  32 /// BB0:
  33 ///   %vreg0 <sgpr> = SCALAR_INST
  34 ///    ...
  35 ///    BRANCH %cond BB1, BB2
  36 /// BB1:
  37 ///   %vreg2 <vgpr> = VECTOR_INST
  38 ///   %vreg3 <vsrc> = COPY %vreg2 <vgpr>
  39 /// BB2:
  40 ///   %vreg4 <sgpr> = PHI %vreg0 <sgpr>, <BB#0>, %vreg3 <vsrc>, <BB#1>
  41 ///   %vreg5 <vgpr> = VECTOR_INST %vreg4 <sgpr>
  42 ///
  43 /// Now that the result of the PHI instruction is an SGPR, the register
  44 /// allocator is now forced to constrain the register class of %vreg3 to
  45 /// <sgpr> so we end up with final code like this:
  46 ///
  47 /// BB0:
  48 ///   %vreg0 <sgpr> = SCALAR_INST
  49 ///    ...
  50 ///    BRANCH %cond BB1, BB2
  51 /// BB1:
  52 ///   %vreg2 <vgpr> = VECTOR_INST
  53 ///   %vreg3 <sgpr> = COPY %vreg2 <vgpr>
  54 /// BB2:
  55 ///   %vreg4 <sgpr> = PHI %vreg0 <sgpr>, <BB#0>, %vreg3 <sgpr>, <BB#1>
  56 ///   %vreg5 <vgpr> = VECTOR_INST %vreg4 <sgpr>
  57 ///
  58 /// Now this code contains an illegal copy from a VGPR to an SGPR.
  59 ///
  60 /// In order to avoid this problem, this pass searches for PHI instructions
  61 /// which define a <vsrc> register and constrains its definition class to
  62 /// <vgpr> if the user of the PHI's definition register is a vector instruction.
  63 /// If the PHI's definition class is constrained to <vgpr> then the coalescer
  64 /// will be unable to perform the COPY removal from the above example  which
  65 /// ultimately led to the creation of an illegal COPY.
  66 //===----------------------------------------------------------------------===//
  67
  68 #include "AMDGPU.h"
  69 #include "AMDGPUSubtarget.h"
  70 #include "SIInstrInfo.h"
  71 #include "llvm/CodeGen/MachineFunctionPass.h"
  72 #include "llvm/CodeGen/MachineInstrBuilder.h"
  73 #include "llvm/CodeGen/MachineRegisterInfo.h"
  74 #include "llvm/Support/Debug.h"
  75 #include "llvm/Support/raw_ostream.h"
  76 #include "llvm/Target/TargetMachine.h"
  77
  78 using namespace llvm;
  79
  80 #define DEBUG_TYPE "sgpr-copies"
  81
  82 namespace {
  83
  84 class SIFixSGPRCopies : public MachineFunctionPass {
  85 public:
  86   static char ID;
  87
  88   SIFixSGPRCopies() : MachineFunctionPass(ID) { }
  89
  90   bool runOnMachineFunction(MachineFunction &MF) override;
  91
  92   const char *getPassName() const override {
  93     return "SI Fix SGPR copies";
  94   }
  95
  96   void getAnalysisUsage(AnalysisUsage &AU) const override {
  97     AU.setPreservesCFG();
  98     MachineFunctionPass::getAnalysisUsage(AU);
  99   }
 100 };
 101
 102 } // End anonymous namespace
 103
 104 INITIALIZE_PASS(SIFixSGPRCopies, DEBUG_TYPE,
 105                 "SI Fix SGPR copies", false, false)
 106
 107 char SIFixSGPRCopies::ID = 0;
 108
 109 char &llvm::SIFixSGPRCopiesID = SIFixSGPRCopies::ID;
 110
 111 FunctionPass *llvm::createSIFixSGPRCopiesPass() {
 112   return new SIFixSGPRCopies();
 113 }
 114
 115 static bool hasVGPROperands(const MachineInstr &MI, const SIRegisterInfo *TRI) {
 116   const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
 117   for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
 118     if (!MI.getOperand(i).isReg() ||
 119         !TargetRegisterInfo::isVirtualRegister(MI.getOperand(i).getReg()))
 120       continue;
 121
 122     if (TRI->hasVGPRs(MRI.getRegClass(MI.getOperand(i).getReg())))
 123       return true;
 124   }
 125   return false;
 126 }
 127
 128 static std::pair<const TargetRegisterClass *, const TargetRegisterClass *>
 129 getCopyRegClasses(const MachineInstr &Copy,
 130                   const SIRegisterInfo &TRI,
 131                   const MachineRegisterInfo &MRI) {
 132   unsigned DstReg = Copy.getOperand(0).getReg();
 133   unsigned SrcReg = Copy.getOperand(1).getReg();
 134
 135   const TargetRegisterClass *SrcRC =
 136     TargetRegisterInfo::isVirtualRegister(SrcReg) ?
 137     MRI.getRegClass(SrcReg) :
 138     TRI.getPhysRegClass(SrcReg);
 139
 140   // We don't really care about the subregister here.
 141   // SrcRC = TRI.getSubRegClass(SrcRC, Copy.getOperand(1).getSubReg());
 142
 143   const TargetRegisterClass *DstRC =
 144     TargetRegisterInfo::isVirtualRegister(DstReg) ?
 145     MRI.getRegClass(DstReg) :
 146     TRI.getPhysRegClass(DstReg);
 147
 148   return std::make_pair(SrcRC, DstRC);
 149 }
 150
 151 static bool isVGPRToSGPRCopy(const TargetRegisterClass *SrcRC,
 152                              const TargetRegisterClass *DstRC,
 153                              const SIRegisterInfo &TRI) {
 154   return TRI.isSGPRClass(DstRC) && TRI.hasVGPRs(SrcRC);
 155 }
 156
 157 static bool isSGPRToVGPRCopy(const TargetRegisterClass *SrcRC,
 158                              const TargetRegisterClass *DstRC,
 159                              const SIRegisterInfo &TRI) {
 160   return TRI.isSGPRClass(SrcRC) && TRI.hasVGPRs(DstRC);
 161 }
 162
 163 // Distribute an SGPR->VGPR copy of a REG_SEQUENCE into a VGPR REG_SEQUENCE.
 164 //
 165 // SGPRx = ...
 166 // SGPRy = REG_SEQUENCE SGPRx, sub0 ...
 167 // VGPRz = COPY SGPRy
 168 //
 169 // ==>
 170 //
 171 // VGPRx = COPY SGPRx
 172 // VGPRz = REG_SEQUENCE VGPRx, sub0
 173 //
 174 // This exposes immediate folding opportunities when materializing 64-bit
 175 // immediates.
 176 static bool foldVGPRCopyIntoRegSequence(MachineInstr &MI,
 177                                         const SIRegisterInfo *TRI,
 178                                         const SIInstrInfo *TII,
 179                                         MachineRegisterInfo &MRI) {
 180   assert(MI.isRegSequence());
 181
 182   unsigned DstReg = MI.getOperand(0).getReg();
 183   if (!TRI->isSGPRClass(MRI.getRegClass(DstReg)))
 184     return false;
 185
 186   if (!MRI.hasOneUse(DstReg))
 187     return false;
 188
 189   MachineInstr &CopyUse = *MRI.use_instr_begin(DstReg);
 190   if (!CopyUse.isCopy())
 191     return false;
 192
 193   const TargetRegisterClass *SrcRC, *DstRC;
 194   std::tie(SrcRC, DstRC) = getCopyRegClasses(CopyUse, *TRI, MRI);
 195
 196   if (!isSGPRToVGPRCopy(SrcRC, DstRC, *TRI))
 197     return false;
 198
 199   // TODO: Could have multiple extracts?
 200   unsigned SubReg = CopyUse.getOperand(1).getSubReg();
 201   if (SubReg != AMDGPU::NoSubRegister)
 202     return false;
 203
 204   MRI.setRegClass(DstReg, DstRC);
 205
 206   // SGPRx = ...
 207   // SGPRy = REG_SEQUENCE SGPRx, sub0 ...
 208   // VGPRz = COPY SGPRy
 209
 210   // =>
 211   // VGPRx = COPY SGPRx
 212   // VGPRz = REG_SEQUENCE VGPRx, sub0
 213
 214   MI.getOperand(0).setReg(CopyUse.getOperand(0).getReg());
 215
 216   for (unsigned I = 1, N = MI.getNumOperands(); I != N; I += 2) {
 217     unsigned SrcReg = MI.getOperand(I).getReg();
 218     unsigned SrcSubReg = MI.getOperand(I).getReg();
 219
 220     const TargetRegisterClass *SrcRC = MRI.getRegClass(SrcReg);
 221     assert(TRI->isSGPRClass(SrcRC) &&
 222            "Expected SGPR REG_SEQUENCE to only have SGPR inputs");
 223
 224     SrcRC = TRI->getSubRegClass(SrcRC, SrcSubReg);
 225     const TargetRegisterClass *NewSrcRC = TRI->getEquivalentVGPRClass(SrcRC);
 226
 227     unsigned TmpReg = MRI.createVirtualRegister(NewSrcRC);
 228
 229     BuildMI(*MI.getParent(), &MI, MI.getDebugLoc(), TII->get(AMDGPU::COPY), TmpReg)
 230       .addOperand(MI.getOperand(I));
 231
 232     MI.getOperand(I).setReg(TmpReg);
 233   }
 234
 235   CopyUse.eraseFromParent();
 236   return true;
 237 }
 238
 239 bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) {
 240   MachineRegisterInfo &MRI = MF.getRegInfo();
 241   const SIRegisterInfo *TRI =
 242       static_cast<const SIRegisterInfo *>(MF.getSubtarget().getRegisterInfo());
 243   const SIInstrInfo *TII =
 244       static_cast<const SIInstrInfo *>(MF.getSubtarget().getInstrInfo());
 245
 246   SmallVector<MachineInstr *, 16> Worklist;
 247
 248   for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
 249                                                   BI != BE; ++BI) {
 250
 251     MachineBasicBlock &MBB = *BI;
 252     for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
 253          I != E; ++I) {
 254       MachineInstr &MI = *I;
 255
 256       switch (MI.getOpcode()) {
 257       default:
 258         continue;
 259       case AMDGPU::COPY: {
 260         // If the destination register is a physical register there isn't really
 261         // much we can do to fix this.
 262         if (!TargetRegisterInfo::isVirtualRegister(MI.getOperand(0).getReg()))
 263           continue;
 264
 265         const TargetRegisterClass *SrcRC, *DstRC;
 266         std::tie(SrcRC, DstRC) = getCopyRegClasses(MI, *TRI, MRI);
 267         if (isVGPRToSGPRCopy(SrcRC, DstRC, *TRI)) {
 268           DEBUG(dbgs() << "Fixing VGPR -> SGPR copy: " << MI);
 269           TII->moveToVALU(MI);
 270         }
 271
 272         break;
 273       }
 274       case AMDGPU::PHI: {
 275         DEBUG(dbgs() << "Fixing PHI: " << MI);
 276         unsigned Reg = MI.getOperand(0).getReg();
 277         if (!TRI->isSGPRClass(MRI.getRegClass(Reg)))
 278           break;
 279
 280         // If a PHI node defines an SGPR and any of its operands are VGPRs,
 281         // then we need to move it to the VALU.
 282         //
 283         // Also, if a PHI node defines an SGPR and has all SGPR operands
 284         // we must move it to the VALU, because the SGPR operands will
 285         // all end up being assigned the same register, which means
 286         // there is a potential for a conflict if different threads take
 287         // different control flow paths.
 288         //
 289         // For Example:
 290         //
 291         // sgpr0 = def;
 292         // ...
 293         // sgpr1 = def;
 294         // ...
 295         // sgpr2 = PHI sgpr0, sgpr1
 296         // use sgpr2;
 297         //
 298         // Will Become:
 299         //
 300         // sgpr2 = def;
 301         // ...
 302         // sgpr2 = def;
 303         // ...
 304         // use sgpr2
 305         //
 306         // FIXME: This is OK if the branching decision is made based on an
 307         // SGPR value.
 308         bool SGPRBranch = false;
 309
 310         // The one exception to this rule is when one of the operands
 311         // is defined by a SI_BREAK, SI_IF_BREAK, or SI_ELSE_BREAK
 312         // instruction.  In this case, there we know the program will
 313         // never enter the second block (the loop) without entering
 314         // the first block (where the condition is computed), so there
 315         // is no chance for values to be over-written.
 316
 317         bool HasBreakDef = false;
 318         for (unsigned i = 1; i < MI.getNumOperands(); i+=2) {
 319           unsigned Reg = MI.getOperand(i).getReg();
 320           if (TRI->hasVGPRs(MRI.getRegClass(Reg))) {
 321             TII->moveToVALU(MI);
 322             break;
 323           }
 324           MachineInstr *DefInstr = MRI.getUniqueVRegDef(Reg);
 325           assert(DefInstr);
 326           switch(DefInstr->getOpcode()) {
 327
 328           case AMDGPU::SI_BREAK:
 329           case AMDGPU::SI_IF_BREAK:
 330           case AMDGPU::SI_ELSE_BREAK:
 331           // If we see a PHI instruction that defines an SGPR, then that PHI
 332           // instruction has already been considered and should have
 333           // a *_BREAK as an operand.
 334           case AMDGPU::PHI:
 335             HasBreakDef = true;
 336             break;
 337           }
 338         }
 339
 340         if (!SGPRBranch && !HasBreakDef)
 341           TII->moveToVALU(MI);
 342         break;
 343       }
 344       case AMDGPU::REG_SEQUENCE: {
 345         if (TRI->hasVGPRs(TII->getOpRegClass(MI, 0)) ||
 346             !hasVGPROperands(MI, TRI)) {
 347           foldVGPRCopyIntoRegSequence(MI, TRI, TII, MRI);
 348           continue;
 349         }
 350
 351         DEBUG(dbgs() << "Fixing REG_SEQUENCE: " << MI);
 352
 353         TII->moveToVALU(MI);
 354         break;
 355       }
 356       case AMDGPU::INSERT_SUBREG: {
 357         const TargetRegisterClass *DstRC, *Src0RC, *Src1RC;
 358         DstRC = MRI.getRegClass(MI.getOperand(0).getReg());
 359         Src0RC = MRI.getRegClass(MI.getOperand(1).getReg());
 360         Src1RC = MRI.getRegClass(MI.getOperand(2).getReg());
 361         if (TRI->isSGPRClass(DstRC) &&
 362             (TRI->hasVGPRs(Src0RC) || TRI->hasVGPRs(Src1RC))) {
 363           DEBUG(dbgs() << " Fixing INSERT_SUBREG: " << MI);
 364           TII->moveToVALU(MI);
 365         }
 366         break;
 367       }
 368       }
 369     }
 370   }
 371
 372   return true;
 373 }