lib/Target/R600/SIFoldOperands.cpp

   1 //===-- SIFoldOperands.cpp - Fold operands --- ----------------------------===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 /// \file
   9 //===----------------------------------------------------------------------===//
  10 //
  11
  12 #include "AMDGPU.h"
  13 #include "AMDGPUSubtarget.h"
  14 #include "SIInstrInfo.h"
  15 #include "llvm/CodeGen/LiveIntervalAnalysis.h"
  16 #include "llvm/CodeGen/MachineDominators.h"
  17 #include "llvm/CodeGen/MachineFunctionPass.h"
  18 #include "llvm/CodeGen/MachineInstrBuilder.h"
  19 #include "llvm/CodeGen/MachineRegisterInfo.h"
  20 #include "llvm/IR/LLVMContext.h"
  21 #include "llvm/IR/Function.h"
  22 #include "llvm/Support/Debug.h"
  23 #include "llvm/Target/TargetMachine.h"
  24
  25 #define DEBUG_TYPE "si-fold-operands"
  26 using namespace llvm;
  27
  28 namespace {
  29
  30 class SIFoldOperands : public MachineFunctionPass {
  31 public:
  32   static char ID;
  33
  34 public:
  35   SIFoldOperands() : MachineFunctionPass(ID) {
  36     initializeSIFoldOperandsPass(*PassRegistry::getPassRegistry());
  37   }
  38
  39   bool runOnMachineFunction(MachineFunction &MF) override;
  40
  41   const char *getPassName() const override {
  42     return "SI Fold Operands";
  43   }
  44
  45   void getAnalysisUsage(AnalysisUsage &AU) const override {
  46     AU.addRequired<MachineDominatorTree>();
  47     AU.setPreservesCFG();
  48     MachineFunctionPass::getAnalysisUsage(AU);
  49   }
  50 };
  51
  52 struct FoldCandidate {
  53   MachineInstr *UseMI;
  54   unsigned UseOpNo;
  55   MachineOperand *OpToFold;
  56   uint64_t ImmToFold;
  57
  58   FoldCandidate(MachineInstr *MI, unsigned OpNo, MachineOperand *FoldOp) :
  59       UseMI(MI), UseOpNo(OpNo), OpToFold(FoldOp), ImmToFold(0) { }
  60
  61   FoldCandidate(MachineInstr *MI, unsigned OpNo, uint64_t Imm) :
  62       UseMI(MI), UseOpNo(OpNo), OpToFold(nullptr), ImmToFold(Imm) { }
  63
  64   bool isImm() const {
  65     return !OpToFold;
  66   }
  67 };
  68
  69 } // End anonymous namespace.
  70
  71 INITIALIZE_PASS_BEGIN(SIFoldOperands, DEBUG_TYPE,
  72                       "SI Fold Operands", false, false)
  73 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
  74 INITIALIZE_PASS_END(SIFoldOperands, DEBUG_TYPE,
  75                     "SI Fold Operands", false, false)
  76
  77 char SIFoldOperands::ID = 0;
  78
  79 char &llvm::SIFoldOperandsID = SIFoldOperands::ID;
  80
  81 FunctionPass *llvm::createSIFoldOperandsPass() {
  82   return new SIFoldOperands();
  83 }
  84
  85 static bool isSafeToFold(unsigned Opcode) {
  86   switch(Opcode) {
  87   case AMDGPU::V_MOV_B32_e32:
  88   case AMDGPU::V_MOV_B32_e64:
  89   case AMDGPU::S_MOV_B32:
  90   case AMDGPU::S_MOV_B64:
  91   case AMDGPU::COPY:
  92     return true;
  93   default:
  94     return false;
  95   }
  96 }
  97
  98 static bool updateOperand(FoldCandidate &Fold,
  99                           const TargetRegisterInfo &TRI) {
 100   MachineInstr *MI = Fold.UseMI;
 101   MachineOperand &Old = MI->getOperand(Fold.UseOpNo);
 102   assert(Old.isReg());
 103
 104   if (Fold.isImm()) {
 105     Old.ChangeToImmediate(Fold.ImmToFold);
 106     return true;
 107   }
 108
 109   MachineOperand *New = Fold.OpToFold;
 110   if (TargetRegisterInfo::isVirtualRegister(Old.getReg()) &&
 111       TargetRegisterInfo::isVirtualRegister(New->getReg())) {
 112     Old.substVirtReg(New->getReg(), New->getSubReg(), TRI);
 113     return true;
 114   }
 115
 116   // FIXME: Handle physical registers.
 117
 118   return false;
 119 }
 120
 121 bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) {
 122   MachineRegisterInfo &MRI = MF.getRegInfo();
 123   const SIInstrInfo *TII =
 124       static_cast<const SIInstrInfo *>(MF.getSubtarget().getInstrInfo());
 125   const SIRegisterInfo &TRI = TII->getRegisterInfo();
 126
 127   for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
 128                                                   BI != BE; ++BI) {
 129
 130     MachineBasicBlock &MBB = *BI;
 131     MachineBasicBlock::iterator I, Next;
 132     for (I = MBB.begin(); I != MBB.end(); I = Next) {
 133       Next = std::next(I);
 134       MachineInstr &MI = *I;
 135
 136       if (!isSafeToFold(MI.getOpcode()))
 137         continue;
 138
 139       MachineOperand &OpToFold = MI.getOperand(1);
 140
 141       // FIXME: Fold operands with subregs.
 142       if (OpToFold.isReg() &&
 143           (!TargetRegisterInfo::isVirtualRegister(OpToFold.getReg()) ||
 144            OpToFold.getSubReg()))
 145         continue;
 146
 147       std::vector<FoldCandidate> FoldList;
 148       for (MachineRegisterInfo::use_iterator
 149            Use = MRI.use_begin(MI.getOperand(0).getReg()), E = MRI.use_end();
 150            Use != E; ++Use) {
 151
 152         MachineInstr *UseMI = Use->getParent();
 153         const MachineOperand &UseOp = UseMI->getOperand(Use.getOperandNo());
 154
 155         // FIXME: Fold operands with subregs.
 156         if (UseOp.isReg() && UseOp.getSubReg()) {
 157           continue;
 158         }
 159
 160         bool FoldingImm = OpToFold.isImm() || OpToFold.isFPImm();
 161
 162         // In order to fold immediates into copies, we need to change the
 163         // copy to a MOV.
 164         if (FoldingImm && UseMI->getOpcode() == AMDGPU::COPY) {
 165           const TargetRegisterClass *TRC =
 166               MRI.getRegClass(UseMI->getOperand(0).getReg());
 167
 168           if (TRC->getSize() == 4) {
 169             if (TRI.isSGPRClass(TRC))
 170               UseMI->setDesc(TII->get(AMDGPU::S_MOV_B32));
 171             else
 172               UseMI->setDesc(TII->get(AMDGPU::V_MOV_B32_e32));
 173           } else if (TRC->getSize() == 8 && TRI.isSGPRClass(TRC)) {
 174             UseMI->setDesc(TII->get(AMDGPU::S_MOV_B64));
 175           } else {
 176             continue;
 177           }
 178         }
 179
 180         const MCInstrDesc &UseDesc = UseMI->getDesc();
 181
 182         // Don't fold into target independent nodes.  Target independent opcodes
 183         // don't have defined register classes.
 184         if (UseDesc.isVariadic() ||
 185             UseDesc.OpInfo[Use.getOperandNo()].RegClass == -1)
 186           continue;
 187
 188         if (FoldingImm) {
 189           uint64_t Imm;
 190           if (OpToFold.isFPImm()) {
 191             Imm = OpToFold.getFPImm()->getValueAPF().bitcastToAPInt().getSExtValue();
 192           } else {
 193             Imm = OpToFold.getImm();
 194           }
 195
 196           const MachineOperand ImmOp = MachineOperand::CreateImm(Imm);
 197           if (TII->isOperandLegal(UseMI, Use.getOperandNo(), &ImmOp)) {
 198             FoldList.push_back(FoldCandidate(UseMI, Use.getOperandNo(), Imm));
 199             continue;
 200           }
 201         }
 202
 203         // Normal substitution with registers
 204         if (TII->isOperandLegal(UseMI, Use.getOperandNo(), &OpToFold)) {
 205           FoldList.push_back(FoldCandidate(UseMI, Use.getOperandNo(), &OpToFold));
 206           continue;
 207         }
 208
 209         // FIXME: We could commute the instruction to create more opportunites
 210         // for folding.  This will only be useful if we have 32-bit instructions.
 211
 212         // FIXME: We could try to change the instruction from 64-bit to 32-bit
 213         // to enable more folding opportunites.  The shrink operands pass
 214         // already does this.
 215       }
 216
 217       for (FoldCandidate &Fold : FoldList) {
 218         if (updateOperand(Fold, TRI)) {
 219           // Clear kill flags.
 220           if (!Fold.isImm()) {
 221             assert(Fold.OpToFold && Fold.OpToFold->isReg());
 222             Fold.OpToFold->setIsKill(false);
 223           }
 224           DEBUG(dbgs() << "Folded source from " << MI << " into OpNo " <<
 225                 Fold.UseOpNo << " of " << *Fold.UseMI << '\n');
 226         }
 227       }
 228     }
 229   }
 230   return false;
 231 }