X-Git-Url: http://demsky.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FTarget%2FR600%2FSIShrinkInstructions.cpp;h=caf2572c11b230b5331cbfac3993324f7236c825;hb=12af22e8cc217827cf4f118b0f5e4ebbda9925ae;hp=745c4b65644df897027302cd55a77397dc734de9;hpb=e5fc4208eba3a3ca3489d6bdbdcfff9c45b1e5ab;p=oota-llvm.git diff --git a/lib/Target/R600/SIShrinkInstructions.cpp b/lib/Target/R600/SIShrinkInstructions.cpp index 745c4b65644..caf2572c11b 100644 --- a/lib/Target/R600/SIShrinkInstructions.cpp +++ b/lib/Target/R600/SIShrinkInstructions.cpp @@ -10,11 +10,13 @@ // #include "AMDGPU.h" +#include "AMDGPUSubtarget.h" #include "SIInstrInfo.h" #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/IR/Constants.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Function.h" #include "llvm/Support/Debug.h" @@ -24,6 +26,8 @@ STATISTIC(NumInstructionsShrunk, "Number of 64-bit instruction reduced to 32-bit."); +STATISTIC(NumLiteralConstantsFolded, + "Number of literal constants folded into 32-bit instructions."); namespace llvm { void initializeSIShrinkInstructionsPass(PassRegistry&); @@ -41,13 +45,13 @@ public: SIShrinkInstructions() : MachineFunctionPass(ID) { } - virtual bool runOnMachineFunction(MachineFunction &MF) override; + bool runOnMachineFunction(MachineFunction &MF) override; - virtual const char *getPassName() const override { + const char *getPassName() const override { return "SI Shrink Instructions"; } - virtual void getAnalysisUsage(AnalysisUsage &AU) const override { + void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesCFG(); MachineFunctionPass::getAnalysisUsage(AU); } @@ -90,7 +94,7 @@ static bool canShrink(MachineInstr &MI, const SIInstrInfo *TII, const MachineOperand *Src1Mod = TII->getNamedOperand(MI, AMDGPU::OpName::src1_modifiers); - if (Src1 && (!isVGPR(Src1, TRI, MRI) || Src1Mod->getImm() != 0)) + if (Src1 && (!isVGPR(Src1, TRI, MRI) || (Src1Mod && Src1Mod->getImm() != 0))) return false; // We don't need to check src0, all input types are legal, so just make @@ -109,10 +113,72 @@ static bool canShrink(MachineInstr &MI, const SIInstrInfo *TII, return !Clamp || Clamp->getImm() == 0; } +/// \brief This function checks \p MI for operands defined by a move immediate +/// instruction and then folds the literal constant into the instruction if it +/// can. This function assumes that \p MI is a VOP1, VOP2, or VOPC instruction +/// and will only fold literal constants if we are still in SSA. +static void foldImmediates(MachineInstr &MI, const SIInstrInfo *TII, + MachineRegisterInfo &MRI, bool TryToCommute = true) { + + if (!MRI.isSSA()) + return; + + assert(TII->isVOP1(MI.getOpcode()) || TII->isVOP2(MI.getOpcode()) || + TII->isVOPC(MI.getOpcode())); + + const SIRegisterInfo &TRI = TII->getRegisterInfo(); + MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0); + + // Only one literal constant is allowed per instruction, so if src0 is a + // literal constant then we can't do any folding. + if (Src0->isImm() && TII->isLiteralConstant(*Src0)) + return; + + + // Literal constants and SGPRs can only be used in Src0, so if Src0 is an + // SGPR, we cannot commute the instruction, so we can't fold any literal + // constants. + if (Src0->isReg() && !isVGPR(Src0, TRI, MRI)) + return; + + // Try to fold Src0 + if (Src0->isReg()) { + unsigned Reg = Src0->getReg(); + MachineInstr *Def = MRI.getUniqueVRegDef(Reg); + if (Def && Def->isMoveImmediate()) { + MachineOperand &MovSrc = Def->getOperand(1); + bool ConstantFolded = false; + + if (MovSrc.isImm() && isUInt<32>(MovSrc.getImm())) { + Src0->ChangeToImmediate(MovSrc.getImm()); + ConstantFolded = true; + } else if (MovSrc.isFPImm()) { + const APFloat &APF = MovSrc.getFPImm()->getValueAPF(); + if (&APF.getSemantics() == &APFloat::IEEEsingle) { + MRI.removeRegOperandFromUseList(Src0); + Src0->ChangeToImmediate(APF.bitcastToAPInt().getZExtValue()); + ConstantFolded = true; + } + } + if (ConstantFolded) { + if (MRI.use_empty(Reg)) + Def->eraseFromParent(); + ++NumLiteralConstantsFolded; + return; + } + } + } + + // We have failed to fold src0, so commute the instruction and try again. + if (TryToCommute && MI.isCommutable() && TII->commuteInstruction(&MI)) + foldImmediates(MI, TII, MRI, false); + +} + bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) { MachineRegisterInfo &MRI = MF.getRegInfo(); - const SIInstrInfo *TII = static_cast( - MF.getTarget().getInstrInfo()); + const SIInstrInfo *TII = + static_cast(MF.getSubtarget().getInstrInfo()); const SIRegisterInfo &TRI = TII->getRegisterInfo(); std::vector I1Defs; @@ -167,27 +233,28 @@ bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) { } // We can shrink this instruction - DEBUG(dbgs() << "Shrinking "; MI.dump(); dbgs() << "\n";); + DEBUG(dbgs() << "Shrinking "; MI.dump(); dbgs() << '\n';); - MachineInstrBuilder MIB = + MachineInstrBuilder Inst32 = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(Op32)); // dst - MIB.addOperand(MI.getOperand(0)); + Inst32.addOperand(MI.getOperand(0)); - MIB.addOperand(*TII->getNamedOperand(MI, AMDGPU::OpName::src0)); + Inst32.addOperand(*TII->getNamedOperand(MI, AMDGPU::OpName::src0)); const MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1); if (Src1) - MIB.addOperand(*Src1); + Inst32.addOperand(*Src1); - for (const MachineOperand &MO : MI.implicit_operands()) - MIB.addOperand(MO); - - DEBUG(dbgs() << "e32 MI = "; MI.dump(); dbgs() << "\n";); ++NumInstructionsShrunk; MI.eraseFromParent(); + + foldImmediates(*Inst32, TII, MRI); + DEBUG(dbgs() << "e32 MI = " << *Inst32 << '\n'); + + } } return false;