X-Git-Url: http://demsky.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FCodeGen%2FPrologEpilogInserter.cpp;h=365a84ab5242648a4939ad75e81233206671203a;hb=99be8ae3898d87373ef0c8f1159b287e28a8d81b;hp=6015d74abe7bc4b43a0177b0fd172cfc94aa1b84;hpb=b9cfbd94abb23ec8646b9b10aa4ac3d1cbf4461e;p=oota-llvm.git diff --git a/lib/CodeGen/PrologEpilogInserter.cpp b/lib/CodeGen/PrologEpilogInserter.cpp index 6015d74abe7..365a84ab524 100644 --- a/lib/CodeGen/PrologEpilogInserter.cpp +++ b/lib/CodeGen/PrologEpilogInserter.cpp @@ -14,1075 +14,137 @@ // This pass must be run after register allocation. After this pass is // executed, it is illegal to construct MO_FrameIndex operands. // -// This pass implements a shrink wrapping variant of prolog/epilog insertion: -// - Places callee saved register (CSR) spills and restores in the CFG to -// tightly surround uses so that execution paths that do not use CSRs do not -// pay the spill/restore penalty. -// -// - Avoiding placment of spills/restores in loops: if a CSR is used inside a -// loop(nest), the spills are placed in the loop preheader, and restores are -// placed in the loop exit nodes (the successors of the loop _exiting_ nodes). -// -// - Covering paths without CSR uses: e.g. if a restore is placed in a join -// block, a matching spill is added to the end of all immediate predecessor -// blocks that are not reached by a spill. Similarly for saves placed in -// branch blocks. -// -// Shrink wrapping uses an analysis similar to the one in GVNPRE to determine -// which basic blocks require callee-saved register save/restore code. -// -// This pass uses MachineDominators and MachineLoopInfo. Loop information -// is used to prevent shrink wrapping of callee-saved register save/restore -// code into loops. +// This pass provides an optional shrink wrapping variant of prolog/epilog +// insertion, enabled via --shrink-wrap. See ShrinkWrapping.cpp. // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "shrink-wrap" - -#include "llvm/CodeGen/Passes.h" +#include "PrologEpilogInserter.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineLoopInfo.h" -#include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineFrameInfo.h" -#include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/RegisterScavenging.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetFrameInfo.h" #include "llvm/Target/TargetInstrInfo.h" -#include "llvm/ADT/SparseBitVector.h" -#include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/PostOrderIterator.h" -#include "llvm/ADT/Statistic.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Compiler.h" -#include "llvm/Support/Debug.h" +#include "llvm/ADT/IndexedMap.h" #include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/Statistic.h" #include -#include using namespace llvm; -STATISTIC(numSRReduced, "Number of CSR spills+restores reduced."); - -// Shrink Wrapping: -static cl::opt -ShrinkWrapping("shrink-wrap", - cl::desc("Shrink wrap callee-saved register spills/restores")); - -// Shrink wrap only the specified function, a debugging aid. -static cl::opt -ShrinkWrapFunc("shrink-wrap-func", cl::Hidden, - cl::desc("Shrink wrap the specified function"), - cl::value_desc("funcname"), - cl::init("")); - -// Debugging level for shrink wrapping. -enum ShrinkWrapDebugLevel { - None, BasicInfo, Iterations, Details -}; - -static cl::opt -ShrinkWrapDebugging("shrink-wrap-dbg", cl::Hidden, - cl::desc("Print shrink wrapping debugging information"), - cl::values( - clEnumVal(None , "disable debug output"), - clEnumVal(BasicInfo , "print basic DF sets"), - clEnumVal(Iterations, "print SR sets for each iteration"), - clEnumVal(Details , "print all DF sets"), - clEnumValEnd)); - - -namespace { - struct VISIBILITY_HIDDEN PEI : public MachineFunctionPass { - static char ID; - PEI() : MachineFunctionPass(&ID) {} - - const char *getPassName() const { - return "Prolog/Epilog Insertion & Frame Finalization"; - } - - virtual void getAnalysisUsage(AnalysisUsage &AU) const { - AU.setPreservesCFG(); - if (ShrinkWrapping || ShrinkWrapFunc != "") { - AU.addRequired(); - AU.addRequired(); - } - AU.addPreserved(); - AU.addPreserved(); - MachineFunctionPass::getAnalysisUsage(AU); - } - - /// runOnMachineFunction - Insert prolog/epilog code and replace abstract - /// frame indexes with appropriate references. - /// - bool runOnMachineFunction(MachineFunction &Fn) { - const TargetRegisterInfo *TRI = Fn.getTarget().getRegisterInfo(); - RS = TRI->requiresRegisterScavenging(Fn) ? new RegScavenger() : NULL; - - DEBUG(MF = &Fn); - - // Get MachineModuleInfo so that we can track the construction of the - // frame. - if (MachineModuleInfo *MMI = getAnalysisIfAvailable()) - Fn.getFrameInfo()->setMachineModuleInfo(MMI); - - // Allow the target machine to make some adjustments to the function - // e.g. UsedPhysRegs before calculateCalleeSavedRegisters. - TRI->processFunctionBeforeCalleeSavedScan(Fn, RS); - - // Scan the function for modified callee saved registers and insert spill - // code for any callee saved registers that are modified. Also calculate - // the MaxCallFrameSize and HasCalls variables for the function's frame - // information and eliminates call frame pseudo instructions. - calculateCalleeSavedRegisters(Fn); - - // Determine placement of CSR spill/restore code: - // - with shrink wrapping, place spills and restores to tightly - // enclose regions in the Machine CFG of the function where - // they are used. Without shrink wrapping - // - default (no shrink wrapping), place all spills in the - // entry block, all restores in return blocks. - placeCSRSpillsAndRestores(Fn); - - // Add the code to save and restore the callee saved registers - insertCSRSpillsAndRestores(Fn); - - // Allow the target machine to make final modifications to the function - // before the frame layout is finalized. - TRI->processFunctionBeforeFrameFinalized(Fn); - - // Calculate actual frame offsets for all abstract stack objects... - calculateFrameObjectOffsets(Fn); - - // Add prolog and epilog code to the function. This function is required - // to align the stack frame as necessary for any stack variables or - // called functions. Because of this, calculateCalleeSavedRegisters - // must be called before this function in order to set the HasCalls - // and MaxCallFrameSize variables. - insertPrologEpilogCode(Fn); - - // Replace all MO_FrameIndex operands with physical register references - // and actual offsets. - // - replaceFrameIndices(Fn); - - delete RS; - clearAllSets(); - return true; - } - - private: - RegScavenger *RS; - - // MinCSFrameIndex, MaxCSFrameIndex - Keeps the range of callee saved - // stack frame indexes. - unsigned MinCSFrameIndex, MaxCSFrameIndex; - - // Analysis info for spill/restore placement. - // "CSR": "callee saved register". - - // CSRegSet contains indices into the Callee Saved Register Info - // vector built by calculateCalleeSavedRegisters() and accessed - // via MF.getFrameInfo()->getCalleeSavedInfo(). - typedef SparseBitVector<> CSRegSet; - - // CSRegBlockMap maps MachineBasicBlocks to sets of callee - // saved register indices. - typedef DenseMap CSRegBlockMap; - - // Set and maps for computing CSR spill/restore placement: - // used in function (UsedCSRegs) - // used in a basic block (CSRUsed) - // anticipatable in a basic block (Antic{In,Out}) - // available in a basic block (Avail{In,Out}) - // to be spilled at the entry to a basic block (CSRSave) - // to be restored at the end of a basic block (CSRRestore) - CSRegSet UsedCSRegs; - CSRegBlockMap CSRUsed; - CSRegBlockMap AnticIn, AnticOut; - CSRegBlockMap AvailIn, AvailOut; - CSRegBlockMap CSRSave; - CSRegBlockMap CSRRestore; - - // Entry and return blocks of the current function. - MachineBasicBlock* EntryBlock; - SmallVector ReturnBlocks; - - // Map of MBBs to top level MachineLoops. - DenseMap TLLoops; - - // Flag to control shrink wrapping per-function: - // may choose to skip shrink wrapping for certain - // functions. - bool ShrinkWrapThisFunction; - -#ifndef NDEBUG - // Machine function handle. - MachineFunction* MF; - - // Flag indicating that the current function - // has at least one "short" path in the machine - // CFG from the entry block to an exit block. - bool HasFastExitPath; -#endif - - bool calculateSets(MachineFunction &Fn); - bool calcAnticInOut(MachineBasicBlock* MBB); - bool calcAvailInOut(MachineBasicBlock* MBB); - void calculateAnticAvail(MachineFunction &Fn); - bool addUsesForMEMERegion(MachineBasicBlock* MBB, - SmallVector& blks); - bool addUsesForTopLevelLoops(SmallVector& blks); - bool calcSpillPlacements(MachineBasicBlock* MBB, - SmallVector &blks, - CSRegBlockMap &prevSpills); - bool calcRestorePlacements(MachineBasicBlock* MBB, - SmallVector &blks, - CSRegBlockMap &prevRestores); - void placeSpillsAndRestores(MachineFunction &Fn); - void placeCSRSpillsAndRestores(MachineFunction &Fn); - void calculateCalleeSavedRegisters(MachineFunction &Fn); - void insertCSRSpillsAndRestores(MachineFunction &Fn); - void calculateFrameObjectOffsets(MachineFunction &Fn); - void replaceFrameIndices(MachineFunction &Fn); - void insertPrologEpilogCode(MachineFunction &Fn); - - // Initialize DFA sets, called before iterations. - void clearAnticAvailSets(); - // Clear all sets constructed by shrink wrapping. - void clearAllSets(); - - // Initialize all shrink wrapping data. - void initShrinkWrappingInfo(); - - // Convienences for dealing with machine loops. - MachineBasicBlock* getTopLevelLoopPreheader(MachineLoop* LP) { - assert(LP && "Machine loop is NULL."); - MachineBasicBlock* PHDR = LP->getLoopPreheader(); - MachineLoop* PLP = LP->getParentLoop(); - while (PLP) { - PHDR = PLP->getLoopPreheader(); - PLP = PLP->getParentLoop(); - } - return PHDR; - } - - MachineLoop* getTopLevelLoopParent(MachineLoop *LP) { - if (LP == 0) - return 0; - MachineLoop* PLP = LP->getParentLoop(); - while (PLP) { - LP = PLP; - PLP = PLP->getParentLoop(); - } - return LP; - } - - // Propgate CSRs used in MBB to all MBBs of loop LP. - void propagateUsesAroundLoop(MachineBasicBlock* MBB, MachineLoop* LP); - - // Convenience for recognizing return blocks. - bool isReturnBlock(MachineBasicBlock* MBB) { - return (MBB && !MBB->empty() && MBB->back().getDesc().isReturn()); - } - -#ifndef NDEBUG - // Debugging methods. - - // Mark this function as having fast exit paths. - void findFastExitPath(); - - // Verify placement of spills/restores. - void verifySpillRestorePlacement(); - - std::string getBasicBlockName(const MachineBasicBlock* MBB); - std::string stringifyCSRegSet(const CSRegSet& s); - void dumpSet(const CSRegSet& s); - void dumpUsed(MachineBasicBlock* MBB); - void dumpAllUsed(); - void dumpSets(MachineBasicBlock* MBB); - void dumpSets1(MachineBasicBlock* MBB); - void dumpAllSets(); - void dumpSRSets(); -#endif - - }; - char PEI::ID = 0; -} - -// Initialize shrink wrapping DFA sets, called before iterations. -void PEI::clearAnticAvailSets() { - AnticIn.clear(); - AnticOut.clear(); - AvailIn.clear(); - AvailOut.clear(); -} - -// Clear all sets constructed by shrink wrapping. -void PEI::clearAllSets() { - ReturnBlocks.clear(); - clearAnticAvailSets(); - UsedCSRegs.clear(); - CSRUsed.clear(); - TLLoops.clear(); - CSRSave.clear(); - CSRRestore.clear(); -} - -// Initialize all shrink wrapping data. -void PEI::initShrinkWrappingInfo() { - clearAllSets(); - EntryBlock = 0; - HasFastExitPath = false; - ShrinkWrapThisFunction = ShrinkWrapping; - // DEBUG: enable or disable shrink wrapping for the current function - // via --shrink-wrap-func=. -#ifndef NDEBUG - if (ShrinkWrapFunc != "") { - std::string MFName = MF->getFunction()->getName(); - ShrinkWrapThisFunction = (MFName == ShrinkWrapFunc); - } -#endif -} +char PEI::ID = 0; +static RegisterPass +X("prologepilog", "Prologue/Epilogue Insertion"); /// createPrologEpilogCodeInserter - This function returns a pass that inserts /// prolog and epilog code, and eliminates abstract frame references. /// FunctionPass *llvm::createPrologEpilogCodeInserter() { return new PEI(); } -/// placeCSRSpillsAndRestores - determine which MBBs of the function -/// need save, restore code for callee-saved registers by doing a DF analysis -/// similar to the one used in code motion (GVNPRE). This produces maps of MBBs -/// to sets of registers (CSRs) for saves and restores. MachineLoopInfo -/// is used to ensure that CSR save/restore code is not placed inside loops. -/// This function computes the maps of MBBs -> CSRs to spill and restore -/// in CSRSave, CSRRestore. -/// -/// If shrink wrapping is not being performed, place all spills in -/// the entry block, all restores in return blocks. In this case, -/// CSRSave has a single mapping, CSRRestore has mappings for each -/// return block. -/// -void PEI::placeCSRSpillsAndRestores(MachineFunction &Fn) { - - initShrinkWrappingInfo(); - - DEBUG(if (ShrinkWrapThisFunction) { - DOUT << "Place CSR spills/restores for " - << MF->getFunction()->getName() << "\n"; - }); - - if (calculateSets(Fn)) - placeSpillsAndRestores(Fn); -} - -/// calcAnticInOut - calculate the anticipated in/out reg sets -/// for the given MBB by looking forward in the MCFG at MBB's -/// successors. -/// -bool PEI::calcAnticInOut(MachineBasicBlock* MBB) { - bool changed = false; - - // AnticOut[MBB] = INTERSECT(AnticIn[S] for S in SUCCESSORS(MBB)) - SmallVector successors; - for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(), - SE = MBB->succ_end(); SI != SE; ++SI) { - MachineBasicBlock* SUCC = *SI; - if (SUCC != MBB) - successors.push_back(SUCC); - } - - unsigned i = 0, e = successors.size(); - if (i != e) { - CSRegSet prevAnticOut = AnticOut[MBB]; - MachineBasicBlock* SUCC = successors[i]; - - AnticOut[MBB] = AnticIn[SUCC]; - for (++i; i != e; ++i) { - SUCC = successors[i]; - AnticOut[MBB] &= AnticIn[SUCC]; - } - if (prevAnticOut != AnticOut[MBB]) - changed = true; - } - - // AnticIn[MBB] = UNION(CSRUsed[MBB], AnticOut[MBB]); - CSRegSet prevAnticIn = AnticIn[MBB]; - AnticIn[MBB] = CSRUsed[MBB] | AnticOut[MBB]; - if (prevAnticIn |= AnticIn[MBB]) - changed = true; - return changed; -} - -/// calcAvailInOut - calculate the available in/out reg sets -/// for the given MBB by looking backward in the MCFG at MBB's -/// predecessors. -/// -bool PEI::calcAvailInOut(MachineBasicBlock* MBB) { - bool changed = false; - - // AvailIn[MBB] = INTERSECT(AvailOut[P] for P in PREDECESSORS(MBB)) - SmallVector predecessors; - for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(), - PE = MBB->pred_end(); PI != PE; ++PI) { - MachineBasicBlock* PRED = *PI; - if (PRED != MBB) - predecessors.push_back(PRED); - } - - unsigned i = 0, e = predecessors.size(); - if (i != e) { - CSRegSet prevAvailIn = AvailIn[MBB]; - MachineBasicBlock* PRED = predecessors[i]; - - AvailIn[MBB] = AvailOut[PRED]; - for (++i; i != e; ++i) { - PRED = predecessors[i]; - AvailIn[MBB] &= AvailOut[PRED]; - } - if (prevAvailIn != AvailIn[MBB]) - changed = true; - } - - // AvailOut[MBB] = UNION(CSRUsed[MBB], AvailIn[MBB]); - CSRegSet prevAvailOut = AvailOut[MBB]; - AvailOut[MBB] = CSRUsed[MBB] | AvailIn[MBB]; - if (prevAvailOut |= AvailOut[MBB]) - changed = true; - return changed; -} - -/// calculateAnticAvail - build the sets anticipated and available -/// registers in the MCFG of the current function iteratively, -/// doing a combined forward and backward analysis. -/// -void PEI::calculateAnticAvail(MachineFunction &Fn) { - // Initialize data flow sets. - clearAnticAvailSets(); - - // Calulate Antic{In,Out} and Avail{In,Out} iteratively on the MCFG. - bool changed = true; - unsigned iterations = 0; - while (changed) { - changed = false; - ++iterations; - for (MachineFunction::iterator MBBI = Fn.begin(), MBBE = Fn.end(); - MBBI != MBBE; ++MBBI) { - MachineBasicBlock* MBB = MBBI; - - // Calculate anticipated in, out regs at MBB from - // anticipated at successors of MBB. - changed |= calcAnticInOut(MBB); - - // Calculate available in, out regs at MBB from - // available at predecessors of MBB. - changed |= calcAvailInOut(MBB); - } - } - - DEBUG(if (ShrinkWrapDebugging >= Details) { - DOUT << "-----------------------------------------------------------\n"; - DOUT << " Antic/Avail Sets:\n"; - DOUT << "-----------------------------------------------------------\n"; - DOUT << "iterations = " << iterations << "\n"; - DOUT << "-----------------------------------------------------------\n"; - DOUT << "MBB | USED | ANTIC_IN | ANTIC_OUT | AVAIL_IN | AVAIL_OUT\n"; - DOUT << "-----------------------------------------------------------\n"; - for (MachineFunction::iterator MBBI = Fn.begin(), MBBE = Fn.end(); - MBBI != MBBE; ++MBBI) { - MachineBasicBlock* MBB = MBBI; - dumpSets(MBB); - } - DOUT << "-----------------------------------------------------------\n"; - }); -} - -/// propagateUsesAroundLoop - copy used register info from MBB to all blocks -/// of the loop given by LP and its parent loops. This prevents spills/restores -/// from being placed in the bodies of loops. -/// -void PEI::propagateUsesAroundLoop(MachineBasicBlock* MBB, MachineLoop* LP) { - if (! MBB || !LP) - return; - - std::vector loopBlocks = LP->getBlocks(); - for (unsigned i = 0, e = loopBlocks.size(); i != e; ++i) { - MachineBasicBlock* LBB = loopBlocks[i]; - if (LBB == MBB) - continue; - if (CSRUsed[LBB].contains(CSRUsed[MBB])) - continue; - CSRUsed[LBB] |= CSRUsed[MBB]; - } -} - -/// calculateSets - collect the CSRs used in this function, compute -/// the DF sets that describe the initial minimal regions in the -/// Machine CFG around which CSR spills and restores must be placed. +/// runOnMachineFunction - Insert prolog/epilog code and replace abstract +/// frame indexes with appropriate references. /// -/// Additionally, this function decides if shrink wrapping should -/// be disabled for the current function, checking the following: -/// 1. the current function has more than 500 MBBs: heuristic limit -/// on function size to reduce compile time impact of the current -/// iterative algorithm. -/// 2. all CSRs are used in the entry block. -/// 3. all CSRs are used in all immediate successors of the entry block. -/// 4. all CSRs are used in a subset of blocks, each of which dominates -/// all return blocks. These blocks, taken as a subgraph of the MCFG, -/// are equivalent to the entry block since all execution paths pass -/// through them. -/// -bool PEI::calculateSets(MachineFunction &Fn) { - // Sets used to compute spill, restore placement sets. - const std::vector CSI = - Fn.getFrameInfo()->getCalleeSavedInfo(); - - // If no CSRs used, we are done. - if (CSI.empty()) { - DEBUG(if (ShrinkWrapThisFunction) - DOUT << "DISABLED: " << Fn.getFunction()->getName() - << ": uses no callee-saved registers\n"); - return false; - } - - // Save refs to entry and return blocks. - EntryBlock = Fn.begin(); - for (MachineFunction::iterator MBB = Fn.begin(), E = Fn.end(); - MBB != E; ++MBB) - if (isReturnBlock(MBB)) - ReturnBlocks.push_back(MBB); - - // Determine if this function has fast exit paths. - DEBUG(if (ShrinkWrapThisFunction) - findFastExitPath()); - - // Limit shrink wrapping via the current iterative bit vector - // implementation to functions with <= 500 MBBs. - if (Fn.size() > 500) { - DEBUG(if (ShrinkWrapThisFunction) - DOUT << "DISABLED: " << Fn.getFunction()->getName() - << ": too large (" << Fn.size() << " MBBs)\n"); - ShrinkWrapThisFunction = false; - } - - // Return now if not shrink wrapping. - if (! ShrinkWrapThisFunction) - return false; - - // Collect set of used CSRs. - for (unsigned inx = 0, e = CSI.size(); inx != e; ++inx) { - UsedCSRegs.set(inx); - } - - // Walk instructions in all MBBs, create CSRUsed[] sets, choose - // whether or not to shrink wrap this function. - MachineLoopInfo &LI = getAnalysis(); - MachineDominatorTree &DT = getAnalysis(); +bool PEI::runOnMachineFunction(MachineFunction &Fn) { + const Function* F = Fn.getFunction(); const TargetRegisterInfo *TRI = Fn.getTarget().getRegisterInfo(); + RS = TRI->requiresRegisterScavenging(Fn) ? new RegScavenger() : NULL; + FrameIndexVirtualScavenging = TRI->requiresFrameIndexScavenging(Fn); + FrameConstantRegMap.clear(); + + // Calculate the MaxCallFrameSize and HasCalls variables for the function's + // frame information. Also eliminates call frame pseudo instructions. + calculateCallsInformation(Fn); + + // Allow the target machine to make some adjustments to the function + // e.g. UsedPhysRegs before calculateCalleeSavedRegisters. + TRI->processFunctionBeforeCalleeSavedScan(Fn, RS); + + // Scan the function for modified callee saved registers and insert spill code + // for any callee saved registers that are modified. + calculateCalleeSavedRegisters(Fn); + + // Determine placement of CSR spill/restore code: + // - with shrink wrapping, place spills and restores to tightly + // enclose regions in the Machine CFG of the function where + // they are used. Without shrink wrapping + // - default (no shrink wrapping), place all spills in the + // entry block, all restores in return blocks. + placeCSRSpillsAndRestores(Fn); + + // Add the code to save and restore the callee saved registers + if (!F->hasFnAttr(Attribute::Naked)) + insertCSRSpillsAndRestores(Fn); + + // Allow the target machine to make final modifications to the function + // before the frame layout is finalized. + TRI->processFunctionBeforeFrameFinalized(Fn); + + // Calculate actual frame offsets for all abstract stack objects... + calculateFrameObjectOffsets(Fn); + + // Add prolog and epilog code to the function. This function is required + // to align the stack frame as necessary for any stack variables or + // called functions. Because of this, calculateCalleeSavedRegisters + // must be called before this function in order to set the HasCalls + // and MaxCallFrameSize variables. + if (!F->hasFnAttr(Attribute::Naked)) + insertPrologEpilogCode(Fn); + + // Replace all MO_FrameIndex operands with physical register references + // and actual offsets. + // + replaceFrameIndices(Fn); - bool allCSRUsesInEntryBlock = true; - for (MachineFunction::iterator MBBI = Fn.begin(), MBBE = Fn.end(); - MBBI != MBBE; ++MBBI) { - MachineBasicBlock* MBB = MBBI; - for (MachineBasicBlock::iterator I = MBB->begin(); I != MBB->end(); ++I) { - for (unsigned inx = 0, e = CSI.size(); inx != e; ++inx) { - unsigned Reg = CSI[inx].getReg(); - // If instruction I reads or modifies Reg, add it to UsedCSRegs, - // CSRUsed map for the current block. - for (unsigned opInx = 0, opEnd = I->getNumOperands(); - opInx != opEnd; ++opInx) { - const MachineOperand &MO = I->getOperand(opInx); - if (! (MO.isReg() && (MO.isUse() || MO.isDef()))) - continue; - unsigned MOReg = MO.getReg(); - if (!MOReg) - continue; - if (MOReg == Reg || - (TargetRegisterInfo::isPhysicalRegister(MOReg) && - TargetRegisterInfo::isPhysicalRegister(Reg) && - TRI->isSubRegister(Reg, MOReg))) { - // CSR Reg is defined/used in block MBB. - CSRUsed[MBB].set(inx); - // Check for uses in EntryBlock. - if (MBB != EntryBlock) - allCSRUsesInEntryBlock = false; - } - } - } - } - - if (CSRUsed[MBB].empty()) - continue; - - // Propagate CSRUsed[MBB] in loops - if (MachineLoop* LP = LI.getLoopFor(MBB)) { - // Add top level loop to work list. - MachineBasicBlock* HDR = getTopLevelLoopPreheader(LP); - MachineLoop* PLP = getTopLevelLoopParent(LP); - - if (! HDR) { - HDR = PLP->getHeader(); - assert(HDR->pred_size() > 0 && "Loop header has no predecessors?"); - MachineBasicBlock::pred_iterator PI = HDR->pred_begin(); - HDR = *PI; - } - TLLoops[HDR] = PLP; - - // Push uses from inside loop to its parent loops, - // or to all other MBBs in its loop. - if (LP->getLoopDepth() > 1) { - for (MachineLoop* PLP = LP->getParentLoop(); PLP; - PLP = PLP->getParentLoop()) { - propagateUsesAroundLoop(MBB, PLP); - } - } else { - propagateUsesAroundLoop(MBB, LP); - } - } - } - - if (allCSRUsesInEntryBlock) { - DEBUG(DOUT << "DISABLED: " << Fn.getFunction()->getName() - << ": all CSRs used in EntryBlock\n"); - ShrinkWrapThisFunction = false; - } else { - bool allCSRsUsedInEntryFanout = true; - for (MachineBasicBlock::succ_iterator SI = EntryBlock->succ_begin(), - SE = EntryBlock->succ_end(); SI != SE; ++SI) { - MachineBasicBlock* SUCC = *SI; - if (CSRUsed[SUCC] != UsedCSRegs) - allCSRsUsedInEntryFanout = false; - } - if (allCSRsUsedInEntryFanout) { - DEBUG(DOUT << "DISABLED: " << Fn.getFunction()->getName() - << ": all CSRs used in imm successors of EntryBlock\n"); - ShrinkWrapThisFunction = false; - } - } - - if (ShrinkWrapThisFunction) { - // Check if MBB uses CSRs and dominates all exit nodes. - // Such nodes are equiv. to the entry node w.r.t. - // CSR uses: every path through the function must - // pass through this node. If each CSR is used at least - // once by these nodes, shrink wrapping is disabled. - CSRegSet CSRUsedInChokePoints; - for (MachineFunction::iterator MBBI = Fn.begin(), MBBE = Fn.end(); - MBBI != MBBE; ++MBBI) { - MachineBasicBlock* MBB = MBBI; - if (MBB == EntryBlock || CSRUsed[MBB].empty() || MBB->succ_size() < 1) - continue; - bool dominatesExitNodes = true; - for (unsigned ri = 0, re = ReturnBlocks.size(); ri != re; ++ri) - if (! DT.dominates(MBB, ReturnBlocks[ri])) { - dominatesExitNodes = false; - break; - } - if (dominatesExitNodes) { - CSRUsedInChokePoints |= CSRUsed[MBB]; - if (CSRUsedInChokePoints == UsedCSRegs) { - DEBUG(DOUT << "DISABLED: " << Fn.getFunction()->getName() - << ": all CSRs used in choke point(s) at " - << getBasicBlockName(MBB) << "\n"); - ShrinkWrapThisFunction = false; - break; - } - } - } - } - - // Return now if we have decided not to apply shrink wrapping - // to the current function. - if (! ShrinkWrapThisFunction) - return false; - - DEBUG({ - DOUT << "ENABLED: " << Fn.getFunction()->getName(); - if (HasFastExitPath) - DOUT << " (fast exit path)"; - DOUT << "\n"; - if (ShrinkWrapDebugging >= BasicInfo) { - DOUT << "------------------------------" - << "-----------------------------\n"; - DOUT << "UsedCSRegs = " << stringifyCSRegSet(UsedCSRegs) << "\n"; - if (ShrinkWrapDebugging >= Details) { - DOUT << "------------------------------" - << "-----------------------------\n"; - dumpAllUsed(); - } - } - }); - - // Build initial DF sets to determine minimal regions in the - // Machine CFG around which CSRs must be spilled and restored. - calculateAnticAvail(Fn); + // If register scavenging is needed, as we've enabled doing it as a + // post-pass, scavenge the virtual registers that frame index elimiation + // inserted. + if (TRI->requiresRegisterScavenging(Fn) && FrameIndexVirtualScavenging) + scavengeFrameVirtualRegs(Fn); + delete RS; + clearAllSets(); return true; } -/// addUsesForMEMERegion - add uses of CSRs spilled or restored in -/// multi-entry, multi-exit (MEME) regions so spill and restore -/// placement will not break code that enters or leaves a -/// shrink-wrapped region by inducing spills with no matching -/// restores or restores with no matching spills. A MEME region -/// is a subgraph of the MCFG with multiple entry edges, multiple -/// exit edges, or both. This code propagates use information -/// through the MCFG until all paths requiring spills and restores -/// _outside_ the computed minimal placement regions have been covered. -/// -bool PEI::addUsesForMEMERegion(MachineBasicBlock* MBB, - SmallVector& blks) { - if (MBB->succ_size() < 2 && MBB->pred_size() < 2) { - bool processThisBlock = false; - for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(), - SE = MBB->succ_end(); SI != SE; ++SI) { - MachineBasicBlock* SUCC = *SI; - if (SUCC->pred_size() > 1) { - processThisBlock = true; - break; - } - } - if (!CSRRestore[MBB].empty() && MBB->succ_size() > 0) { - for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(), - PE = MBB->pred_end(); PI != PE; ++PI) { - MachineBasicBlock* PRED = *PI; - if (PRED->succ_size() > 1) { - processThisBlock = true; - break; - } - } - } - if (! processThisBlock) - return false; - } - - CSRegSet prop; - if (!CSRSave[MBB].empty()) - prop = CSRSave[MBB]; - else if (!CSRRestore[MBB].empty()) - prop = CSRRestore[MBB]; - else - prop = CSRUsed[MBB]; - if (prop.empty()) - return false; - - // Propagate selected bits to successors, predecessors of MBB. - bool addedUses = false; - for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(), - SE = MBB->succ_end(); SI != SE; ++SI) { - MachineBasicBlock* SUCC = *SI; - // Self-loop - if (SUCC == MBB) - continue; - if (! CSRUsed[SUCC].contains(prop)) { - CSRUsed[SUCC] |= prop; - addedUses = true; - blks.push_back(SUCC); - DEBUG(if (ShrinkWrapDebugging >= Iterations) - DOUT << getBasicBlockName(MBB) - << "(" << stringifyCSRegSet(prop) << ")->" - << "successor " << getBasicBlockName(SUCC) << "\n"); - } - } - for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(), - PE = MBB->pred_end(); PI != PE; ++PI) { - MachineBasicBlock* PRED = *PI; - // Self-loop - if (PRED == MBB) - continue; - if (! CSRUsed[PRED].contains(prop)) { - CSRUsed[PRED] |= prop; - addedUses = true; - blks.push_back(PRED); - DEBUG(if (ShrinkWrapDebugging >= Iterations) - DOUT << getBasicBlockName(MBB) - << "(" << stringifyCSRegSet(prop) << ")->" - << "predecessor " << getBasicBlockName(PRED) << "\n"); - } - } - return addedUses; -} - -/// addUsesForTopLevelLoops - add uses for CSRs used inside top -/// level loops to the exit blocks of those loops. -/// -bool PEI::addUsesForTopLevelLoops(SmallVector& blks) { - bool addedUses = false; - - // Place restores for top level loops where needed. - for (DenseMap::iterator - I = TLLoops.begin(), E = TLLoops.end(); I != E; ++I) { - MachineBasicBlock* MBB = I->first; - MachineLoop* LP = I->second; - MachineBasicBlock* HDR = LP->getHeader(); - SmallVector exitBlocks; - CSRegSet loopSpills; - - loopSpills = CSRSave[MBB]; - if (CSRSave[MBB].empty()) { - loopSpills = CSRUsed[HDR]; - assert(!loopSpills.empty() && "No CSRs used in loop?"); - } else if (CSRRestore[MBB].contains(CSRSave[MBB])) - continue; - - LP->getExitBlocks(exitBlocks); - assert(exitBlocks.size() > 0 && "Loop has no top level exit blocks?"); - for (unsigned i = 0, e = exitBlocks.size(); i != e; ++i) { - MachineBasicBlock* EXB = exitBlocks[i]; - if (! CSRUsed[EXB].contains(loopSpills)) { - CSRUsed[EXB] |= loopSpills; - addedUses = true; - DEBUG(if (ShrinkWrapDebugging >= Iterations) - DOUT << "LOOP " << getBasicBlockName(MBB) - << "(" << stringifyCSRegSet(loopSpills) << ")->" - << getBasicBlockName(EXB) << "\n"); - if (EXB->succ_size() > 1 || EXB->pred_size() > 1) - blks.push_back(EXB); - } - } - } - return addedUses; -} - -/// calcSpillPlacements - determine which CSRs should be spilled -/// in MBB using AnticIn sets of MBB's predecessors, keeping track -/// of changes to spilled reg sets. Add MBB to the set of blocks -/// that need to be processed for propagating use info to cover -/// multi-entry/exit regions. -/// -bool PEI::calcSpillPlacements(MachineBasicBlock* MBB, - SmallVector &blks, - CSRegBlockMap &prevSpills) { - bool placedSpills = false; - // Intersect (CSRegs - AnticIn[P]) for P in Predecessors(MBB) - CSRegSet anticInPreds; - SmallVector predecessors; - for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(), - PE = MBB->pred_end(); PI != PE; ++PI) { - MachineBasicBlock* PRED = *PI; - if (PRED != MBB) - predecessors.push_back(PRED); - } - unsigned i = 0, e = predecessors.size(); - if (i != e) { - MachineBasicBlock* PRED = predecessors[i]; - anticInPreds = UsedCSRegs - AnticIn[PRED]; - for (++i; i != e; ++i) { - PRED = predecessors[i]; - anticInPreds &= (UsedCSRegs - AnticIn[PRED]); - } - } else { - // Handle uses in entry blocks (which have no predecessors). - // This is necessary because the DFA formulation assumes the - // entry and (multiple) exit nodes cannot have CSR uses, which - // is not the case in the real world. - anticInPreds = UsedCSRegs; - } - // Compute spills required at MBB: - CSRSave[MBB] |= (AnticIn[MBB] - AvailIn[MBB]) & anticInPreds; - - if (! CSRSave[MBB].empty()) { - if (MBB == EntryBlock) { - for (unsigned ri = 0, re = ReturnBlocks.size(); ri != re; ++ri) - CSRRestore[ReturnBlocks[ri]] |= CSRSave[MBB]; - } else { - // Reset all regs spilled in MBB that are also spilled in EntryBlock. - if (CSRSave[EntryBlock].intersects(CSRSave[MBB])) { - CSRSave[MBB] = CSRSave[MBB] - CSRSave[EntryBlock]; - } - } - } - placedSpills = (CSRSave[MBB] != prevSpills[MBB]); - prevSpills[MBB] = CSRSave[MBB]; - // Remember this block for adding restores to successor - // blocks for multi-entry region. - if (placedSpills) - blks.push_back(MBB); - - DEBUG(if (! CSRSave[MBB].empty() && ShrinkWrapDebugging >= Iterations) - DOUT << "SAVE[" << getBasicBlockName(MBB) << "] = " - << stringifyCSRegSet(CSRSave[MBB]) << "\n"); - - return placedSpills; -} - -/// calcRestorePlacements - determine which CSRs should be restored -/// in MBB using AvailOut sets of MBB's succcessors, keeping track -/// of changes to restored reg sets. Add MBB to the set of blocks -/// that need to be processed for propagating use info to cover -/// multi-entry/exit regions. -/// -bool PEI::calcRestorePlacements(MachineBasicBlock* MBB, - SmallVector &blks, - CSRegBlockMap &prevRestores) { - bool placedRestores = false; - // Intersect (CSRegs - AvailOut[S]) for S in Successors(MBB) - CSRegSet availOutSucc; - SmallVector successors; - for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(), - SE = MBB->succ_end(); SI != SE; ++SI) { - MachineBasicBlock* SUCC = *SI; - if (SUCC != MBB) - successors.push_back(SUCC); - } - unsigned i = 0, e = successors.size(); - if (i != e) { - MachineBasicBlock* SUCC = successors[i]; - availOutSucc = UsedCSRegs - AvailOut[SUCC]; - for (++i; i != e; ++i) { - SUCC = successors[i]; - availOutSucc &= (UsedCSRegs - AvailOut[SUCC]); - } - } else { - if (! CSRUsed[MBB].empty() || ! AvailOut[MBB].empty()) { - // Handle uses in return blocks (which have no successors). - // This is necessary because the DFA formulation assumes the - // entry and (multiple) exit nodes cannot have CSR uses, which - // is not the case in the real world. - availOutSucc = UsedCSRegs; - } +#if 0 +void PEI::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); + if (ShrinkWrapping || ShrinkWrapFunc != "") { + AU.addRequired(); + AU.addRequired(); } - // Compute restores required at MBB: - CSRRestore[MBB] |= (AvailOut[MBB] - AnticOut[MBB]) & availOutSucc; - - // Postprocess restore placements at MBB. - // Remove the CSRs that are restored in the return blocks. - // Lest this be confusing, note that: - // CSRSave[EntryBlock] == CSRRestore[B] for all B in ReturnBlocks. - if (MBB->succ_size() && ! CSRRestore[MBB].empty()) { - if (! CSRSave[EntryBlock].empty()) - CSRRestore[MBB] = CSRRestore[MBB] - CSRSave[EntryBlock]; - } - placedRestores = (CSRRestore[MBB] != prevRestores[MBB]); - prevRestores[MBB] = CSRRestore[MBB]; - // Remember this block for adding saves to predecessor - // blocks for multi-entry region. - if (placedRestores) - blks.push_back(MBB); - - DEBUG(if (! CSRRestore[MBB].empty() && ShrinkWrapDebugging >= Iterations) - DOUT << "RESTORE[" << getBasicBlockName(MBB) << "] = " - << stringifyCSRegSet(CSRRestore[MBB]) << "\n"); - - return placedRestores; -} - -/// placeSpillsAndRestores - place spills and restores of CSRs -/// used in MBBs in minimal regions that contain the uses. -/// -void PEI::placeSpillsAndRestores(MachineFunction &Fn) { - CSRegBlockMap prevCSRSave; - CSRegBlockMap prevCSRRestore; - SmallVector cvBlocks, ncvBlocks; - bool changed = true; - unsigned iterations = 0; - - // Iterate computation of spill and restore placements in the MCFG until: - // 1. CSR use info has been fully propagated around the MCFG, and - // 2. computation of CSRSave[], CSRRestore[] reach fixed points. - while (changed) { - changed = false; - ++iterations; - - DEBUG(if (ShrinkWrapDebugging >= Iterations) - DOUT << "iter " << iterations - << " --------------------------------------------------\n"); - - // Calculate CSR{Save,Restore} sets using Antic, Avail on the MCFG, - // which determines the placements of spills and restores. - // Keep track of changes to spills, restores in each iteration to - // minimize the total iterations. - bool SRChanged = false; - for (MachineFunction::iterator MBBI = Fn.begin(), MBBE = Fn.end(); - MBBI != MBBE; ++MBBI) { - MachineBasicBlock* MBB = MBBI; - - // Place spills for CSRs in MBB. - SRChanged |= calcSpillPlacements(MBB, cvBlocks, prevCSRSave); - - // Place restores for CSRs in MBB. - SRChanged |= calcRestorePlacements(MBB, cvBlocks, prevCSRRestore); - } - - // Add uses of CSRs used inside loops where needed. - changed |= addUsesForTopLevelLoops(cvBlocks); - - // Add uses for CSRs spilled or restored at branch, join points. - if (changed || SRChanged) { - while (! cvBlocks.empty()) { - MachineBasicBlock* MBB = cvBlocks.pop_back_val(); - changed |= addUsesForMEMERegion(MBB, ncvBlocks); - } - if (! ncvBlocks.empty()) { - cvBlocks = ncvBlocks; - ncvBlocks.clear(); - } - } - - if (changed) { - calculateAnticAvail(Fn); - CSRSave.clear(); - CSRRestore.clear(); - } - } - - // Check for effectiveness: - // SR0 = {r | r in CSRSave[EntryBlock], CSRRestore[RB], RB in ReturnBlocks} - // numSRReduced = |(UsedCSRegs - SR0)|, approx. SR0 by CSRSave[EntryBlock] - // Gives a measure of how many CSR spills have been moved from EntryBlock - // to minimal regions enclosing their uses. - CSRegSet notSpilledInEntryBlock = (UsedCSRegs - CSRSave[EntryBlock]); - unsigned numSRReducedThisFunc = notSpilledInEntryBlock.count(); - numSRReduced += numSRReducedThisFunc; - DEBUG(if (ShrinkWrapDebugging >= BasicInfo) { - DOUT << "-----------------------------------------------------------\n"; - DOUT << "total iterations = " << iterations << " ( " - << Fn.getFunction()->getName() - << " " << numSRReducedThisFunc - << " " << Fn.size() - << " )\n"; - DOUT << "-----------------------------------------------------------\n"; - dumpSRSets(); - DOUT << "-----------------------------------------------------------\n"; - if (numSRReducedThisFunc) - verifySpillRestorePlacement(); - }); + AU.addPreserved(); + AU.addPreserved(); + MachineFunctionPass::getAnalysisUsage(AU); } +#endif -/// calculateCalleeSavedRegisters - Scan the function for modified callee saved -/// registers. Also calculate the MaxCallFrameSize and HasCalls variables for -/// the function's frame information and eliminates call frame pseudo -/// instructions. -/// -void PEI::calculateCalleeSavedRegisters(MachineFunction &Fn) { +/// calculateCallsInformation - Calculate the MaxCallFrameSize and HasCalls +/// variables for the function's frame information and eliminate call frame +/// pseudo instructions. +void PEI::calculateCallsInformation(MachineFunction &Fn) { const TargetRegisterInfo *RegInfo = Fn.getTarget().getRegisterInfo(); - const TargetFrameInfo *TFI = Fn.getTarget().getFrameInfo(); + MachineFrameInfo *MFI = Fn.getFrameInfo(); - // Get the callee saved register list... - const unsigned *CSRegs = RegInfo->getCalleeSavedRegs(&Fn); + unsigned MaxCallFrameSize = 0; + bool HasCalls = MFI->hasCalls(); // Get the function call frame set-up and tear-down instruction opcode int FrameSetupOpcode = RegInfo->getCallFrameSetupOpcode(); int FrameDestroyOpcode = RegInfo->getCallFrameDestroyOpcode(); - // These are used to keep track the callee-save area. Initialize them. - MinCSFrameIndex = INT_MAX; - MaxCSFrameIndex = 0; - - // Early exit for targets which have no callee saved registers and no call - // frame setup/destroy pseudo instructions. - if ((CSRegs == 0 || CSRegs[0] == 0) && - FrameSetupOpcode == -1 && FrameDestroyOpcode == -1) + // Early exit for targets which have no call frame setup/destroy pseudo + // instructions. + if (FrameSetupOpcode == -1 && FrameDestroyOpcode == -1) return; - unsigned MaxCallFrameSize = 0; - bool HasCalls = false; - std::vector FrameSDOps; for (MachineFunction::iterator BB = Fn.begin(), E = Fn.end(); BB != E; ++BB) for (MachineBasicBlock::iterator I = BB->begin(); I != BB->end(); ++I) @@ -1094,31 +156,57 @@ void PEI::calculateCalleeSavedRegisters(MachineFunction &Fn) { if (Size > MaxCallFrameSize) MaxCallFrameSize = Size; HasCalls = true; FrameSDOps.push_back(I); + } else if (I->isInlineAsm()) { + // An InlineAsm might be a call; assume it is to get the stack frame + // aligned correctly for calls. + HasCalls = true; } - MachineFrameInfo *FFI = Fn.getFrameInfo(); - FFI->setHasCalls(HasCalls); - FFI->setMaxCallFrameSize(MaxCallFrameSize); + MFI->setHasCalls(HasCalls); + MFI->setMaxCallFrameSize(MaxCallFrameSize); - for (unsigned i = 0, e = FrameSDOps.size(); i != e; ++i) { - MachineBasicBlock::iterator I = FrameSDOps[i]; - // If call frames are not being included as part of the stack frame, - // and there is no dynamic allocation (therefore referencing frame slots - // off sp), leave the pseudo ops alone. We'll eliminate them later. - if (RegInfo->hasReservedCallFrame(Fn) || RegInfo->hasFP(Fn)) + for (std::vector::iterator + i = FrameSDOps.begin(), e = FrameSDOps.end(); i != e; ++i) { + MachineBasicBlock::iterator I = *i; + + // If call frames are not being included as part of the stack frame, and + // the target doesn't indicate otherwise, remove the call frame pseudos + // here. The sub/add sp instruction pairs are still inserted, but we don't + // need to track the SP adjustment for frame index elimination. + if (RegInfo->canSimplifyCallFramePseudos(Fn)) RegInfo->eliminateCallFramePseudoInstr(Fn, *I->getParent(), I); } +} - // Now figure out which *callee saved* registers are modified by the current + +/// calculateCalleeSavedRegisters - Scan the function for modified callee saved +/// registers. +void PEI::calculateCalleeSavedRegisters(MachineFunction &Fn) { + const TargetRegisterInfo *RegInfo = Fn.getTarget().getRegisterInfo(); + const TargetFrameInfo *TFI = Fn.getTarget().getFrameInfo(); + MachineFrameInfo *MFI = Fn.getFrameInfo(); + + // Get the callee saved register list... + const unsigned *CSRegs = RegInfo->getCalleeSavedRegs(&Fn); + + // These are used to keep track the callee-save area. Initialize them. + MinCSFrameIndex = INT_MAX; + MaxCSFrameIndex = 0; + + // Early exit for targets which have no callee saved registers. + if (CSRegs == 0 || CSRegs[0] == 0) + return; + + // Figure out which *callee saved* registers are modified by the current // function, thus needing to be saved and restored in the prolog/epilog. - // - const TargetRegisterClass* const *CSRegClasses = + const TargetRegisterClass * const *CSRegClasses = RegInfo->getCalleeSavedRegClasses(&Fn); + std::vector CSI; for (unsigned i = 0; CSRegs[i]; ++i) { unsigned Reg = CSRegs[i]; if (Fn.getRegInfo().isPhysRegUsed(Reg)) { - // If the reg is modified, save it! + // If the reg is modified, save it! CSI.push_back(CalleeSavedInfo(Reg, CSRegClasses[i])); } else { for (const unsigned *AliasSet = RegInfo->getAliasSet(Reg); @@ -1135,42 +223,51 @@ void PEI::calculateCalleeSavedRegisters(MachineFunction &Fn) { return; // Early exit if no callee saved registers are modified! unsigned NumFixedSpillSlots; - const std::pair *FixedSpillSlots = + const TargetFrameInfo::SpillSlot *FixedSpillSlots = TFI->getCalleeSavedSpillSlots(NumFixedSpillSlots); // Now that we know which registers need to be saved and restored, allocate // stack slots for them. - for (unsigned i = 0, e = CSI.size(); i != e; ++i) { - unsigned Reg = CSI[i].getReg(); - const TargetRegisterClass *RC = CSI[i].getRegClass(); + for (std::vector::iterator + I = CSI.begin(), E = CSI.end(); I != E; ++I) { + unsigned Reg = I->getReg(); + const TargetRegisterClass *RC = I->getRegClass(); + + int FrameIdx; + if (RegInfo->hasReservedSpillSlot(Fn, Reg, FrameIdx)) { + I->setFrameIdx(FrameIdx); + continue; + } // Check to see if this physreg must be spilled to a particular stack slot // on this target. - const std::pair *FixedSlot = FixedSpillSlots; + const TargetFrameInfo::SpillSlot *FixedSlot = FixedSpillSlots; while (FixedSlot != FixedSpillSlots+NumFixedSpillSlots && - FixedSlot->first != Reg) + FixedSlot->Reg != Reg) ++FixedSlot; - int FrameIdx; - if (FixedSlot == FixedSpillSlots+NumFixedSpillSlots) { + if (FixedSlot == FixedSpillSlots + NumFixedSpillSlots) { // Nope, just spill it anywhere convenient. unsigned Align = RC->getAlignment(); unsigned StackAlign = TFI->getStackAlignment(); - // We may not be able to sastify the desired alignment specification of - // the TargetRegisterClass if the stack alignment is smaller. - // Use the min. + + // We may not be able to satisfy the desired alignment specification of + // the TargetRegisterClass if the stack alignment is smaller. Use the + // min. Align = std::min(Align, StackAlign); - FrameIdx = FFI->CreateStackObject(RC->getSize(), Align); + FrameIdx = MFI->CreateStackObject(RC->getSize(), Align, true); if ((unsigned)FrameIdx < MinCSFrameIndex) MinCSFrameIndex = FrameIdx; if ((unsigned)FrameIdx > MaxCSFrameIndex) MaxCSFrameIndex = FrameIdx; } else { // Spill it to the stack where we must. - FrameIdx = FFI->CreateFixedObject(RC->getSize(), FixedSlot->second); + FrameIdx = MFI->CreateFixedObject(RC->getSize(), FixedSlot->Offset, + true, false); } - CSI[i].setFrameIdx(FrameIdx); + + I->setFrameIdx(FrameIdx); } - FFI->setCalleeSavedInfo(CSI); + MFI->setCalleeSavedInfo(CSI); } /// insertCSRSpillsAndRestores - Insert spill and restore code for @@ -1178,8 +275,10 @@ void PEI::calculateCalleeSavedRegisters(MachineFunction &Fn) { /// void PEI::insertCSRSpillsAndRestores(MachineFunction &Fn) { // Get callee saved register information. - MachineFrameInfo *FFI = Fn.getFrameInfo(); - const std::vector &CSI = FFI->getCalleeSavedInfo(); + MachineFrameInfo *MFI = Fn.getFrameInfo(); + const std::vector &CSI = MFI->getCalleeSavedInfo(); + + MFI->setCalleeSavedInfoValid(true); // Early exit if no callee saved registers are modified! if (CSI.empty()) @@ -1188,10 +287,6 @@ void PEI::insertCSRSpillsAndRestores(MachineFunction &Fn) { const TargetInstrInfo &TII = *Fn.getTarget().getInstrInfo(); MachineBasicBlock::iterator I; - DEBUG(if (ShrinkWrapThisFunction && ShrinkWrapDebugging >= Details) - DOUT << "Inserting CSR spills/restores in function " - << Fn.getFunction()->getName() << "\n"); - if (! ShrinkWrapThisFunction) { // Spill using target interface. I = EntryBlock->begin(); @@ -1256,10 +351,6 @@ void PEI::insertCSRSpillsAndRestores(MachineFunction &Fn) { if (save.empty()) continue; - DEBUG(if (ShrinkWrapDebugging >= Details) - DOUT << "Spilling " << stringifyCSRegSet(save) - << " in " << getBasicBlockName(MBB) << "\n"); - blockCSI.clear(); for (CSRegSet::iterator RI = save.begin(), RE = save.end(); RI != RE; ++RI) { @@ -1284,10 +375,6 @@ void PEI::insertCSRSpillsAndRestores(MachineFunction &Fn) { } } - DEBUG(if (ShrinkWrapDebugging >= Details) - DOUT << "------------------------------" - << "-----------------------------\n"); - for (CSRegBlockMap::iterator BI = CSRRestore.begin(), BE = CSRRestore.end(); BI != BE; ++BI) { MachineBasicBlock* MBB = BI->first; @@ -1296,10 +383,6 @@ void PEI::insertCSRSpillsAndRestores(MachineFunction &Fn) { if (restore.empty()) continue; - DEBUG(if (ShrinkWrapDebugging >= Details) - DOUT << "Restoring " << stringifyCSRegSet(restore) - << " in " << getBasicBlockName(MBB) << "\n"); - blockCSI.clear(); for (CSRegSet::iterator RI = restore.begin(), RE = restore.end(); RI != RE; ++RI) { @@ -1349,23 +432,18 @@ void PEI::insertCSRSpillsAndRestores(MachineFunction &Fn) { } } } - - DEBUG(if (ShrinkWrapDebugging >= Details) - DOUT << "------------------------------" - << "-----------------------------\n"); } /// AdjustStackOffset - Helper function used to adjust the stack frame offset. static inline void -AdjustStackOffset(MachineFrameInfo *FFI, int FrameIdx, +AdjustStackOffset(MachineFrameInfo *MFI, int FrameIdx, bool StackGrowsDown, int64_t &Offset, unsigned &MaxAlign) { - // If stack grows down, we need to add size of find the lowest address of the - // object. + // If the stack grows down, add the object size to find the lowest address. if (StackGrowsDown) - Offset += FFI->getObjectSize(FrameIdx); + Offset += MFI->getObjectSize(FrameIdx); - unsigned Align = FFI->getObjectAlignment(FrameIdx); + unsigned Align = MFI->getObjectAlignment(FrameIdx); // If the alignment of this object is greater than that of the stack, then // increase the stack alignment to match. @@ -1375,10 +453,10 @@ AdjustStackOffset(MachineFrameInfo *FFI, int FrameIdx, Offset = (Offset + Align - 1) / Align * Align; if (StackGrowsDown) { - FFI->setObjectOffset(FrameIdx, -Offset); // Set the computed offset + MFI->setObjectOffset(FrameIdx, -Offset); // Set the computed offset } else { - FFI->setObjectOffset(FrameIdx, Offset); - Offset += FFI->getObjectSize(FrameIdx); + MFI->setObjectOffset(FrameIdx, Offset); + Offset += MFI->getObjectSize(FrameIdx); } } @@ -1392,35 +470,34 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) { TFI.getStackGrowthDirection() == TargetFrameInfo::StackGrowsDown; // Loop over all of the stack objects, assigning sequential addresses... - MachineFrameInfo *FFI = Fn.getFrameInfo(); - - unsigned MaxAlign = FFI->getMaxAlignment(); + MachineFrameInfo *MFI = Fn.getFrameInfo(); // Start at the beginning of the local area. // The Offset is the distance from the stack top in the direction // of stack growth -- so it's always nonnegative. - int64_t Offset = TFI.getOffsetOfLocalArea(); + int LocalAreaOffset = TFI.getOffsetOfLocalArea(); if (StackGrowsDown) - Offset = -Offset; - assert(Offset >= 0 + LocalAreaOffset = -LocalAreaOffset; + assert(LocalAreaOffset >= 0 && "Local area offset should be in direction of stack growth"); + int64_t Offset = LocalAreaOffset; // If there are fixed sized objects that are preallocated in the local area, // non-fixed objects can't be allocated right at the start of local area. // We currently don't support filling in holes in between fixed sized // objects, so we adjust 'Offset' to point to the end of last fixed sized // preallocated object. - for (int i = FFI->getObjectIndexBegin(); i != 0; ++i) { + for (int i = MFI->getObjectIndexBegin(); i != 0; ++i) { int64_t FixedOff; if (StackGrowsDown) { // The maximum distance from the stack pointer is at lower address of // the object -- which is given by offset. For down growing stack // the offset is negative, so we negate the offset to get the distance. - FixedOff = -FFI->getObjectOffset(i); + FixedOff = -MFI->getObjectOffset(i); } else { // The maximum distance from the start pointer is at the upper // address of the object. - FixedOff = FFI->getObjectOffset(i) + FFI->getObjectSize(i); + FixedOff = MFI->getObjectOffset(i) + MFI->getObjectSize(i); } if (FixedOff > Offset) Offset = FixedOff; } @@ -1431,96 +508,93 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) { for (unsigned i = MinCSFrameIndex; i <= MaxCSFrameIndex; ++i) { // If stack grows down, we need to add size of find the lowest // address of the object. - Offset += FFI->getObjectSize(i); + Offset += MFI->getObjectSize(i); - unsigned Align = FFI->getObjectAlignment(i); - // If the alignment of this object is greater than that of the stack, - // then increase the stack alignment to match. - MaxAlign = std::max(MaxAlign, Align); + unsigned Align = MFI->getObjectAlignment(i); // Adjust to alignment boundary Offset = (Offset+Align-1)/Align*Align; - FFI->setObjectOffset(i, -Offset); // Set the computed offset + MFI->setObjectOffset(i, -Offset); // Set the computed offset } } else { int MaxCSFI = MaxCSFrameIndex, MinCSFI = MinCSFrameIndex; for (int i = MaxCSFI; i >= MinCSFI ; --i) { - unsigned Align = FFI->getObjectAlignment(i); - // If the alignment of this object is greater than that of the stack, - // then increase the stack alignment to match. - MaxAlign = std::max(MaxAlign, Align); + unsigned Align = MFI->getObjectAlignment(i); // Adjust to alignment boundary Offset = (Offset+Align-1)/Align*Align; - FFI->setObjectOffset(i, Offset); - Offset += FFI->getObjectSize(i); + MFI->setObjectOffset(i, Offset); + Offset += MFI->getObjectSize(i); } } + unsigned MaxAlign = MFI->getMaxAlignment(); + // Make sure the special register scavenging spill slot is closest to the // frame pointer if a frame pointer is required. const TargetRegisterInfo *RegInfo = Fn.getTarget().getRegisterInfo(); - if (RS && RegInfo->hasFP(Fn)) { + if (RS && RegInfo->hasFP(Fn) && !RegInfo->needsStackRealignment(Fn)) { int SFI = RS->getScavengingFrameIndex(); if (SFI >= 0) - AdjustStackOffset(FFI, SFI, StackGrowsDown, Offset, MaxAlign); + AdjustStackOffset(MFI, SFI, StackGrowsDown, Offset, MaxAlign); } // Make sure that the stack protector comes before the local variables on the // stack. - if (FFI->getStackProtectorIndex() >= 0) - AdjustStackOffset(FFI, FFI->getStackProtectorIndex(), StackGrowsDown, + if (MFI->getStackProtectorIndex() >= 0) + AdjustStackOffset(MFI, MFI->getStackProtectorIndex(), StackGrowsDown, Offset, MaxAlign); // Then assign frame offsets to stack objects that are not used to spill // callee saved registers. - for (unsigned i = 0, e = FFI->getObjectIndexEnd(); i != e; ++i) { + for (unsigned i = 0, e = MFI->getObjectIndexEnd(); i != e; ++i) { if (i >= MinCSFrameIndex && i <= MaxCSFrameIndex) continue; if (RS && (int)i == RS->getScavengingFrameIndex()) continue; - if (FFI->isDeadObjectIndex(i)) + if (MFI->isDeadObjectIndex(i)) continue; - if (FFI->getStackProtectorIndex() == (int)i) + if (MFI->getStackProtectorIndex() == (int)i) continue; - AdjustStackOffset(FFI, i, StackGrowsDown, Offset, MaxAlign); + AdjustStackOffset(MFI, i, StackGrowsDown, Offset, MaxAlign); } // Make sure the special register scavenging spill slot is closest to the // stack pointer. - if (RS && !RegInfo->hasFP(Fn)) { + if (RS && (!RegInfo->hasFP(Fn) || RegInfo->needsStackRealignment(Fn))) { int SFI = RS->getScavengingFrameIndex(); if (SFI >= 0) - AdjustStackOffset(FFI, SFI, StackGrowsDown, Offset, MaxAlign); + AdjustStackOffset(MFI, SFI, StackGrowsDown, Offset, MaxAlign); } - // Round up the size to a multiple of the alignment, but only if there are - // calls or alloca's in the function. This ensures that any calls to - // subroutines have their stack frames suitable aligned. - // Also do this if we need runtime alignment of the stack. In this case - // offsets will be relative to SP not FP; round up the stack size so this - // works. - if (!RegInfo->targetHandlesStackFrameRounding() && - (FFI->hasCalls() || FFI->hasVarSizedObjects() || - (RegInfo->needsStackRealignment(Fn) && - FFI->getObjectIndexEnd() != 0))) { + if (!RegInfo->targetHandlesStackFrameRounding()) { // If we have reserved argument space for call sites in the function // immediately on entry to the current function, count it as part of the // overall stack size. - if (RegInfo->hasReservedCallFrame(Fn)) - Offset += FFI->getMaxCallFrameSize(); - - unsigned AlignMask = std::max(TFI.getStackAlignment(),MaxAlign) - 1; + if (MFI->hasCalls() && RegInfo->hasReservedCallFrame(Fn)) + Offset += MFI->getMaxCallFrameSize(); + + // Round up the size to a multiple of the alignment. If the function has + // any calls or alloca's, align to the target's StackAlignment value to + // ensure that the callee's frame or the alloca data is suitably aligned; + // otherwise, for leaf functions, align to the TransientStackAlignment + // value. + unsigned StackAlign; + if (MFI->hasCalls() || MFI->hasVarSizedObjects() || + (RegInfo->needsStackRealignment(Fn) && MFI->getObjectIndexEnd() != 0)) + StackAlign = TFI.getStackAlignment(); + else + StackAlign = TFI.getTransientStackAlignment(); + // If the frame pointer is eliminated, all frame offsets will be relative + // to SP not FP; align to MaxAlign so this works. + StackAlign = std::max(StackAlign, MaxAlign); + unsigned AlignMask = StackAlign - 1; Offset = (Offset + AlignMask) & ~uint64_t(AlignMask); } // Update frame info to pretend that this is part of the stack... - FFI->setStackSize(Offset+TFI.getOffsetOfLocalArea()); - - // Remember the required stack alignment in case targets need it to perform - // dynamic stack alignment. - FFI->setMaxAlignment(MaxAlign); + MFI->setStackSize(Offset - LocalAreaOffset); } @@ -1561,14 +635,9 @@ void PEI::replaceFrameIndices(MachineFunction &Fn) { for (MachineFunction::iterator BB = Fn.begin(), E = Fn.end(); BB != E; ++BB) { int SPAdj = 0; // SP offset due to call frame setup / destroy. - if (RS) RS->enterBasicBlock(BB); + if (RS && !FrameIndexVirtualScavenging) RS->enterBasicBlock(BB); for (MachineBasicBlock::iterator I = BB->begin(); I != BB->end(); ) { - if (I->getOpcode() == TargetInstrInfo::DECLARE) { - // Ignore it. - ++I; - continue; - } if (I->getOpcode() == FrameSetupOpcode || I->getOpcode() == FrameDestroyOpcode) { @@ -1590,7 +659,7 @@ void PEI::replaceFrameIndices(MachineFunction &Fn) { if (PrevI == BB->end()) I = BB->begin(); // The replaced instr was the first in the block. else - I = next(PrevI); + I = llvm::next(PrevI); continue; } @@ -1611,8 +680,16 @@ void PEI::replaceFrameIndices(MachineFunction &Fn) { // If this instruction has a FrameIndex operand, we need to // use that target machine register info object to eliminate // it. - - TRI.eliminateFrameIndex(MI, SPAdj, RS); + TargetRegisterInfo::FrameIndexValue Value; + unsigned VReg = + TRI.eliminateFrameIndex(MI, SPAdj, &Value, + FrameIndexVirtualScavenging ? NULL : RS); + if (VReg) { + assert (FrameIndexVirtualScavenging && + "Not scavenging, but virtual returned from " + "eliminateFrameIndex()!"); + FrameConstantRegMap[VReg] = FrameConstantEntry(Value, SPAdj); + } // Reset the iterator if we were at the beginning of the BB. if (AtBeginning) { @@ -1627,283 +704,205 @@ void PEI::replaceFrameIndices(MachineFunction &Fn) { if (DoIncr && I != BB->end()) ++I; // Update register states. - if (RS && MI) RS->forward(MI); + if (RS && !FrameIndexVirtualScavenging && MI) RS->forward(MI); } assert(SPAdj == 0 && "Unbalanced call frame setup / destroy pairs?"); } } -// Debugging methods for shrink wrapping. -#ifndef NDEBUG -/// findFastExitPath - debugging method used to detect functions -/// with at least one path from the entry block to a return block -/// directly or which has a very small number of edges. -/// -void PEI::findFastExitPath() { - if (! EntryBlock) - return; - // Fina a path from EntryBlock to any return block that does not branch: - // Entry - // | ... - // v | - // B1<-----+ - // | - // v - // Return - for (MachineBasicBlock::succ_iterator SI = EntryBlock->succ_begin(), - SE = EntryBlock->succ_end(); SI != SE; ++SI) { - MachineBasicBlock* SUCC = *SI; - - // Assume positive, disprove existence of fast path. - HasFastExitPath = true; - - // Check the immediate successors. - if (isReturnBlock(SUCC)) { - if (ShrinkWrapDebugging >= BasicInfo) - DOUT << "Fast exit path: " << getBasicBlockName(EntryBlock) - << "->" << getBasicBlockName(SUCC) << "\n"; - break; - } - // Traverse df from SUCC, look for a branch block. - std::string exitPath = getBasicBlockName(SUCC); - for (df_iterator BI = df_begin(SUCC), - BE = df_end(SUCC); BI != BE; ++BI) { - MachineBasicBlock* SBB = *BI; - // Reject paths with branch nodes. - if (SBB->succ_size() > 1) { - HasFastExitPath = false; - break; +/// findLastUseReg - find the killing use of the specified register within +/// the instruciton range. Return the operand number of the kill in Operand. +static MachineBasicBlock::iterator +findLastUseReg(MachineBasicBlock::iterator I, MachineBasicBlock::iterator ME, + unsigned Reg) { + // Scan forward to find the last use of this virtual register + for (++I; I != ME; ++I) { + MachineInstr *MI = I; + bool isDefInsn = false; + bool isKillInsn = false; + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) + if (MI->getOperand(i).isReg()) { + unsigned OpReg = MI->getOperand(i).getReg(); + if (OpReg == 0 || !TargetRegisterInfo::isVirtualRegister(OpReg)) + continue; + assert (OpReg == Reg + && "overlapping use of scavenged index register!"); + // If this is the killing use, we have a candidate. + if (MI->getOperand(i).isKill()) + isKillInsn = true; + else if (MI->getOperand(i).isDef()) + isDefInsn = true; } - exitPath += "->" + getBasicBlockName(SBB); - } - if (HasFastExitPath) { - if (ShrinkWrapDebugging >= BasicInfo) - DOUT << "Fast exit path: " << getBasicBlockName(EntryBlock) - << "->" << exitPath << "\n"; - break; - } + if (isKillInsn && !isDefInsn) + return I; } + // If we hit the end of the basic block, there was no kill of + // the virtual register, which is wrong. + assert (0 && "scavenged index register never killed!"); + return ME; } -/// verifySpillRestorePlacement - check the current spill/restore -/// sets for safety. Attempt to find spills without restores or -/// restores without spills. -/// Spills: walk df from each MBB in spill set ensuring that -/// all CSRs spilled at MMBB are restored on all paths -/// from MBB to all exit blocks. -/// Restores: walk idf from each MBB in restore set ensuring that -/// all CSRs restored at MBB are spilled on all paths -/// reaching MBB. -/// -void PEI::verifySpillRestorePlacement() { - unsigned numReturnBlocks = 0; - for (MachineFunction::iterator MBBI = MF->begin(), MBBE = MF->end(); - MBBI != MBBE; ++MBBI) { - MachineBasicBlock* MBB = MBBI; - if (isReturnBlock(MBB) || MBB->succ_size() == 0) - ++numReturnBlocks; - } - for (CSRegBlockMap::iterator BI = CSRSave.begin(), - BE = CSRSave.end(); BI != BE; ++BI) { - MachineBasicBlock* MBB = BI->first; - CSRegSet spilled = BI->second; - CSRegSet restored; - - if (spilled.empty()) - continue; - - DOUT << "SAVE[" << getBasicBlockName(MBB) << "] = " - << stringifyCSRegSet(spilled) - << " RESTORE[" << getBasicBlockName(MBB) << "] = " - << stringifyCSRegSet(CSRRestore[MBB]) << "\n"; - - if (CSRRestore[MBB].intersects(spilled)) { - restored |= (CSRRestore[MBB] & spilled); - } - - // Walk depth first from MBB to find restores of all CSRs spilled at MBB: - // we must find restores for all spills w/no intervening spills on all - // paths from MBB to all return blocks. - for (df_iterator BI = df_begin(MBB), - BE = df_end(MBB); BI != BE; ++BI) { - MachineBasicBlock* SBB = *BI; - if (SBB == MBB) - continue; - // Stop when we encounter spills of any CSRs spilled at MBB that - // have not yet been seen to be restored. - if (CSRSave[SBB].intersects(spilled) && - !restored.contains(CSRSave[SBB] & spilled)) - break; - // Collect the CSRs spilled at MBB that are restored - // at this DF successor of MBB. - if (CSRRestore[SBB].intersects(spilled)) - restored |= (CSRRestore[SBB] & spilled); - // If we are at a retun block, check that the restores - // we have seen so far exhaust the spills at MBB, then - // reset the restores. - if (isReturnBlock(SBB) || SBB->succ_size() == 0) { - if (restored != spilled) { - CSRegSet notRestored = (spilled - restored); - DOUT << MF->getFunction()->getName() << ": " - << stringifyCSRegSet(notRestored) - << " spilled at " << getBasicBlockName(MBB) - << " are never restored on path to return " - << getBasicBlockName(SBB) << "\n"; +/// scavengeFrameVirtualRegs - Replace all frame index virtual registers +/// with physical registers. Use the register scavenger to find an +/// appropriate register to use. +void PEI::scavengeFrameVirtualRegs(MachineFunction &Fn) { + // Run through the instructions and find any virtual registers. + for (MachineFunction::iterator BB = Fn.begin(), + E = Fn.end(); BB != E; ++BB) { + RS->enterBasicBlock(BB); + + // FIXME: The logic flow in this function is still too convoluted. + // It needs a cleanup refactoring. Do that in preparation for tracking + // more than one scratch register value and using ranges to find + // available scratch registers. + unsigned CurrentVirtReg = 0; + unsigned CurrentScratchReg = 0; + bool havePrevValue = false; + TargetRegisterInfo::FrameIndexValue PrevValue(0,0); + TargetRegisterInfo::FrameIndexValue Value(0,0); + MachineInstr *PrevLastUseMI = NULL; + unsigned PrevLastUseOp = 0; + bool trackingCurrentValue = false; + int SPAdj = 0; + + // The instruction stream may change in the loop, so check BB->end() + // directly. + for (MachineBasicBlock::iterator I = BB->begin(); I != BB->end(); ) { + MachineInstr *MI = I; + bool isDefInsn = false; + bool isKillInsn = false; + bool clobbersScratchReg = false; + bool DoIncr = true; + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + if (MI->getOperand(i).isReg()) { + MachineOperand &MO = MI->getOperand(i); + unsigned Reg = MO.getReg(); + if (Reg == 0) + continue; + if (!TargetRegisterInfo::isVirtualRegister(Reg)) { + // If we have a previous scratch reg, check and see if anything + // here kills whatever value is in there. + if (Reg == CurrentScratchReg) { + if (MO.isUse()) { + // Two-address operands implicitly kill + if (MO.isKill() || MI->isRegTiedToDefOperand(i)) + clobbersScratchReg = true; + } else { + assert (MO.isDef()); + clobbersScratchReg = true; + } + } + continue; + } + // If this is a def, remember that this insn defines the value. + // This lets us properly consider insns which re-use the scratch + // register, such as r2 = sub r2, #imm, in the middle of the + // scratch range. + if (MO.isDef()) + isDefInsn = true; + + // Have we already allocated a scratch register for this virtual? + if (Reg != CurrentVirtReg) { + // When we first encounter a new virtual register, it + // must be a definition. + assert(MI->getOperand(i).isDef() && + "frame index virtual missing def!"); + // We can't have nested virtual register live ranges because + // there's only a guarantee of one scavenged register at a time. + assert (CurrentVirtReg == 0 && + "overlapping frame index virtual registers!"); + + // If the target gave us information about what's in the register, + // we can use that to re-use scratch regs. + DenseMap::iterator Entry = + FrameConstantRegMap.find(Reg); + trackingCurrentValue = Entry != FrameConstantRegMap.end(); + if (trackingCurrentValue) { + SPAdj = (*Entry).second.second; + Value = (*Entry).second.first; + } else { + SPAdj = 0; + Value.first = 0; + Value.second = 0; + } + + // If the scratch register from the last allocation is still + // available, see if the value matches. If it does, just re-use it. + if (trackingCurrentValue && havePrevValue && PrevValue == Value) { + // FIXME: This assumes that the instructions in the live range + // for the virtual register are exclusively for the purpose + // of populating the value in the register. That's reasonable + // for these frame index registers, but it's still a very, very + // strong assumption. rdar://7322732. Better would be to + // explicitly check each instruction in the range for references + // to the virtual register. Only delete those insns that + // touch the virtual register. + + // Find the last use of the new virtual register. Remove all + // instruction between here and there, and update the current + // instruction to reference the last use insn instead. + MachineBasicBlock::iterator LastUseMI = + findLastUseReg(I, BB->end(), Reg); + + // Remove all instructions up 'til the last use, since they're + // just calculating the value we already have. + BB->erase(I, LastUseMI); + I = LastUseMI; + + // Extend the live range of the scratch register + PrevLastUseMI->getOperand(PrevLastUseOp).setIsKill(false); + RS->setUsed(CurrentScratchReg); + CurrentVirtReg = Reg; + + // We deleted the instruction we were scanning the operands of. + // Jump back to the instruction iterator loop. Don't increment + // past this instruction since we updated the iterator already. + DoIncr = false; + break; + } + + // Scavenge a new scratch register + CurrentVirtReg = Reg; + const TargetRegisterClass *RC = Fn.getRegInfo().getRegClass(Reg); + CurrentScratchReg = RS->FindUnusedReg(RC); + if (CurrentScratchReg == 0) + // No register is "free". Scavenge a register. + CurrentScratchReg = RS->scavengeRegister(RC, I, SPAdj); + + PrevValue = Value; + } + // replace this reference to the virtual register with the + // scratch register. + assert (CurrentScratchReg && "Missing scratch register!"); + MI->getOperand(i).setReg(CurrentScratchReg); + + if (MI->getOperand(i).isKill()) { + isKillInsn = true; + PrevLastUseOp = i; + PrevLastUseMI = MI; + } } - restored.clear(); } - } - } - - // Check restore placements. - for (CSRegBlockMap::iterator BI = CSRRestore.begin(), - BE = CSRRestore.end(); BI != BE; ++BI) { - MachineBasicBlock* MBB = BI->first; - CSRegSet restored = BI->second; - CSRegSet spilled; - - if (restored.empty()) - continue; - - DOUT << "SAVE[" << getBasicBlockName(MBB) << "] = " - << stringifyCSRegSet(CSRSave[MBB]) - << " RESTORE[" << getBasicBlockName(MBB) << "] = " - << stringifyCSRegSet(restored) << "\n"; - - if (CSRSave[MBB].intersects(restored)) { - spilled |= (CSRSave[MBB] & restored); - } - // Walk inverse depth first from MBB to find spills of all - // CSRs restored at MBB: - for (idf_iterator BI = idf_begin(MBB), - BE = idf_end(MBB); BI != BE; ++BI) { - MachineBasicBlock* PBB = *BI; - if (PBB == MBB) - continue; - // Stop when we encounter restores of any CSRs restored at MBB that - // have not yet been seen to be spilled. - if (CSRRestore[PBB].intersects(restored) && - !spilled.contains(CSRRestore[PBB] & restored)) - break; - // Collect the CSRs restored at MBB that are spilled - // at this DF predecessor of MBB. - if (CSRSave[PBB].intersects(restored)) - spilled |= (CSRSave[PBB] & restored); - } - if (spilled != restored) { - CSRegSet notSpilled = (restored - spilled); - DOUT << MF->getFunction()->getName() << ": " - << stringifyCSRegSet(notSpilled) - << " restored at " << getBasicBlockName(MBB) - << " are never spilled\n"; - } - } -} - -// Debugging print methods. -std::string PEI::getBasicBlockName(const MachineBasicBlock* MBB) { - std::ostringstream name; - if (MBB) { - if (MBB->getBasicBlock()) - name << MBB->getBasicBlock()->getName(); - else - name << "_MBB_" << MBB->getNumber(); - } - return name.str(); -} - -std::string PEI::stringifyCSRegSet(const CSRegSet& s) { - const TargetRegisterInfo* TRI = MF->getTarget().getRegisterInfo(); - const std::vector CSI = - MF->getFrameInfo()->getCalleeSavedInfo(); - - std::ostringstream srep; - if (CSI.size() == 0) { - srep << "[]"; - return srep.str(); - } - srep << "["; - CSRegSet::iterator I = s.begin(), E = s.end(); - if (I != E) { - unsigned reg = CSI[*I].getReg(); - srep << TRI->getName(reg); - for (++I; I != E; ++I) { - reg = CSI[*I].getReg(); - srep << ","; - srep << TRI->getName(reg); - } - } - srep << "]"; - return srep.str(); -} - -void PEI::dumpSet(const CSRegSet& s) { - DOUT << stringifyCSRegSet(s) << "\n"; -} - -void PEI::dumpUsed(MachineBasicBlock* MBB) { - if (MBB) { - DOUT << "CSRUsed[" << getBasicBlockName(MBB) << "] = " - << stringifyCSRegSet(CSRUsed[MBB]) << "\n"; - } -} - -void PEI::dumpAllUsed() { - for (MachineFunction::iterator MBBI = MF->begin(), MBBE = MF->end(); - MBBI != MBBE; ++MBBI) { - MachineBasicBlock* MBB = MBBI; - dumpUsed(MBB); - } -} - -void PEI::dumpSets(MachineBasicBlock* MBB) { - if (MBB) { - DOUT << getBasicBlockName(MBB) << " | " - << stringifyCSRegSet(CSRUsed[MBB]) << " | " - << stringifyCSRegSet(AnticIn[MBB]) << " | " - << stringifyCSRegSet(AnticOut[MBB]) << " | " - << stringifyCSRegSet(AvailIn[MBB]) << " | " - << stringifyCSRegSet(AvailOut[MBB]) << "\n"; - } -} - -void PEI::dumpSets1(MachineBasicBlock* MBB) { - if (MBB) { - DOUT << getBasicBlockName(MBB) << " | " - << stringifyCSRegSet(CSRUsed[MBB]) << " | " - << stringifyCSRegSet(AnticIn[MBB]) << " | " - << stringifyCSRegSet(AnticOut[MBB]) << " | " - << stringifyCSRegSet(AvailIn[MBB]) << " | " - << stringifyCSRegSet(AvailOut[MBB]) << " | " - << stringifyCSRegSet(CSRSave[MBB]) << " | " - << stringifyCSRegSet(CSRRestore[MBB]) << "\n"; - } -} - -void PEI::dumpAllSets() { - for (MachineFunction::iterator MBBI = MF->begin(), MBBE = MF->end(); - MBBI != MBBE; ++MBBI) { - MachineBasicBlock* MBB = MBBI; - dumpSets1(MBB); - } -} - -void PEI::dumpSRSets() { - for (MachineFunction::iterator MBB = MF->begin(), E = MF->end(); - MBB != E; ++MBB) { - if (! CSRSave[MBB].empty()) { - DOUT << "SAVE[" << getBasicBlockName(MBB) << "] = " - << stringifyCSRegSet(CSRSave[MBB]); - if (CSRRestore[MBB].empty()) - DOUT << "\n"; - } - if (! CSRRestore[MBB].empty()) { - if (! CSRSave[MBB].empty()) - DOUT << " "; - DOUT << "RESTORE[" << getBasicBlockName(MBB) << "] = " - << stringifyCSRegSet(CSRRestore[MBB]) << "\n"; + // If this is the last use of the scratch, stop tracking it. The + // last use will be a kill operand in an instruction that does + // not also define the scratch register. + if (isKillInsn && !isDefInsn) { + CurrentVirtReg = 0; + havePrevValue = trackingCurrentValue; + } + // Similarly, notice if instruction clobbered the value in the + // register we're tracking for possible later reuse. This is noted + // above, but enforced here since the value is still live while we + // process the rest of the operands of the instruction. + if (clobbersScratchReg) { + havePrevValue = false; + CurrentScratchReg = 0; + } + if (DoIncr) { + RS->forward(I); + ++I; + } } } } -#endif