X-Git-Url: http://demsky.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FCodeGen%2FPrologEpilogInserter.cpp;h=136b1ed4db5d76e7b71a118752e2f4ae6a733931;hb=3bfc4d8e13edd83534b733f0f1de5b1d5f6bf828;hp=2d54cd4274183a099e987487bf2da2d3b7e4307f;hpb=7c617b5e53987d786451dd668b5113f2e2b983f8;p=oota-llvm.git diff --git a/lib/CodeGen/PrologEpilogInserter.cpp b/lib/CodeGen/PrologEpilogInserter.cpp index 2d54cd42741..136b1ed4db5 100644 --- a/lib/CodeGen/PrologEpilogInserter.cpp +++ b/lib/CodeGen/PrologEpilogInserter.cpp @@ -14,40 +14,95 @@ // This pass must be run after register allocation. After this pass is // executed, it is illegal to construct MO_FrameIndex operands. // -// This pass provides an optional shrink wrapping variant of prolog/epilog -// insertion, enabled via --shrink-wrap. See ShrinkWrapping.cpp. -// //===----------------------------------------------------------------------===// +#define DEBUG_TYPE "pei" #include "PrologEpilogInserter.h" +#include "llvm/ADT/IndexedMap.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/MachineDominators.h" -#include "llvm/CodeGen/MachineLoopInfo.h" -#include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/RegisterScavenging.h" -#include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/Target/TargetFrameInfo.h" -#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/CodeGen/StackProtector.h" +#include "llvm/IR/DiagnosticInfo.h" +#include "llvm/IR/InlineAsm.h" +#include "llvm/IR/LLVMContext.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Compiler.h" -#include "llvm/ADT/IndexedMap.h" -#include "llvm/ADT/STLExtras.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetFrameLowering.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" #include using namespace llvm; char PEI::ID = 0; +char &llvm::PrologEpilogCodeInserterID = PEI::ID; + +static cl::opt +WarnStackSize("warn-stack-size", cl::Hidden, cl::init((unsigned)-1), + cl::desc("Warn for stack size bigger than the given" + " number")); + +INITIALIZE_PASS_BEGIN(PEI, "prologepilog", + "Prologue/Epilogue Insertion", false, false) +INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) +INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) +INITIALIZE_PASS_DEPENDENCY(StackProtector) +INITIALIZE_PASS_DEPENDENCY(TargetPassConfig) +INITIALIZE_PASS_END(PEI, "prologepilog", + "Prologue/Epilogue Insertion & Frame Finalization", + false, false) + +STATISTIC(NumScavengedRegs, "Number of frame index regs scavenged"); +STATISTIC(NumBytesStackSpace, + "Number of bytes used for stack in all functions"); -static RegisterPass -X("prologepilog", "Prologue/Epilogue Insertion"); +void PEI::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); + AU.addPreserved(); + AU.addPreserved(); + AU.addRequired(); + AU.addRequired(); + MachineFunctionPass::getAnalysisUsage(AU); +} -/// createPrologEpilogCodeInserter - This function returns a pass that inserts -/// prolog and epilog code, and eliminates abstract frame references. -/// -FunctionPass *llvm::createPrologEpilogCodeInserter() { return new PEI(); } +bool PEI::isReturnBlock(MachineBasicBlock* MBB) { + return (MBB && !MBB->empty() && MBB->back().isReturn()); +} + +/// Compute the set of return blocks +void PEI::calculateSets(MachineFunction &Fn) { + // Sets used to compute spill, restore placement sets. + const std::vector &CSI = + Fn.getFrameInfo()->getCalleeSavedInfo(); + + // If no CSRs used, we are done. + if (CSI.empty()) + return; + + // Save refs to entry and return blocks. + EntryBlock = Fn.begin(); + for (MachineFunction::iterator MBB = Fn.begin(), E = Fn.end(); + MBB != E; ++MBB) + if (isReturnBlock(MBB)) + ReturnBlocks.push_back(MBB); + + return; +} + +/// StackObjSet - A set of stack object indexes +typedef SmallSetVector StackObjSet; /// runOnMachineFunction - Insert prolog/epilog code and replace abstract /// frame indexes with appropriate references. @@ -55,52 +110,47 @@ FunctionPass *llvm::createPrologEpilogCodeInserter() { return new PEI(); } bool PEI::runOnMachineFunction(MachineFunction &Fn) { const Function* F = Fn.getFunction(); const TargetRegisterInfo *TRI = Fn.getTarget().getRegisterInfo(); + const TargetFrameLowering *TFI = Fn.getTarget().getFrameLowering(); + + assert(!Fn.getRegInfo().getNumVirtRegs() && "Regalloc must assign all vregs"); + RS = TRI->requiresRegisterScavenging(Fn) ? new RegScavenger() : NULL; FrameIndexVirtualScavenging = TRI->requiresFrameIndexScavenging(Fn); - FrameConstantRegMap.clear(); - // Get MachineModuleInfo so that we can track the construction of the - // frame. - if (MachineModuleInfo *MMI = getAnalysisIfAvailable()) - Fn.getFrameInfo()->setMachineModuleInfo(MMI); - - // Calculate the MaxCallFrameSize and HasCalls variables for the function's - // frame information. Also eliminates call frame pseudo instructions. + // Calculate the MaxCallFrameSize and AdjustsStack variables for the + // function's frame information. Also eliminates call frame pseudo + // instructions. calculateCallsInformation(Fn); // Allow the target machine to make some adjustments to the function // e.g. UsedPhysRegs before calculateCalleeSavedRegisters. - TRI->processFunctionBeforeCalleeSavedScan(Fn, RS); + TFI->processFunctionBeforeCalleeSavedScan(Fn, RS); // Scan the function for modified callee saved registers and insert spill code // for any callee saved registers that are modified. calculateCalleeSavedRegisters(Fn); // Determine placement of CSR spill/restore code: - // - with shrink wrapping, place spills and restores to tightly - // enclose regions in the Machine CFG of the function where - // they are used. Without shrink wrapping - // - default (no shrink wrapping), place all spills in the - // entry block, all restores in return blocks. - placeCSRSpillsAndRestores(Fn); + // place all spills in the entry block, all restores in return blocks. + calculateSets(Fn); // Add the code to save and restore the callee saved registers - if (!F->hasFnAttr(Attribute::Naked)) + if (!F->hasFnAttribute(Attribute::Naked)) insertCSRSpillsAndRestores(Fn); // Allow the target machine to make final modifications to the function // before the frame layout is finalized. - TRI->processFunctionBeforeFrameFinalized(Fn); + TFI->processFunctionBeforeFrameFinalized(Fn, RS); // Calculate actual frame offsets for all abstract stack objects... calculateFrameObjectOffsets(Fn); // Add prolog and epilog code to the function. This function is required // to align the stack frame as necessary for any stack variables or - // called functions. Because of this, calculateCalleeSavedRegisters - // must be called before this function in order to set the HasCalls + // called functions. Because of this, calculateCalleeSavedRegisters() + // must be called before this function in order to set the AdjustsStack // and MaxCallFrameSize variables. - if (!F->hasFnAttr(Attribute::Naked)) + if (!F->hasFnAttribute(Attribute::Naked)) insertPrologEpilogCode(Fn); // Replace all MO_FrameIndex operands with physical register references @@ -114,37 +164,36 @@ bool PEI::runOnMachineFunction(MachineFunction &Fn) { if (TRI->requiresRegisterScavenging(Fn) && FrameIndexVirtualScavenging) scavengeFrameVirtualRegs(Fn); - delete RS; - clearAllSets(); - return true; -} + // Clear any vregs created by virtual scavenging. + Fn.getRegInfo().clearVirtRegs(); -#if 0 -void PEI::getAnalysisUsage(AnalysisUsage &AU) const { - AU.setPreservesCFG(); - if (ShrinkWrapping || ShrinkWrapFunc != "") { - AU.addRequired(); - AU.addRequired(); + // Warn on stack size when we exceeds the given limit. + MachineFrameInfo *MFI = Fn.getFrameInfo(); + uint64_t StackSize = MFI->getStackSize(); + if (WarnStackSize.getNumOccurrences() > 0 && WarnStackSize < StackSize) { + DiagnosticInfoStackSize DiagStackSize(*F, StackSize); + F->getContext().diagnose(DiagStackSize); } - AU.addPreserved(); - AU.addPreserved(); - MachineFunctionPass::getAnalysisUsage(AU); + + delete RS; + ReturnBlocks.clear(); + return true; } -#endif -/// calculateCallsInformation - Calculate the MaxCallFrameSize and HasCalls +/// calculateCallsInformation - Calculate the MaxCallFrameSize and AdjustsStack /// variables for the function's frame information and eliminate call frame /// pseudo instructions. void PEI::calculateCallsInformation(MachineFunction &Fn) { - const TargetRegisterInfo *RegInfo = Fn.getTarget().getRegisterInfo(); - MachineFrameInfo *FFI = Fn.getFrameInfo(); + const TargetInstrInfo &TII = *Fn.getTarget().getInstrInfo(); + const TargetFrameLowering *TFI = Fn.getTarget().getFrameLowering(); + MachineFrameInfo *MFI = Fn.getFrameInfo(); unsigned MaxCallFrameSize = 0; - bool HasCalls = FFI->hasCalls(); + bool AdjustsStack = MFI->adjustsStack(); // Get the function call frame set-up and tear-down instruction opcode - int FrameSetupOpcode = RegInfo->getCallFrameSetupOpcode(); - int FrameDestroyOpcode = RegInfo->getCallFrameDestroyOpcode(); + int FrameSetupOpcode = TII.getCallFrameSetupOpcode(); + int FrameDestroyOpcode = TII.getCallFrameDestroyOpcode(); // Early exit for targets which have no call frame setup/destroy pseudo // instructions. @@ -160,16 +209,17 @@ void PEI::calculateCallsInformation(MachineFunction &Fn) { " instructions should have a single immediate argument!"); unsigned Size = I->getOperand(0).getImm(); if (Size > MaxCallFrameSize) MaxCallFrameSize = Size; - HasCalls = true; + AdjustsStack = true; FrameSDOps.push_back(I); } else if (I->isInlineAsm()) { - // An InlineAsm might be a call; assume it is to get the stack frame - // aligned correctly for calls. - HasCalls = true; + // Some inline asm's need a stack frame, as indicated by operand 1. + unsigned ExtraInfo = I->getOperand(InlineAsm::MIOp_ExtraInfo).getImm(); + if (ExtraInfo & InlineAsm::Extra_IsAlignStack) + AdjustsStack = true; } - FFI->setHasCalls(HasCalls); - FFI->setMaxCallFrameSize(MaxCallFrameSize); + MFI->setAdjustsStack(AdjustsStack); + MFI->setMaxCallFrameSize(MaxCallFrameSize); for (std::vector::iterator i = FrameSDOps.begin(), e = FrameSDOps.end(); i != e; ++i) { @@ -179,21 +229,21 @@ void PEI::calculateCallsInformation(MachineFunction &Fn) { // the target doesn't indicate otherwise, remove the call frame pseudos // here. The sub/add sp instruction pairs are still inserted, but we don't // need to track the SP adjustment for frame index elimination. - if (RegInfo->canSimplifyCallFramePseudos(Fn)) - RegInfo->eliminateCallFramePseudoInstr(Fn, *I->getParent(), I); + if (TFI->canSimplifyCallFramePseudos(Fn)) + TFI->eliminateCallFramePseudoInstr(Fn, *I->getParent(), I); } } /// calculateCalleeSavedRegisters - Scan the function for modified callee saved /// registers. -void PEI::calculateCalleeSavedRegisters(MachineFunction &Fn) { - const TargetRegisterInfo *RegInfo = Fn.getTarget().getRegisterInfo(); - const TargetFrameInfo *TFI = Fn.getTarget().getFrameInfo(); - MachineFrameInfo *FFI = Fn.getFrameInfo(); +void PEI::calculateCalleeSavedRegisters(MachineFunction &F) { + const TargetRegisterInfo *RegInfo = F.getTarget().getRegisterInfo(); + const TargetFrameLowering *TFI = F.getTarget().getFrameLowering(); + MachineFrameInfo *MFI = F.getFrameInfo(); // Get the callee saved register list... - const unsigned *CSRegs = RegInfo->getCalleeSavedRegs(&Fn); + const uint16_t *CSRegs = RegInfo->getCalleeSavedRegs(&F); // These are used to keep track the callee-save area. Initialize them. MinCSFrameIndex = INT_MAX; @@ -203,25 +253,17 @@ void PEI::calculateCalleeSavedRegisters(MachineFunction &Fn) { if (CSRegs == 0 || CSRegs[0] == 0) return; - // Figure out which *callee saved* registers are modified by the current - // function, thus needing to be saved and restored in the prolog/epilog. - const TargetRegisterClass * const *CSRegClasses = - RegInfo->getCalleeSavedRegClasses(&Fn); + // In Naked functions we aren't going to save any registers. + if (F.getFunction()->hasFnAttribute(Attribute::Naked)) + return; std::vector CSI; for (unsigned i = 0; CSRegs[i]; ++i) { unsigned Reg = CSRegs[i]; - if (Fn.getRegInfo().isPhysRegUsed(Reg)) { + // Functions which call __builtin_unwind_init get all their registers saved. + if (F.getRegInfo().isPhysRegUsed(Reg) || F.getMMI().callsUnwindInit()) { // If the reg is modified, save it! - CSI.push_back(CalleeSavedInfo(Reg, CSRegClasses[i])); - } else { - for (const unsigned *AliasSet = RegInfo->getAliasSet(Reg); - *AliasSet; ++AliasSet) { // Check alias registers too. - if (Fn.getRegInfo().isPhysRegUsed(*AliasSet)) { - CSI.push_back(CalleeSavedInfo(Reg, CSRegClasses[i])); - break; - } - } + CSI.push_back(CalleeSavedInfo(Reg)); } } @@ -229,7 +271,7 @@ void PEI::calculateCalleeSavedRegisters(MachineFunction &Fn) { return; // Early exit if no callee saved registers are modified! unsigned NumFixedSpillSlots; - const TargetFrameInfo::SpillSlot *FixedSpillSlots = + const TargetFrameLowering::SpillSlot *FixedSpillSlots = TFI->getCalleeSavedSpillSlots(NumFixedSpillSlots); // Now that we know which registers need to be saved and restored, allocate @@ -237,17 +279,17 @@ void PEI::calculateCalleeSavedRegisters(MachineFunction &Fn) { for (std::vector::iterator I = CSI.begin(), E = CSI.end(); I != E; ++I) { unsigned Reg = I->getReg(); - const TargetRegisterClass *RC = I->getRegClass(); + const TargetRegisterClass *RC = RegInfo->getMinimalPhysRegClass(Reg); int FrameIdx; - if (RegInfo->hasReservedSpillSlot(Fn, Reg, FrameIdx)) { + if (RegInfo->hasReservedSpillSlot(F, Reg, FrameIdx)) { I->setFrameIdx(FrameIdx); continue; } // Check to see if this physreg must be spilled to a particular stack slot // on this target. - const TargetFrameInfo::SpillSlot *FixedSlot = FixedSpillSlots; + const TargetFrameLowering::SpillSlot *FixedSlot = FixedSpillSlots; while (FixedSlot != FixedSpillSlots+NumFixedSpillSlots && FixedSlot->Reg != Reg) ++FixedSlot; @@ -261,159 +303,66 @@ void PEI::calculateCalleeSavedRegisters(MachineFunction &Fn) { // the TargetRegisterClass if the stack alignment is smaller. Use the // min. Align = std::min(Align, StackAlign); - FrameIdx = FFI->CreateStackObject(RC->getSize(), Align, true); + FrameIdx = MFI->CreateStackObject(RC->getSize(), Align, true); if ((unsigned)FrameIdx < MinCSFrameIndex) MinCSFrameIndex = FrameIdx; if ((unsigned)FrameIdx > MaxCSFrameIndex) MaxCSFrameIndex = FrameIdx; } else { // Spill it to the stack where we must. - FrameIdx = FFI->CreateFixedObject(RC->getSize(), FixedSlot->Offset, - true, false); + FrameIdx = MFI->CreateFixedObject(RC->getSize(), FixedSlot->Offset, true); } I->setFrameIdx(FrameIdx); } - FFI->setCalleeSavedInfo(CSI); + MFI->setCalleeSavedInfo(CSI); } /// insertCSRSpillsAndRestores - Insert spill and restore code for -/// callee saved registers used in the function, handling shrink wrapping. +/// callee saved registers used in the function. /// void PEI::insertCSRSpillsAndRestores(MachineFunction &Fn) { // Get callee saved register information. - MachineFrameInfo *FFI = Fn.getFrameInfo(); - const std::vector &CSI = FFI->getCalleeSavedInfo(); + MachineFrameInfo *MFI = Fn.getFrameInfo(); + const std::vector &CSI = MFI->getCalleeSavedInfo(); - FFI->setCalleeSavedInfoValid(true); + MFI->setCalleeSavedInfoValid(true); // Early exit if no callee saved registers are modified! if (CSI.empty()) return; const TargetInstrInfo &TII = *Fn.getTarget().getInstrInfo(); + const TargetFrameLowering *TFI = Fn.getTarget().getFrameLowering(); + const TargetRegisterInfo *TRI = Fn.getTarget().getRegisterInfo(); MachineBasicBlock::iterator I; - if (! ShrinkWrapThisFunction) { - // Spill using target interface. - I = EntryBlock->begin(); - if (!TII.spillCalleeSavedRegisters(*EntryBlock, I, CSI)) { - for (unsigned i = 0, e = CSI.size(); i != e; ++i) { - // Add the callee-saved register as live-in. - // It's killed at the spill. - EntryBlock->addLiveIn(CSI[i].getReg()); - - // Insert the spill to the stack frame. - TII.storeRegToStackSlot(*EntryBlock, I, CSI[i].getReg(), true, - CSI[i].getFrameIdx(), CSI[i].getRegClass()); - } - } - - // Restore using target interface. - for (unsigned ri = 0, re = ReturnBlocks.size(); ri != re; ++ri) { - MachineBasicBlock* MBB = ReturnBlocks[ri]; - I = MBB->end(); --I; - - // Skip over all terminator instructions, which are part of the return - // sequence. - MachineBasicBlock::iterator I2 = I; - while (I2 != MBB->begin() && (--I2)->getDesc().isTerminator()) - I = I2; - - bool AtStart = I == MBB->begin(); - MachineBasicBlock::iterator BeforeI = I; - if (!AtStart) - --BeforeI; - - // Restore all registers immediately before the return and any - // terminators that preceed it. - if (!TII.restoreCalleeSavedRegisters(*MBB, I, CSI)) { - for (unsigned i = 0, e = CSI.size(); i != e; ++i) { - TII.loadRegFromStackSlot(*MBB, I, CSI[i].getReg(), - CSI[i].getFrameIdx(), - CSI[i].getRegClass()); - assert(I != MBB->begin() && - "loadRegFromStackSlot didn't insert any code!"); - // Insert in reverse order. loadRegFromStackSlot can insert - // multiple instructions. - if (AtStart) - I = MBB->begin(); - else { - I = BeforeI; - ++I; - } - } - } - } - return; - } - - // Insert spills. - std::vector blockCSI; - for (CSRegBlockMap::iterator BI = CSRSave.begin(), - BE = CSRSave.end(); BI != BE; ++BI) { - MachineBasicBlock* MBB = BI->first; - CSRegSet save = BI->second; - - if (save.empty()) - continue; - - blockCSI.clear(); - for (CSRegSet::iterator RI = save.begin(), - RE = save.end(); RI != RE; ++RI) { - blockCSI.push_back(CSI[*RI]); - } - assert(blockCSI.size() > 0 && - "Could not collect callee saved register info"); - - I = MBB->begin(); - - // When shrink wrapping, use stack slot stores/loads. - for (unsigned i = 0, e = blockCSI.size(); i != e; ++i) { + // Spill using target interface. + I = EntryBlock->begin(); + if (!TFI->spillCalleeSavedRegisters(*EntryBlock, I, CSI, TRI)) { + for (unsigned i = 0, e = CSI.size(); i != e; ++i) { // Add the callee-saved register as live-in. // It's killed at the spill. - MBB->addLiveIn(blockCSI[i].getReg()); + EntryBlock->addLiveIn(CSI[i].getReg()); // Insert the spill to the stack frame. - TII.storeRegToStackSlot(*MBB, I, blockCSI[i].getReg(), - true, - blockCSI[i].getFrameIdx(), - blockCSI[i].getRegClass()); + unsigned Reg = CSI[i].getReg(); + const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); + TII.storeRegToStackSlot(*EntryBlock, I, Reg, true, CSI[i].getFrameIdx(), + RC, TRI); } } - for (CSRegBlockMap::iterator BI = CSRRestore.begin(), - BE = CSRRestore.end(); BI != BE; ++BI) { - MachineBasicBlock* MBB = BI->first; - CSRegSet restore = BI->second; - - if (restore.empty()) - continue; - - blockCSI.clear(); - for (CSRegSet::iterator RI = restore.begin(), - RE = restore.end(); RI != RE; ++RI) { - blockCSI.push_back(CSI[*RI]); - } - assert(blockCSI.size() > 0 && - "Could not find callee saved register info"); - - // If MBB is empty and needs restores, insert at the _beginning_. - if (MBB->empty()) { - I = MBB->begin(); - } else { - I = MBB->end(); - --I; + // Restore using target interface. + for (unsigned ri = 0, re = ReturnBlocks.size(); ri != re; ++ri) { + MachineBasicBlock *MBB = ReturnBlocks[ri]; + I = MBB->end(); + --I; - // Skip over all terminator instructions, which are part of the - // return sequence. - if (! I->getDesc().isTerminator()) { - ++I; - } else { - MachineBasicBlock::iterator I2 = I; - while (I2 != MBB->begin() && (--I2)->getDesc().isTerminator()) - I = I2; - } - } + // Skip over all terminator instructions, which are part of the return + // sequence. + MachineBasicBlock::iterator I2 = I; + while (I2 != MBB->begin() && (--I2)->isTerminator()) + I = I2; bool AtStart = I == MBB->begin(); MachineBasicBlock::iterator BeforeI = I; @@ -421,20 +370,22 @@ void PEI::insertCSRSpillsAndRestores(MachineFunction &Fn) { --BeforeI; // Restore all registers immediately before the return and any - // terminators that preceed it. - for (unsigned i = 0, e = blockCSI.size(); i != e; ++i) { - TII.loadRegFromStackSlot(*MBB, I, blockCSI[i].getReg(), - blockCSI[i].getFrameIdx(), - blockCSI[i].getRegClass()); - assert(I != MBB->begin() && - "loadRegFromStackSlot didn't insert any code!"); - // Insert in reverse order. loadRegFromStackSlot can insert - // multiple instructions. - if (AtStart) - I = MBB->begin(); - else { - I = BeforeI; - ++I; + // terminators that precede it. + if (!TFI->restoreCalleeSavedRegisters(*MBB, I, CSI, TRI)) { + for (unsigned i = 0, e = CSI.size(); i != e; ++i) { + unsigned Reg = CSI[i].getReg(); + const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); + TII.loadRegFromStackSlot(*MBB, I, Reg, CSI[i].getFrameIdx(), RC, TRI); + assert(I != MBB->begin() && + "loadRegFromStackSlot didn't insert any code!"); + // Insert in reverse order. loadRegFromStackSlot can insert + // multiple instructions. + if (AtStart) + I = MBB->begin(); + else { + I = BeforeI; + ++I; + } } } } @@ -442,14 +393,14 @@ void PEI::insertCSRSpillsAndRestores(MachineFunction &Fn) { /// AdjustStackOffset - Helper function used to adjust the stack frame offset. static inline void -AdjustStackOffset(MachineFrameInfo *FFI, int FrameIdx, +AdjustStackOffset(MachineFrameInfo *MFI, int FrameIdx, bool StackGrowsDown, int64_t &Offset, unsigned &MaxAlign) { // If the stack grows down, add the object size to find the lowest address. if (StackGrowsDown) - Offset += FFI->getObjectSize(FrameIdx); + Offset += MFI->getObjectSize(FrameIdx); - unsigned Align = FFI->getObjectAlignment(FrameIdx); + unsigned Align = MFI->getObjectAlignment(FrameIdx); // If the alignment of this object is greater than that of the stack, then // increase the stack alignment to match. @@ -459,10 +410,28 @@ AdjustStackOffset(MachineFrameInfo *FFI, int FrameIdx, Offset = (Offset + Align - 1) / Align * Align; if (StackGrowsDown) { - FFI->setObjectOffset(FrameIdx, -Offset); // Set the computed offset + DEBUG(dbgs() << "alloc FI(" << FrameIdx << ") at SP[" << -Offset << "]\n"); + MFI->setObjectOffset(FrameIdx, -Offset); // Set the computed offset } else { - FFI->setObjectOffset(FrameIdx, Offset); - Offset += FFI->getObjectSize(FrameIdx); + DEBUG(dbgs() << "alloc FI(" << FrameIdx << ") at SP[" << Offset << "]\n"); + MFI->setObjectOffset(FrameIdx, Offset); + Offset += MFI->getObjectSize(FrameIdx); + } +} + +/// AssignProtectedObjSet - Helper function to assign large stack objects (i.e., +/// those required to be close to the Stack Protector) to stack offsets. +static void +AssignProtectedObjSet(const StackObjSet &UnassignedObjs, + SmallSet &ProtectedObjs, + MachineFrameInfo *MFI, bool StackGrowsDown, + int64_t &Offset, unsigned &MaxAlign) { + + for (StackObjSet::const_iterator I = UnassignedObjs.begin(), + E = UnassignedObjs.end(); I != E; ++I) { + int i = *I; + AdjustStackOffset(MFI, i, StackGrowsDown, Offset, MaxAlign); + ProtectedObjs.insert(i); } } @@ -470,13 +439,14 @@ AdjustStackOffset(MachineFrameInfo *FFI, int FrameIdx, /// abstract stack objects. /// void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) { - const TargetFrameInfo &TFI = *Fn.getTarget().getFrameInfo(); + const TargetFrameLowering &TFI = *Fn.getTarget().getFrameLowering(); + StackProtector *SP = &getAnalysis(); bool StackGrowsDown = - TFI.getStackGrowthDirection() == TargetFrameInfo::StackGrowsDown; + TFI.getStackGrowthDirection() == TargetFrameLowering::StackGrowsDown; // Loop over all of the stack objects, assigning sequential addresses... - MachineFrameInfo *FFI = Fn.getFrameInfo(); + MachineFrameInfo *MFI = Fn.getFrameInfo(); // Start at the beginning of the local area. // The Offset is the distance from the stack top in the direction @@ -493,17 +463,17 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) { // We currently don't support filling in holes in between fixed sized // objects, so we adjust 'Offset' to point to the end of last fixed sized // preallocated object. - for (int i = FFI->getObjectIndexBegin(); i != 0; ++i) { + for (int i = MFI->getObjectIndexBegin(); i != 0; ++i) { int64_t FixedOff; if (StackGrowsDown) { // The maximum distance from the stack pointer is at lower address of // the object -- which is given by offset. For down growing stack // the offset is negative, so we negate the offset to get the distance. - FixedOff = -FFI->getObjectOffset(i); + FixedOff = -MFI->getObjectOffset(i); } else { // The maximum distance from the start pointer is at the upper // address of the object. - FixedOff = FFI->getObjectOffset(i) + FFI->getObjectSize(i); + FixedOff = MFI->getObjectOffset(i) + MFI->getObjectSize(i); } if (FixedOff > Offset) Offset = FixedOff; } @@ -512,74 +482,157 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) { // callee saved registers. if (StackGrowsDown) { for (unsigned i = MinCSFrameIndex; i <= MaxCSFrameIndex; ++i) { - // If stack grows down, we need to add size of find the lowest + // If the stack grows down, we need to add the size to find the lowest // address of the object. - Offset += FFI->getObjectSize(i); + Offset += MFI->getObjectSize(i); - unsigned Align = FFI->getObjectAlignment(i); + unsigned Align = MFI->getObjectAlignment(i); // Adjust to alignment boundary Offset = (Offset+Align-1)/Align*Align; - FFI->setObjectOffset(i, -Offset); // Set the computed offset + MFI->setObjectOffset(i, -Offset); // Set the computed offset } } else { int MaxCSFI = MaxCSFrameIndex, MinCSFI = MinCSFrameIndex; for (int i = MaxCSFI; i >= MinCSFI ; --i) { - unsigned Align = FFI->getObjectAlignment(i); + unsigned Align = MFI->getObjectAlignment(i); // Adjust to alignment boundary Offset = (Offset+Align-1)/Align*Align; - FFI->setObjectOffset(i, Offset); - Offset += FFI->getObjectSize(i); + MFI->setObjectOffset(i, Offset); + Offset += MFI->getObjectSize(i); } } - unsigned MaxAlign = FFI->getMaxAlignment(); + unsigned MaxAlign = MFI->getMaxAlignment(); // Make sure the special register scavenging spill slot is closest to the - // frame pointer if a frame pointer is required. + // incoming stack pointer if a frame pointer is required and is closer + // to the incoming rather than the final stack pointer. const TargetRegisterInfo *RegInfo = Fn.getTarget().getRegisterInfo(); - if (RS && RegInfo->hasFP(Fn) && !RegInfo->needsStackRealignment(Fn)) { - int SFI = RS->getScavengingFrameIndex(); - if (SFI >= 0) - AdjustStackOffset(FFI, SFI, StackGrowsDown, Offset, MaxAlign); + bool EarlyScavengingSlots = (TFI.hasFP(Fn) && + TFI.isFPCloseToIncomingSP() && + RegInfo->useFPForScavengingIndex(Fn) && + !RegInfo->needsStackRealignment(Fn)); + if (RS && EarlyScavengingSlots) { + SmallVector SFIs; + RS->getScavengingFrameIndices(SFIs); + for (SmallVectorImpl::iterator I = SFIs.begin(), + IE = SFIs.end(); I != IE; ++I) + AdjustStackOffset(MFI, *I, StackGrowsDown, Offset, MaxAlign); + } + + // FIXME: Once this is working, then enable flag will change to a target + // check for whether the frame is large enough to want to use virtual + // frame index registers. Functions which don't want/need this optimization + // will continue to use the existing code path. + if (MFI->getUseLocalStackAllocationBlock()) { + unsigned Align = MFI->getLocalFrameMaxAlign(); + + // Adjust to alignment boundary. + Offset = (Offset + Align - 1) / Align * Align; + + DEBUG(dbgs() << "Local frame base offset: " << Offset << "\n"); + + // Resolve offsets for objects in the local block. + for (unsigned i = 0, e = MFI->getLocalFrameObjectCount(); i != e; ++i) { + std::pair Entry = MFI->getLocalFrameObjectMap(i); + int64_t FIOffset = (StackGrowsDown ? -Offset : Offset) + Entry.second; + DEBUG(dbgs() << "alloc FI(" << Entry.first << ") at SP[" << + FIOffset << "]\n"); + MFI->setObjectOffset(Entry.first, FIOffset); + } + // Allocate the local block + Offset += MFI->getLocalFrameSize(); + + MaxAlign = std::max(Align, MaxAlign); } // Make sure that the stack protector comes before the local variables on the // stack. - if (FFI->getStackProtectorIndex() >= 0) - AdjustStackOffset(FFI, FFI->getStackProtectorIndex(), StackGrowsDown, + SmallSet ProtectedObjs; + if (MFI->getStackProtectorIndex() >= 0) { + StackObjSet LargeArrayObjs; + StackObjSet SmallArrayObjs; + StackObjSet AddrOfObjs; + + AdjustStackOffset(MFI, MFI->getStackProtectorIndex(), StackGrowsDown, Offset, MaxAlign); + // Assign large stack objects first. + for (unsigned i = 0, e = MFI->getObjectIndexEnd(); i != e; ++i) { + if (MFI->isObjectPreAllocated(i) && + MFI->getUseLocalStackAllocationBlock()) + continue; + if (i >= MinCSFrameIndex && i <= MaxCSFrameIndex) + continue; + if (RS && RS->isScavengingFrameIndex((int)i)) + continue; + if (MFI->isDeadObjectIndex(i)) + continue; + if (MFI->getStackProtectorIndex() == (int)i) + continue; + + switch (SP->getSSPLayout(MFI->getObjectAllocation(i))) { + case StackProtector::SSPLK_None: + continue; + case StackProtector::SSPLK_SmallArray: + SmallArrayObjs.insert(i); + continue; + case StackProtector::SSPLK_AddrOf: + AddrOfObjs.insert(i); + continue; + case StackProtector::SSPLK_LargeArray: + LargeArrayObjs.insert(i); + continue; + } + llvm_unreachable("Unexpected SSPLayoutKind."); + } + + AssignProtectedObjSet(LargeArrayObjs, ProtectedObjs, MFI, StackGrowsDown, + Offset, MaxAlign); + AssignProtectedObjSet(SmallArrayObjs, ProtectedObjs, MFI, StackGrowsDown, + Offset, MaxAlign); + AssignProtectedObjSet(AddrOfObjs, ProtectedObjs, MFI, StackGrowsDown, + Offset, MaxAlign); + } + // Then assign frame offsets to stack objects that are not used to spill // callee saved registers. - for (unsigned i = 0, e = FFI->getObjectIndexEnd(); i != e; ++i) { + for (unsigned i = 0, e = MFI->getObjectIndexEnd(); i != e; ++i) { + if (MFI->isObjectPreAllocated(i) && + MFI->getUseLocalStackAllocationBlock()) + continue; if (i >= MinCSFrameIndex && i <= MaxCSFrameIndex) continue; - if (RS && (int)i == RS->getScavengingFrameIndex()) + if (RS && RS->isScavengingFrameIndex((int)i)) continue; - if (FFI->isDeadObjectIndex(i)) + if (MFI->isDeadObjectIndex(i)) continue; - if (FFI->getStackProtectorIndex() == (int)i) + if (MFI->getStackProtectorIndex() == (int)i) + continue; + if (ProtectedObjs.count(i)) continue; - AdjustStackOffset(FFI, i, StackGrowsDown, Offset, MaxAlign); + AdjustStackOffset(MFI, i, StackGrowsDown, Offset, MaxAlign); } // Make sure the special register scavenging spill slot is closest to the // stack pointer. - if (RS && (!RegInfo->hasFP(Fn) || RegInfo->needsStackRealignment(Fn))) { - int SFI = RS->getScavengingFrameIndex(); - if (SFI >= 0) - AdjustStackOffset(FFI, SFI, StackGrowsDown, Offset, MaxAlign); + if (RS && !EarlyScavengingSlots) { + SmallVector SFIs; + RS->getScavengingFrameIndices(SFIs); + for (SmallVectorImpl::iterator I = SFIs.begin(), + IE = SFIs.end(); I != IE; ++I) + AdjustStackOffset(MFI, *I, StackGrowsDown, Offset, MaxAlign); } - if (!RegInfo->targetHandlesStackFrameRounding()) { + if (!TFI.targetHandlesStackFrameRounding()) { // If we have reserved argument space for call sites in the function // immediately on entry to the current function, count it as part of the // overall stack size. - if (FFI->hasCalls() && RegInfo->hasReservedCallFrame(Fn)) - Offset += FFI->getMaxCallFrameSize(); + if (MFI->adjustsStack() && TFI.hasReservedCallFrame(Fn)) + Offset += MFI->getMaxCallFrameSize(); // Round up the size to a multiple of the alignment. If the function has // any calls or alloca's, align to the target's StackAlignment value to @@ -587,41 +640,57 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) { // otherwise, for leaf functions, align to the TransientStackAlignment // value. unsigned StackAlign; - if (FFI->hasCalls() || FFI->hasVarSizedObjects() || - (RegInfo->needsStackRealignment(Fn) && FFI->getObjectIndexEnd() != 0)) + if (MFI->adjustsStack() || MFI->hasVarSizedObjects() || + (RegInfo->needsStackRealignment(Fn) && MFI->getObjectIndexEnd() != 0)) StackAlign = TFI.getStackAlignment(); else StackAlign = TFI.getTransientStackAlignment(); - // If the frame pointer is eliminated, all frame offsets will be relative - // to SP not FP; align to MaxAlign so this works. + + // If the frame pointer is eliminated, all frame offsets will be relative to + // SP not FP. Align to MaxAlign so this works. StackAlign = std::max(StackAlign, MaxAlign); unsigned AlignMask = StackAlign - 1; Offset = (Offset + AlignMask) & ~uint64_t(AlignMask); } // Update frame info to pretend that this is part of the stack... - FFI->setStackSize(Offset - LocalAreaOffset); + int64_t StackSize = Offset - LocalAreaOffset; + MFI->setStackSize(StackSize); + NumBytesStackSpace += StackSize; } - /// insertPrologEpilogCode - Scan the function for modified callee saved /// registers, insert spill code for these callee saved registers, then add /// prolog and epilog code to the function. /// void PEI::insertPrologEpilogCode(MachineFunction &Fn) { - const TargetRegisterInfo *TRI = Fn.getTarget().getRegisterInfo(); + const TargetFrameLowering &TFI = *Fn.getTarget().getFrameLowering(); // Add prologue to the function... - TRI->emitPrologue(Fn); + TFI.emitPrologue(Fn); // Add epilogue to restore the callee-save registers in each exiting block for (MachineFunction::iterator I = Fn.begin(), E = Fn.end(); I != E; ++I) { // If last instruction is a return instruction, add an epilogue - if (!I->empty() && I->back().getDesc().isReturn()) - TRI->emitEpilogue(Fn, *I); + if (!I->empty() && I->back().isReturn()) + TFI.emitEpilogue(Fn, *I); } -} + // Emit additional code that is required to support segmented stacks, if + // we've been asked for it. This, when linked with a runtime with support + // for segmented stacks (libgcc is one), will result in allocating stack + // space in small chunks instead of one large contiguous block. + if (Fn.getTarget().Options.EnableSegmentedStacks) + TFI.adjustForSegmentedStacks(Fn); + + // Emit additional code that is required to explicitly handle the stack in + // HiPE native code (if needed) when loaded in the Erlang/OTP runtime. The + // approach is rather similar to that of Segmented Stacks, but it uses a + // different conditional check and another BIF for allocating more stack + // space. + if (Fn.getFunction()->getCallingConv() == CallingConv::HiPE) + TFI.adjustForHiPEPrologue(Fn); +} /// replaceFrameIndices - Replace all MO_FrameIndex operands with physical /// register references and actual offsets. @@ -629,286 +698,217 @@ void PEI::insertPrologEpilogCode(MachineFunction &Fn) { void PEI::replaceFrameIndices(MachineFunction &Fn) { if (!Fn.getFrameInfo()->hasStackObjects()) return; // Nothing to do? + // Store SPAdj at exit of a basic block. + SmallVector SPState; + SPState.resize(Fn.getNumBlockIDs()); + SmallPtrSet Reachable; + + // Iterate over the reachable blocks in DFS order. + for (df_ext_iterator > + DFI = df_ext_begin(&Fn, Reachable), DFE = df_ext_end(&Fn, Reachable); + DFI != DFE; ++DFI) { + int SPAdj = 0; + // Check the exit state of the DFS stack predecessor. + if (DFI.getPathLength() >= 2) { + MachineBasicBlock *StackPred = DFI.getPath(DFI.getPathLength() - 2); + assert(Reachable.count(StackPred) && + "DFS stack predecessor is already visited.\n"); + SPAdj = SPState[StackPred->getNumber()]; + } + MachineBasicBlock *BB = *DFI; + replaceFrameIndices(BB, Fn, SPAdj); + SPState[BB->getNumber()] = SPAdj; + } + + // Handle the unreachable blocks. + for (MachineFunction::iterator BB = Fn.begin(), E = Fn.end(); BB != E; ++BB) { + if (Reachable.count(BB)) + // Already handled in DFS traversal. + continue; + int SPAdj = 0; + replaceFrameIndices(BB, Fn, SPAdj); + } +} + +void PEI::replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &Fn, + int &SPAdj) { const TargetMachine &TM = Fn.getTarget(); assert(TM.getRegisterInfo() && "TM::getRegisterInfo() must be implemented!"); + const TargetInstrInfo &TII = *Fn.getTarget().getInstrInfo(); const TargetRegisterInfo &TRI = *TM.getRegisterInfo(); - const TargetFrameInfo *TFI = TM.getFrameInfo(); + const TargetFrameLowering *TFI = TM.getFrameLowering(); bool StackGrowsDown = - TFI->getStackGrowthDirection() == TargetFrameInfo::StackGrowsDown; - int FrameSetupOpcode = TRI.getCallFrameSetupOpcode(); - int FrameDestroyOpcode = TRI.getCallFrameDestroyOpcode(); + TFI->getStackGrowthDirection() == TargetFrameLowering::StackGrowsDown; + int FrameSetupOpcode = TII.getCallFrameSetupOpcode(); + int FrameDestroyOpcode = TII.getCallFrameDestroyOpcode(); - for (MachineFunction::iterator BB = Fn.begin(), - E = Fn.end(); BB != E; ++BB) { - int SPAdj = 0; // SP offset due to call frame setup / destroy. - if (RS && !FrameIndexVirtualScavenging) RS->enterBasicBlock(BB); + if (RS && !FrameIndexVirtualScavenging) RS->enterBasicBlock(BB); - for (MachineBasicBlock::iterator I = BB->begin(); I != BB->end(); ) { + for (MachineBasicBlock::iterator I = BB->begin(); I != BB->end(); ) { - if (I->getOpcode() == FrameSetupOpcode || - I->getOpcode() == FrameDestroyOpcode) { - // Remember how much SP has been adjusted to create the call - // frame. - int Size = I->getOperand(0).getImm(); + if (I->getOpcode() == FrameSetupOpcode || + I->getOpcode() == FrameDestroyOpcode) { + // Remember how much SP has been adjusted to create the call + // frame. + int Size = I->getOperand(0).getImm(); + + if ((!StackGrowsDown && I->getOpcode() == FrameSetupOpcode) || + (StackGrowsDown && I->getOpcode() == FrameDestroyOpcode)) + Size = -Size; - if ((!StackGrowsDown && I->getOpcode() == FrameSetupOpcode) || - (StackGrowsDown && I->getOpcode() == FrameDestroyOpcode)) - Size = -Size; + SPAdj += Size; - SPAdj += Size; + MachineBasicBlock::iterator PrevI = BB->end(); + if (I != BB->begin()) PrevI = std::prev(I); + TFI->eliminateCallFramePseudoInstr(Fn, *BB, I); - MachineBasicBlock::iterator PrevI = BB->end(); - if (I != BB->begin()) PrevI = prior(I); - TRI.eliminateCallFramePseudoInstr(Fn, *BB, I); + // Visit the instructions created by eliminateCallFramePseudoInstr(). + if (PrevI == BB->end()) + I = BB->begin(); // The replaced instr was the first in the block. + else + I = std::next(PrevI); + continue; + } - // Visit the instructions created by eliminateCallFramePseudoInstr(). - if (PrevI == BB->end()) - I = BB->begin(); // The replaced instr was the first in the block. - else - I = llvm::next(PrevI); + MachineInstr *MI = I; + bool DoIncr = true; + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + if (!MI->getOperand(i).isFI()) continue; - } - MachineInstr *MI = I; - bool DoIncr = true; - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) - if (MI->getOperand(i).isFI()) { - // Some instructions (e.g. inline asm instructions) can have - // multiple frame indices and/or cause eliminateFrameIndex - // to insert more than one instruction. We need the register - // scavenger to go through all of these instructions so that - // it can update its register information. We keep the - // iterator at the point before insertion so that we can - // revisit them in full. - bool AtBeginning = (I == BB->begin()); - if (!AtBeginning) --I; - - // If this instruction has a FrameIndex operand, we need to - // use that target machine register info object to eliminate - // it. - TargetRegisterInfo::FrameIndexValue Value; - unsigned VReg = - TRI.eliminateFrameIndex(MI, SPAdj, &Value, - FrameIndexVirtualScavenging ? NULL : RS); - if (VReg) { - assert (FrameIndexVirtualScavenging && - "Not scavenging, but virtual returned from " - "eliminateFrameIndex()!"); - FrameConstantRegMap[VReg] = FrameConstantEntry(Value, SPAdj); - } - - // Reset the iterator if we were at the beginning of the BB. - if (AtBeginning) { - I = BB->begin(); - DoIncr = false; - } - - MI = 0; - break; - } + // Frame indicies in debug values are encoded in a target independent + // way with simply the frame index and offset rather than any + // target-specific addressing mode. + if (MI->isDebugValue()) { + assert(i == 0 && "Frame indicies can only appear as the first " + "operand of a DBG_VALUE machine instruction"); + unsigned Reg; + MachineOperand &Offset = MI->getOperand(1); + Offset.setImm(Offset.getImm() + + TFI->getFrameIndexReference( + Fn, MI->getOperand(0).getIndex(), Reg)); + MI->getOperand(0).ChangeToRegister(Reg, false /*isDef*/); + continue; + } - if (DoIncr && I != BB->end()) ++I; + // Some instructions (e.g. inline asm instructions) can have + // multiple frame indices and/or cause eliminateFrameIndex + // to insert more than one instruction. We need the register + // scavenger to go through all of these instructions so that + // it can update its register information. We keep the + // iterator at the point before insertion so that we can + // revisit them in full. + bool AtBeginning = (I == BB->begin()); + if (!AtBeginning) --I; + + // If this instruction has a FrameIndex operand, we need to + // use that target machine register info object to eliminate + // it. + TRI.eliminateFrameIndex(MI, SPAdj, i, + FrameIndexVirtualScavenging ? NULL : RS); + + // Reset the iterator if we were at the beginning of the BB. + if (AtBeginning) { + I = BB->begin(); + DoIncr = false; + } - // Update register states. - if (RS && !FrameIndexVirtualScavenging && MI) RS->forward(MI); + MI = 0; + break; } - assert(SPAdj == 0 && "Unbalanced call frame setup / destroy pairs?"); - } -} + if (DoIncr && I != BB->end()) ++I; -/// findLastUseReg - find the killing use of the specified register within -/// the instruciton range. Return the operand number of the kill in Operand. -static MachineBasicBlock::iterator -findLastUseReg(MachineBasicBlock::iterator I, MachineBasicBlock::iterator ME, - unsigned Reg) { - // Scan forward to find the last use of this virtual register - for (++I; I != ME; ++I) { - MachineInstr *MI = I; - bool isDefInsn = false; - bool isKillInsn = false; - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) - if (MI->getOperand(i).isReg()) { - unsigned OpReg = MI->getOperand(i).getReg(); - if (OpReg == 0 || !TargetRegisterInfo::isVirtualRegister(OpReg)) - continue; - assert (OpReg == Reg - && "overlapping use of scavenged index register!"); - // If this is the killing use, we have a candidate. - if (MI->getOperand(i).isKill()) - isKillInsn = true; - else if (MI->getOperand(i).isDef()) - isDefInsn = true; - } - if (isKillInsn && !isDefInsn) - return I; + // Update register states. + if (RS && !FrameIndexVirtualScavenging && MI) RS->forward(MI); } - // If we hit the end of the basic block, there was no kill of - // the virtual register, which is wrong. - assert (0 && "scavenged index register never killed!"); - return ME; } /// scavengeFrameVirtualRegs - Replace all frame index virtual registers /// with physical registers. Use the register scavenger to find an /// appropriate register to use. +/// +/// FIXME: Iterating over the instruction stream is unnecessary. We can simply +/// iterate over the vreg use list, which at this point only contains machine +/// operands for which eliminateFrameIndex need a new scratch reg. void PEI::scavengeFrameVirtualRegs(MachineFunction &Fn) { // Run through the instructions and find any virtual registers. for (MachineFunction::iterator BB = Fn.begin(), E = Fn.end(); BB != E; ++BB) { RS->enterBasicBlock(BB); - // FIXME: The logic flow in this function is still too convoluted. - // It needs a cleanup refactoring. Do that in preparation for tracking - // more than one scratch register value and using ranges to find - // available scratch registers. - unsigned CurrentVirtReg = 0; - unsigned CurrentScratchReg = 0; - bool havePrevValue = false; - TargetRegisterInfo::FrameIndexValue PrevValue(0,0); - TargetRegisterInfo::FrameIndexValue Value(0,0); - MachineInstr *PrevLastUseMI = NULL; - unsigned PrevLastUseOp = 0; - bool trackingCurrentValue = false; int SPAdj = 0; // The instruction stream may change in the loop, so check BB->end() // directly. for (MachineBasicBlock::iterator I = BB->begin(); I != BB->end(); ) { + // We might end up here again with a NULL iterator if we scavenged a + // register for which we inserted spill code for definition by what was + // originally the first instruction in BB. + if (I == MachineBasicBlock::iterator(NULL)) + I = BB->begin(); + MachineInstr *MI = I; - bool isDefInsn = false; - bool isKillInsn = false; - bool clobbersScratchReg = false; - bool DoIncr = true; + MachineBasicBlock::iterator J = std::next(I); + MachineBasicBlock::iterator P = + I == BB->begin() ? MachineBasicBlock::iterator(NULL) : std::prev(I); + + // RS should process this instruction before we might scavenge at this + // location. This is because we might be replacing a virtual register + // defined by this instruction, and if so, registers killed by this + // instruction are available, and defined registers are not. + RS->forward(I); + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { if (MI->getOperand(i).isReg()) { MachineOperand &MO = MI->getOperand(i); unsigned Reg = MO.getReg(); if (Reg == 0) continue; - if (!TargetRegisterInfo::isVirtualRegister(Reg)) { - // If we have a previous scratch reg, check and see if anything - // here kills whatever value is in there. - if (Reg == CurrentScratchReg) { - if (MO.isUse()) { - // Two-address operands implicitly kill - if (MO.isKill() || MI->isRegTiedToDefOperand(i)) - clobbersScratchReg = true; - } else { - assert (MO.isDef()); - clobbersScratchReg = true; - } - } + if (!TargetRegisterInfo::isVirtualRegister(Reg)) continue; - } - // If this is a def, remember that this insn defines the value. - // This lets us properly consider insns which re-use the scratch - // register, such as r2 = sub r2, #imm, in the middle of the - // scratch range. - if (MO.isDef()) - isDefInsn = true; - - // Have we already allocated a scratch register for this virtual? - if (Reg != CurrentVirtReg) { - // When we first encounter a new virtual register, it - // must be a definition. - assert(MI->getOperand(i).isDef() && - "frame index virtual missing def!"); - // We can't have nested virtual register live ranges because - // there's only a guarantee of one scavenged register at a time. - assert (CurrentVirtReg == 0 && - "overlapping frame index virtual registers!"); - - // If the target gave us information about what's in the register, - // we can use that to re-use scratch regs. - DenseMap::iterator Entry = - FrameConstantRegMap.find(Reg); - trackingCurrentValue = Entry != FrameConstantRegMap.end(); - if (trackingCurrentValue) { - SPAdj = (*Entry).second.second; - Value = (*Entry).second.first; - } else { - SPAdj = 0; - Value.first = 0; - Value.second = 0; - } - - // If the scratch register from the last allocation is still - // available, see if the value matches. If it does, just re-use it. - if (trackingCurrentValue && havePrevValue && PrevValue == Value) { - // FIXME: This assumes that the instructions in the live range - // for the virtual register are exclusively for the purpose - // of populating the value in the register. That's reasonable - // for these frame index registers, but it's still a very, very - // strong assumption. rdar://7322732. Better would be to - // explicitly check each instruction in the range for references - // to the virtual register. Only delete those insns that - // touch the virtual register. - - // Find the last use of the new virtual register. Remove all - // instruction between here and there, and update the current - // instruction to reference the last use insn instead. - MachineBasicBlock::iterator LastUseMI = - findLastUseReg(I, BB->end(), Reg); - - // Remove all instructions up 'til the last use, since they're - // just calculating the value we already have. - BB->erase(I, LastUseMI); - I = LastUseMI; - - // Extend the live range of the scratch register - PrevLastUseMI->getOperand(PrevLastUseOp).setIsKill(false); - RS->setUsed(CurrentScratchReg); - CurrentVirtReg = Reg; - - // We deleted the instruction we were scanning the operands of. - // Jump back to the instruction iterator loop. Don't increment - // past this instruction since we updated the iterator already. - DoIncr = false; - break; - } - - // Scavenge a new scratch register - CurrentVirtReg = Reg; - const TargetRegisterClass *RC = Fn.getRegInfo().getRegClass(Reg); - CurrentScratchReg = RS->FindUnusedReg(RC); - if (CurrentScratchReg == 0) - // No register is "free". Scavenge a register. - CurrentScratchReg = RS->scavengeRegister(RC, I, SPAdj); - - PrevValue = Value; - } - // replace this reference to the virtual register with the + + // When we first encounter a new virtual register, it + // must be a definition. + assert(MI->getOperand(i).isDef() && + "frame index virtual missing def!"); + // Scavenge a new scratch register + const TargetRegisterClass *RC = Fn.getRegInfo().getRegClass(Reg); + unsigned ScratchReg = RS->scavengeRegister(RC, J, SPAdj); + + ++NumScavengedRegs; + + // Replace this reference to the virtual register with the // scratch register. - assert (CurrentScratchReg && "Missing scratch register!"); - MI->getOperand(i).setReg(CurrentScratchReg); - - if (MI->getOperand(i).isKill()) { - isKillInsn = true; - PrevLastUseOp = i; - PrevLastUseMI = MI; - } + assert (ScratchReg && "Missing scratch register!"); + Fn.getRegInfo().replaceRegWith(Reg, ScratchReg); + + // Because this instruction was processed by the RS before this + // register was allocated, make sure that the RS now records the + // register as being used. + RS->setUsed(ScratchReg); } } - // If this is the last use of the scratch, stop tracking it. The - // last use will be a kill operand in an instruction that does - // not also define the scratch register. - if (isKillInsn && !isDefInsn) { - CurrentVirtReg = 0; - havePrevValue = trackingCurrentValue; - } - // Similarly, notice if instruction clobbered the value in the - // register we're tracking for possible later reuse. This is noted - // above, but enforced here since the value is still live while we - // process the rest of the operands of the instruction. - if (clobbersScratchReg) { - havePrevValue = false; - CurrentScratchReg = 0; - } - if (DoIncr) { - RS->forward(I); + + // If the scavenger needed to use one of its spill slots, the + // spill code will have been inserted in between I and J. This is a + // problem because we need the spill code before I: Move I to just + // prior to J. + if (I != std::prev(J)) { + BB->splice(J, BB, I); + + // Before we move I, we need to prepare the RS to visit I again. + // Specifically, RS will assert if it sees uses of registers that + // it believes are undefined. Because we have already processed + // register kills in I, when it visits I again, it will believe that + // those registers are undefined. To avoid this situation, unprocess + // the instruction I. + assert(RS->getCurrentPosition() == I && + "The register scavenger has an unexpected position"); + I = P; + RS->unprocess(P); + } else ++I; - } } } }