X-Git-Url: http://demsky.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FTarget%2FX86%2FX86FrameLowering.cpp;h=edbe7d0894a0805e8cbb2ebaaea2b1888b15a3c0;hb=00552e3875ee5f382db6c98286a241a7d0efe1b8;hp=95bca7d936f69dc98cf2543a584fe23d6ca53d85;hpb=d8ae3e1feec0fa1c06e96e43a80ad2c5e5208461;p=oota-llvm.git diff --git a/lib/Target/X86/X86FrameLowering.cpp b/lib/Target/X86/X86FrameLowering.cpp index 95bca7d936f..edbe7d0894a 100644 --- a/lib/Target/X86/X86FrameLowering.cpp +++ b/lib/Target/X86/X86FrameLowering.cpp @@ -37,8 +37,47 @@ using namespace llvm; // FIXME: completely move here. extern cl::opt ForceStackAlign; +X86FrameLowering::X86FrameLowering(const X86Subtarget &STI, + unsigned StackAlignOverride) + : TargetFrameLowering(StackGrowsDown, StackAlignOverride, + STI.is64Bit() ? -8 : -4), + STI(STI), TII(*STI.getInstrInfo()), TRI(STI.getRegisterInfo()) { + // Cache a bunch of frame-related predicates for this subtarget. + SlotSize = TRI->getSlotSize(); + Is64Bit = STI.is64Bit(); + IsLP64 = STI.isTarget64BitLP64(); + // standard x86_64 and NaCl use 64-bit frame/stack pointers, x32 - 32-bit. + Uses64BitFramePtr = STI.isTarget64BitLP64() || STI.isTargetNaCl64(); + StackPtr = TRI->getStackRegister(); +} + bool X86FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const { - return !MF.getFrameInfo()->hasVarSizedObjects(); + return !MF.getFrameInfo()->hasVarSizedObjects() && + !MF.getInfo()->getHasPushSequences(); +} + +/// canSimplifyCallFramePseudos - If there is a reserved call frame, the +/// call frame pseudos can be simplified. Having a FP, as in the default +/// implementation, is not sufficient here since we can't always use it. +/// Use a more nuanced condition. +bool +X86FrameLowering::canSimplifyCallFramePseudos(const MachineFunction &MF) const { + return hasReservedCallFrame(MF) || + (hasFP(MF) && !TRI->needsStackRealignment(MF)) || + TRI->hasBasePointer(MF); +} + +// needsFrameIndexResolution - Do we need to perform FI resolution for +// this function. Normally, this is required only when the function +// has any stack objects. However, FI resolution actually has another job, +// not apparent from the title - it resolves callframesetup/destroy +// that were not simplified earlier. +// So, this is required for x86 functions that have push sequences even +// when there are no stack objects. +bool +X86FrameLowering::needsFrameIndexResolution(const MachineFunction &MF) const { + return MF.getFrameInfo()->hasStackObjects() || + MF.getInfo()->getHasPushSequences(); } /// hasFP - Return true if the specified function should have a dedicated frame @@ -47,12 +86,11 @@ bool X86FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const { bool X86FrameLowering::hasFP(const MachineFunction &MF) const { const MachineFrameInfo *MFI = MF.getFrameInfo(); const MachineModuleInfo &MMI = MF.getMMI(); - const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo(); return (MF.getTarget().Options.DisableFramePointerElim(MF) || - RegInfo->needsStackRealignment(MF) || + TRI->needsStackRealignment(MF) || MFI->hasVarSizedObjects() || - MFI->isFrameAddressTaken() || MFI->hasInlineAsmWithSPAdjust() || + MFI->isFrameAddressTaken() || MFI->hasOpaqueSPAdjustment() || MF.getInfo()->getForceFramePointer() || MMI.callsUnwindInit() || MMI.callsEHReturn() || MFI->hasStackMap() || MFI->hasPatchPoint()); @@ -82,6 +120,14 @@ static unsigned getADDriOpcode(unsigned IsLP64, int64_t Imm) { } } +static unsigned getSUBrrOpcode(unsigned isLP64) { + return isLP64 ? X86::SUB64rr : X86::SUB32rr; +} + +static unsigned getADDrrOpcode(unsigned isLP64) { + return isLP64 ? X86::ADD64rr : X86::ADD32rr; +} + static unsigned getANDriOpcode(bool IsLP64, int64_t Imm) { if (IsLP64) { if (isInt<8>(Imm)) @@ -93,16 +139,6 @@ static unsigned getANDriOpcode(bool IsLP64, int64_t Imm) { return X86::AND32ri; } -static unsigned getPUSHiOpcode(bool IsLP64, MachineOperand MO) { - // We don't support LP64 for now. - assert(!IsLP64); - - if (MO.isImm() && isInt<8>(MO.getImm())) - return X86::PUSH32i8; - - return X86::PUSHi32;; -} - static unsigned getLEArOpcode(unsigned IsLP64) { return IsLP64 ? X86::LEA64r : X86::LEA32r; } @@ -112,7 +148,7 @@ static unsigned getLEArOpcode(unsigned IsLP64) { /// to this register without worry about clobbering it. static unsigned findDeadCallerSavedReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, - const TargetRegisterInfo &TRI, + const TargetRegisterInfo *TRI, bool Is64Bit) { const MachineFunction *MF = MBB.getParent(); const Function *F = MF->getFunction(); @@ -151,7 +187,7 @@ static unsigned findDeadCallerSavedReg(MachineBasicBlock &MBB, unsigned Reg = MO.getReg(); if (!Reg) continue; - for (MCRegAliasIterator AI(Reg, &TRI, true); AI.isValid(); ++AI) + for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) Uses.insert(*AI); } @@ -165,38 +201,90 @@ static unsigned findDeadCallerSavedReg(MachineBasicBlock &MBB, return 0; } +static bool isEAXLiveIn(MachineFunction &MF) { + for (MachineRegisterInfo::livein_iterator II = MF.getRegInfo().livein_begin(), + EE = MF.getRegInfo().livein_end(); II != EE; ++II) { + unsigned Reg = II->first; + + if (Reg == X86::RAX || Reg == X86::EAX || Reg == X86::AX || + Reg == X86::AH || Reg == X86::AL) + return true; + } + + return false; +} + +/// Check whether or not the terminators of \p MBB needs to read EFLAGS. +static bool terminatorsNeedFlagsAsInput(const MachineBasicBlock &MBB) { + for (const MachineInstr &MI : MBB.terminators()) { + bool BreakNext = false; + for (const MachineOperand &MO : MI.operands()) { + if (!MO.isReg()) + continue; + unsigned Reg = MO.getReg(); + if (Reg != X86::EFLAGS) + continue; + + // This terminator needs an eflag that is not defined + // by a previous terminator. + if (!MO.isDef()) + return true; + BreakNext = true; + } + if (BreakNext) + break; + } + return false; +} /// emitSPUpdate - Emit a series of instructions to increment / decrement the /// stack pointer by a constant value. -static -void emitSPUpdate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, - unsigned StackPtr, int64_t NumBytes, - bool Is64BitTarget, bool Is64BitStackPtr, bool UseLEA, - const TargetInstrInfo &TII, const TargetRegisterInfo &TRI) { +void X86FrameLowering::emitSPUpdate(MachineBasicBlock &MBB, + MachineBasicBlock::iterator &MBBI, + int64_t NumBytes, bool InEpilogue) const { bool isSub = NumBytes < 0; uint64_t Offset = isSub ? -NumBytes : NumBytes; - unsigned Opc; - if (UseLEA) - Opc = getLEArOpcode(Is64BitStackPtr); - else - Opc = isSub - ? getSUBriOpcode(Is64BitStackPtr, Offset) - : getADDriOpcode(Is64BitStackPtr, Offset); uint64_t Chunk = (1LL << 31) - 1; DebugLoc DL = MBB.findDebugLoc(MBBI); while (Offset) { - uint64_t ThisVal = (Offset > Chunk) ? Chunk : Offset; - if (ThisVal == (Is64BitTarget ? 8 : 4)) { + if (Offset > Chunk) { + // Rather than emit a long series of instructions for large offsets, + // load the offset into a register and do one sub/add + unsigned Reg = 0; + + if (isSub && !isEAXLiveIn(*MBB.getParent())) + Reg = (unsigned)(Is64Bit ? X86::RAX : X86::EAX); + else + Reg = findDeadCallerSavedReg(MBB, MBBI, TRI, Is64Bit); + + if (Reg) { + unsigned Opc = Is64Bit ? X86::MOV64ri : X86::MOV32ri; + BuildMI(MBB, MBBI, DL, TII.get(Opc), Reg) + .addImm(Offset); + Opc = isSub + ? getSUBrrOpcode(Is64Bit) + : getADDrrOpcode(Is64Bit); + MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr) + .addReg(StackPtr) + .addReg(Reg); + MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead. + Offset = 0; + continue; + } + } + + uint64_t ThisVal = std::min(Offset, Chunk); + if (ThisVal == (Is64Bit ? 8 : 4)) { // Use push / pop instead. unsigned Reg = isSub - ? (unsigned)(Is64BitTarget ? X86::RAX : X86::EAX) - : findDeadCallerSavedReg(MBB, MBBI, TRI, Is64BitTarget); + ? (unsigned)(Is64Bit ? X86::RAX : X86::EAX) + : findDeadCallerSavedReg(MBB, MBBI, TRI, Is64Bit); if (Reg) { - Opc = isSub - ? (Is64BitTarget ? X86::PUSH64r : X86::PUSH32r) - : (Is64BitTarget ? X86::POP64r : X86::POP32r); + unsigned Opc = isSub + ? (Is64Bit ? X86::PUSH64r : X86::PUSH32r) + : (Is64Bit ? X86::POP64r : X86::POP32r); MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(Opc)) .addReg(Reg, getDefRegState(!isSub) | getUndefRegState(isSub)); if (isSub) @@ -206,25 +294,59 @@ void emitSPUpdate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, } } - MachineInstr *MI = nullptr; - - if (UseLEA) { - MI = addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr), - StackPtr, false, isSub ? -ThisVal : ThisVal); - } else { - MI = BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr) - .addReg(StackPtr) - .addImm(ThisVal); - MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead. - } - + MachineInstrBuilder MI = BuildStackAdjustment( + MBB, MBBI, DL, isSub ? -ThisVal : ThisVal, InEpilogue); if (isSub) - MI->setFlag(MachineInstr::FrameSetup); + MI.setMIFlag(MachineInstr::FrameSetup); Offset -= ThisVal; } } +MachineInstrBuilder X86FrameLowering::BuildStackAdjustment( + MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, DebugLoc DL, + int64_t Offset, bool InEpilogue) const { + assert(Offset != 0 && "zero offset stack adjustment requested"); + + // On Atom, using LEA to adjust SP is preferred, but using it in the epilogue + // is tricky. + bool UseLEA; + if (!InEpilogue) { + UseLEA = STI.useLeaForSP(); + } else { + // If we can use LEA for SP but we shouldn't, check that none + // of the terminators uses the eflags. Otherwise we will insert + // a ADD that will redefine the eflags and break the condition. + // Alternatively, we could move the ADD, but this may not be possible + // and is an optimization anyway. + UseLEA = canUseLEAForSPInEpilogue(*MBB.getParent()); + if (UseLEA && !STI.useLeaForSP()) + UseLEA = terminatorsNeedFlagsAsInput(MBB); + // If that assert breaks, that means we do not do the right thing + // in canUseAsEpilogue. + assert((UseLEA || !terminatorsNeedFlagsAsInput(MBB)) && + "We shouldn't have allowed this insertion point"); + } + + MachineInstrBuilder MI; + if (UseLEA) { + MI = addRegOffset(BuildMI(MBB, MBBI, DL, + TII.get(getLEArOpcode(Uses64BitFramePtr)), + StackPtr), + StackPtr, false, Offset); + } else { + bool IsSub = Offset < 0; + uint64_t AbsOffset = IsSub ? -Offset : Offset; + unsigned Opc = IsSub ? getSUBriOpcode(Uses64BitFramePtr, AbsOffset) + : getADDriOpcode(Uses64BitFramePtr, AbsOffset); + MI = BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr) + .addReg(StackPtr) + .addImm(AbsOffset); + MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead. + } + return MI; +} + /// mergeSPUpdatesUp - Merge two stack-manipulating instructions upper iterator. static void mergeSPUpdatesUp(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, @@ -249,45 +371,9 @@ void mergeSPUpdatesUp(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, } } -/// mergeSPUpdatesDown - Merge two stack-manipulating instructions lower -/// iterator. -static -void mergeSPUpdatesDown(MachineBasicBlock &MBB, - MachineBasicBlock::iterator &MBBI, - unsigned StackPtr, uint64_t *NumBytes = nullptr) { - // FIXME: THIS ISN'T RUN!!! - return; - - if (MBBI == MBB.end()) return; - - MachineBasicBlock::iterator NI = std::next(MBBI); - if (NI == MBB.end()) return; - - unsigned Opc = NI->getOpcode(); - if ((Opc == X86::ADD64ri32 || Opc == X86::ADD64ri8 || - Opc == X86::ADD32ri || Opc == X86::ADD32ri8) && - NI->getOperand(0).getReg() == StackPtr) { - if (NumBytes) - *NumBytes -= NI->getOperand(2).getImm(); - MBB.erase(NI); - MBBI = NI; - } else if ((Opc == X86::SUB64ri32 || Opc == X86::SUB64ri8 || - Opc == X86::SUB32ri || Opc == X86::SUB32ri8) && - NI->getOperand(0).getReg() == StackPtr) { - if (NumBytes) - *NumBytes += NI->getOperand(2).getImm(); - MBB.erase(NI); - MBBI = NI; - } -} - -/// mergeSPUpdates - Checks the instruction before/after the passed -/// instruction. If it is an ADD/SUB/LEA instruction it is deleted argument and -/// the stack adjustment is returned as a positive value for ADD/LEA and a -/// negative for SUB. -static int mergeSPUpdates(MachineBasicBlock &MBB, - MachineBasicBlock::iterator &MBBI, unsigned StackPtr, - bool doMergeWithPrevious) { +int X86FrameLowering::mergeSPUpdates(MachineBasicBlock &MBB, + MachineBasicBlock::iterator &MBBI, + bool doMergeWithPrevious) const { if ((doMergeWithPrevious && MBBI == MBB.begin()) || (!doMergeWithPrevious && MBBI == MBB.end())) return 0; @@ -316,17 +402,13 @@ static int mergeSPUpdates(MachineBasicBlock &MBB, return Offset; } -static bool isEAXLiveIn(MachineFunction &MF) { - for (MachineRegisterInfo::livein_iterator II = MF.getRegInfo().livein_begin(), - EE = MF.getRegInfo().livein_end(); II != EE; ++II) { - unsigned Reg = II->first; - - if (Reg == X86::EAX || Reg == X86::AX || - Reg == X86::AH || Reg == X86::AL) - return true; - } - - return false; +void X86FrameLowering::BuildCFI(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, DebugLoc DL, + MCCFIInstruction CFIInst) const { + MachineFunction &MF = *MBB.getParent(); + unsigned CFIIndex = MF.getMMI().addFrameInst(CFIInst); + BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex); } void @@ -337,7 +419,6 @@ X86FrameLowering::emitCalleeSavedFrameMoves(MachineBasicBlock &MBB, MachineFrameInfo *MFI = MF.getFrameInfo(); MachineModuleInfo &MMI = MF.getMMI(); const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo(); - const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); // Add callee saved registers to move list. const std::vector &CSI = MFI->getCalleeSavedInfo(); @@ -350,11 +431,8 @@ X86FrameLowering::emitCalleeSavedFrameMoves(MachineBasicBlock &MBB, unsigned Reg = I->getReg(); unsigned DwarfReg = MRI->getDwarfRegNum(Reg, true); - unsigned CFIIndex = - MMI.addFrameInst(MCCFIInstruction::createOffset(nullptr, DwarfReg, - Offset)); - BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION)) - .addCFIIndex(CFIIndex); + BuildCFI(MBB, MBBI, DL, + MCCFIInstruction::createOffset(nullptr, DwarfReg, Offset)); } } @@ -375,12 +453,20 @@ static bool usesTheStack(const MachineFunction &MF) { return false; } -void X86FrameLowering::getStackProbeFunction(const X86Subtarget &STI, - unsigned &CallOp, - const char *&Symbol) { - CallOp = STI.is64Bit() ? X86::W64ALLOCA : X86::CALLpcrel32; +void X86FrameLowering::emitStackProbeCall(MachineFunction &MF, + MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + DebugLoc DL) const { + bool IsLargeCodeModel = MF.getTarget().getCodeModel() == CodeModel::Large; - if (STI.is64Bit()) { + unsigned CallOp; + if (Is64Bit) + CallOp = IsLargeCodeModel ? X86::CALL64r : X86::CALL64pcrel32; + else + CallOp = X86::CALLpcrel32; + + const char *Symbol; + if (Is64Bit) { if (STI.isTargetCygMing()) { Symbol = "___chkstk_ms"; } else { @@ -390,6 +476,79 @@ void X86FrameLowering::getStackProbeFunction(const X86Subtarget &STI, Symbol = "_alloca"; else Symbol = "_chkstk"; + + MachineInstrBuilder CI; + + // All current stack probes take AX and SP as input, clobber flags, and + // preserve all registers. x86_64 probes leave RSP unmodified. + if (Is64Bit && MF.getTarget().getCodeModel() == CodeModel::Large) { + // For the large code model, we have to call through a register. Use R11, + // as it is scratch in all supported calling conventions. + BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64ri), X86::R11) + .addExternalSymbol(Symbol); + CI = BuildMI(MBB, MBBI, DL, TII.get(CallOp)).addReg(X86::R11); + } else { + CI = BuildMI(MBB, MBBI, DL, TII.get(CallOp)).addExternalSymbol(Symbol); + } + + unsigned AX = Is64Bit ? X86::RAX : X86::EAX; + unsigned SP = Is64Bit ? X86::RSP : X86::ESP; + CI.addReg(AX, RegState::Implicit) + .addReg(SP, RegState::Implicit) + .addReg(AX, RegState::Define | RegState::Implicit) + .addReg(SP, RegState::Define | RegState::Implicit) + .addReg(X86::EFLAGS, RegState::Define | RegState::Implicit); + + if (Is64Bit) { + // MSVC x64's __chkstk and cygwin/mingw's ___chkstk_ms do not adjust %rsp + // themselves. It also does not clobber %rax so we can reuse it when + // adjusting %rsp. + BuildMI(MBB, MBBI, DL, TII.get(X86::SUB64rr), X86::RSP) + .addReg(X86::RSP) + .addReg(X86::RAX); + } +} + +static unsigned calculateSetFPREG(uint64_t SPAdjust) { + // Win64 ABI has a less restrictive limitation of 240; 128 works equally well + // and might require smaller successive adjustments. + const uint64_t Win64MaxSEHOffset = 128; + uint64_t SEHFrameOffset = std::min(SPAdjust, Win64MaxSEHOffset); + // Win64 ABI requires 16-byte alignment for the UWOP_SET_FPREG opcode. + return SEHFrameOffset & -16; +} + +// If we're forcing a stack realignment we can't rely on just the frame +// info, we need to know the ABI stack alignment as well in case we +// have a call out. Otherwise just make sure we have some alignment - we'll +// go with the minimum SlotSize. +uint64_t X86FrameLowering::calculateMaxStackAlign(const MachineFunction &MF) const { + const MachineFrameInfo *MFI = MF.getFrameInfo(); + uint64_t MaxAlign = MFI->getMaxAlignment(); // Desired stack alignment. + unsigned StackAlign = getStackAlignment(); + if (ForceStackAlign) { + if (MFI->hasCalls()) + MaxAlign = (StackAlign > MaxAlign) ? StackAlign : MaxAlign; + else if (MaxAlign < SlotSize) + MaxAlign = SlotSize; + } + return MaxAlign; +} + +void X86FrameLowering::BuildStackAlignAND(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + DebugLoc DL, + uint64_t MaxAlign) const { + uint64_t Val = -MaxAlign; + MachineInstr *MI = + BuildMI(MBB, MBBI, DL, TII.get(getANDriOpcode(Uses64BitFramePtr, Val)), + StackPtr) + .addReg(StackPtr) + .addImm(Val) + .setMIFlag(MachineInstr::FrameSetup); + + // The EFLAGS implicit def is dead. + MI->getOperand(3).setIsDead(); } /// emitPrologue - Push callee-saved registers onto the stack, which @@ -476,71 +635,62 @@ void X86FrameLowering::getStackProbeFunction(const X86Subtarget &STI, - for 32-bit code, substitute %e?? registers for %r?? */ -void X86FrameLowering::emitPrologue(MachineFunction &MF) const { - MachineBasicBlock &MBB = MF.front(); // Prologue goes in entry BB. +void X86FrameLowering::emitPrologue(MachineFunction &MF, + MachineBasicBlock &MBB) const { + assert(&STI == &MF.getSubtarget() && + "MF used frame lowering for wrong subtarget"); MachineBasicBlock::iterator MBBI = MBB.begin(); MachineFrameInfo *MFI = MF.getFrameInfo(); const Function *Fn = MF.getFunction(); - const X86RegisterInfo *RegInfo = - static_cast(MF.getSubtarget().getRegisterInfo()); - const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); MachineModuleInfo &MMI = MF.getMMI(); X86MachineFunctionInfo *X86FI = MF.getInfo(); - uint64_t MaxAlign = MFI->getMaxAlignment(); // Desired stack alignment. + uint64_t MaxAlign = calculateMaxStackAlign(MF); // Desired stack alignment. uint64_t StackSize = MFI->getStackSize(); // Number of bytes to allocate. bool HasFP = hasFP(MF); - const X86Subtarget &STI = MF.getTarget().getSubtarget(); - bool Is64Bit = STI.is64Bit(); - // standard x86_64 and NaCl use 64-bit frame/stack pointers, x32 - 32-bit. - const bool Uses64BitFramePtr = STI.isTarget64BitLP64() || STI.isTargetNaCl64(); - bool IsWin64 = STI.isTargetWin64(); - // Not necessarily synonymous with IsWin64. - bool IsWinEH = MF.getTarget().getMCAsmInfo()->usesWindowsCFI(); - bool NeedsWinEH = IsWinEH && Fn->needsUnwindTableEntry(); + bool IsWin64CC = STI.isCallingConvWin64(Fn->getCallingConv()); + bool IsWin64Prologue = MF.getTarget().getMCAsmInfo()->usesWindowsCFI(); + bool NeedsWinCFI = IsWin64Prologue && Fn->needsUnwindTableEntry(); bool NeedsDwarfCFI = - !IsWinEH && (MMI.hasDebugInfo() || Fn->needsUnwindTableEntry()); - bool UseLEA = STI.useLeaForSP(); - unsigned StackAlign = getStackAlignment(); - unsigned SlotSize = RegInfo->getSlotSize(); - unsigned FramePtr = RegInfo->getFrameRegister(MF); - const unsigned MachineFramePtr = STI.isTarget64BitILP32() ? - getX86SubSuperRegister(FramePtr, MVT::i64, false) : FramePtr; - unsigned StackPtr = RegInfo->getStackRegister(); - unsigned BasePtr = RegInfo->getBaseRegister(); + !IsWin64Prologue && (MMI.hasDebugInfo() || Fn->needsUnwindTableEntry()); + unsigned FramePtr = TRI->getFrameRegister(MF); + const unsigned MachineFramePtr = + STI.isTarget64BitILP32() + ? getX86SubSuperRegister(FramePtr, MVT::i64, false) + : FramePtr; + unsigned BasePtr = TRI->getBaseRegister(); DebugLoc DL; - // If we're forcing a stack realignment we can't rely on just the frame - // info, we need to know the ABI stack alignment as well in case we - // have a call out. Otherwise just make sure we have some alignment - we'll - // go with the minimum SlotSize. - if (ForceStackAlign) { - if (MFI->hasCalls()) - MaxAlign = (StackAlign > MaxAlign) ? StackAlign : MaxAlign; - else if (MaxAlign < SlotSize) - MaxAlign = SlotSize; - } - // Add RETADDR move area to callee saved frame size. int TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta(); + if (TailCallReturnAddrDelta && IsWin64Prologue) + report_fatal_error("Can't handle guaranteed tail call under win64 yet"); + if (TailCallReturnAddrDelta < 0) X86FI->setCalleeSavedFrameSize( X86FI->getCalleeSavedFrameSize() - TailCallReturnAddrDelta); bool UseStackProbe = (STI.isOSWindows() && !STI.isTargetMachO()); + // The default stack probe size is 4096 if the function has no stackprobesize + // attribute. + unsigned StackProbeSize = 4096; + if (Fn->hasFnAttribute("stack-probe-size")) + Fn->getFnAttribute("stack-probe-size") + .getValueAsString() + .getAsInteger(0, StackProbeSize); + // If this is x86-64 and the Red Zone is not disabled, if we are a leaf // function, and use up to 128 bytes of stack space, don't have a frame // pointer, calls, or dynamic alloca then we do not need to adjust the // stack pointer (we fit in the Red Zone). We also check that we don't // push and pop from the stack. - if (Is64Bit && !Fn->getAttributes().hasAttribute(AttributeSet::FunctionIndex, - Attribute::NoRedZone) && - !RegInfo->needsStackRealignment(MF) && - !MFI->hasVarSizedObjects() && // No dynamic alloca. - !MFI->adjustsStack() && // No calls. - !IsWin64 && // Win64 has no Red Zone - !usesTheStack(MF) && // Don't push and pop. - !MF.shouldSplitStack()) { // Regular stack + if (Is64Bit && !Fn->hasFnAttribute(Attribute::NoRedZone) && + !TRI->needsStackRealignment(MF) && + !MFI->hasVarSizedObjects() && // No dynamic alloca. + !MFI->adjustsStack() && // No calls. + !IsWin64CC && // Win64 has no Red Zone + !usesTheStack(MF) && // Don't push and pop. + !MF.shouldSplitStack()) { // Regular stack uint64_t MinSize = X86FI->getCalleeSavedFrameSize(); if (HasFP) MinSize += SlotSize; StackSize = std::max(MinSize, StackSize > 128 ? StackSize - 128 : 0); @@ -551,14 +701,9 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const { // applies to tail call optimized functions where the callee argument stack // size is bigger than the callers. if (TailCallReturnAddrDelta < 0) { - MachineInstr *MI = - BuildMI(MBB, MBBI, DL, - TII.get(getSUBriOpcode(Uses64BitFramePtr, -TailCallReturnAddrDelta)), - StackPtr) - .addReg(StackPtr) - .addImm(-TailCallReturnAddrDelta) + BuildStackAdjustment(MBB, MBBI, DL, TailCallReturnAddrDelta, + /*InEpilogue=*/false) .setMIFlag(MachineInstr::FrameSetup); - MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead. } // Mapping for machine moves: @@ -584,14 +729,12 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const { // If required, include space for extra hidden slot for stashing base pointer. if (X86FI->getRestoreBasePointer()) FrameSize += SlotSize; - if (RegInfo->needsStackRealignment(MF)) { - // Callee-saved registers are pushed on stack before the stack - // is realigned. - FrameSize -= X86FI->getCalleeSavedFrameSize(); - NumBytes = (FrameSize + MaxAlign - 1) / MaxAlign * MaxAlign; - } else { - NumBytes = FrameSize - X86FI->getCalleeSavedFrameSize(); - } + + NumBytes = FrameSize - X86FI->getCalleeSavedFrameSize(); + + // Callee-saved registers are pushed on stack before the stack is realigned. + if (TRI->needsStackRealignment(MF) && !IsWin64Prologue) + NumBytes = RoundUpToAlignment(NumBytes, MaxAlign); // Get the offset of the stack slot for the EBP register, which is // guaranteed to be the last slot by processFunctionBeforeFrameFinalized. @@ -607,40 +750,36 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const { // Mark the place where EBP/RBP was saved. // Define the current CFA rule to use the provided offset. assert(StackSize); - unsigned CFIIndex = MMI.addFrameInst( - MCCFIInstruction::createDefCfaOffset(nullptr, 2 * stackGrowth)); - BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION)) - .addCFIIndex(CFIIndex); + BuildCFI(MBB, MBBI, DL, + MCCFIInstruction::createDefCfaOffset(nullptr, 2 * stackGrowth)); // Change the rule for the FramePtr to be an "offset" rule. - unsigned DwarfFramePtr = RegInfo->getDwarfRegNum(MachineFramePtr, true); - CFIIndex = MMI.addFrameInst( - MCCFIInstruction::createOffset(nullptr, - DwarfFramePtr, 2 * stackGrowth)); - BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION)) - .addCFIIndex(CFIIndex); + unsigned DwarfFramePtr = TRI->getDwarfRegNum(MachineFramePtr, true); + BuildCFI(MBB, MBBI, DL, MCCFIInstruction::createOffset( + nullptr, DwarfFramePtr, 2 * stackGrowth)); } - if (NeedsWinEH) { + if (NeedsWinCFI) { BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_PushReg)) .addImm(FramePtr) .setMIFlag(MachineInstr::FrameSetup); } - // Update EBP with the new base value. - BuildMI(MBB, MBBI, DL, - TII.get(Uses64BitFramePtr ? X86::MOV64rr : X86::MOV32rr), FramePtr) - .addReg(StackPtr) - .setMIFlag(MachineInstr::FrameSetup); + if (!IsWin64Prologue) { + // Update EBP with the new base value. + BuildMI(MBB, MBBI, DL, + TII.get(Uses64BitFramePtr ? X86::MOV64rr : X86::MOV32rr), + FramePtr) + .addReg(StackPtr) + .setMIFlag(MachineInstr::FrameSetup); + } if (NeedsDwarfCFI) { // Mark effective beginning of when frame pointer becomes valid. // Define the current CFA to use the EBP/RBP register. - unsigned DwarfFramePtr = RegInfo->getDwarfRegNum(MachineFramePtr, true); - unsigned CFIIndex = MMI.addFrameInst( - MCCFIInstruction::createDefCfaRegister(nullptr, DwarfFramePtr)); - BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION)) - .addCFIIndex(CFIIndex); + unsigned DwarfFramePtr = TRI->getDwarfRegNum(MachineFramePtr, true); + BuildCFI(MBB, MBBI, DL, + MCCFIInstruction::createDefCfaRegister(nullptr, DwarfFramePtr)); } // Mark the FramePtr as live-in in every block. @@ -655,6 +794,7 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const { int StackOffset = 2 * stackGrowth; while (MBBI != MBB.end() && + MBBI->getFlag(MachineInstr::FrameSetup) && (MBBI->getOpcode() == X86::PUSH32r || MBBI->getOpcode() == X86::PUSH64r)) { PushedRegs = true; @@ -665,14 +805,12 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const { // Mark callee-saved push instruction. // Define the current CFA rule to use the provided offset. assert(StackSize); - unsigned CFIIndex = MMI.addFrameInst( - MCCFIInstruction::createDefCfaOffset(nullptr, StackOffset)); - BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION)) - .addCFIIndex(CFIIndex); + BuildCFI(MBB, MBBI, DL, + MCCFIInstruction::createDefCfaOffset(nullptr, StackOffset)); StackOffset += stackGrowth; } - if (NeedsWinEH) { + if (NeedsWinCFI) { BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_PushReg)).addImm(Reg).setMIFlag( MachineInstr::FrameSetup); } @@ -680,33 +818,19 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const { // Realign stack after we pushed callee-saved registers (so that we'll be // able to calculate their offsets from the frame pointer). - if (RegInfo->needsStackRealignment(MF)) { + // Don't do this for Win64, it needs to realign the stack after the prologue. + if (!IsWin64Prologue && TRI->needsStackRealignment(MF)) { assert(HasFP && "There should be a frame pointer if stack is realigned."); - uint64_t Val = -MaxAlign; - MachineInstr *MI = - BuildMI(MBB, MBBI, DL, - TII.get(getANDriOpcode(Uses64BitFramePtr, Val)), StackPtr) - .addReg(StackPtr) - .addImm(Val) - .setMIFlag(MachineInstr::FrameSetup); - - // The EFLAGS implicit def is dead. - MI->getOperand(3).setIsDead(); + BuildStackAlignAND(MBB, MBBI, DL, MaxAlign); } // If there is an SUB32ri of ESP immediately before this instruction, merge // the two. This can be the case when tail call elimination is enabled and // the callee has more arguments then the caller. - NumBytes -= mergeSPUpdates(MBB, MBBI, StackPtr, true); - - // If there is an ADD32ri or SUB32ri of ESP immediately after this - // instruction, merge the two instructions. - mergeSPUpdatesDown(MBB, MBBI, StackPtr, &NumBytes); + NumBytes -= mergeSPUpdates(MBB, MBBI, true); // Adjust stack pointer: ESP -= numbytes. - static const size_t PageSize = 4096; - // Windows and cygwin/mingw require a prologue helper routine when allocating // more than 4K bytes on the stack. Windows uses __chkstk and cygwin/mingw // uses __alloca. __alloca and the 32-bit version of __chkstk will probe the @@ -715,12 +839,10 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const { // responsible for adjusting the stack pointer. Touching the stack at 4K // increments is necessary to ensure that the guard pages used by the OS // virtual memory manager are allocated in correct sequence. - if (NumBytes >= PageSize && UseStackProbe) { - const char *StackProbeSymbol; - unsigned CallOp; - - getStackProbeFunction(STI, CallOp, StackProbeSymbol); - + uint64_t AlignedNumBytes = NumBytes; + if (IsWin64Prologue && TRI->needsStackRealignment(MF)) + AlignedNumBytes = RoundUpToAlignment(AlignedNumBytes, MaxAlign); + if (AlignedNumBytes >= StackProbeSize && UseStackProbe) { // Check whether EAX is livein for this function. bool isEAXAlive = isEAXLiveIn(MF); @@ -738,9 +860,19 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const { if (Is64Bit) { // Handle the 64-bit Windows ABI case where we need to call __chkstk. // Function prologue is responsible for adjusting the stack pointer. - BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64ri), X86::RAX) - .addImm(NumBytes) - .setMIFlag(MachineInstr::FrameSetup); + if (isUInt<32>(NumBytes)) { + BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX) + .addImm(NumBytes) + .setMIFlag(MachineInstr::FrameSetup); + } else if (isInt<32>(NumBytes)) { + BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64ri32), X86::RAX) + .addImm(NumBytes) + .setMIFlag(MachineInstr::FrameSetup); + } else { + BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64ri), X86::RAX) + .addImm(NumBytes) + .setMIFlag(MachineInstr::FrameSetup); + } } else { // Allocate NumBytes-4 bytes on stack in case of isEAXAlive. // We'll also use 4 already allocated bytes for EAX. @@ -749,22 +881,17 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const { .setMIFlag(MachineInstr::FrameSetup); } - BuildMI(MBB, MBBI, DL, - TII.get(CallOp)) - .addExternalSymbol(StackProbeSymbol) - .addReg(StackPtr, RegState::Define | RegState::Implicit) - .addReg(X86::EFLAGS, RegState::Define | RegState::Implicit) - .setMIFlag(MachineInstr::FrameSetup); + // Save a pointer to the MI where we set AX. + MachineBasicBlock::iterator SetRAX = MBBI; + --SetRAX; + + // Call __chkstk, __chkstk_ms, or __alloca. + emitStackProbeCall(MF, MBB, MBBI, DL); + + // Apply the frame setup flag to all inserted instrs. + for (; SetRAX != MBBI; ++SetRAX) + SetRAX->setFlag(MachineInstr::FrameSetup); - if (Is64Bit) { - // MSVC x64's __chkstk and cygwin/mingw's ___chkstk_ms do not adjust %rsp - // themself. It also does not clobber %rax so we can reuse it when - // adjusting %rsp. - BuildMI(MBB, MBBI, DL, TII.get(X86::SUB64rr), StackPtr) - .addReg(StackPtr) - .addReg(X86::RAX) - .setMIFlag(MachineInstr::FrameSetup); - } if (isEAXAlive) { // Restore EAX MachineInstr *MI = addRegOffset(BuildMI(MF, DL, TII.get(X86::MOV32rm), @@ -774,92 +901,93 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const { MBB.insert(MBBI, MI); } } else if (NumBytes) { - emitSPUpdate(MBB, MBBI, StackPtr, -(int64_t)NumBytes, Is64Bit, Uses64BitFramePtr, - UseLEA, TII, *RegInfo); + emitSPUpdate(MBB, MBBI, -(int64_t)NumBytes, /*InEpilogue=*/false); } + if (NeedsWinCFI && NumBytes) + BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_StackAlloc)) + .addImm(NumBytes) + .setMIFlag(MachineInstr::FrameSetup); + int SEHFrameOffset = 0; - if (NeedsWinEH) { - if (HasFP) { - // We need to set frame base offset low enough such that all saved - // register offsets would be positive relative to it, but we can't - // just use NumBytes, because .seh_setframe offset must be <=240. - // So we pretend to have only allocated enough space to spill the - // non-volatile registers. - // We don't care about the rest of stack allocation, because unwinder - // will restore SP to (BP - SEHFrameOffset) - for (const CalleeSavedInfo &Info : MFI->getCalleeSavedInfo()) { - int offset = MFI->getObjectOffset(Info.getFrameIdx()); - SEHFrameOffset = std::max(SEHFrameOffset, std::abs(offset)); - } - SEHFrameOffset += SEHFrameOffset % 16; // ensure alignmant - - // This only needs to account for XMM spill slots, GPR slots - // are covered by the .seh_pushreg's emitted above. - unsigned Size = SEHFrameOffset - X86FI->getCalleeSavedFrameSize(); - if (Size) { - BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_StackAlloc)) - .addImm(Size) - .setMIFlag(MachineInstr::FrameSetup); - } + if (IsWin64Prologue && HasFP) { + SEHFrameOffset = calculateSetFPREG(NumBytes); + if (SEHFrameOffset) + addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::LEA64r), FramePtr), + StackPtr, false, SEHFrameOffset); + else + BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64rr), FramePtr).addReg(StackPtr); + if (NeedsWinCFI) BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_SetFrame)) .addImm(FramePtr) .addImm(SEHFrameOffset) .setMIFlag(MachineInstr::FrameSetup); - } else { - // SP will be the base register for restoring XMMs - if (NumBytes) { - BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_StackAlloc)) - .addImm(NumBytes) - .setMIFlag(MachineInstr::FrameSetup); - } - } } - // Skip the rest of register spilling code - while (MBBI != MBB.end() && MBBI->getFlag(MachineInstr::FrameSetup)) + while (MBBI != MBB.end() && MBBI->getFlag(MachineInstr::FrameSetup)) { + const MachineInstr *FrameInstr = &*MBBI; ++MBBI; - // Emit SEH info for non-GPRs - if (NeedsWinEH) { - for (const CalleeSavedInfo &Info : MFI->getCalleeSavedInfo()) { - unsigned Reg = Info.getReg(); - if (X86::GR64RegClass.contains(Reg) || X86::GR32RegClass.contains(Reg)) - continue; - assert(X86::FR64RegClass.contains(Reg) && "Unexpected register class"); - - int Offset = getFrameIndexOffset(MF, Info.getFrameIdx()); - Offset += SEHFrameOffset; - - BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_SaveXMM)) - .addImm(Reg) - .addImm(Offset) - .setMIFlag(MachineInstr::FrameSetup); + if (NeedsWinCFI) { + int FI; + if (unsigned Reg = TII.isStoreToStackSlot(FrameInstr, FI)) { + if (X86::FR64RegClass.contains(Reg)) { + int Offset = getFrameIndexOffset(MF, FI); + Offset += SEHFrameOffset; + + BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_SaveXMM)) + .addImm(Reg) + .addImm(Offset) + .setMIFlag(MachineInstr::FrameSetup); + } + } } + } + if (NeedsWinCFI) BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_EndPrologue)) .setMIFlag(MachineInstr::FrameSetup); + + // Realign stack after we spilled callee-saved registers (so that we'll be + // able to calculate their offsets from the frame pointer). + // Win64 requires aligning the stack after the prologue. + if (IsWin64Prologue && TRI->needsStackRealignment(MF)) { + assert(HasFP && "There should be a frame pointer if stack is realigned."); + BuildStackAlignAND(MBB, MBBI, DL, MaxAlign); } // If we need a base pointer, set it up here. It's whatever the value // of the stack pointer is at this point. Any variable size objects // will be allocated after this, so we can still use the base pointer // to reference locals. - if (RegInfo->hasBasePointer(MF)) { + if (TRI->hasBasePointer(MF)) { // Update the base pointer with the current stack pointer. unsigned Opc = Uses64BitFramePtr ? X86::MOV64rr : X86::MOV32rr; BuildMI(MBB, MBBI, DL, TII.get(Opc), BasePtr) .addReg(StackPtr) .setMIFlag(MachineInstr::FrameSetup); if (X86FI->getRestoreBasePointer()) { - // Stash value of base pointer. Saving RSP instead of EBP shortens dependence chain. + // Stash value of base pointer. Saving RSP instead of EBP shortens + // dependence chain. Used by SjLj EH. unsigned Opm = Uses64BitFramePtr ? X86::MOV64mr : X86::MOV32mr; addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(Opm)), FramePtr, true, X86FI->getRestoreBasePointerOffset()) .addReg(StackPtr) .setMIFlag(MachineInstr::FrameSetup); } + + if (X86FI->getHasSEHFramePtrSave()) { + // Stash the value of the frame pointer relative to the base pointer for + // Win32 EH. This supports Win32 EH, which does the inverse of the above: + // it recovers the frame pointer from the base pointer rather than the + // other way around. + unsigned Opm = Uses64BitFramePtr ? X86::MOV64mr : X86::MOV32mr; + addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(Opm)), BasePtr, true, + getFrameIndexOffset(MF, X86FI->getSEHFramePtrSaveIndex())) + .addReg(FramePtr) + .setMIFlag(MachineInstr::FrameSetup); + } } if (((!HasFP && NumBytes) || PushedRegs) && NeedsDwarfCFI) { @@ -867,12 +995,8 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const { if (!HasFP && NumBytes) { // Define the current CFA rule to use the provided offset. assert(StackSize); - unsigned CFIIndex = MMI.addFrameInst( - MCCFIInstruction::createDefCfaOffset(nullptr, - -StackSize + stackGrowth)); - - BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION)) - .addCFIIndex(CFIIndex); + BuildCFI(MBB, MBBI, DL, MCCFIInstruction::createDefCfaOffset( + nullptr, -StackSize + stackGrowth)); } // Emit DWARF info specifying the offsets of the callee-saved registers. @@ -881,79 +1005,51 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const { } } +bool X86FrameLowering::canUseLEAForSPInEpilogue( + const MachineFunction &MF) const { + // We can't use LEA instructions for adjusting the stack pointer if this is a + // leaf function in the Win64 ABI. Only ADD instructions may be used to + // deallocate the stack. + // This means that we can use LEA for SP in two situations: + // 1. We *aren't* using the Win64 ABI which means we are free to use LEA. + // 2. We *have* a frame pointer which means we are permitted to use LEA. + return !MF.getTarget().getMCAsmInfo()->usesWindowsCFI() || hasFP(MF); +} + void X86FrameLowering::emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const { const MachineFrameInfo *MFI = MF.getFrameInfo(); X86MachineFunctionInfo *X86FI = MF.getInfo(); - const X86RegisterInfo *RegInfo = - static_cast(MF.getSubtarget().getRegisterInfo()); - const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); - MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr(); - assert(MBBI != MBB.end() && "Returning block has no instructions"); - unsigned RetOpcode = MBBI->getOpcode(); - DebugLoc DL = MBBI->getDebugLoc(); - const X86Subtarget &STI = MF.getTarget().getSubtarget(); - bool Is64Bit = STI.is64Bit(); + MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator(); + DebugLoc DL; + if (MBBI != MBB.end()) + DL = MBBI->getDebugLoc(); // standard x86_64 and NaCl use 64-bit frame/stack pointers, x32 - 32-bit. - const bool Uses64BitFramePtr = STI.isTarget64BitLP64() || STI.isTargetNaCl64(); const bool Is64BitILP32 = STI.isTarget64BitILP32(); - bool UseLEA = STI.useLeaForSP(); - unsigned StackAlign = getStackAlignment(); - unsigned SlotSize = RegInfo->getSlotSize(); - unsigned FramePtr = RegInfo->getFrameRegister(MF); - unsigned MachineFramePtr = Is64BitILP32 ? - getX86SubSuperRegister(FramePtr, MVT::i64, false) : FramePtr; - unsigned StackPtr = RegInfo->getStackRegister(); - - bool IsWinEH = MF.getTarget().getMCAsmInfo()->usesWindowsCFI(); - bool NeedsWinEH = IsWinEH && MF.getFunction()->needsUnwindTableEntry(); - - switch (RetOpcode) { - default: - llvm_unreachable("Can only insert epilog into returning blocks"); - case X86::RETQ: - case X86::RETL: - case X86::RETIL: - case X86::RETIQ: - case X86::TCRETURNdi: - case X86::TCRETURNri: - case X86::TCRETURNmi: - case X86::TCRETURNdi64: - case X86::TCRETURNri64: - case X86::TCRETURNmi64: - case X86::EH_RETURN: - case X86::EH_RETURN64: - break; // These are ok - } + unsigned FramePtr = TRI->getFrameRegister(MF); + unsigned MachineFramePtr = + Is64BitILP32 ? getX86SubSuperRegister(FramePtr, MVT::i64, false) + : FramePtr; + + bool IsWin64Prologue = MF.getTarget().getMCAsmInfo()->usesWindowsCFI(); + bool NeedsWinCFI = + IsWin64Prologue && MF.getFunction()->needsUnwindTableEntry(); // Get the number of bytes to allocate from the FrameInfo. uint64_t StackSize = MFI->getStackSize(); - uint64_t MaxAlign = MFI->getMaxAlignment(); + uint64_t MaxAlign = calculateMaxStackAlign(MF); unsigned CSSize = X86FI->getCalleeSavedFrameSize(); uint64_t NumBytes = 0; - // If we're forcing a stack realignment we can't rely on just the frame - // info, we need to know the ABI stack alignment as well in case we - // have a call out. Otherwise just make sure we have some alignment - we'll - // go with the minimum. - if (ForceStackAlign) { - if (MFI->hasCalls()) - MaxAlign = (StackAlign > MaxAlign) ? StackAlign : MaxAlign; - else - MaxAlign = MaxAlign ? MaxAlign : 4; - } - if (hasFP(MF)) { // Calculate required stack adjustment. uint64_t FrameSize = StackSize - SlotSize; - if (RegInfo->needsStackRealignment(MF)) { - // Callee-saved registers were pushed on stack before the stack - // was realigned. - FrameSize -= CSSize; - NumBytes = (FrameSize + MaxAlign - 1) / MaxAlign * MaxAlign; - } else { - NumBytes = FrameSize - CSSize; - } + NumBytes = FrameSize - CSSize; + + // Callee-saved registers were pushed on stack before the stack was + // realigned. + if (TRI->needsStackRealignment(MF) && !IsWin64Prologue) + NumBytes = RoundUpToAlignment(FrameSize, MaxAlign); // Pop EBP. BuildMI(MBB, MBBI, DL, @@ -961,6 +1057,7 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF, } else { NumBytes = StackSize - CSSize; } + uint64_t SEHStackAllocAmt = NumBytes; // Skip the callee-saved pop instructions. while (MBBI != MBB.begin()) { @@ -975,7 +1072,8 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF, } MachineBasicBlock::iterator FirstCSPop = MBBI; - DL = MBBI->getDebugLoc(); + if (MBBI != MBB.end()) + DL = MBBI->getDebugLoc(); // If there is an ADD32ri or SUB32ri of ESP immediately before this // instruction, merge the two instructions. @@ -985,13 +1083,24 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF, // If dynamic alloca is used, then reset esp to point to the last callee-saved // slot before popping them off! Same applies for the case, when stack was // realigned. - if (RegInfo->needsStackRealignment(MF) || MFI->hasVarSizedObjects()) { - if (RegInfo->needsStackRealignment(MF)) + if (TRI->needsStackRealignment(MF) || MFI->hasVarSizedObjects()) { + if (TRI->needsStackRealignment(MF)) MBBI = FirstCSPop; - if (CSSize != 0) { + unsigned SEHFrameOffset = calculateSetFPREG(SEHStackAllocAmt); + uint64_t LEAAmount = + IsWin64Prologue ? SEHStackAllocAmt - SEHFrameOffset : -CSSize; + + // There are only two legal forms of epilogue: + // - add SEHAllocationSize, %rsp + // - lea SEHAllocationSize(%FramePtr), %rsp + // + // 'mov %FramePtr, %rsp' will not be recognized as an epilogue sequence. + // However, we may use this sequence if we have a frame pointer because the + // effects of the prologue can safely be undone. + if (LEAAmount != 0) { unsigned Opc = getLEArOpcode(Uses64BitFramePtr); addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr), - FramePtr, false, -CSSize); + FramePtr, false, LEAAmount); --MBBI; } else { unsigned Opc = (Uses64BitFramePtr ? X86::MOV64rr : X86::MOV32rr); @@ -1001,8 +1110,7 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF, } } else if (NumBytes) { // Adjust stack pointer back: ESP += numbytes. - emitSPUpdate(MBB, MBBI, StackPtr, NumBytes, Is64Bit, Uses64BitFramePtr, UseLEA, - TII, *RegInfo); + emitSPUpdate(MBB, MBBI, NumBytes, /*InEpilogue=*/true); --MBBI; } @@ -1012,147 +1120,105 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF, // into the epilogue. To cope with that, we insert an epilogue marker here, // then replace it with a 'nop' if it ends up immediately after a CALL in the // final emitted code. - if (NeedsWinEH) + if (NeedsWinCFI) BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_Epilogue)); - // We're returning from function via eh_return. - if (RetOpcode == X86::EH_RETURN || RetOpcode == X86::EH_RETURN64) { - MBBI = MBB.getLastNonDebugInstr(); - MachineOperand &DestAddr = MBBI->getOperand(0); - assert(DestAddr.isReg() && "Offset should be in register!"); - BuildMI(MBB, MBBI, DL, - TII.get(Uses64BitFramePtr ? X86::MOV64rr : X86::MOV32rr), - StackPtr).addReg(DestAddr.getReg()); - } else if (RetOpcode == X86::TCRETURNri || RetOpcode == X86::TCRETURNdi || - RetOpcode == X86::TCRETURNmi || - RetOpcode == X86::TCRETURNri64 || RetOpcode == X86::TCRETURNdi64 || - RetOpcode == X86::TCRETURNmi64) { - bool isMem = RetOpcode == X86::TCRETURNmi || RetOpcode == X86::TCRETURNmi64; - // Tail call return: adjust the stack pointer and jump to callee. - MBBI = MBB.getLastNonDebugInstr(); - MachineOperand &JumpTarget = MBBI->getOperand(0); - MachineOperand &StackAdjust = MBBI->getOperand(isMem ? 5 : 1); - assert(StackAdjust.isImm() && "Expecting immediate value."); - - // Adjust stack pointer. - int StackAdj = StackAdjust.getImm(); - int MaxTCDelta = X86FI->getTCReturnAddrDelta(); - int Offset = 0; - assert(MaxTCDelta <= 0 && "MaxTCDelta should never be positive"); - - // Incoporate the retaddr area. - Offset = StackAdj-MaxTCDelta; - assert(Offset >= 0 && "Offset should never be negative"); - - if (Offset) { - // Check for possible merge with preceding ADD instruction. - Offset += mergeSPUpdates(MBB, MBBI, StackPtr, true); - emitSPUpdate(MBB, MBBI, StackPtr, Offset, Is64Bit, Uses64BitFramePtr, - UseLEA, TII, *RegInfo); - } - - // Jump to label or value in register. - if (RetOpcode == X86::TCRETURNdi || RetOpcode == X86::TCRETURNdi64) { - MachineInstrBuilder MIB = - BuildMI(MBB, MBBI, DL, TII.get((RetOpcode == X86::TCRETURNdi) - ? X86::TAILJMPd : X86::TAILJMPd64)); - if (JumpTarget.isGlobal()) - MIB.addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset(), - JumpTarget.getTargetFlags()); - else { - assert(JumpTarget.isSymbol()); - MIB.addExternalSymbol(JumpTarget.getSymbolName(), - JumpTarget.getTargetFlags()); - } - } else if (RetOpcode == X86::TCRETURNmi || RetOpcode == X86::TCRETURNmi64) { - MachineInstrBuilder MIB = - BuildMI(MBB, MBBI, DL, TII.get((RetOpcode == X86::TCRETURNmi) - ? X86::TAILJMPm : X86::TAILJMPm64)); - for (unsigned i = 0; i != 5; ++i) - MIB.addOperand(MBBI->getOperand(i)); - } else if (RetOpcode == X86::TCRETURNri64) { - BuildMI(MBB, MBBI, DL, TII.get(X86::TAILJMPr64)). - addReg(JumpTarget.getReg(), RegState::Kill); - } else { - BuildMI(MBB, MBBI, DL, TII.get(X86::TAILJMPr)). - addReg(JumpTarget.getReg(), RegState::Kill); - } - - MachineInstr *NewMI = std::prev(MBBI); - NewMI->copyImplicitOps(MF, MBBI); - - // Delete the pseudo instruction TCRETURN. - MBB.erase(MBBI); - } else if ((RetOpcode == X86::RETQ || RetOpcode == X86::RETL || - RetOpcode == X86::RETIQ || RetOpcode == X86::RETIL) && - (X86FI->getTCReturnAddrDelta() < 0)) { - // Add the return addr area delta back since we are not tail calling. - int delta = -1*X86FI->getTCReturnAddrDelta(); - MBBI = MBB.getLastNonDebugInstr(); + // Add the return addr area delta back since we are not tail calling. + int Offset = -1 * X86FI->getTCReturnAddrDelta(); + assert(Offset >= 0 && "TCDelta should never be positive"); + if (Offset) { + MBBI = MBB.getFirstTerminator(); // Check for possible merge with preceding ADD instruction. - delta += mergeSPUpdates(MBB, MBBI, StackPtr, true); - emitSPUpdate(MBB, MBBI, StackPtr, delta, Is64Bit, Uses64BitFramePtr, UseLEA, TII, - *RegInfo); + Offset += mergeSPUpdates(MBB, MBBI, true); + emitSPUpdate(MBB, MBBI, Offset, /*InEpilogue=*/true); } } int X86FrameLowering::getFrameIndexOffset(const MachineFunction &MF, int FI) const { - const X86RegisterInfo *RegInfo = - static_cast(MF.getSubtarget().getRegisterInfo()); const MachineFrameInfo *MFI = MF.getFrameInfo(); + // Offset will hold the offset from the stack pointer at function entry to the + // object. + // We need to factor in additional offsets applied during the prologue to the + // frame, base, and stack pointer depending on which is used. int Offset = MFI->getObjectOffset(FI) - getOffsetOfLocalArea(); + const X86MachineFunctionInfo *X86FI = MF.getInfo(); + unsigned CSSize = X86FI->getCalleeSavedFrameSize(); uint64_t StackSize = MFI->getStackSize(); + bool HasFP = hasFP(MF); + bool IsWin64Prologue = MF.getTarget().getMCAsmInfo()->usesWindowsCFI(); + int64_t FPDelta = 0; + + if (IsWin64Prologue) { + assert(!MFI->hasCalls() || (StackSize % 16) == 8); - if (RegInfo->hasBasePointer(MF)) { - assert (hasFP(MF) && "VLAs and dynamic stack realign, but no FP?!"); + // Calculate required stack adjustment. + uint64_t FrameSize = StackSize - SlotSize; + // If required, include space for extra hidden slot for stashing base pointer. + if (X86FI->getRestoreBasePointer()) + FrameSize += SlotSize; + uint64_t NumBytes = FrameSize - CSSize; + + uint64_t SEHFrameOffset = calculateSetFPREG(NumBytes); + if (FI && FI == X86FI->getFAIndex()) + return -SEHFrameOffset; + + // FPDelta is the offset from the "traditional" FP location of the old base + // pointer followed by return address and the location required by the + // restricted Win64 prologue. + // Add FPDelta to all offsets below that go through the frame pointer. + FPDelta = FrameSize - SEHFrameOffset; + assert((!MFI->hasCalls() || (FPDelta % 16) == 0) && + "FPDelta isn't aligned per the Win64 ABI!"); + } + + + if (TRI->hasBasePointer(MF)) { + assert(HasFP && "VLAs and dynamic stack realign, but no FP?!"); if (FI < 0) { // Skip the saved EBP. - return Offset + RegInfo->getSlotSize(); + return Offset + SlotSize + FPDelta; } else { assert((-(Offset + StackSize)) % MFI->getObjectAlignment(FI) == 0); return Offset + StackSize; } - } else if (RegInfo->needsStackRealignment(MF)) { + } else if (TRI->needsStackRealignment(MF)) { if (FI < 0) { // Skip the saved EBP. - return Offset + RegInfo->getSlotSize(); + return Offset + SlotSize + FPDelta; } else { assert((-(Offset + StackSize)) % MFI->getObjectAlignment(FI) == 0); return Offset + StackSize; } // FIXME: Support tail calls } else { - if (!hasFP(MF)) + if (!HasFP) return Offset + StackSize; // Skip the saved EBP. - Offset += RegInfo->getSlotSize(); + Offset += SlotSize; // Skip the RETADDR move area - const X86MachineFunctionInfo *X86FI = MF.getInfo(); int TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta(); if (TailCallReturnAddrDelta < 0) Offset -= TailCallReturnAddrDelta; } - return Offset; + return Offset + FPDelta; } int X86FrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI, unsigned &FrameReg) const { - const X86RegisterInfo *RegInfo = - static_cast(MF.getSubtarget().getRegisterInfo()); // We can't calculate offset from frame pointer if the stack is realigned, // so enforce usage of stack/base pointer. The base pointer is used when we // have dynamic allocas in addition to dynamic realignment. - if (RegInfo->hasBasePointer(MF)) - FrameReg = RegInfo->getBaseRegister(); - else if (RegInfo->needsStackRealignment(MF)) - FrameReg = RegInfo->getStackRegister(); + if (TRI->hasBasePointer(MF)) + FrameReg = TRI->getBaseRegister(); + else if (TRI->needsStackRealignment(MF)) + FrameReg = TRI->getStackRegister(); else - FrameReg = RegInfo->getFrameRegister(MF); + FrameReg = TRI->getFrameRegister(MF); return getFrameIndexOffset(MF, FI); } @@ -1163,8 +1229,6 @@ int X86FrameLowering::getFrameIndexOffsetFromSP(const MachineFunction &MF, int F const uint64_t StackSize = MFI->getStackSize(); { #ifndef NDEBUG - const X86RegisterInfo *RegInfo = - static_cast(MF.getSubtarget().getRegisterInfo()); // Note: LLVM arranges the stack as: // Args > Saved RetPC (<--FP) > CSRs > dynamic alignment (<--BP) // > "Stack Slots" (<--SP) @@ -1176,7 +1240,7 @@ int X86FrameLowering::getFrameIndexOffsetFromSP(const MachineFunction &MF, int F // frame). As a result, THE RESULT OF THIS CALL IS MEANINGLESS FOR CSRs // AND FixedObjects IFF needsStackRealignment or hasVarSizedObject. - assert(!RegInfo->hasBasePointer(MF) && "we don't handle this case"); + assert(!TRI->hasBasePointer(MF) && "we don't handle this case"); // We don't handle tail calls, and shouldn't be seeing them // either. @@ -1218,14 +1282,12 @@ int X86FrameLowering::getFrameIndexOffsetFromSP(const MachineFunction &MF, int F return Offset + StackSize; } // Simplified from getFrameIndexReference keeping only StackPointer cases -int X86FrameLowering::getFrameIndexReferenceFromSP(const MachineFunction &MF, int FI, - unsigned &FrameReg) const { - const X86RegisterInfo *RegInfo = - static_cast(MF.getSubtarget().getRegisterInfo()); - - assert(!RegInfo->hasBasePointer(MF) && "we don't handle this case"); +int X86FrameLowering::getFrameIndexReferenceFromSP(const MachineFunction &MF, + int FI, + unsigned &FrameReg) const { + assert(!TRI->hasBasePointer(MF) && "we don't handle this case"); - FrameReg = RegInfo->getStackRegister(); + FrameReg = TRI->getStackRegister(); return getFrameIndexOffsetFromSP(MF, FI); } @@ -1233,9 +1295,6 @@ bool X86FrameLowering::assignCalleeSavedSpillSlots( MachineFunction &MF, const TargetRegisterInfo *TRI, std::vector &CSI) const { MachineFrameInfo *MFI = MF.getFrameInfo(); - const X86RegisterInfo *RegInfo = - static_cast(MF.getSubtarget().getRegisterInfo()); - unsigned SlotSize = RegInfo->getSlotSize(); X86MachineFunctionInfo *X86FI = MF.getInfo(); unsigned CalleeSavedFrameSize = 0; @@ -1249,7 +1308,7 @@ bool X86FrameLowering::assignCalleeSavedSpillSlots( // Since emitPrologue and emitEpilogue will handle spilling and restoring of // the frame register, we can delete it from CSI list and not have to worry // about avoiding it later. - unsigned FPReg = RegInfo->getFrameRegister(MF); + unsigned FPReg = TRI->getFrameRegister(MF); for (unsigned i = 0; i < CSI.size(); ++i) { if (TRI->regsOverlap(CSI[i].getReg(),FPReg)) { CSI.erase(CSI.begin() + i); @@ -1280,7 +1339,7 @@ bool X86FrameLowering::assignCalleeSavedSpillSlots( if (X86::GR64RegClass.contains(Reg) || X86::GR32RegClass.contains(Reg)) continue; - const TargetRegisterClass *RC = RegInfo->getMinimalPhysRegClass(Reg); + const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); // ensure alignment SpillSlotOffset -= std::abs(SpillSlotOffset) % RC->getAlignment(); // spill into slot @@ -1300,10 +1359,6 @@ bool X86FrameLowering::spillCalleeSavedRegisters( const TargetRegisterInfo *TRI) const { DebugLoc DL = MBB.findDebugLoc(MI); - MachineFunction &MF = *MBB.getParent(); - const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); - const X86Subtarget &STI = MF.getTarget().getSubtarget(); - // Push GPRs. It increases frame size. unsigned Opc = STI.is64Bit() ? X86::PUSH64r : X86::PUSH32r; for (unsigned i = CSI.size(); i != 0; --i) { @@ -1322,8 +1377,7 @@ bool X86FrameLowering::spillCalleeSavedRegisters( // It can be done by spilling XMMs to stack frame. for (unsigned i = CSI.size(); i != 0; --i) { unsigned Reg = CSI[i-1].getReg(); - if (X86::GR64RegClass.contains(Reg) || - X86::GR32RegClass.contains(Reg)) + if (X86::GR64RegClass.contains(Reg) || X86::GR32RegClass.contains(Reg)) continue; // Add the callee-saved register as live-in. It's killed at the spill. MBB.addLiveIn(Reg); @@ -1348,10 +1402,6 @@ bool X86FrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB, DebugLoc DL = MBB.findDebugLoc(MI); - MachineFunction &MF = *MBB.getParent(); - const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); - const X86Subtarget &STI = MF.getTarget().getSubtarget(); - // Reload XMMs from stack frame. for (unsigned i = 0, e = CSI.size(); i != e; ++i) { unsigned Reg = CSI[i].getReg(); @@ -1376,13 +1426,12 @@ bool X86FrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB, return true; } -void -X86FrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, - RegScavenger *RS) const { +void X86FrameLowering::determineCalleeSaves(MachineFunction &MF, + BitVector &SavedRegs, + RegScavenger *RS) const { + TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS); + MachineFrameInfo *MFI = MF.getFrameInfo(); - const X86RegisterInfo *RegInfo = - static_cast(MF.getSubtarget().getRegisterInfo()); - unsigned SlotSize = RegInfo->getSlotSize(); X86MachineFunctionInfo *X86FI = MF.getInfo(); int64_t TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta(); @@ -1402,8 +1451,8 @@ X86FrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, } // Spill the BasePtr if it's used. - if (RegInfo->hasBasePointer(MF)) - MF.getRegInfo().setPhysRegUsed(RegInfo->getBaseRegister()); + if (TRI->hasBasePointer(MF)) + SavedRegs.set(TRI->getBaseRegister()); } static bool @@ -1458,15 +1507,10 @@ GetScratchRegister(bool Is64Bit, bool IsLP64, const MachineFunction &MF, bool Pr // limit. static const uint64_t kSplitStackAvailable = 256; -void -X86FrameLowering::adjustForSegmentedStacks(MachineFunction &MF) const { - MachineBasicBlock &prologueMBB = MF.front(); +void X86FrameLowering::adjustForSegmentedStacks( + MachineFunction &MF, MachineBasicBlock &PrologueMBB) const { MachineFrameInfo *MFI = MF.getFrameInfo(); - const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); uint64_t StackSize; - const X86Subtarget &STI = MF.getTarget().getSubtarget(); - bool Is64Bit = STI.is64Bit(); - const bool IsLP64 = STI.isTarget64BitLP64(); unsigned TlsReg, TlsOffset; DebugLoc DL; @@ -1502,8 +1546,9 @@ X86FrameLowering::adjustForSegmentedStacks(MachineFunction &MF) const { // The MOV R10, RAX needs to be in a different block, since the RET we emit in // allocMBB needs to be last (terminating) instruction. - for (MachineBasicBlock::livein_iterator i = prologueMBB.livein_begin(), - e = prologueMBB.livein_end(); i != e; i++) { + for (MachineBasicBlock::livein_iterator i = PrologueMBB.livein_begin(), + e = PrologueMBB.livein_end(); + i != e; i++) { allocMBB->addLiveIn(*i); checkMBB->addLiveIn(*i); } @@ -1617,7 +1662,7 @@ X86FrameLowering::adjustForSegmentedStacks(MachineFunction &MF) const { // This jump is taken if SP >= (Stacklet Limit + Stack Space required). // It jumps to normal execution of the function body. - BuildMI(checkMBB, DL, TII.get(X86::JA_4)).addMBB(&prologueMBB); + BuildMI(checkMBB, DL, TII.get(X86::JA_1)).addMBB(&PrologueMBB); // On 32 bit we first push the arguments size and then the frame size. On 64 // bit, we pass the stack frame size in r10 and the argument size in r11. @@ -1638,8 +1683,6 @@ X86FrameLowering::adjustForSegmentedStacks(MachineFunction &MF) const { .addImm(StackSize); BuildMI(allocMBB, DL, TII.get(MOVri), Reg11) .addImm(X86FI->getArgumentStackSize()); - MF.getRegInfo().setPhysRegUsed(Reg10); - MF.getRegInfo().setPhysRegUsed(Reg11); } else { BuildMI(allocMBB, DL, TII.get(X86::PUSHi32)) .addImm(X86FI->getArgumentStackSize()); @@ -1684,10 +1727,10 @@ X86FrameLowering::adjustForSegmentedStacks(MachineFunction &MF) const { else BuildMI(allocMBB, DL, TII.get(X86::MORESTACK_RET)); - allocMBB->addSuccessor(&prologueMBB); + allocMBB->addSuccessor(&PrologueMBB); checkMBB->addSuccessor(allocMBB); - checkMBB->addSuccessor(&prologueMBB); + checkMBB->addSuccessor(&PrologueMBB); #ifdef XDEBUG MF.verify(); @@ -1709,15 +1752,9 @@ X86FrameLowering::adjustForSegmentedStacks(MachineFunction &MF) const { /// call inc_stack # doubles the stack space /// temp0 = sp - MaxStack /// if( temp0 < SP_LIMIT(P) ) goto IncStack else goto OldStart -void X86FrameLowering::adjustForHiPEPrologue(MachineFunction &MF) const { - const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); +void X86FrameLowering::adjustForHiPEPrologue( + MachineFunction &MF, MachineBasicBlock &PrologueMBB) const { MachineFrameInfo *MFI = MF.getFrameInfo(); - const unsigned SlotSize = - static_cast(MF.getSubtarget().getRegisterInfo()) - ->getSlotSize(); - const X86Subtarget &STI = MF.getTarget().getSubtarget(); - const bool Is64Bit = STI.is64Bit(); - const bool IsLP64 = STI.isTarget64BitLP64(); DebugLoc DL; // HiPE-specific values const unsigned HipeLeafWords = 24; @@ -1780,12 +1817,12 @@ void X86FrameLowering::adjustForHiPEPrologue(MachineFunction &MF) const { // If the stack frame needed is larger than the guaranteed then runtime checks // and calls to "inc_stack_0" BIF should be inserted in the assembly prologue. if (MaxStack > Guaranteed) { - MachineBasicBlock &prologueMBB = MF.front(); MachineBasicBlock *stackCheckMBB = MF.CreateMachineBasicBlock(); MachineBasicBlock *incStackMBB = MF.CreateMachineBasicBlock(); - for (MachineBasicBlock::livein_iterator I = prologueMBB.livein_begin(), - E = prologueMBB.livein_end(); I != E; I++) { + for (MachineBasicBlock::livein_iterator I = PrologueMBB.livein_begin(), + E = PrologueMBB.livein_end(); + I != E; I++) { stackCheckMBB->addLiveIn(*I); incStackMBB->addLiveIn(*I); } @@ -1821,7 +1858,7 @@ void X86FrameLowering::adjustForHiPEPrologue(MachineFunction &MF) const { // SPLimitOffset is in a fixed heap location (pointed by BP). addRegOffset(BuildMI(stackCheckMBB, DL, TII.get(CMPop)) .addReg(ScratchReg), PReg, false, SPLimitOffset); - BuildMI(stackCheckMBB, DL, TII.get(X86::JAE_4)).addMBB(&prologueMBB); + BuildMI(stackCheckMBB, DL, TII.get(X86::JAE_1)).addMBB(&PrologueMBB); // Create new MBB for IncStack: BuildMI(incStackMBB, DL, TII.get(CALLop)). @@ -1830,11 +1867,11 @@ void X86FrameLowering::adjustForHiPEPrologue(MachineFunction &MF) const { SPReg, false, -MaxStack); addRegOffset(BuildMI(incStackMBB, DL, TII.get(CMPop)) .addReg(ScratchReg), PReg, false, SPLimitOffset); - BuildMI(incStackMBB, DL, TII.get(X86::JLE_4)).addMBB(incStackMBB); + BuildMI(incStackMBB, DL, TII.get(X86::JLE_1)).addMBB(incStackMBB); - stackCheckMBB->addSuccessor(&prologueMBB, 99); + stackCheckMBB->addSuccessor(&PrologueMBB, 99); stackCheckMBB->addSuccessor(incStackMBB, 1); - incStackMBB->addSuccessor(&prologueMBB, 99); + incStackMBB->addSuccessor(&PrologueMBB, 99); incStackMBB->addSuccessor(incStackMBB, 1); } #ifdef XDEBUG @@ -1842,115 +1879,15 @@ void X86FrameLowering::adjustForHiPEPrologue(MachineFunction &MF) const { #endif } -bool X86FrameLowering:: -convertArgMovsToPushes(MachineFunction &MF, MachineBasicBlock &MBB, - MachineBasicBlock::iterator I, uint64_t Amount) const { - const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); - const X86RegisterInfo &RegInfo = *static_cast( - MF.getSubtarget().getRegisterInfo()); - unsigned StackPtr = RegInfo.getStackRegister(); - - // Scan the call setup sequence for the pattern we're looking for. - // We only handle a simple case now - a sequence of MOV32mi or MOV32mr - // instructions, that push a sequence of 32-bit values onto the stack, with - // no gaps. - std::map MovMap; - do { - int Opcode = I->getOpcode(); - if (Opcode != X86::MOV32mi && Opcode != X86::MOV32mr) - break; - - // We only want movs of the form: - // movl imm/r32, k(%ecx) - // If we run into something else, bail - // Note that AddrBaseReg may, counterintuitively, not be a register... - if (!I->getOperand(X86::AddrBaseReg).isReg() || - (I->getOperand(X86::AddrBaseReg).getReg() != StackPtr) || - !I->getOperand(X86::AddrScaleAmt).isImm() || - (I->getOperand(X86::AddrScaleAmt).getImm() != 1) || - (I->getOperand(X86::AddrIndexReg).getReg() != X86::NoRegister) || - (I->getOperand(X86::AddrSegmentReg).getReg() != X86::NoRegister) || - !I->getOperand(X86::AddrDisp).isImm()) - return false; - - int64_t StackDisp = I->getOperand(X86::AddrDisp).getImm(); - - // We don't want to consider the unaligned case. - if (StackDisp % 4) - return false; - - // If the same stack slot is being filled twice, something's fishy. - if (!MovMap.insert(std::pair(StackDisp, I)).second) - return false; - - ++I; - } while (I != MBB.end()); - - // We now expect the end of the sequence - a call and a stack adjust. - if (I == MBB.end()) - return false; - if (!I->isCall()) - return false; - MachineBasicBlock::iterator Call = I; - if ((++I)->getOpcode() != TII.getCallFrameDestroyOpcode()) - return false; - - // Now, go through the map, and see that we don't have any gaps, - // but only a series of 32-bit MOVs. - // Since std::map provides ordered iteration, the original order - // of the MOVs doesn't matter. - int64_t ExpectedDist = 0; - for (auto MMI = MovMap.begin(), MME = MovMap.end(); MMI != MME; - ++MMI, ExpectedDist += 4) - if (MMI->first != ExpectedDist) - return false; - - // Ok, everything looks fine. Do the transformation. - DebugLoc DL = I->getDebugLoc(); - - // It's possible the original stack adjustment amount was larger than - // that done by the pushes. If so, we still need a SUB. - Amount -= ExpectedDist; - if (Amount) { - MachineInstr* Sub = BuildMI(MBB, Call, DL, - TII.get(getSUBriOpcode(false, Amount)), StackPtr) - .addReg(StackPtr).addImm(Amount); - Sub->getOperand(3).setIsDead(); - } - - // Now, iterate through the map in reverse order, and replace the movs - // with pushes. MOVmi/MOVmr doesn't have any defs, so need to replace uses. - for (auto MMI = MovMap.rbegin(), MME = MovMap.rend(); MMI != MME; ++MMI) { - MachineBasicBlock::iterator MOV = MMI->second; - MachineOperand PushOp = MOV->getOperand(X86::AddrNumOperands); - - // Replace MOVmr with PUSH32r, and MOVmi with PUSHi of appropriate size - int PushOpcode = X86::PUSH32r; - if (MOV->getOpcode() == X86::MOV32mi) - PushOpcode = getPUSHiOpcode(false, PushOp); - - BuildMI(MBB, Call, DL, TII.get(PushOpcode)).addOperand(PushOp); - MBB.erase(MOV); - } - - return true; -} - void X86FrameLowering:: eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const { - const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); - const X86RegisterInfo &RegInfo = *static_cast( - MF.getSubtarget().getRegisterInfo()); - unsigned StackPtr = RegInfo.getStackRegister(); bool reserveCallFrame = hasReservedCallFrame(MF); - int Opcode = I->getOpcode(); + unsigned Opcode = I->getOpcode(); bool isDestroy = Opcode == TII.getCallFrameDestroyOpcode(); - const X86Subtarget &STI = MF.getTarget().getSubtarget(); - bool IsLP64 = STI.isTarget64BitLP64(); DebugLoc DL = I->getDebugLoc(); uint64_t Amount = !reserveCallFrame ? I->getOperand(0).getImm() : 0; - uint64_t CalleeAmt = isDestroy ? I->getOperand(1).getImm() : 0; + uint64_t InternalAmt = (isDestroy || Amount) ? I->getOperand(1).getImm() : 0; I = MBB.erase(I); if (!reserveCallFrame) { @@ -1963,66 +1900,44 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, // We need to keep the stack aligned properly. To do this, we round the // amount of space needed for the outgoing arguments up to the next // alignment boundary. - unsigned StackAlign = MF.getTarget() - .getSubtargetImpl() - ->getFrameLowering() - ->getStackAlignment(); - Amount = (Amount + StackAlign - 1) / StackAlign * StackAlign; - - MachineInstr *New = nullptr; - if (Opcode == TII.getCallFrameSetupOpcode()) { - // Try to convert movs to the stack into pushes. - // We currently only look for a pattern that appears in 32-bit - // calling conventions. - if (!IsLP64 && convertArgMovsToPushes(MF, MBB, I, Amount)) - return; - - New = BuildMI(MF, DL, TII.get(getSUBriOpcode(IsLP64, Amount)), - StackPtr) - .addReg(StackPtr) - .addImm(Amount); - } else { - assert(Opcode == TII.getCallFrameDestroyOpcode()); + unsigned StackAlign = getStackAlignment(); + Amount = RoundUpToAlignment(Amount, StackAlign); - // Factor out the amount the callee already popped. - Amount -= CalleeAmt; + // Factor out the amount that gets handled inside the sequence + // (Pushes of argument for frame setup, callee pops for frame destroy) + Amount -= InternalAmt; - if (Amount) { - unsigned Opc = getADDriOpcode(IsLP64, Amount); - New = BuildMI(MF, DL, TII.get(Opc), StackPtr) - .addReg(StackPtr).addImm(Amount); - } + if (Amount) { + // Add Amount to SP to destroy a frame, and subtract to setup. + int Offset = isDestroy ? Amount : -Amount; + BuildStackAdjustment(MBB, I, DL, Offset, /*InEpilogue=*/false); } - - if (New) { - // The EFLAGS implicit def is dead. - New->getOperand(3).setIsDead(); - - // Replace the pseudo instruction with a new instruction. - MBB.insert(I, New); - } - return; } - if (Opcode == TII.getCallFrameDestroyOpcode() && CalleeAmt) { + if (isDestroy && InternalAmt) { // If we are performing frame pointer elimination and if the callee pops // something off the stack pointer, add it back. We do this until we have // more advanced stack pointer tracking ability. - unsigned Opc = getSUBriOpcode(IsLP64, CalleeAmt); - MachineInstr *New = BuildMI(MF, DL, TII.get(Opc), StackPtr) - .addReg(StackPtr).addImm(CalleeAmt); - - // The EFLAGS implicit def is dead. - New->getOperand(3).setIsDead(); - // We are not tracking the stack pointer adjustment by the callee, so make // sure we restore the stack pointer immediately after the call, there may // be spill code inserted between the CALL and ADJCALLSTACKUP instructions. MachineBasicBlock::iterator B = MBB.begin(); while (I != B && !std::prev(I)->isCall()) --I; - MBB.insert(I, New); + BuildStackAdjustment(MBB, I, DL, -InternalAmt, /*InEpilogue=*/false); } } +bool X86FrameLowering::canUseAsEpilogue(const MachineBasicBlock &MBB) const { + assert(MBB.getParent() && "Block is not attached to a function!"); + + if (canUseLEAForSPInEpilogue(*MBB.getParent())) + return true; + + // If we cannot use LEA to adjust SP, we may need to use ADD, which + // clobbers the EFLAGS. Check that none of the terminators reads the + // EFLAGS, and if one uses it, conservatively assume this is not + // safe to insert the epilogue here. + return !terminatorsNeedFlagsAsInput(MBB); +}