1 //=======- X86FrameInfo.cpp - X86 Frame Information ------------*- C++ -*-====//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file contains the X86 implementation of TargetFrameInfo class.
12 //===----------------------------------------------------------------------===//
14 #include "X86FrameInfo.h"
15 #include "X86InstrBuilder.h"
16 #include "X86InstrInfo.h"
17 #include "X86MachineFunctionInfo.h"
18 #include "llvm/Function.h"
19 #include "llvm/CodeGen/MachineFrameInfo.h"
20 #include "llvm/CodeGen/MachineFunction.h"
21 #include "llvm/CodeGen/MachineInstrBuilder.h"
22 #include "llvm/CodeGen/MachineModuleInfo.h"
23 #include "llvm/CodeGen/MachineRegisterInfo.h"
24 #include "llvm/Target/TargetData.h"
25 #include "llvm/Target/TargetOptions.h"
26 #include "llvm/Support/CommandLine.h"
30 // FIXME: completely move here.
31 extern cl::opt<bool> ForceStackAlign;
33 bool X86FrameInfo::hasReservedCallFrame(const MachineFunction &MF) const {
34 return !MF.getFrameInfo()->hasVarSizedObjects();
37 /// hasFP - Return true if the specified function should have a dedicated frame
38 /// pointer register. This is true if the function has variable sized allocas
39 /// or if frame pointer elimination is disabled.
40 bool X86FrameInfo::hasFP(const MachineFunction &MF) const {
41 const MachineFrameInfo *MFI = MF.getFrameInfo();
42 const MachineModuleInfo &MMI = MF.getMMI();
43 const TargetRegisterInfo *RI = MF.getTarget().getRegisterInfo();
45 return (DisableFramePointerElim(MF) ||
46 RI->needsStackRealignment(MF) ||
47 MFI->hasVarSizedObjects() ||
48 MFI->isFrameAddressTaken() ||
49 MF.getInfo<X86MachineFunctionInfo>()->getForceFramePointer() ||
50 MMI.callsUnwindInit());
53 static unsigned getSUBriOpcode(unsigned is64Bit, int64_t Imm) {
57 return X86::SUB64ri32;
65 static unsigned getADDriOpcode(unsigned is64Bit, int64_t Imm) {
69 return X86::ADD64ri32;
77 /// emitSPUpdate - Emit a series of instructions to increment / decrement the
78 /// stack pointer by a constant value.
80 void emitSPUpdate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI,
81 unsigned StackPtr, int64_t NumBytes, bool Is64Bit,
82 const TargetInstrInfo &TII) {
83 bool isSub = NumBytes < 0;
84 uint64_t Offset = isSub ? -NumBytes : NumBytes;
85 unsigned Opc = isSub ?
86 getSUBriOpcode(Is64Bit, Offset) :
87 getADDriOpcode(Is64Bit, Offset);
88 uint64_t Chunk = (1LL << 31) - 1;
89 DebugLoc DL = MBB.findDebugLoc(MBBI);
92 uint64_t ThisVal = (Offset > Chunk) ? Chunk : Offset;
94 BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr)
97 MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead.
102 /// mergeSPUpdatesUp - Merge two stack-manipulating instructions upper iterator.
104 void mergeSPUpdatesUp(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI,
105 unsigned StackPtr, uint64_t *NumBytes = NULL) {
106 if (MBBI == MBB.begin()) return;
108 MachineBasicBlock::iterator PI = prior(MBBI);
109 unsigned Opc = PI->getOpcode();
110 if ((Opc == X86::ADD64ri32 || Opc == X86::ADD64ri8 ||
111 Opc == X86::ADD32ri || Opc == X86::ADD32ri8) &&
112 PI->getOperand(0).getReg() == StackPtr) {
114 *NumBytes += PI->getOperand(2).getImm();
116 } else if ((Opc == X86::SUB64ri32 || Opc == X86::SUB64ri8 ||
117 Opc == X86::SUB32ri || Opc == X86::SUB32ri8) &&
118 PI->getOperand(0).getReg() == StackPtr) {
120 *NumBytes -= PI->getOperand(2).getImm();
125 /// mergeSPUpdatesDown - Merge two stack-manipulating instructions lower iterator.
127 void mergeSPUpdatesDown(MachineBasicBlock &MBB,
128 MachineBasicBlock::iterator &MBBI,
129 unsigned StackPtr, uint64_t *NumBytes = NULL) {
130 // FIXME: THIS ISN'T RUN!!!
133 if (MBBI == MBB.end()) return;
135 MachineBasicBlock::iterator NI = llvm::next(MBBI);
136 if (NI == MBB.end()) return;
138 unsigned Opc = NI->getOpcode();
139 if ((Opc == X86::ADD64ri32 || Opc == X86::ADD64ri8 ||
140 Opc == X86::ADD32ri || Opc == X86::ADD32ri8) &&
141 NI->getOperand(0).getReg() == StackPtr) {
143 *NumBytes -= NI->getOperand(2).getImm();
146 } else if ((Opc == X86::SUB64ri32 || Opc == X86::SUB64ri8 ||
147 Opc == X86::SUB32ri || Opc == X86::SUB32ri8) &&
148 NI->getOperand(0).getReg() == StackPtr) {
150 *NumBytes += NI->getOperand(2).getImm();
156 /// mergeSPUpdates - Checks the instruction before/after the passed
157 /// instruction. If it is an ADD/SUB instruction it is deleted argument and the
158 /// stack adjustment is returned as a positive value for ADD and a negative for
160 static int mergeSPUpdates(MachineBasicBlock &MBB,
161 MachineBasicBlock::iterator &MBBI,
163 bool doMergeWithPrevious) {
164 if ((doMergeWithPrevious && MBBI == MBB.begin()) ||
165 (!doMergeWithPrevious && MBBI == MBB.end()))
168 MachineBasicBlock::iterator PI = doMergeWithPrevious ? prior(MBBI) : MBBI;
169 MachineBasicBlock::iterator NI = doMergeWithPrevious ? 0 : llvm::next(MBBI);
170 unsigned Opc = PI->getOpcode();
173 if ((Opc == X86::ADD64ri32 || Opc == X86::ADD64ri8 ||
174 Opc == X86::ADD32ri || Opc == X86::ADD32ri8) &&
175 PI->getOperand(0).getReg() == StackPtr){
176 Offset += PI->getOperand(2).getImm();
178 if (!doMergeWithPrevious) MBBI = NI;
179 } else if ((Opc == X86::SUB64ri32 || Opc == X86::SUB64ri8 ||
180 Opc == X86::SUB32ri || Opc == X86::SUB32ri8) &&
181 PI->getOperand(0).getReg() == StackPtr) {
182 Offset -= PI->getOperand(2).getImm();
184 if (!doMergeWithPrevious) MBBI = NI;
190 static bool isEAXLiveIn(MachineFunction &MF) {
191 for (MachineRegisterInfo::livein_iterator II = MF.getRegInfo().livein_begin(),
192 EE = MF.getRegInfo().livein_end(); II != EE; ++II) {
193 unsigned Reg = II->first;
195 if (Reg == X86::EAX || Reg == X86::AX ||
196 Reg == X86::AH || Reg == X86::AL)
203 void X86FrameInfo::emitCalleeSavedFrameMoves(MachineFunction &MF,
205 unsigned FramePtr) const {
206 MachineFrameInfo *MFI = MF.getFrameInfo();
207 MachineModuleInfo &MMI = MF.getMMI();
209 // Add callee saved registers to move list.
210 const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
211 if (CSI.empty()) return;
213 std::vector<MachineMove> &Moves = MMI.getFrameMoves();
214 const TargetData *TD = MF.getTarget().getTargetData();
215 bool HasFP = hasFP(MF);
217 // Calculate amount of bytes used for return address storing.
219 (MF.getTarget().getFrameInfo()->getStackGrowthDirection() ==
220 TargetFrameInfo::StackGrowsUp ?
221 TD->getPointerSize() : -TD->getPointerSize());
223 // FIXME: This is dirty hack. The code itself is pretty mess right now.
224 // It should be rewritten from scratch and generalized sometimes.
226 // Determine maximum offset (minumum due to stack growth).
227 int64_t MaxOffset = 0;
228 for (std::vector<CalleeSavedInfo>::const_iterator
229 I = CSI.begin(), E = CSI.end(); I != E; ++I)
230 MaxOffset = std::min(MaxOffset,
231 MFI->getObjectOffset(I->getFrameIdx()));
233 // Calculate offsets.
234 int64_t saveAreaOffset = (HasFP ? 3 : 2) * stackGrowth;
235 for (std::vector<CalleeSavedInfo>::const_iterator
236 I = CSI.begin(), E = CSI.end(); I != E; ++I) {
237 int64_t Offset = MFI->getObjectOffset(I->getFrameIdx());
238 unsigned Reg = I->getReg();
239 Offset = MaxOffset - Offset + saveAreaOffset;
241 // Don't output a new machine move if we're re-saving the frame
242 // pointer. This happens when the PrologEpilogInserter has inserted an extra
243 // "PUSH" of the frame pointer -- the "emitPrologue" method automatically
244 // generates one when frame pointers are used. If we generate a "machine
245 // move" for this extra "PUSH", the linker will lose track of the fact that
246 // the frame pointer should have the value of the first "PUSH" when it's
249 // FIXME: This looks inelegant. It's possibly correct, but it's covering up
250 // another bug. I.e., one where we generate a prolog like this:
258 // The immediate re-push of EBP is unnecessary. At the least, it's an
259 // optimization bug. EBP can be used as a scratch register in certain
260 // cases, but probably not when we have a frame pointer.
261 if (HasFP && FramePtr == Reg)
264 MachineLocation CSDst(MachineLocation::VirtualFP, Offset);
265 MachineLocation CSSrc(Reg);
266 Moves.push_back(MachineMove(Label, CSDst, CSSrc));
270 /// emitPrologue - Push callee-saved registers onto the stack, which
271 /// automatically adjust the stack pointer. Adjust the stack pointer to allocate
272 /// space for local variables. Also emit labels used by the exception handler to
273 /// generate the exception handling frames.
274 void X86FrameInfo::emitPrologue(MachineFunction &MF) const {
275 MachineBasicBlock &MBB = MF.front(); // Prologue goes in entry BB.
276 MachineBasicBlock::iterator MBBI = MBB.begin();
277 MachineFrameInfo *MFI = MF.getFrameInfo();
278 const Function *Fn = MF.getFunction();
279 const X86Subtarget *Subtarget = &MF.getTarget().getSubtarget<X86Subtarget>();
280 const X86RegisterInfo *RegInfo =
281 static_cast<const X86RegisterInfo*>(MF.getTarget().getRegisterInfo());
282 const X86InstrInfo &TII =
283 *static_cast<const X86InstrInfo*>(MF.getTarget().getInstrInfo());
284 MachineModuleInfo &MMI = MF.getMMI();
285 X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
286 bool needsFrameMoves = MMI.hasDebugInfo() ||
287 !Fn->doesNotThrow() || UnwindTablesMandatory;
288 uint64_t MaxAlign = MFI->getMaxAlignment(); // Desired stack alignment.
289 uint64_t StackSize = MFI->getStackSize(); // Number of bytes to allocate.
290 bool HasFP = hasFP(MF);
291 bool Is64Bit = STI.is64Bit();
292 bool IsWin64 = STI.isTargetWin64();
293 unsigned StackAlign = getStackAlignment();
294 unsigned SlotSize = RegInfo->getSlotSize();
295 unsigned FramePtr = RegInfo->getFrameRegister(MF);
296 unsigned StackPtr = RegInfo->getStackRegister();
300 // If we're forcing a stack realignment we can't rely on just the frame
301 // info, we need to know the ABI stack alignment as well in case we
302 // have a call out. Otherwise just make sure we have some alignment - we'll
303 // go with the minimum SlotSize.
304 if (ForceStackAlign) {
306 MaxAlign = (StackAlign > MaxAlign) ? StackAlign : MaxAlign;
307 else if (MaxAlign < SlotSize)
311 // Add RETADDR move area to callee saved frame size.
312 int TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta();
313 if (TailCallReturnAddrDelta < 0)
314 X86FI->setCalleeSavedFrameSize(
315 X86FI->getCalleeSavedFrameSize() - TailCallReturnAddrDelta);
317 // If this is x86-64 and the Red Zone is not disabled, if we are a leaf
318 // function, and use up to 128 bytes of stack space, don't have a frame
319 // pointer, calls, or dynamic alloca then we do not need to adjust the
320 // stack pointer (we fit in the Red Zone).
321 if (Is64Bit && !Fn->hasFnAttr(Attribute::NoRedZone) &&
322 !RegInfo->needsStackRealignment(MF) &&
323 !MFI->hasVarSizedObjects() && // No dynamic alloca.
324 !MFI->adjustsStack() && // No calls.
325 !IsWin64) { // Win64 has no Red Zone
326 uint64_t MinSize = X86FI->getCalleeSavedFrameSize();
327 if (HasFP) MinSize += SlotSize;
328 StackSize = std::max(MinSize, StackSize > 128 ? StackSize - 128 : 0);
329 MFI->setStackSize(StackSize);
330 } else if (IsWin64) {
331 // We need to always allocate 32 bytes as register spill area.
332 // FIXME: We might reuse these 32 bytes for leaf functions.
334 MFI->setStackSize(StackSize);
337 // Insert stack pointer adjustment for later moving of return addr. Only
338 // applies to tail call optimized functions where the callee argument stack
339 // size is bigger than the callers.
340 if (TailCallReturnAddrDelta < 0) {
342 BuildMI(MBB, MBBI, DL,
343 TII.get(getSUBriOpcode(Is64Bit, -TailCallReturnAddrDelta)),
346 .addImm(-TailCallReturnAddrDelta);
347 MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead.
350 // Mapping for machine moves:
352 // DST: VirtualFP AND
353 // SRC: VirtualFP => DW_CFA_def_cfa_offset
354 // ELSE => DW_CFA_def_cfa
356 // SRC: VirtualFP AND
357 // DST: Register => DW_CFA_def_cfa_register
360 // OFFSET < 0 => DW_CFA_offset_extended_sf
361 // REG < 64 => DW_CFA_offset + Reg
362 // ELSE => DW_CFA_offset_extended
364 std::vector<MachineMove> &Moves = MMI.getFrameMoves();
365 const TargetData *TD = MF.getTarget().getTargetData();
366 uint64_t NumBytes = 0;
367 int stackGrowth = -TD->getPointerSize();
370 // Calculate required stack adjustment.
371 uint64_t FrameSize = StackSize - SlotSize;
372 if (RegInfo->needsStackRealignment(MF))
373 FrameSize = (FrameSize + MaxAlign - 1) / MaxAlign * MaxAlign;
375 NumBytes = FrameSize - X86FI->getCalleeSavedFrameSize();
377 // Get the offset of the stack slot for the EBP register, which is
378 // guaranteed to be the last slot by processFunctionBeforeFrameFinalized.
379 // Update the frame offset adjustment.
380 MFI->setOffsetAdjustment(-NumBytes);
382 // Save EBP/RBP into the appropriate stack slot.
383 BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::PUSH64r : X86::PUSH32r))
384 .addReg(FramePtr, RegState::Kill);
386 if (needsFrameMoves) {
387 // Mark the place where EBP/RBP was saved.
388 MCSymbol *FrameLabel = MMI.getContext().CreateTempSymbol();
389 BuildMI(MBB, MBBI, DL, TII.get(X86::PROLOG_LABEL)).addSym(FrameLabel);
391 // Define the current CFA rule to use the provided offset.
393 MachineLocation SPDst(MachineLocation::VirtualFP);
394 MachineLocation SPSrc(MachineLocation::VirtualFP, 2 * stackGrowth);
395 Moves.push_back(MachineMove(FrameLabel, SPDst, SPSrc));
397 // FIXME: Verify & implement for FP
398 MachineLocation SPDst(StackPtr);
399 MachineLocation SPSrc(StackPtr, stackGrowth);
400 Moves.push_back(MachineMove(FrameLabel, SPDst, SPSrc));
403 // Change the rule for the FramePtr to be an "offset" rule.
404 MachineLocation FPDst(MachineLocation::VirtualFP, 2 * stackGrowth);
405 MachineLocation FPSrc(FramePtr);
406 Moves.push_back(MachineMove(FrameLabel, FPDst, FPSrc));
409 // Update EBP with the new base value...
410 BuildMI(MBB, MBBI, DL,
411 TII.get(Is64Bit ? X86::MOV64rr : X86::MOV32rr), FramePtr)
414 if (needsFrameMoves) {
415 // Mark effective beginning of when frame pointer becomes valid.
416 MCSymbol *FrameLabel = MMI.getContext().CreateTempSymbol();
417 BuildMI(MBB, MBBI, DL, TII.get(X86::PROLOG_LABEL)).addSym(FrameLabel);
419 // Define the current CFA to use the EBP/RBP register.
420 MachineLocation FPDst(FramePtr);
421 MachineLocation FPSrc(MachineLocation::VirtualFP);
422 Moves.push_back(MachineMove(FrameLabel, FPDst, FPSrc));
425 // Mark the FramePtr as live-in in every block except the entry.
426 for (MachineFunction::iterator I = llvm::next(MF.begin()), E = MF.end();
428 I->addLiveIn(FramePtr);
431 if (RegInfo->needsStackRealignment(MF)) {
433 BuildMI(MBB, MBBI, DL,
434 TII.get(Is64Bit ? X86::AND64ri32 : X86::AND32ri),
435 StackPtr).addReg(StackPtr).addImm(-MaxAlign);
437 // The EFLAGS implicit def is dead.
438 MI->getOperand(3).setIsDead();
441 NumBytes = StackSize - X86FI->getCalleeSavedFrameSize();
444 // Skip the callee-saved push instructions.
445 bool PushedRegs = false;
446 int StackOffset = 2 * stackGrowth;
448 while (MBBI != MBB.end() &&
449 (MBBI->getOpcode() == X86::PUSH32r ||
450 MBBI->getOpcode() == X86::PUSH64r)) {
454 if (!HasFP && needsFrameMoves) {
455 // Mark callee-saved push instruction.
456 MCSymbol *Label = MMI.getContext().CreateTempSymbol();
457 BuildMI(MBB, MBBI, DL, TII.get(X86::PROLOG_LABEL)).addSym(Label);
459 // Define the current CFA rule to use the provided offset.
460 unsigned Ptr = StackSize ?
461 MachineLocation::VirtualFP : StackPtr;
462 MachineLocation SPDst(Ptr);
463 MachineLocation SPSrc(Ptr, StackOffset);
464 Moves.push_back(MachineMove(Label, SPDst, SPSrc));
465 StackOffset += stackGrowth;
469 DL = MBB.findDebugLoc(MBBI);
471 // If there is an SUB32ri of ESP immediately before this instruction, merge
472 // the two. This can be the case when tail call elimination is enabled and
473 // the callee has more arguments then the caller.
474 NumBytes -= mergeSPUpdates(MBB, MBBI, StackPtr, true);
476 // If there is an ADD32ri or SUB32ri of ESP immediately after this
477 // instruction, merge the two instructions.
478 mergeSPUpdatesDown(MBB, MBBI, StackPtr, &NumBytes);
480 // Adjust stack pointer: ESP -= numbytes.
482 // Windows and cygwin/mingw require a prologue helper routine when allocating
483 // more than 4K bytes on the stack. Windows uses __chkstk and cygwin/mingw
484 // uses __alloca. __alloca and the 32-bit version of __chkstk will probe the
485 // stack and adjust the stack pointer in one go. The 64-bit version of
486 // __chkstk is only responsible for probing the stack. The 64-bit prologue is
487 // responsible for adjusting the stack pointer. Touching the stack at 4K
488 // increments is necessary to ensure that the guard pages used by the OS
489 // virtual memory manager are allocated in correct sequence.
490 if (NumBytes >= 4096 &&
491 (Subtarget->isTargetCygMing() || Subtarget->isTargetWin32())) {
492 // Check whether EAX is livein for this function.
493 bool isEAXAlive = isEAXLiveIn(MF);
495 const char *StackProbeSymbol =
496 Subtarget->isTargetWindows() ? "_chkstk" : "_alloca";
497 unsigned CallOp = Is64Bit ? X86::CALL64pcrel32 : X86::CALLpcrel32;
499 BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX)
501 BuildMI(MBB, MBBI, DL, TII.get(CallOp))
502 .addExternalSymbol(StackProbeSymbol)
503 .addReg(StackPtr, RegState::Define | RegState::Implicit)
504 .addReg(X86::EFLAGS, RegState::Define | RegState::Implicit);
507 BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH32r))
508 .addReg(X86::EAX, RegState::Kill);
510 // Allocate NumBytes-4 bytes on stack. We'll also use 4 already
511 // allocated bytes for EAX.
512 BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX)
513 .addImm(NumBytes - 4);
514 BuildMI(MBB, MBBI, DL, TII.get(CallOp))
515 .addExternalSymbol(StackProbeSymbol)
516 .addReg(StackPtr, RegState::Define | RegState::Implicit)
517 .addReg(X86::EFLAGS, RegState::Define | RegState::Implicit);
520 MachineInstr *MI = addRegOffset(BuildMI(MF, DL, TII.get(X86::MOV32rm),
522 StackPtr, false, NumBytes - 4);
523 MBB.insert(MBBI, MI);
525 } else if (NumBytes >= 4096 && Subtarget->isTargetWin64()) {
526 // Sanity check that EAX is not livein for this function. It should
527 // should not be, so throw an assert.
528 assert(!isEAXLiveIn(MF) && "EAX is livein in the Win64 case!");
530 // Handle the 64-bit Windows ABI case where we need to call __chkstk.
531 // Function prologue is responsible for adjusting the stack pointer.
532 BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX)
534 BuildMI(MBB, MBBI, DL, TII.get(X86::WINCALL64pcrel32))
535 .addExternalSymbol("__chkstk")
536 .addReg(StackPtr, RegState::Define | RegState::Implicit);
537 emitSPUpdate(MBB, MBBI, StackPtr, -(int64_t)NumBytes, Is64Bit, TII);
539 emitSPUpdate(MBB, MBBI, StackPtr, -(int64_t)NumBytes, Is64Bit, TII);
541 if ((NumBytes || PushedRegs) && needsFrameMoves) {
542 // Mark end of stack pointer adjustment.
543 MCSymbol *Label = MMI.getContext().CreateTempSymbol();
544 BuildMI(MBB, MBBI, DL, TII.get(X86::PROLOG_LABEL)).addSym(Label);
546 if (!HasFP && NumBytes) {
547 // Define the current CFA rule to use the provided offset.
549 MachineLocation SPDst(MachineLocation::VirtualFP);
550 MachineLocation SPSrc(MachineLocation::VirtualFP,
551 -StackSize + stackGrowth);
552 Moves.push_back(MachineMove(Label, SPDst, SPSrc));
554 // FIXME: Verify & implement for FP
555 MachineLocation SPDst(StackPtr);
556 MachineLocation SPSrc(StackPtr, stackGrowth);
557 Moves.push_back(MachineMove(Label, SPDst, SPSrc));
561 // Emit DWARF info specifying the offsets of the callee-saved registers.
563 emitCalleeSavedFrameMoves(MF, Label, HasFP ? FramePtr : StackPtr);
567 void X86FrameInfo::emitEpilogue(MachineFunction &MF,
568 MachineBasicBlock &MBB) const {
569 const MachineFrameInfo *MFI = MF.getFrameInfo();
570 X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
571 const X86RegisterInfo *RegInfo =
572 static_cast<const X86RegisterInfo*>(MF.getTarget().getRegisterInfo());
573 const X86InstrInfo &TII =
574 *static_cast<const X86InstrInfo*>(MF.getTarget().getInstrInfo());
575 MachineBasicBlock::iterator MBBI = prior(MBB.end());
576 unsigned RetOpcode = MBBI->getOpcode();
577 DebugLoc DL = MBBI->getDebugLoc();
578 bool Is64Bit = STI.is64Bit();
579 unsigned StackAlign = getStackAlignment();
580 unsigned SlotSize = RegInfo->getSlotSize();
581 unsigned FramePtr = RegInfo->getFrameRegister(MF);
582 unsigned StackPtr = RegInfo->getStackRegister();
586 llvm_unreachable("Can only insert epilog into returning blocks");
589 case X86::TCRETURNdi:
590 case X86::TCRETURNri:
591 case X86::TCRETURNmi:
592 case X86::TCRETURNdi64:
593 case X86::TCRETURNri64:
594 case X86::TCRETURNmi64:
596 case X86::EH_RETURN64:
597 break; // These are ok
600 // Get the number of bytes to allocate from the FrameInfo.
601 uint64_t StackSize = MFI->getStackSize();
602 uint64_t MaxAlign = MFI->getMaxAlignment();
603 unsigned CSSize = X86FI->getCalleeSavedFrameSize();
604 uint64_t NumBytes = 0;
606 // If we're forcing a stack realignment we can't rely on just the frame
607 // info, we need to know the ABI stack alignment as well in case we
608 // have a call out. Otherwise just make sure we have some alignment - we'll
609 // go with the minimum.
610 if (ForceStackAlign) {
612 MaxAlign = (StackAlign > MaxAlign) ? StackAlign : MaxAlign;
614 MaxAlign = MaxAlign ? MaxAlign : 4;
618 // Calculate required stack adjustment.
619 uint64_t FrameSize = StackSize - SlotSize;
620 if (RegInfo->needsStackRealignment(MF))
621 FrameSize = (FrameSize + MaxAlign - 1)/MaxAlign*MaxAlign;
623 NumBytes = FrameSize - CSSize;
626 BuildMI(MBB, MBBI, DL,
627 TII.get(Is64Bit ? X86::POP64r : X86::POP32r), FramePtr);
629 NumBytes = StackSize - CSSize;
632 // Skip the callee-saved pop instructions.
633 MachineBasicBlock::iterator LastCSPop = MBBI;
634 while (MBBI != MBB.begin()) {
635 MachineBasicBlock::iterator PI = prior(MBBI);
636 unsigned Opc = PI->getOpcode();
638 if (Opc != X86::POP32r && Opc != X86::POP64r &&
639 !PI->getDesc().isTerminator())
645 DL = MBBI->getDebugLoc();
647 // If there is an ADD32ri or SUB32ri of ESP immediately before this
648 // instruction, merge the two instructions.
649 if (NumBytes || MFI->hasVarSizedObjects())
650 mergeSPUpdatesUp(MBB, MBBI, StackPtr, &NumBytes);
652 // If dynamic alloca is used, then reset esp to point to the last callee-saved
653 // slot before popping them off! Same applies for the case, when stack was
655 if (RegInfo->needsStackRealignment(MF)) {
656 // We cannot use LEA here, because stack pointer was realigned. We need to
657 // deallocate local frame back.
659 emitSPUpdate(MBB, MBBI, StackPtr, NumBytes, Is64Bit, TII);
660 MBBI = prior(LastCSPop);
663 BuildMI(MBB, MBBI, DL,
664 TII.get(Is64Bit ? X86::MOV64rr : X86::MOV32rr),
665 StackPtr).addReg(FramePtr);
666 } else if (MFI->hasVarSizedObjects()) {
668 unsigned Opc = Is64Bit ? X86::LEA64r : X86::LEA32r;
670 addRegOffset(BuildMI(MF, DL, TII.get(Opc), StackPtr),
671 FramePtr, false, -CSSize);
672 MBB.insert(MBBI, MI);
674 BuildMI(MBB, MBBI, DL,
675 TII.get(Is64Bit ? X86::MOV64rr : X86::MOV32rr), StackPtr)
678 } else if (NumBytes) {
679 // Adjust stack pointer back: ESP += numbytes.
680 emitSPUpdate(MBB, MBBI, StackPtr, NumBytes, Is64Bit, TII);
683 // We're returning from function via eh_return.
684 if (RetOpcode == X86::EH_RETURN || RetOpcode == X86::EH_RETURN64) {
685 MBBI = prior(MBB.end());
686 MachineOperand &DestAddr = MBBI->getOperand(0);
687 assert(DestAddr.isReg() && "Offset should be in register!");
688 BuildMI(MBB, MBBI, DL,
689 TII.get(Is64Bit ? X86::MOV64rr : X86::MOV32rr),
690 StackPtr).addReg(DestAddr.getReg());
691 } else if (RetOpcode == X86::TCRETURNri || RetOpcode == X86::TCRETURNdi ||
692 RetOpcode == X86::TCRETURNmi ||
693 RetOpcode == X86::TCRETURNri64 || RetOpcode == X86::TCRETURNdi64 ||
694 RetOpcode == X86::TCRETURNmi64) {
695 bool isMem = RetOpcode == X86::TCRETURNmi || RetOpcode == X86::TCRETURNmi64;
696 // Tail call return: adjust the stack pointer and jump to callee.
697 MBBI = prior(MBB.end());
698 MachineOperand &JumpTarget = MBBI->getOperand(0);
699 MachineOperand &StackAdjust = MBBI->getOperand(isMem ? 5 : 1);
700 assert(StackAdjust.isImm() && "Expecting immediate value.");
702 // Adjust stack pointer.
703 int StackAdj = StackAdjust.getImm();
704 int MaxTCDelta = X86FI->getTCReturnAddrDelta();
706 assert(MaxTCDelta <= 0 && "MaxTCDelta should never be positive");
708 // Incoporate the retaddr area.
709 Offset = StackAdj-MaxTCDelta;
710 assert(Offset >= 0 && "Offset should never be negative");
713 // Check for possible merge with preceeding ADD instruction.
714 Offset += mergeSPUpdates(MBB, MBBI, StackPtr, true);
715 emitSPUpdate(MBB, MBBI, StackPtr, Offset, Is64Bit, TII);
718 // Jump to label or value in register.
719 if (RetOpcode == X86::TCRETURNdi || RetOpcode == X86::TCRETURNdi64) {
720 BuildMI(MBB, MBBI, DL, TII.get((RetOpcode == X86::TCRETURNdi)
721 ? X86::TAILJMPd : X86::TAILJMPd64)).
722 addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset(),
723 JumpTarget.getTargetFlags());
724 } else if (RetOpcode == X86::TCRETURNmi || RetOpcode == X86::TCRETURNmi64) {
725 MachineInstrBuilder MIB =
726 BuildMI(MBB, MBBI, DL, TII.get((RetOpcode == X86::TCRETURNmi)
727 ? X86::TAILJMPm : X86::TAILJMPm64));
728 for (unsigned i = 0; i != 5; ++i)
729 MIB.addOperand(MBBI->getOperand(i));
730 } else if (RetOpcode == X86::TCRETURNri64) {
731 BuildMI(MBB, MBBI, DL, TII.get(X86::TAILJMPr64)).
732 addReg(JumpTarget.getReg(), RegState::Kill);
734 BuildMI(MBB, MBBI, DL, TII.get(X86::TAILJMPr)).
735 addReg(JumpTarget.getReg(), RegState::Kill);
738 MachineInstr *NewMI = prior(MBBI);
739 for (unsigned i = 2, e = MBBI->getNumOperands(); i != e; ++i)
740 NewMI->addOperand(MBBI->getOperand(i));
742 // Delete the pseudo instruction TCRETURN.
744 } else if ((RetOpcode == X86::RET || RetOpcode == X86::RETI) &&
745 (X86FI->getTCReturnAddrDelta() < 0)) {
746 // Add the return addr area delta back since we are not tail calling.
747 int delta = -1*X86FI->getTCReturnAddrDelta();
748 MBBI = prior(MBB.end());
750 // Check for possible merge with preceeding ADD instruction.
751 delta += mergeSPUpdates(MBB, MBBI, StackPtr, true);
752 emitSPUpdate(MBB, MBBI, StackPtr, delta, Is64Bit, TII);