1 //=======- X86FrameInfo.cpp - X86 Frame Information ------------*- C++ -*-====//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file contains the X86 implementation of TargetFrameInfo class.
12 //===----------------------------------------------------------------------===//
14 #include "X86FrameInfo.h"
15 #include "X86InstrBuilder.h"
16 #include "X86InstrInfo.h"
17 #include "X86MachineFunctionInfo.h"
18 #include "llvm/Function.h"
19 #include "llvm/CodeGen/MachineFrameInfo.h"
20 #include "llvm/CodeGen/MachineFunction.h"
21 #include "llvm/CodeGen/MachineInstrBuilder.h"
22 #include "llvm/CodeGen/MachineModuleInfo.h"
23 #include "llvm/CodeGen/MachineRegisterInfo.h"
24 #include "llvm/Target/TargetData.h"
25 #include "llvm/Target/TargetOptions.h"
26 #include "llvm/Support/CommandLine.h"
30 // FIXME: completely move here.
31 extern cl::opt<bool> ForceStackAlign;
33 static unsigned getSUBriOpcode(unsigned is64Bit, int64_t Imm) {
37 return X86::SUB64ri32;
45 static unsigned getADDriOpcode(unsigned is64Bit, int64_t Imm) {
49 return X86::ADD64ri32;
57 /// emitSPUpdate - Emit a series of instructions to increment / decrement the
58 /// stack pointer by a constant value.
60 void emitSPUpdate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI,
61 unsigned StackPtr, int64_t NumBytes, bool Is64Bit,
62 const TargetInstrInfo &TII) {
63 bool isSub = NumBytes < 0;
64 uint64_t Offset = isSub ? -NumBytes : NumBytes;
65 unsigned Opc = isSub ?
66 getSUBriOpcode(Is64Bit, Offset) :
67 getADDriOpcode(Is64Bit, Offset);
68 uint64_t Chunk = (1LL << 31) - 1;
69 DebugLoc DL = MBB.findDebugLoc(MBBI);
72 uint64_t ThisVal = (Offset > Chunk) ? Chunk : Offset;
74 BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr)
77 MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead.
82 /// mergeSPUpdatesUp - Merge two stack-manipulating instructions upper iterator.
84 void mergeSPUpdatesUp(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI,
85 unsigned StackPtr, uint64_t *NumBytes = NULL) {
86 if (MBBI == MBB.begin()) return;
88 MachineBasicBlock::iterator PI = prior(MBBI);
89 unsigned Opc = PI->getOpcode();
90 if ((Opc == X86::ADD64ri32 || Opc == X86::ADD64ri8 ||
91 Opc == X86::ADD32ri || Opc == X86::ADD32ri8) &&
92 PI->getOperand(0).getReg() == StackPtr) {
94 *NumBytes += PI->getOperand(2).getImm();
96 } else if ((Opc == X86::SUB64ri32 || Opc == X86::SUB64ri8 ||
97 Opc == X86::SUB32ri || Opc == X86::SUB32ri8) &&
98 PI->getOperand(0).getReg() == StackPtr) {
100 *NumBytes -= PI->getOperand(2).getImm();
105 /// mergeSPUpdatesDown - Merge two stack-manipulating instructions lower iterator.
107 void mergeSPUpdatesDown(MachineBasicBlock &MBB,
108 MachineBasicBlock::iterator &MBBI,
109 unsigned StackPtr, uint64_t *NumBytes = NULL) {
110 // FIXME: THIS ISN'T RUN!!!
113 if (MBBI == MBB.end()) return;
115 MachineBasicBlock::iterator NI = llvm::next(MBBI);
116 if (NI == MBB.end()) return;
118 unsigned Opc = NI->getOpcode();
119 if ((Opc == X86::ADD64ri32 || Opc == X86::ADD64ri8 ||
120 Opc == X86::ADD32ri || Opc == X86::ADD32ri8) &&
121 NI->getOperand(0).getReg() == StackPtr) {
123 *NumBytes -= NI->getOperand(2).getImm();
126 } else if ((Opc == X86::SUB64ri32 || Opc == X86::SUB64ri8 ||
127 Opc == X86::SUB32ri || Opc == X86::SUB32ri8) &&
128 NI->getOperand(0).getReg() == StackPtr) {
130 *NumBytes += NI->getOperand(2).getImm();
136 /// mergeSPUpdates - Checks the instruction before/after the passed
137 /// instruction. If it is an ADD/SUB instruction it is deleted argument and the
138 /// stack adjustment is returned as a positive value for ADD and a negative for
140 static int mergeSPUpdates(MachineBasicBlock &MBB,
141 MachineBasicBlock::iterator &MBBI,
143 bool doMergeWithPrevious) {
144 if ((doMergeWithPrevious && MBBI == MBB.begin()) ||
145 (!doMergeWithPrevious && MBBI == MBB.end()))
148 MachineBasicBlock::iterator PI = doMergeWithPrevious ? prior(MBBI) : MBBI;
149 MachineBasicBlock::iterator NI = doMergeWithPrevious ? 0 : llvm::next(MBBI);
150 unsigned Opc = PI->getOpcode();
153 if ((Opc == X86::ADD64ri32 || Opc == X86::ADD64ri8 ||
154 Opc == X86::ADD32ri || Opc == X86::ADD32ri8) &&
155 PI->getOperand(0).getReg() == StackPtr){
156 Offset += PI->getOperand(2).getImm();
158 if (!doMergeWithPrevious) MBBI = NI;
159 } else if ((Opc == X86::SUB64ri32 || Opc == X86::SUB64ri8 ||
160 Opc == X86::SUB32ri || Opc == X86::SUB32ri8) &&
161 PI->getOperand(0).getReg() == StackPtr) {
162 Offset -= PI->getOperand(2).getImm();
164 if (!doMergeWithPrevious) MBBI = NI;
170 static bool isEAXLiveIn(MachineFunction &MF) {
171 for (MachineRegisterInfo::livein_iterator II = MF.getRegInfo().livein_begin(),
172 EE = MF.getRegInfo().livein_end(); II != EE; ++II) {
173 unsigned Reg = II->first;
175 if (Reg == X86::EAX || Reg == X86::AX ||
176 Reg == X86::AH || Reg == X86::AL)
183 void X86FrameInfo::emitCalleeSavedFrameMoves(MachineFunction &MF,
185 unsigned FramePtr) const {
186 MachineFrameInfo *MFI = MF.getFrameInfo();
187 const X86RegisterInfo *RegInfo =
188 static_cast<const X86RegisterInfo*>(MF.getTarget().getRegisterInfo());
189 MachineModuleInfo &MMI = MF.getMMI();
191 // Add callee saved registers to move list.
192 const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
193 if (CSI.empty()) return;
195 std::vector<MachineMove> &Moves = MMI.getFrameMoves();
196 const TargetData *TD = MF.getTarget().getTargetData();
197 bool HasFP = RegInfo->hasFP(MF);
199 // Calculate amount of bytes used for return address storing.
201 (MF.getTarget().getFrameInfo()->getStackGrowthDirection() ==
202 TargetFrameInfo::StackGrowsUp ?
203 TD->getPointerSize() : -TD->getPointerSize());
205 // FIXME: This is dirty hack. The code itself is pretty mess right now.
206 // It should be rewritten from scratch and generalized sometimes.
208 // Determine maximum offset (minumum due to stack growth).
209 int64_t MaxOffset = 0;
210 for (std::vector<CalleeSavedInfo>::const_iterator
211 I = CSI.begin(), E = CSI.end(); I != E; ++I)
212 MaxOffset = std::min(MaxOffset,
213 MFI->getObjectOffset(I->getFrameIdx()));
215 // Calculate offsets.
216 int64_t saveAreaOffset = (HasFP ? 3 : 2) * stackGrowth;
217 for (std::vector<CalleeSavedInfo>::const_iterator
218 I = CSI.begin(), E = CSI.end(); I != E; ++I) {
219 int64_t Offset = MFI->getObjectOffset(I->getFrameIdx());
220 unsigned Reg = I->getReg();
221 Offset = MaxOffset - Offset + saveAreaOffset;
223 // Don't output a new machine move if we're re-saving the frame
224 // pointer. This happens when the PrologEpilogInserter has inserted an extra
225 // "PUSH" of the frame pointer -- the "emitPrologue" method automatically
226 // generates one when frame pointers are used. If we generate a "machine
227 // move" for this extra "PUSH", the linker will lose track of the fact that
228 // the frame pointer should have the value of the first "PUSH" when it's
231 // FIXME: This looks inelegant. It's possibly correct, but it's covering up
232 // another bug. I.e., one where we generate a prolog like this:
240 // The immediate re-push of EBP is unnecessary. At the least, it's an
241 // optimization bug. EBP can be used as a scratch register in certain
242 // cases, but probably not when we have a frame pointer.
243 if (HasFP && FramePtr == Reg)
246 MachineLocation CSDst(MachineLocation::VirtualFP, Offset);
247 MachineLocation CSSrc(Reg);
248 Moves.push_back(MachineMove(Label, CSDst, CSSrc));
252 /// emitPrologue - Push callee-saved registers onto the stack, which
253 /// automatically adjust the stack pointer. Adjust the stack pointer to allocate
254 /// space for local variables. Also emit labels used by the exception handler to
255 /// generate the exception handling frames.
256 void X86FrameInfo::emitPrologue(MachineFunction &MF) const {
257 MachineBasicBlock &MBB = MF.front(); // Prologue goes in entry BB.
258 MachineBasicBlock::iterator MBBI = MBB.begin();
259 MachineFrameInfo *MFI = MF.getFrameInfo();
260 const Function *Fn = MF.getFunction();
261 const X86Subtarget *Subtarget = &MF.getTarget().getSubtarget<X86Subtarget>();
262 const X86RegisterInfo *RegInfo =
263 static_cast<const X86RegisterInfo*>(MF.getTarget().getRegisterInfo());
264 const X86InstrInfo &TII =
265 *static_cast<const X86InstrInfo*>(MF.getTarget().getInstrInfo());
266 MachineModuleInfo &MMI = MF.getMMI();
267 X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
268 bool needsFrameMoves = MMI.hasDebugInfo() ||
269 !Fn->doesNotThrow() || UnwindTablesMandatory;
270 uint64_t MaxAlign = MFI->getMaxAlignment(); // Desired stack alignment.
271 uint64_t StackSize = MFI->getStackSize(); // Number of bytes to allocate.
272 bool HasFP = RegInfo->hasFP(MF);
273 bool Is64Bit = STI.is64Bit();
274 bool IsWin64 = STI.isTargetWin64();
275 unsigned StackAlign = getStackAlignment();
276 unsigned SlotSize = RegInfo->getSlotSize();
277 unsigned FramePtr = RegInfo->getFrameRegister(MF);
278 unsigned StackPtr = RegInfo->getStackRegister();
282 // If we're forcing a stack realignment we can't rely on just the frame
283 // info, we need to know the ABI stack alignment as well in case we
284 // have a call out. Otherwise just make sure we have some alignment - we'll
285 // go with the minimum SlotSize.
286 if (ForceStackAlign) {
288 MaxAlign = (StackAlign > MaxAlign) ? StackAlign : MaxAlign;
289 else if (MaxAlign < SlotSize)
293 // Add RETADDR move area to callee saved frame size.
294 int TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta();
295 if (TailCallReturnAddrDelta < 0)
296 X86FI->setCalleeSavedFrameSize(
297 X86FI->getCalleeSavedFrameSize() - TailCallReturnAddrDelta);
299 // If this is x86-64 and the Red Zone is not disabled, if we are a leaf
300 // function, and use up to 128 bytes of stack space, don't have a frame
301 // pointer, calls, or dynamic alloca then we do not need to adjust the
302 // stack pointer (we fit in the Red Zone).
303 if (Is64Bit && !Fn->hasFnAttr(Attribute::NoRedZone) &&
304 !RegInfo->needsStackRealignment(MF) &&
305 !MFI->hasVarSizedObjects() && // No dynamic alloca.
306 !MFI->adjustsStack() && // No calls.
307 !IsWin64) { // Win64 has no Red Zone
308 uint64_t MinSize = X86FI->getCalleeSavedFrameSize();
309 if (HasFP) MinSize += SlotSize;
310 StackSize = std::max(MinSize, StackSize > 128 ? StackSize - 128 : 0);
311 MFI->setStackSize(StackSize);
312 } else if (IsWin64) {
313 // We need to always allocate 32 bytes as register spill area.
314 // FIXME: We might reuse these 32 bytes for leaf functions.
316 MFI->setStackSize(StackSize);
319 // Insert stack pointer adjustment for later moving of return addr. Only
320 // applies to tail call optimized functions where the callee argument stack
321 // size is bigger than the callers.
322 if (TailCallReturnAddrDelta < 0) {
324 BuildMI(MBB, MBBI, DL,
325 TII.get(getSUBriOpcode(Is64Bit, -TailCallReturnAddrDelta)),
328 .addImm(-TailCallReturnAddrDelta);
329 MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead.
332 // Mapping for machine moves:
334 // DST: VirtualFP AND
335 // SRC: VirtualFP => DW_CFA_def_cfa_offset
336 // ELSE => DW_CFA_def_cfa
338 // SRC: VirtualFP AND
339 // DST: Register => DW_CFA_def_cfa_register
342 // OFFSET < 0 => DW_CFA_offset_extended_sf
343 // REG < 64 => DW_CFA_offset + Reg
344 // ELSE => DW_CFA_offset_extended
346 std::vector<MachineMove> &Moves = MMI.getFrameMoves();
347 const TargetData *TD = MF.getTarget().getTargetData();
348 uint64_t NumBytes = 0;
349 int stackGrowth = -TD->getPointerSize();
352 // Calculate required stack adjustment.
353 uint64_t FrameSize = StackSize - SlotSize;
354 if (RegInfo->needsStackRealignment(MF))
355 FrameSize = (FrameSize + MaxAlign - 1) / MaxAlign * MaxAlign;
357 NumBytes = FrameSize - X86FI->getCalleeSavedFrameSize();
359 // Get the offset of the stack slot for the EBP register, which is
360 // guaranteed to be the last slot by processFunctionBeforeFrameFinalized.
361 // Update the frame offset adjustment.
362 MFI->setOffsetAdjustment(-NumBytes);
364 // Save EBP/RBP into the appropriate stack slot.
365 BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::PUSH64r : X86::PUSH32r))
366 .addReg(FramePtr, RegState::Kill);
368 if (needsFrameMoves) {
369 // Mark the place where EBP/RBP was saved.
370 MCSymbol *FrameLabel = MMI.getContext().CreateTempSymbol();
371 BuildMI(MBB, MBBI, DL, TII.get(X86::PROLOG_LABEL)).addSym(FrameLabel);
373 // Define the current CFA rule to use the provided offset.
375 MachineLocation SPDst(MachineLocation::VirtualFP);
376 MachineLocation SPSrc(MachineLocation::VirtualFP, 2 * stackGrowth);
377 Moves.push_back(MachineMove(FrameLabel, SPDst, SPSrc));
379 // FIXME: Verify & implement for FP
380 MachineLocation SPDst(StackPtr);
381 MachineLocation SPSrc(StackPtr, stackGrowth);
382 Moves.push_back(MachineMove(FrameLabel, SPDst, SPSrc));
385 // Change the rule for the FramePtr to be an "offset" rule.
386 MachineLocation FPDst(MachineLocation::VirtualFP, 2 * stackGrowth);
387 MachineLocation FPSrc(FramePtr);
388 Moves.push_back(MachineMove(FrameLabel, FPDst, FPSrc));
391 // Update EBP with the new base value...
392 BuildMI(MBB, MBBI, DL,
393 TII.get(Is64Bit ? X86::MOV64rr : X86::MOV32rr), FramePtr)
396 if (needsFrameMoves) {
397 // Mark effective beginning of when frame pointer becomes valid.
398 MCSymbol *FrameLabel = MMI.getContext().CreateTempSymbol();
399 BuildMI(MBB, MBBI, DL, TII.get(X86::PROLOG_LABEL)).addSym(FrameLabel);
401 // Define the current CFA to use the EBP/RBP register.
402 MachineLocation FPDst(FramePtr);
403 MachineLocation FPSrc(MachineLocation::VirtualFP);
404 Moves.push_back(MachineMove(FrameLabel, FPDst, FPSrc));
407 // Mark the FramePtr as live-in in every block except the entry.
408 for (MachineFunction::iterator I = llvm::next(MF.begin()), E = MF.end();
410 I->addLiveIn(FramePtr);
413 if (RegInfo->needsStackRealignment(MF)) {
415 BuildMI(MBB, MBBI, DL,
416 TII.get(Is64Bit ? X86::AND64ri32 : X86::AND32ri),
417 StackPtr).addReg(StackPtr).addImm(-MaxAlign);
419 // The EFLAGS implicit def is dead.
420 MI->getOperand(3).setIsDead();
423 NumBytes = StackSize - X86FI->getCalleeSavedFrameSize();
426 // Skip the callee-saved push instructions.
427 bool PushedRegs = false;
428 int StackOffset = 2 * stackGrowth;
430 while (MBBI != MBB.end() &&
431 (MBBI->getOpcode() == X86::PUSH32r ||
432 MBBI->getOpcode() == X86::PUSH64r)) {
436 if (!HasFP && needsFrameMoves) {
437 // Mark callee-saved push instruction.
438 MCSymbol *Label = MMI.getContext().CreateTempSymbol();
439 BuildMI(MBB, MBBI, DL, TII.get(X86::PROLOG_LABEL)).addSym(Label);
441 // Define the current CFA rule to use the provided offset.
442 unsigned Ptr = StackSize ?
443 MachineLocation::VirtualFP : StackPtr;
444 MachineLocation SPDst(Ptr);
445 MachineLocation SPSrc(Ptr, StackOffset);
446 Moves.push_back(MachineMove(Label, SPDst, SPSrc));
447 StackOffset += stackGrowth;
451 DL = MBB.findDebugLoc(MBBI);
453 // If there is an SUB32ri of ESP immediately before this instruction, merge
454 // the two. This can be the case when tail call elimination is enabled and
455 // the callee has more arguments then the caller.
456 NumBytes -= mergeSPUpdates(MBB, MBBI, StackPtr, true);
458 // If there is an ADD32ri or SUB32ri of ESP immediately after this
459 // instruction, merge the two instructions.
460 mergeSPUpdatesDown(MBB, MBBI, StackPtr, &NumBytes);
462 // Adjust stack pointer: ESP -= numbytes.
464 // Windows and cygwin/mingw require a prologue helper routine when allocating
465 // more than 4K bytes on the stack. Windows uses __chkstk and cygwin/mingw
466 // uses __alloca. __alloca and the 32-bit version of __chkstk will probe the
467 // stack and adjust the stack pointer in one go. The 64-bit version of
468 // __chkstk is only responsible for probing the stack. The 64-bit prologue is
469 // responsible for adjusting the stack pointer. Touching the stack at 4K
470 // increments is necessary to ensure that the guard pages used by the OS
471 // virtual memory manager are allocated in correct sequence.
472 if (NumBytes >= 4096 &&
473 (Subtarget->isTargetCygMing() || Subtarget->isTargetWin32())) {
474 // Check whether EAX is livein for this function.
475 bool isEAXAlive = isEAXLiveIn(MF);
477 const char *StackProbeSymbol =
478 Subtarget->isTargetWindows() ? "_chkstk" : "_alloca";
479 unsigned CallOp = Is64Bit ? X86::CALL64pcrel32 : X86::CALLpcrel32;
481 BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX)
483 BuildMI(MBB, MBBI, DL, TII.get(CallOp))
484 .addExternalSymbol(StackProbeSymbol)
485 .addReg(StackPtr, RegState::Define | RegState::Implicit)
486 .addReg(X86::EFLAGS, RegState::Define | RegState::Implicit);
489 BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH32r))
490 .addReg(X86::EAX, RegState::Kill);
492 // Allocate NumBytes-4 bytes on stack. We'll also use 4 already
493 // allocated bytes for EAX.
494 BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX)
495 .addImm(NumBytes - 4);
496 BuildMI(MBB, MBBI, DL, TII.get(CallOp))
497 .addExternalSymbol(StackProbeSymbol)
498 .addReg(StackPtr, RegState::Define | RegState::Implicit)
499 .addReg(X86::EFLAGS, RegState::Define | RegState::Implicit);
502 MachineInstr *MI = addRegOffset(BuildMI(MF, DL, TII.get(X86::MOV32rm),
504 StackPtr, false, NumBytes - 4);
505 MBB.insert(MBBI, MI);
507 } else if (NumBytes >= 4096 && Subtarget->isTargetWin64()) {
508 // Sanity check that EAX is not livein for this function. It should
509 // should not be, so throw an assert.
510 assert(!isEAXLiveIn(MF) && "EAX is livein in the Win64 case!");
512 // Handle the 64-bit Windows ABI case where we need to call __chkstk.
513 // Function prologue is responsible for adjusting the stack pointer.
514 BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX)
516 BuildMI(MBB, MBBI, DL, TII.get(X86::WINCALL64pcrel32))
517 .addExternalSymbol("__chkstk")
518 .addReg(StackPtr, RegState::Define | RegState::Implicit);
519 emitSPUpdate(MBB, MBBI, StackPtr, -(int64_t)NumBytes, Is64Bit, TII);
521 emitSPUpdate(MBB, MBBI, StackPtr, -(int64_t)NumBytes, Is64Bit, TII);
523 if ((NumBytes || PushedRegs) && needsFrameMoves) {
524 // Mark end of stack pointer adjustment.
525 MCSymbol *Label = MMI.getContext().CreateTempSymbol();
526 BuildMI(MBB, MBBI, DL, TII.get(X86::PROLOG_LABEL)).addSym(Label);
528 if (!HasFP && NumBytes) {
529 // Define the current CFA rule to use the provided offset.
531 MachineLocation SPDst(MachineLocation::VirtualFP);
532 MachineLocation SPSrc(MachineLocation::VirtualFP,
533 -StackSize + stackGrowth);
534 Moves.push_back(MachineMove(Label, SPDst, SPSrc));
536 // FIXME: Verify & implement for FP
537 MachineLocation SPDst(StackPtr);
538 MachineLocation SPSrc(StackPtr, stackGrowth);
539 Moves.push_back(MachineMove(Label, SPDst, SPSrc));
543 // Emit DWARF info specifying the offsets of the callee-saved registers.
545 emitCalleeSavedFrameMoves(MF, Label, HasFP ? FramePtr : StackPtr);
549 void X86FrameInfo::emitEpilogue(MachineFunction &MF,
550 MachineBasicBlock &MBB) const {
551 const MachineFrameInfo *MFI = MF.getFrameInfo();
552 X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
553 const X86RegisterInfo *RegInfo =
554 static_cast<const X86RegisterInfo*>(MF.getTarget().getRegisterInfo());
555 const X86InstrInfo &TII =
556 *static_cast<const X86InstrInfo*>(MF.getTarget().getInstrInfo());
557 MachineBasicBlock::iterator MBBI = prior(MBB.end());
558 unsigned RetOpcode = MBBI->getOpcode();
559 DebugLoc DL = MBBI->getDebugLoc();
560 bool Is64Bit = STI.is64Bit();
561 unsigned StackAlign = getStackAlignment();
562 unsigned SlotSize = RegInfo->getSlotSize();
563 unsigned FramePtr = RegInfo->getFrameRegister(MF);
564 unsigned StackPtr = RegInfo->getStackRegister();
568 llvm_unreachable("Can only insert epilog into returning blocks");
571 case X86::TCRETURNdi:
572 case X86::TCRETURNri:
573 case X86::TCRETURNmi:
574 case X86::TCRETURNdi64:
575 case X86::TCRETURNri64:
576 case X86::TCRETURNmi64:
578 case X86::EH_RETURN64:
579 break; // These are ok
582 // Get the number of bytes to allocate from the FrameInfo.
583 uint64_t StackSize = MFI->getStackSize();
584 uint64_t MaxAlign = MFI->getMaxAlignment();
585 unsigned CSSize = X86FI->getCalleeSavedFrameSize();
586 uint64_t NumBytes = 0;
588 // If we're forcing a stack realignment we can't rely on just the frame
589 // info, we need to know the ABI stack alignment as well in case we
590 // have a call out. Otherwise just make sure we have some alignment - we'll
591 // go with the minimum.
592 if (ForceStackAlign) {
594 MaxAlign = (StackAlign > MaxAlign) ? StackAlign : MaxAlign;
596 MaxAlign = MaxAlign ? MaxAlign : 4;
599 if (RegInfo->hasFP(MF)) {
600 // Calculate required stack adjustment.
601 uint64_t FrameSize = StackSize - SlotSize;
602 if (RegInfo->needsStackRealignment(MF))
603 FrameSize = (FrameSize + MaxAlign - 1)/MaxAlign*MaxAlign;
605 NumBytes = FrameSize - CSSize;
608 BuildMI(MBB, MBBI, DL,
609 TII.get(Is64Bit ? X86::POP64r : X86::POP32r), FramePtr);
611 NumBytes = StackSize - CSSize;
614 // Skip the callee-saved pop instructions.
615 MachineBasicBlock::iterator LastCSPop = MBBI;
616 while (MBBI != MBB.begin()) {
617 MachineBasicBlock::iterator PI = prior(MBBI);
618 unsigned Opc = PI->getOpcode();
620 if (Opc != X86::POP32r && Opc != X86::POP64r &&
621 !PI->getDesc().isTerminator())
627 DL = MBBI->getDebugLoc();
629 // If there is an ADD32ri or SUB32ri of ESP immediately before this
630 // instruction, merge the two instructions.
631 if (NumBytes || MFI->hasVarSizedObjects())
632 mergeSPUpdatesUp(MBB, MBBI, StackPtr, &NumBytes);
634 // If dynamic alloca is used, then reset esp to point to the last callee-saved
635 // slot before popping them off! Same applies for the case, when stack was
637 if (RegInfo->needsStackRealignment(MF)) {
638 // We cannot use LEA here, because stack pointer was realigned. We need to
639 // deallocate local frame back.
641 emitSPUpdate(MBB, MBBI, StackPtr, NumBytes, Is64Bit, TII);
642 MBBI = prior(LastCSPop);
645 BuildMI(MBB, MBBI, DL,
646 TII.get(Is64Bit ? X86::MOV64rr : X86::MOV32rr),
647 StackPtr).addReg(FramePtr);
648 } else if (MFI->hasVarSizedObjects()) {
650 unsigned Opc = Is64Bit ? X86::LEA64r : X86::LEA32r;
652 addRegOffset(BuildMI(MF, DL, TII.get(Opc), StackPtr),
653 FramePtr, false, -CSSize);
654 MBB.insert(MBBI, MI);
656 BuildMI(MBB, MBBI, DL,
657 TII.get(Is64Bit ? X86::MOV64rr : X86::MOV32rr), StackPtr)
660 } else if (NumBytes) {
661 // Adjust stack pointer back: ESP += numbytes.
662 emitSPUpdate(MBB, MBBI, StackPtr, NumBytes, Is64Bit, TII);
665 // We're returning from function via eh_return.
666 if (RetOpcode == X86::EH_RETURN || RetOpcode == X86::EH_RETURN64) {
667 MBBI = prior(MBB.end());
668 MachineOperand &DestAddr = MBBI->getOperand(0);
669 assert(DestAddr.isReg() && "Offset should be in register!");
670 BuildMI(MBB, MBBI, DL,
671 TII.get(Is64Bit ? X86::MOV64rr : X86::MOV32rr),
672 StackPtr).addReg(DestAddr.getReg());
673 } else if (RetOpcode == X86::TCRETURNri || RetOpcode == X86::TCRETURNdi ||
674 RetOpcode == X86::TCRETURNmi ||
675 RetOpcode == X86::TCRETURNri64 || RetOpcode == X86::TCRETURNdi64 ||
676 RetOpcode == X86::TCRETURNmi64) {
677 bool isMem = RetOpcode == X86::TCRETURNmi || RetOpcode == X86::TCRETURNmi64;
678 // Tail call return: adjust the stack pointer and jump to callee.
679 MBBI = prior(MBB.end());
680 MachineOperand &JumpTarget = MBBI->getOperand(0);
681 MachineOperand &StackAdjust = MBBI->getOperand(isMem ? 5 : 1);
682 assert(StackAdjust.isImm() && "Expecting immediate value.");
684 // Adjust stack pointer.
685 int StackAdj = StackAdjust.getImm();
686 int MaxTCDelta = X86FI->getTCReturnAddrDelta();
688 assert(MaxTCDelta <= 0 && "MaxTCDelta should never be positive");
690 // Incoporate the retaddr area.
691 Offset = StackAdj-MaxTCDelta;
692 assert(Offset >= 0 && "Offset should never be negative");
695 // Check for possible merge with preceeding ADD instruction.
696 Offset += mergeSPUpdates(MBB, MBBI, StackPtr, true);
697 emitSPUpdate(MBB, MBBI, StackPtr, Offset, Is64Bit, TII);
700 // Jump to label or value in register.
701 if (RetOpcode == X86::TCRETURNdi || RetOpcode == X86::TCRETURNdi64) {
702 BuildMI(MBB, MBBI, DL, TII.get((RetOpcode == X86::TCRETURNdi)
703 ? X86::TAILJMPd : X86::TAILJMPd64)).
704 addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset(),
705 JumpTarget.getTargetFlags());
706 } else if (RetOpcode == X86::TCRETURNmi || RetOpcode == X86::TCRETURNmi64) {
707 MachineInstrBuilder MIB =
708 BuildMI(MBB, MBBI, DL, TII.get((RetOpcode == X86::TCRETURNmi)
709 ? X86::TAILJMPm : X86::TAILJMPm64));
710 for (unsigned i = 0; i != 5; ++i)
711 MIB.addOperand(MBBI->getOperand(i));
712 } else if (RetOpcode == X86::TCRETURNri64) {
713 BuildMI(MBB, MBBI, DL, TII.get(X86::TAILJMPr64)).
714 addReg(JumpTarget.getReg(), RegState::Kill);
716 BuildMI(MBB, MBBI, DL, TII.get(X86::TAILJMPr)).
717 addReg(JumpTarget.getReg(), RegState::Kill);
720 MachineInstr *NewMI = prior(MBBI);
721 for (unsigned i = 2, e = MBBI->getNumOperands(); i != e; ++i)
722 NewMI->addOperand(MBBI->getOperand(i));
724 // Delete the pseudo instruction TCRETURN.
726 } else if ((RetOpcode == X86::RET || RetOpcode == X86::RETI) &&
727 (X86FI->getTCReturnAddrDelta() < 0)) {
728 // Add the return addr area delta back since we are not tail calling.
729 int delta = -1*X86FI->getTCReturnAddrDelta();
730 MBBI = prior(MBB.end());
732 // Check for possible merge with preceeding ADD instruction.
733 delta += mergeSPUpdates(MBB, MBBI, StackPtr, true);
734 emitSPUpdate(MBB, MBBI, StackPtr, delta, Is64Bit, TII);