1 //===- AArch64FrameLowering.cpp - AArch64 Frame Lowering -------*- C++ -*-====//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file contains the AArch64 implementation of TargetFrameLowering class.
12 //===----------------------------------------------------------------------===//
14 #include "AArch64FrameLowering.h"
15 #include "AArch64InstrInfo.h"
16 #include "AArch64MachineFunctionInfo.h"
17 #include "AArch64Subtarget.h"
18 #include "AArch64TargetMachine.h"
19 #include "llvm/ADT/Statistic.h"
20 #include "llvm/CodeGen/MachineFrameInfo.h"
21 #include "llvm/CodeGen/MachineFunction.h"
22 #include "llvm/CodeGen/MachineInstrBuilder.h"
23 #include "llvm/CodeGen/MachineModuleInfo.h"
24 #include "llvm/CodeGen/MachineRegisterInfo.h"
25 #include "llvm/CodeGen/RegisterScavenging.h"
26 #include "llvm/IR/DataLayout.h"
27 #include "llvm/IR/Function.h"
28 #include "llvm/Support/CommandLine.h"
29 #include "llvm/Support/Debug.h"
30 #include "llvm/Support/raw_ostream.h"
34 #define DEBUG_TYPE "frame-info"
36 static cl::opt<bool> EnableRedZone("aarch64-redzone",
37 cl::desc("enable use of redzone on AArch64"),
38 cl::init(false), cl::Hidden);
40 STATISTIC(NumRedZoneFunctions, "Number of functions using red zone");
42 static unsigned estimateStackSize(MachineFunction &MF) {
43 const MachineFrameInfo *FFI = MF.getFrameInfo();
45 for (int i = FFI->getObjectIndexBegin(); i != 0; ++i) {
46 int FixedOff = -FFI->getObjectOffset(i);
47 if (FixedOff > Offset)
50 for (unsigned i = 0, e = FFI->getObjectIndexEnd(); i != e; ++i) {
51 if (FFI->isDeadObjectIndex(i))
53 Offset += FFI->getObjectSize(i);
54 unsigned Align = FFI->getObjectAlignment(i);
55 // Adjust to alignment boundary
56 Offset = (Offset + Align - 1) / Align * Align;
58 // This does not include the 16 bytes used for fp and lr.
59 return (unsigned)Offset;
62 bool AArch64FrameLowering::canUseRedZone(const MachineFunction &MF) const {
65 // Don't use the red zone if the function explicitly asks us not to.
66 // This is typically used for kernel code.
67 if (MF.getFunction()->getAttributes().hasAttribute(
68 AttributeSet::FunctionIndex, Attribute::NoRedZone))
71 const MachineFrameInfo *MFI = MF.getFrameInfo();
72 const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
73 unsigned NumBytes = AFI->getLocalStackSize();
75 // Note: currently hasFP() is always true for hasCalls(), but that's an
76 // implementation detail of the current code, not a strict requirement,
77 // so stay safe here and check both.
78 if (MFI->hasCalls() || hasFP(MF) || NumBytes > 128)
83 /// hasFP - Return true if the specified function should have a dedicated frame
85 bool AArch64FrameLowering::hasFP(const MachineFunction &MF) const {
86 const MachineFrameInfo *MFI = MF.getFrameInfo();
89 const TargetRegisterInfo *RegInfo =
90 MF.getTarget().getSubtargetImpl()->getRegisterInfo();
91 assert(!RegInfo->needsStackRealignment(MF) &&
92 "No stack realignment on AArch64!");
95 return (MFI->hasCalls() || MFI->hasVarSizedObjects() ||
96 MFI->isFrameAddressTaken());
99 /// hasReservedCallFrame - Under normal circumstances, when a frame pointer is
100 /// not required, we reserve argument space for call sites in the function
101 /// immediately on entry to the current function. This eliminates the need for
102 /// add/sub sp brackets around call sites. Returns true if the call frame is
103 /// included as part of the stack frame.
105 AArch64FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
106 return !MF.getFrameInfo()->hasVarSizedObjects();
109 void AArch64FrameLowering::eliminateCallFramePseudoInstr(
110 MachineFunction &MF, MachineBasicBlock &MBB,
111 MachineBasicBlock::iterator I) const {
112 const AArch64InstrInfo *TII = static_cast<const AArch64InstrInfo *>(
113 MF.getTarget().getSubtargetImpl()->getInstrInfo());
114 DebugLoc DL = I->getDebugLoc();
115 int Opc = I->getOpcode();
116 bool IsDestroy = Opc == TII->getCallFrameDestroyOpcode();
117 uint64_t CalleePopAmount = IsDestroy ? I->getOperand(1).getImm() : 0;
119 const TargetFrameLowering *TFI =
120 MF.getTarget().getSubtargetImpl()->getFrameLowering();
121 if (!TFI->hasReservedCallFrame(MF)) {
122 unsigned Align = getStackAlignment();
124 int64_t Amount = I->getOperand(0).getImm();
125 Amount = RoundUpToAlignment(Amount, Align);
129 // N.b. if CalleePopAmount is valid but zero (i.e. callee would pop, but it
130 // doesn't have to pop anything), then the first operand will be zero too so
131 // this adjustment is a no-op.
132 if (CalleePopAmount == 0) {
133 // FIXME: in-function stack adjustment for calls is limited to 24-bits
134 // because there's no guaranteed temporary register available.
136 // ADD/SUB (immediate) has only LSL #0 and LSL #12 avaiable.
137 // 1) For offset <= 12-bit, we use LSL #0
138 // 2) For 12-bit <= offset <= 24-bit, we use two instructions. One uses
139 // LSL #0, and the other uses LSL #12.
141 // Mostly call frames will be allocated at the start of a function so
142 // this is OK, but it is a limitation that needs dealing with.
143 assert(Amount > -0xffffff && Amount < 0xffffff && "call frame too large");
144 emitFrameOffset(MBB, I, DL, AArch64::SP, AArch64::SP, Amount, TII);
146 } else if (CalleePopAmount != 0) {
147 // If the calling convention demands that the callee pops arguments from the
148 // stack, we want to add it back if we have a reserved call frame.
149 assert(CalleePopAmount < 0xffffff && "call frame too large");
150 emitFrameOffset(MBB, I, DL, AArch64::SP, AArch64::SP, -CalleePopAmount,
156 void AArch64FrameLowering::emitCalleeSavedFrameMoves(
157 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
158 unsigned FramePtr) const {
159 MachineFunction &MF = *MBB.getParent();
160 MachineFrameInfo *MFI = MF.getFrameInfo();
161 MachineModuleInfo &MMI = MF.getMMI();
162 const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo();
163 const TargetInstrInfo *TII =
164 MF.getTarget().getSubtargetImpl()->getInstrInfo();
165 DebugLoc DL = MBB.findDebugLoc(MBBI);
167 // Add callee saved registers to move list.
168 const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
172 const DataLayout *TD = MF.getTarget().getSubtargetImpl()->getDataLayout();
173 bool HasFP = hasFP(MF);
175 // Calculate amount of bytes used for return address storing.
176 int stackGrowth = -TD->getPointerSize(0);
178 // Calculate offsets.
179 int64_t saveAreaOffset = (HasFP ? 2 : 1) * stackGrowth;
180 unsigned TotalSkipped = 0;
181 for (const auto &Info : CSI) {
182 unsigned Reg = Info.getReg();
183 int64_t Offset = MFI->getObjectOffset(Info.getFrameIdx()) -
184 getOffsetOfLocalArea() + saveAreaOffset;
186 // Don't output a new CFI directive if we're re-saving the frame pointer or
187 // link register. This happens when the PrologEpilogInserter has inserted an
188 // extra "STP" of the frame pointer and link register -- the "emitPrologue"
189 // method automatically generates the directives when frame pointers are
190 // used. If we generate CFI directives for the extra "STP"s, the linker will
191 // lose track of the correct values for the frame pointer and link register.
192 if (HasFP && (FramePtr == Reg || Reg == AArch64::LR)) {
193 TotalSkipped += stackGrowth;
197 unsigned DwarfReg = MRI->getDwarfRegNum(Reg, true);
198 unsigned CFIIndex = MMI.addFrameInst(MCCFIInstruction::createOffset(
199 nullptr, DwarfReg, Offset - TotalSkipped));
200 BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
201 .addCFIIndex(CFIIndex);
205 void AArch64FrameLowering::emitPrologue(MachineFunction &MF) const {
206 MachineBasicBlock &MBB = MF.front(); // Prologue goes in entry BB.
207 MachineBasicBlock::iterator MBBI = MBB.begin();
208 const MachineFrameInfo *MFI = MF.getFrameInfo();
209 const Function *Fn = MF.getFunction();
210 const AArch64RegisterInfo *RegInfo = static_cast<const AArch64RegisterInfo *>(
211 MF.getTarget().getSubtargetImpl()->getRegisterInfo());
212 const TargetInstrInfo *TII =
213 MF.getTarget().getSubtargetImpl()->getInstrInfo();
214 MachineModuleInfo &MMI = MF.getMMI();
215 AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
216 bool needsFrameMoves = MMI.hasDebugInfo() || Fn->needsUnwindTableEntry();
217 bool HasFP = hasFP(MF);
218 DebugLoc DL = MBB.findDebugLoc(MBBI);
220 int NumBytes = (int)MFI->getStackSize();
221 if (!AFI->hasStackFrame()) {
222 assert(!HasFP && "unexpected function without stack frame but with FP");
224 // All of the stack allocation is for locals.
225 AFI->setLocalStackSize(NumBytes);
227 // Label used to tie together the PROLOG_LABEL and the MachineMoves.
228 MCSymbol *FrameLabel = MMI.getContext().CreateTempSymbol();
230 // REDZONE: If the stack size is less than 128 bytes, we don't need
231 // to actually allocate.
232 if (NumBytes && !canUseRedZone(MF)) {
233 emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP, -NumBytes, TII,
234 MachineInstr::FrameSetup);
236 // Encode the stack size of the leaf function.
237 unsigned CFIIndex = MMI.addFrameInst(
238 MCCFIInstruction::createDefCfaOffset(FrameLabel, -NumBytes));
239 BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
240 .addCFIIndex(CFIIndex);
241 } else if (NumBytes) {
242 ++NumRedZoneFunctions;
248 // Only set up FP if we actually need to.
251 // First instruction must a) allocate the stack and b) have an immediate
252 // that is a multiple of -2.
253 assert((MBBI->getOpcode() == AArch64::STPXpre ||
254 MBBI->getOpcode() == AArch64::STPDpre) &&
255 MBBI->getOperand(3).getReg() == AArch64::SP &&
256 MBBI->getOperand(4).getImm() < 0 &&
257 (MBBI->getOperand(4).getImm() & 1) == 0);
259 // Frame pointer is fp = sp - 16. Since the STPXpre subtracts the space
260 // required for the callee saved register area we get the frame pointer
261 // by addding that offset - 16 = -getImm()*8 - 2*8 = -(getImm() + 2) * 8.
262 FPOffset = -(MBBI->getOperand(4).getImm() + 2) * 8;
263 assert(FPOffset >= 0 && "Bad Framepointer Offset");
266 // Move past the saves of the callee-saved registers.
267 while (MBBI->getOpcode() == AArch64::STPXi ||
268 MBBI->getOpcode() == AArch64::STPDi ||
269 MBBI->getOpcode() == AArch64::STPXpre ||
270 MBBI->getOpcode() == AArch64::STPDpre) {
274 assert(NumBytes >= 0 && "Negative stack allocation size!?");
276 // Issue sub fp, sp, FPOffset or
277 // mov fp,sp when FPOffset is zero.
278 // Note: All stores of callee-saved registers are marked as "FrameSetup".
279 // This code marks the instruction(s) that set the FP also.
280 emitFrameOffset(MBB, MBBI, DL, AArch64::FP, AArch64::SP, FPOffset, TII,
281 MachineInstr::FrameSetup);
284 // All of the remaining stack allocations are for locals.
285 AFI->setLocalStackSize(NumBytes);
287 // Allocate space for the rest of the frame.
289 // If we're a leaf function, try using the red zone.
290 if (!canUseRedZone(MF))
291 emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP, -NumBytes, TII,
292 MachineInstr::FrameSetup);
295 // If we need a base pointer, set it up here. It's whatever the value of the
296 // stack pointer is at this point. Any variable size objects will be allocated
297 // after this, so we can still use the base pointer to reference locals.
299 // FIXME: Clarify FrameSetup flags here.
300 // Note: Use emitFrameOffset() like above for FP if the FrameSetup flag is
303 if (RegInfo->hasBasePointer(MF))
304 TII->copyPhysReg(MBB, MBBI, DL, AArch64::X19, AArch64::SP, false);
306 if (needsFrameMoves) {
307 const DataLayout *TD = MF.getTarget().getSubtargetImpl()->getDataLayout();
308 const int StackGrowth = -TD->getPointerSize(0);
309 unsigned FramePtr = RegInfo->getFrameRegister(MF);
311 // An example of the prologue:
318 // .cfi_personality 155, ___gxx_personality_v0
320 // .cfi_lsda 16, Lexception33
322 // stp xa,bx, [sp, -#offset]!
324 // stp x28, x27, [sp, #offset-32]
325 // stp fp, lr, [sp, #offset-16]
326 // add fp, sp, #offset - 16
330 // +-------------------------------------------+
331 // 10000 | ........ | ........ | ........ | ........ |
332 // 10004 | ........ | ........ | ........ | ........ |
333 // +-------------------------------------------+
334 // 10008 | ........ | ........ | ........ | ........ |
335 // 1000c | ........ | ........ | ........ | ........ |
336 // +===========================================+
337 // 10010 | X28 Register |
338 // 10014 | X28 Register |
339 // +-------------------------------------------+
340 // 10018 | X27 Register |
341 // 1001c | X27 Register |
342 // +===========================================+
343 // 10020 | Frame Pointer |
344 // 10024 | Frame Pointer |
345 // +-------------------------------------------+
346 // 10028 | Link Register |
347 // 1002c | Link Register |
348 // +===========================================+
349 // 10030 | ........ | ........ | ........ | ........ |
350 // 10034 | ........ | ........ | ........ | ........ |
351 // +-------------------------------------------+
352 // 10038 | ........ | ........ | ........ | ........ |
353 // 1003c | ........ | ........ | ........ | ........ |
354 // +-------------------------------------------+
356 // [sp] = 10030 :: >>initial value<<
357 // sp = 10020 :: stp fp, lr, [sp, #-16]!
358 // fp = sp == 10020 :: mov fp, sp
359 // [sp] == 10020 :: stp x28, x27, [sp, #-16]!
360 // sp == 10010 :: >>final value<<
362 // The frame pointer (w29) points to address 10020. If we use an offset of
363 // '16' from 'w29', we get the CFI offsets of -8 for w30, -16 for w29, -24
364 // for w27, and -32 for w28:
367 // .cfi_def_cfa w29, 16
369 // .cfi_offset w30, -8
371 // .cfi_offset w29, -16
373 // .cfi_offset w27, -24
375 // .cfi_offset w28, -32
378 // Define the current CFA rule to use the provided FP.
379 unsigned Reg = RegInfo->getDwarfRegNum(FramePtr, true);
380 unsigned CFIIndex = MMI.addFrameInst(
381 MCCFIInstruction::createDefCfa(nullptr, Reg, 2 * StackGrowth));
382 BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
383 .addCFIIndex(CFIIndex);
385 // Record the location of the stored LR
386 unsigned LR = RegInfo->getDwarfRegNum(AArch64::LR, true);
387 CFIIndex = MMI.addFrameInst(
388 MCCFIInstruction::createOffset(nullptr, LR, StackGrowth));
389 BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
390 .addCFIIndex(CFIIndex);
392 // Record the location of the stored FP
393 CFIIndex = MMI.addFrameInst(
394 MCCFIInstruction::createOffset(nullptr, Reg, 2 * StackGrowth));
395 BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
396 .addCFIIndex(CFIIndex);
398 // Encode the stack size of the leaf function.
399 unsigned CFIIndex = MMI.addFrameInst(
400 MCCFIInstruction::createDefCfaOffset(nullptr, -MFI->getStackSize()));
401 BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
402 .addCFIIndex(CFIIndex);
405 // Now emit the moves for whatever callee saved regs we have.
406 emitCalleeSavedFrameMoves(MBB, MBBI, FramePtr);
410 static bool isCalleeSavedRegister(unsigned Reg, const MCPhysReg *CSRegs) {
411 for (unsigned i = 0; CSRegs[i]; ++i)
412 if (Reg == CSRegs[i])
417 static bool isCSRestore(MachineInstr *MI, const MCPhysReg *CSRegs) {
419 if (MI->getOpcode() == AArch64::LDPXpost ||
420 MI->getOpcode() == AArch64::LDPDpost)
423 if (MI->getOpcode() == AArch64::LDPXpost ||
424 MI->getOpcode() == AArch64::LDPDpost ||
425 MI->getOpcode() == AArch64::LDPXi || MI->getOpcode() == AArch64::LDPDi) {
426 if (!isCalleeSavedRegister(MI->getOperand(RtIdx).getReg(), CSRegs) ||
427 !isCalleeSavedRegister(MI->getOperand(RtIdx + 1).getReg(), CSRegs) ||
428 MI->getOperand(RtIdx + 2).getReg() != AArch64::SP)
436 void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
437 MachineBasicBlock &MBB) const {
438 MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
439 assert(MBBI->isReturn() && "Can only insert epilog into returning blocks");
440 MachineFrameInfo *MFI = MF.getFrameInfo();
441 const AArch64InstrInfo *TII = static_cast<const AArch64InstrInfo *>(
442 MF.getTarget().getSubtargetImpl()->getInstrInfo());
443 const AArch64RegisterInfo *RegInfo = static_cast<const AArch64RegisterInfo *>(
444 MF.getTarget().getSubtargetImpl()->getRegisterInfo());
445 DebugLoc DL = MBBI->getDebugLoc();
446 unsigned RetOpcode = MBBI->getOpcode();
448 int NumBytes = MFI->getStackSize();
449 const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
451 // Initial and residual are named for consitency with the prologue. Note that
452 // in the epilogue, the residual adjustment is executed first.
453 uint64_t ArgumentPopSize = 0;
454 if (RetOpcode == AArch64::TCRETURNdi || RetOpcode == AArch64::TCRETURNri) {
455 MachineOperand &StackAdjust = MBBI->getOperand(1);
457 // For a tail-call in a callee-pops-arguments environment, some or all of
458 // the stack may actually be in use for the call's arguments, this is
459 // calculated during LowerCall and consumed here...
460 ArgumentPopSize = StackAdjust.getImm();
462 // ... otherwise the amount to pop is *all* of the argument space,
463 // conveniently stored in the MachineFunctionInfo by
464 // LowerFormalArguments. This will, of course, be zero for the C calling
466 ArgumentPopSize = AFI->getArgumentStackToRestore();
469 // The stack frame should be like below,
471 // ---------------------- ---
473 // | BytesInStackArgArea| CalleeArgStackSize
474 // | (NumReusableBytes) | (of tail call)
477 // ---------------------| --- |
479 // | CalleeSavedReg | | |
480 // | (NumRestores * 16) | | |
482 // ---------------------| | NumBytes
483 // | | StackSize (StackAdjustUp)
484 // | LocalStackSize | | |
485 // | (covering callee | | |
488 // ---------------------- --- ---
490 // So NumBytes = StackSize + BytesInStackArgArea - CalleeArgStackSize
491 // = StackSize + ArgumentPopSize
493 // AArch64TargetLowering::LowerCall figures out ArgumentPopSize and keeps
494 // it as the 2nd argument of AArch64ISD::TC_RETURN.
495 NumBytes += ArgumentPopSize;
497 unsigned NumRestores = 0;
498 // Move past the restores of the callee-saved registers.
499 MachineBasicBlock::iterator LastPopI = MBBI;
500 const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF);
501 if (LastPopI != MBB.begin()) {
505 } while (LastPopI != MBB.begin() && isCSRestore(LastPopI, CSRegs));
506 if (!isCSRestore(LastPopI, CSRegs)) {
511 NumBytes -= NumRestores * 16;
512 assert(NumBytes >= 0 && "Negative stack allocation size!?");
515 // If this was a redzone leaf function, we don't need to restore the
517 if (!canUseRedZone(MF))
518 emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::SP, NumBytes,
523 // Restore the original stack pointer.
524 // FIXME: Rather than doing the math here, we should instead just use
525 // non-post-indexed loads for the restores if we aren't actually going to
526 // be able to save any instructions.
527 if (NumBytes || MFI->hasVarSizedObjects())
528 emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::FP,
529 -(NumRestores - 1) * 16, TII, MachineInstr::NoFlags);
532 /// getFrameIndexOffset - Returns the displacement from the frame register to
533 /// the stack frame of the specified index.
534 int AArch64FrameLowering::getFrameIndexOffset(const MachineFunction &MF,
537 return getFrameIndexReference(MF, FI, FrameReg);
540 /// getFrameIndexReference - Provide a base+offset reference to an FI slot for
541 /// debug info. It's the same as what we use for resolving the code-gen
542 /// references for now. FIXME: This can go wrong when references are
543 /// SP-relative and simple call frames aren't used.
544 int AArch64FrameLowering::getFrameIndexReference(const MachineFunction &MF,
546 unsigned &FrameReg) const {
547 return resolveFrameIndexReference(MF, FI, FrameReg);
550 int AArch64FrameLowering::resolveFrameIndexReference(const MachineFunction &MF,
551 int FI, unsigned &FrameReg,
552 bool PreferFP) const {
553 const MachineFrameInfo *MFI = MF.getFrameInfo();
554 const AArch64RegisterInfo *RegInfo = static_cast<const AArch64RegisterInfo *>(
555 MF.getTarget().getSubtargetImpl()->getRegisterInfo());
556 const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
557 int FPOffset = MFI->getObjectOffset(FI) + 16;
558 int Offset = MFI->getObjectOffset(FI) + MFI->getStackSize();
559 bool isFixed = MFI->isFixedObjectIndex(FI);
561 // Use frame pointer to reference fixed objects. Use it for locals if
562 // there are VLAs (and thus the SP isn't reliable as a base).
563 // Make sure useFPForScavengingIndex() does the right thing for the emergency
566 if (AFI->hasStackFrame()) {
567 // Note: Keeping the following as multiple 'if' statements rather than
568 // merging to a single expression for readability.
570 // Argument access should always use the FP.
573 } else if (hasFP(MF) && !RegInfo->hasBasePointer(MF)) {
574 // Use SP or FP, whichever gives us the best chance of the offset
575 // being in range for direct access. If the FPOffset is positive,
576 // that'll always be best, as the SP will be even further away.
577 // If the FPOffset is negative, we have to keep in mind that the
578 // available offset range for negative offsets is smaller than for
579 // positive ones. If we have variable sized objects, we're stuck with
580 // using the FP regardless, though, as the SP offset is unknown
581 // and we don't have a base pointer available. If an offset is
582 // available via the FP and the SP, use whichever is closest.
583 if (PreferFP || MFI->hasVarSizedObjects() || FPOffset >= 0 ||
584 (FPOffset >= -256 && Offset > -FPOffset))
590 FrameReg = RegInfo->getFrameRegister(MF);
594 // Use the base pointer if we have one.
595 if (RegInfo->hasBasePointer(MF))
596 FrameReg = RegInfo->getBaseRegister();
598 FrameReg = AArch64::SP;
599 // If we're using the red zone for this function, the SP won't actually
600 // be adjusted, so the offsets will be negative. They're also all
601 // within range of the signed 9-bit immediate instructions.
602 if (canUseRedZone(MF))
603 Offset -= AFI->getLocalStackSize();
609 static unsigned getPrologueDeath(MachineFunction &MF, unsigned Reg) {
610 if (Reg != AArch64::LR)
611 return getKillRegState(true);
613 // LR maybe referred to later by an @llvm.returnaddress intrinsic.
614 bool LRLiveIn = MF.getRegInfo().isLiveIn(AArch64::LR);
615 bool LRKill = !(LRLiveIn && MF.getFrameInfo()->isReturnAddressTaken());
616 return getKillRegState(LRKill);
619 bool AArch64FrameLowering::spillCalleeSavedRegisters(
620 MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
621 const std::vector<CalleeSavedInfo> &CSI,
622 const TargetRegisterInfo *TRI) const {
623 MachineFunction &MF = *MBB.getParent();
624 const TargetInstrInfo &TII =
625 *MF.getTarget().getSubtargetImpl()->getInstrInfo();
626 unsigned Count = CSI.size();
628 assert((Count & 1) == 0 && "Odd number of callee-saved regs to spill!");
631 DL = MI->getDebugLoc();
633 for (unsigned i = 0; i < Count; i += 2) {
634 unsigned idx = Count - i - 2;
635 unsigned Reg1 = CSI[idx].getReg();
636 unsigned Reg2 = CSI[idx + 1].getReg();
637 // GPRs and FPRs are saved in pairs of 64-bit regs. We expect the CSI
638 // list to come in sorted by frame index so that we can issue the store
639 // pair instructions directly. Assert if we see anything otherwise.
641 // The order of the registers in the list is controlled by
642 // getCalleeSavedRegs(), so they will always be in-order, as well.
643 assert(CSI[idx].getFrameIdx() + 1 == CSI[idx + 1].getFrameIdx() &&
644 "Out of order callee saved regs!");
646 assert((Count & 1) == 0 && "Odd number of callee-saved regs to spill!");
647 assert((i & 1) == 0 && "Odd index for callee-saved reg spill!");
648 // Issue sequence of non-sp increment and pi sp spills for cs regs. The
649 // first spill is a pre-increment that allocates the stack.
651 // stp x22, x21, [sp, #-48]! // addImm(-6)
652 // stp x20, x19, [sp, #16] // addImm(+2)
653 // stp fp, lr, [sp, #32] // addImm(+4)
654 // Rationale: This sequence saves uop updates compared to a sequence of
655 // pre-increment spills like stp xi,xj,[sp,#-16]!
656 // Note: Similar rational and sequence for restores in epilog.
657 if (AArch64::GPR64RegClass.contains(Reg1)) {
658 assert(AArch64::GPR64RegClass.contains(Reg2) &&
659 "Expected GPR64 callee-saved register pair!");
660 // For first spill use pre-increment store.
662 StrOpc = AArch64::STPXpre;
664 StrOpc = AArch64::STPXi;
665 } else if (AArch64::FPR64RegClass.contains(Reg1)) {
666 assert(AArch64::FPR64RegClass.contains(Reg2) &&
667 "Expected FPR64 callee-saved register pair!");
668 // For first spill use pre-increment store.
670 StrOpc = AArch64::STPDpre;
672 StrOpc = AArch64::STPDi;
674 llvm_unreachable("Unexpected callee saved register!");
675 DEBUG(dbgs() << "CSR spill: (" << TRI->getName(Reg1) << ", "
676 << TRI->getName(Reg2) << ") -> fi#(" << CSI[idx].getFrameIdx()
677 << ", " << CSI[idx + 1].getFrameIdx() << ")\n");
678 // Compute offset: i = 0 => offset = -Count;
679 // i = 2 => offset = -(Count - 2) + Count = 2 = i; etc.
680 const int Offset = (i == 0) ? -Count : i;
681 assert((Offset >= -64 && Offset <= 63) &&
682 "Offset out of bounds for STP immediate");
683 MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(StrOpc));
684 if (StrOpc == AArch64::STPDpre || StrOpc == AArch64::STPXpre)
685 MIB.addReg(AArch64::SP, RegState::Define);
687 MIB.addReg(Reg2, getPrologueDeath(MF, Reg2))
688 .addReg(Reg1, getPrologueDeath(MF, Reg1))
690 .addImm(Offset) // [sp, #offset * 8], where factor * 8 is implicit
691 .setMIFlag(MachineInstr::FrameSetup);
696 bool AArch64FrameLowering::restoreCalleeSavedRegisters(
697 MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
698 const std::vector<CalleeSavedInfo> &CSI,
699 const TargetRegisterInfo *TRI) const {
700 MachineFunction &MF = *MBB.getParent();
701 const TargetInstrInfo &TII =
702 *MF.getTarget().getSubtargetImpl()->getInstrInfo();
703 unsigned Count = CSI.size();
705 assert((Count & 1) == 0 && "Odd number of callee-saved regs to spill!");
708 DL = MI->getDebugLoc();
710 for (unsigned i = 0; i < Count; i += 2) {
711 unsigned Reg1 = CSI[i].getReg();
712 unsigned Reg2 = CSI[i + 1].getReg();
713 // GPRs and FPRs are saved in pairs of 64-bit regs. We expect the CSI
714 // list to come in sorted by frame index so that we can issue the store
715 // pair instructions directly. Assert if we see anything otherwise.
716 assert(CSI[i].getFrameIdx() + 1 == CSI[i + 1].getFrameIdx() &&
717 "Out of order callee saved regs!");
718 // Issue sequence of non-sp increment and sp-pi restores for cs regs. Only
719 // the last load is sp-pi post-increment and de-allocates the stack:
721 // ldp fp, lr, [sp, #32] // addImm(+4)
722 // ldp x20, x19, [sp, #16] // addImm(+2)
723 // ldp x22, x21, [sp], #48 // addImm(+6)
724 // Note: see comment in spillCalleeSavedRegisters()
727 assert((Count & 1) == 0 && "Odd number of callee-saved regs to spill!");
728 assert((i & 1) == 0 && "Odd index for callee-saved reg spill!");
729 if (AArch64::GPR64RegClass.contains(Reg1)) {
730 assert(AArch64::GPR64RegClass.contains(Reg2) &&
731 "Expected GPR64 callee-saved register pair!");
733 LdrOpc = AArch64::LDPXpost;
735 LdrOpc = AArch64::LDPXi;
736 } else if (AArch64::FPR64RegClass.contains(Reg1)) {
737 assert(AArch64::FPR64RegClass.contains(Reg2) &&
738 "Expected FPR64 callee-saved register pair!");
740 LdrOpc = AArch64::LDPDpost;
742 LdrOpc = AArch64::LDPDi;
744 llvm_unreachable("Unexpected callee saved register!");
745 DEBUG(dbgs() << "CSR restore: (" << TRI->getName(Reg1) << ", "
746 << TRI->getName(Reg2) << ") -> fi#(" << CSI[i].getFrameIdx()
747 << ", " << CSI[i + 1].getFrameIdx() << ")\n");
749 // Compute offset: i = 0 => offset = Count - 2; i = 2 => offset = Count - 4;
751 const int Offset = (i == Count - 2) ? Count : Count - i - 2;
752 assert((Offset >= -64 && Offset <= 63) &&
753 "Offset out of bounds for LDP immediate");
754 MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(LdrOpc));
755 if (LdrOpc == AArch64::LDPXpost || LdrOpc == AArch64::LDPDpost)
756 MIB.addReg(AArch64::SP, RegState::Define);
758 MIB.addReg(Reg2, getDefRegState(true))
759 .addReg(Reg1, getDefRegState(true))
761 .addImm(Offset); // [sp], #offset * 8 or [sp, #offset * 8]
762 // where the factor * 8 is implicit
767 void AArch64FrameLowering::processFunctionBeforeCalleeSavedScan(
768 MachineFunction &MF, RegScavenger *RS) const {
769 const AArch64RegisterInfo *RegInfo = static_cast<const AArch64RegisterInfo *>(
770 MF.getTarget().getSubtargetImpl()->getRegisterInfo());
771 AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
772 MachineRegisterInfo *MRI = &MF.getRegInfo();
773 SmallVector<unsigned, 4> UnspilledCSGPRs;
774 SmallVector<unsigned, 4> UnspilledCSFPRs;
776 // The frame record needs to be created by saving the appropriate registers
778 MRI->setPhysRegUsed(AArch64::FP);
779 MRI->setPhysRegUsed(AArch64::LR);
782 // Spill the BasePtr if it's used. Do this first thing so that the
783 // getCalleeSavedRegs() below will get the right answer.
784 if (RegInfo->hasBasePointer(MF))
785 MRI->setPhysRegUsed(RegInfo->getBaseRegister());
787 // If any callee-saved registers are used, the frame cannot be eliminated.
788 unsigned NumGPRSpilled = 0;
789 unsigned NumFPRSpilled = 0;
790 bool ExtraCSSpill = false;
791 bool CanEliminateFrame = true;
792 DEBUG(dbgs() << "*** processFunctionBeforeCalleeSavedScan\nUsed CSRs:");
793 const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF);
795 // Check pairs of consecutive callee-saved registers.
796 for (unsigned i = 0; CSRegs[i]; i += 2) {
797 assert(CSRegs[i + 1] && "Odd number of callee-saved registers!");
799 const unsigned OddReg = CSRegs[i];
800 const unsigned EvenReg = CSRegs[i + 1];
801 assert((AArch64::GPR64RegClass.contains(OddReg) &&
802 AArch64::GPR64RegClass.contains(EvenReg)) ^
803 (AArch64::FPR64RegClass.contains(OddReg) &&
804 AArch64::FPR64RegClass.contains(EvenReg)) &&
805 "Register class mismatch!");
807 const bool OddRegUsed = MRI->isPhysRegUsed(OddReg);
808 const bool EvenRegUsed = MRI->isPhysRegUsed(EvenReg);
810 // Early exit if none of the registers in the register pair is actually
812 if (!OddRegUsed && !EvenRegUsed) {
813 if (AArch64::GPR64RegClass.contains(OddReg)) {
814 UnspilledCSGPRs.push_back(OddReg);
815 UnspilledCSGPRs.push_back(EvenReg);
817 UnspilledCSFPRs.push_back(OddReg);
818 UnspilledCSFPRs.push_back(EvenReg);
823 unsigned Reg = AArch64::NoRegister;
824 // If only one of the registers of the register pair is used, make sure to
825 // mark the other one as used as well.
826 if (OddRegUsed ^ EvenRegUsed) {
827 // Find out which register is the additional spill.
828 Reg = OddRegUsed ? EvenReg : OddReg;
829 MRI->setPhysRegUsed(Reg);
832 DEBUG(dbgs() << ' ' << PrintReg(OddReg, RegInfo));
833 DEBUG(dbgs() << ' ' << PrintReg(EvenReg, RegInfo));
835 assert(((OddReg == AArch64::LR && EvenReg == AArch64::FP) ||
836 (RegInfo->getEncodingValue(OddReg) + 1 ==
837 RegInfo->getEncodingValue(EvenReg))) &&
838 "Register pair of non-adjacent registers!");
839 if (AArch64::GPR64RegClass.contains(OddReg)) {
841 // If it's not a reserved register, we can use it in lieu of an
842 // emergency spill slot for the register scavenger.
843 // FIXME: It would be better to instead keep looking and choose another
844 // unspilled register that isn't reserved, if there is one.
845 if (Reg != AArch64::NoRegister && !RegInfo->isReservedReg(MF, Reg))
850 CanEliminateFrame = false;
853 // FIXME: Set BigStack if any stack slot references may be out of range.
854 // For now, just conservatively guestimate based on unscaled indexing
855 // range. We'll end up allocating an unnecessary spill slot a lot, but
856 // realistically that's not a big deal at this stage of the game.
857 // The CSR spill slots have not been allocated yet, so estimateStackSize
858 // won't include them.
859 MachineFrameInfo *MFI = MF.getFrameInfo();
860 unsigned CFSize = estimateStackSize(MF) + 8 * (NumGPRSpilled + NumFPRSpilled);
861 DEBUG(dbgs() << "Estimated stack frame size: " << CFSize << " bytes.\n");
862 bool BigStack = (CFSize >= 256);
863 if (BigStack || !CanEliminateFrame || RegInfo->cannotEliminateFrame(MF))
864 AFI->setHasStackFrame(true);
866 // Estimate if we might need to scavenge a register at some point in order
867 // to materialize a stack offset. If so, either spill one additional
868 // callee-saved register or reserve a special spill slot to facilitate
869 // register scavenging. If we already spilled an extra callee-saved register
870 // above to keep the number of spills even, we don't need to do anything else
872 if (BigStack && !ExtraCSSpill) {
874 // If we're adding a register to spill here, we have to add two of them
875 // to keep the number of regs to spill even.
876 assert(((UnspilledCSGPRs.size() & 1) == 0) && "Odd number of registers!");
878 while (!UnspilledCSGPRs.empty() && Count < 2) {
879 unsigned Reg = UnspilledCSGPRs.back();
880 UnspilledCSGPRs.pop_back();
881 DEBUG(dbgs() << "Spilling " << PrintReg(Reg, RegInfo)
882 << " to get a scratch register.\n");
883 MRI->setPhysRegUsed(Reg);
888 // If we didn't find an extra callee-saved register to spill, create
889 // an emergency spill slot.
891 const TargetRegisterClass *RC = &AArch64::GPR64RegClass;
892 int FI = MFI->CreateStackObject(RC->getSize(), RC->getAlignment(), false);
893 RS->addScavengingFrameIndex(FI);
894 DEBUG(dbgs() << "No available CS registers, allocated fi#" << FI
895 << " as the emergency spill slot.\n");