1 //===- ARM64FrameLowering.cpp - ARM64 Frame Lowering -----------*- C++ -*-====//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file contains the ARM64 implementation of TargetFrameLowering class.
12 //===----------------------------------------------------------------------===//
14 #include "ARM64FrameLowering.h"
15 #include "ARM64InstrInfo.h"
16 #include "ARM64MachineFunctionInfo.h"
17 #include "ARM64Subtarget.h"
18 #include "ARM64TargetMachine.h"
19 #include "llvm/ADT/Statistic.h"
20 #include "llvm/IR/DataLayout.h"
21 #include "llvm/IR/Function.h"
22 #include "llvm/CodeGen/MachineFrameInfo.h"
23 #include "llvm/CodeGen/MachineFunction.h"
24 #include "llvm/CodeGen/MachineInstrBuilder.h"
25 #include "llvm/CodeGen/MachineModuleInfo.h"
26 #include "llvm/CodeGen/MachineRegisterInfo.h"
27 #include "llvm/CodeGen/RegisterScavenging.h"
28 #include "llvm/Support/Debug.h"
29 #include "llvm/Support/CommandLine.h"
30 #include "llvm/Support/raw_ostream.h"
34 #define DEBUG_TYPE "frame-info"
36 static cl::opt<bool> EnableRedZone("arm64-redzone",
37 cl::desc("enable use of redzone on ARM64"),
38 cl::init(false), cl::Hidden);
40 STATISTIC(NumRedZoneFunctions, "Number of functions using red zone");
42 static unsigned estimateStackSize(MachineFunction &MF) {
43 const MachineFrameInfo *FFI = MF.getFrameInfo();
45 for (int i = FFI->getObjectIndexBegin(); i != 0; ++i) {
46 int FixedOff = -FFI->getObjectOffset(i);
47 if (FixedOff > Offset)
50 for (unsigned i = 0, e = FFI->getObjectIndexEnd(); i != e; ++i) {
51 if (FFI->isDeadObjectIndex(i))
53 Offset += FFI->getObjectSize(i);
54 unsigned Align = FFI->getObjectAlignment(i);
55 // Adjust to alignment boundary
56 Offset = (Offset + Align - 1) / Align * Align;
58 // This does not include the 16 bytes used for fp and lr.
59 return (unsigned)Offset;
62 bool ARM64FrameLowering::canUseRedZone(const MachineFunction &MF) const {
65 // Don't use the red zone if the function explicitly asks us not to.
66 // This is typically used for kernel code.
67 if (MF.getFunction()->getAttributes().hasAttribute(
68 AttributeSet::FunctionIndex, Attribute::NoRedZone))
71 const MachineFrameInfo *MFI = MF.getFrameInfo();
72 const ARM64FunctionInfo *AFI = MF.getInfo<ARM64FunctionInfo>();
73 unsigned NumBytes = AFI->getLocalStackSize();
75 // Note: currently hasFP() is always true for hasCalls(), but that's an
76 // implementation detail of the current code, not a strict requirement,
77 // so stay safe here and check both.
78 if (MFI->hasCalls() || hasFP(MF) || NumBytes > 128)
83 /// hasFP - Return true if the specified function should have a dedicated frame
85 bool ARM64FrameLowering::hasFP(const MachineFunction &MF) const {
86 const MachineFrameInfo *MFI = MF.getFrameInfo();
89 const TargetRegisterInfo *RegInfo = MF.getTarget().getRegisterInfo();
90 assert(!RegInfo->needsStackRealignment(MF) &&
91 "No stack realignment on ARM64!");
94 return (MFI->hasCalls() || MFI->hasVarSizedObjects() ||
95 MFI->isFrameAddressTaken());
98 /// hasReservedCallFrame - Under normal circumstances, when a frame pointer is
99 /// not required, we reserve argument space for call sites in the function
100 /// immediately on entry to the current function. This eliminates the need for
101 /// add/sub sp brackets around call sites. Returns true if the call frame is
102 /// included as part of the stack frame.
103 bool ARM64FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
104 return !MF.getFrameInfo()->hasVarSizedObjects();
107 void ARM64FrameLowering::eliminateCallFramePseudoInstr(
108 MachineFunction &MF, MachineBasicBlock &MBB,
109 MachineBasicBlock::iterator I) const {
110 const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
111 const ARM64InstrInfo *TII =
112 static_cast<const ARM64InstrInfo *>(MF.getTarget().getInstrInfo());
113 if (!TFI->hasReservedCallFrame(MF)) {
114 // If we have alloca, convert as follows:
115 // ADJCALLSTACKDOWN -> sub, sp, sp, amount
116 // ADJCALLSTACKUP -> add, sp, sp, amount
117 MachineInstr *Old = I;
118 DebugLoc DL = Old->getDebugLoc();
119 unsigned Amount = Old->getOperand(0).getImm();
121 // We need to keep the stack aligned properly. To do this, we round the
122 // amount of space needed for the outgoing arguments up to the next
123 // alignment boundary.
124 unsigned Align = TFI->getStackAlignment();
125 Amount = (Amount + Align - 1) / Align * Align;
127 // Replace the pseudo instruction with a new instruction...
128 unsigned Opc = Old->getOpcode();
129 if (Opc == ARM64::ADJCALLSTACKDOWN) {
130 emitFrameOffset(MBB, I, DL, ARM64::SP, ARM64::SP, -Amount, TII);
132 assert(Opc == ARM64::ADJCALLSTACKUP && "expected ADJCALLSTACKUP");
133 emitFrameOffset(MBB, I, DL, ARM64::SP, ARM64::SP, Amount, TII);
141 ARM64FrameLowering::emitCalleeSavedFrameMoves(MachineBasicBlock &MBB,
142 MachineBasicBlock::iterator MBBI,
143 unsigned FramePtr) const {
144 MachineFunction &MF = *MBB.getParent();
145 MachineFrameInfo *MFI = MF.getFrameInfo();
146 MachineModuleInfo &MMI = MF.getMMI();
147 const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo();
148 const ARM64InstrInfo *TII = TM.getInstrInfo();
149 DebugLoc DL = MBB.findDebugLoc(MBBI);
151 // Add callee saved registers to move list.
152 const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
156 const DataLayout *TD = MF.getTarget().getDataLayout();
157 bool HasFP = hasFP(MF);
159 // Calculate amount of bytes used for return address storing.
160 int stackGrowth = -TD->getPointerSize(0);
162 // Calculate offsets.
163 int64_t saveAreaOffset = (HasFP ? 2 : 1) * stackGrowth;
164 unsigned TotalSkipped = 0;
165 for (const auto &Info : CSI) {
166 unsigned Reg = Info.getReg();
167 int64_t Offset = MFI->getObjectOffset(Info.getFrameIdx()) -
168 getOffsetOfLocalArea() + saveAreaOffset;
170 // Don't output a new CFI directive if we're re-saving the frame pointer or
171 // link register. This happens when the PrologEpilogInserter has inserted an
172 // extra "STP" of the frame pointer and link register -- the "emitPrologue"
173 // method automatically generates the directives when frame pointers are
174 // used. If we generate CFI directives for the extra "STP"s, the linker will
175 // lose track of the correct values for the frame pointer and link register.
176 if (HasFP && (FramePtr == Reg || Reg == ARM64::LR)) {
177 TotalSkipped += stackGrowth;
181 unsigned DwarfReg = MRI->getDwarfRegNum(Reg, true);
182 unsigned CFIIndex = MMI.addFrameInst(MCCFIInstruction::createOffset(
183 nullptr, DwarfReg, Offset - TotalSkipped));
184 BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
185 .addCFIIndex(CFIIndex);
189 void ARM64FrameLowering::emitPrologue(MachineFunction &MF) const {
190 MachineBasicBlock &MBB = MF.front(); // Prologue goes in entry BB.
191 MachineBasicBlock::iterator MBBI = MBB.begin();
192 const MachineFrameInfo *MFI = MF.getFrameInfo();
193 const Function *Fn = MF.getFunction();
194 const ARM64RegisterInfo *RegInfo = TM.getRegisterInfo();
195 const ARM64InstrInfo *TII = TM.getInstrInfo();
196 MachineModuleInfo &MMI = MF.getMMI();
197 ARM64FunctionInfo *AFI = MF.getInfo<ARM64FunctionInfo>();
198 bool needsFrameMoves = MMI.hasDebugInfo() || Fn->needsUnwindTableEntry();
199 bool HasFP = hasFP(MF);
200 DebugLoc DL = MBB.findDebugLoc(MBBI);
202 int NumBytes = (int)MFI->getStackSize();
203 if (!AFI->hasStackFrame()) {
204 assert(!HasFP && "unexpected function without stack frame but with FP");
206 // All of the stack allocation is for locals.
207 AFI->setLocalStackSize(NumBytes);
209 // Label used to tie together the PROLOG_LABEL and the MachineMoves.
210 MCSymbol *FrameLabel = MMI.getContext().CreateTempSymbol();
212 // REDZONE: If the stack size is less than 128 bytes, we don't need
213 // to actually allocate.
214 if (NumBytes && !canUseRedZone(MF)) {
215 emitFrameOffset(MBB, MBBI, DL, ARM64::SP, ARM64::SP, -NumBytes, TII,
216 MachineInstr::FrameSetup);
218 // Encode the stack size of the leaf function.
219 unsigned CFIIndex = MMI.addFrameInst(
220 MCCFIInstruction::createDefCfaOffset(FrameLabel, -NumBytes));
221 BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
222 .addCFIIndex(CFIIndex);
223 } else if (NumBytes) {
224 ++NumRedZoneFunctions;
230 // Only set up FP if we actually need to.
233 // First instruction must a) allocate the stack and b) have an immediate
234 // that is a multiple of -2.
235 assert((MBBI->getOpcode() == ARM64::STPXpre ||
236 MBBI->getOpcode() == ARM64::STPDpre) &&
237 MBBI->getOperand(2).getReg() == ARM64::SP &&
238 MBBI->getOperand(3).getImm() < 0 &&
239 (MBBI->getOperand(3).getImm() & 1) == 0);
241 // Frame pointer is fp = sp - 16. Since the STPXpre subtracts the space
242 // required for the callee saved register area we get the frame pointer
243 // by addding that offset - 16 = -getImm()*8 - 2*8 = -(getImm() + 2) * 8.
244 FPOffset = -(MBBI->getOperand(3).getImm() + 2) * 8;
245 assert(FPOffset >= 0 && "Bad Framepointer Offset");
248 // Move past the saves of the callee-saved registers.
249 while (MBBI->getOpcode() == ARM64::STPXi ||
250 MBBI->getOpcode() == ARM64::STPDi ||
251 MBBI->getOpcode() == ARM64::STPXpre ||
252 MBBI->getOpcode() == ARM64::STPDpre) {
256 assert(NumBytes >= 0 && "Negative stack allocation size!?");
258 // Issue sub fp, sp, FPOffset or
259 // mov fp,sp when FPOffset is zero.
260 // Note: All stores of callee-saved registers are marked as "FrameSetup".
261 // This code marks the instruction(s) that set the FP also.
262 emitFrameOffset(MBB, MBBI, DL, ARM64::FP, ARM64::SP, FPOffset, TII,
263 MachineInstr::FrameSetup);
266 // All of the remaining stack allocations are for locals.
267 AFI->setLocalStackSize(NumBytes);
269 // Allocate space for the rest of the frame.
271 // If we're a leaf function, try using the red zone.
272 if (!canUseRedZone(MF))
273 emitFrameOffset(MBB, MBBI, DL, ARM64::SP, ARM64::SP, -NumBytes, TII,
274 MachineInstr::FrameSetup);
277 // If we need a base pointer, set it up here. It's whatever the value of the
278 // stack pointer is at this point. Any variable size objects will be allocated
279 // after this, so we can still use the base pointer to reference locals.
281 // FIXME: Clarify FrameSetup flags here.
282 // Note: Use emitFrameOffset() like above for FP if the FrameSetup flag is
285 if (RegInfo->hasBasePointer(MF))
286 TII->copyPhysReg(MBB, MBBI, DL, ARM64::X19, ARM64::SP, false);
288 if (needsFrameMoves) {
289 const DataLayout *TD = MF.getTarget().getDataLayout();
290 const int StackGrowth = -TD->getPointerSize(0);
291 unsigned FramePtr = RegInfo->getFrameRegister(MF);
293 // An example of the prologue:
300 // .cfi_personality 155, ___gxx_personality_v0
302 // .cfi_lsda 16, Lexception33
304 // stp xa,bx, [sp, -#offset]!
306 // stp x28, x27, [sp, #offset-32]
307 // stp fp, lr, [sp, #offset-16]
308 // add fp, sp, #offset - 16
312 // +-------------------------------------------+
313 // 10000 | ........ | ........ | ........ | ........ |
314 // 10004 | ........ | ........ | ........ | ........ |
315 // +-------------------------------------------+
316 // 10008 | ........ | ........ | ........ | ........ |
317 // 1000c | ........ | ........ | ........ | ........ |
318 // +===========================================+
319 // 10010 | X28 Register |
320 // 10014 | X28 Register |
321 // +-------------------------------------------+
322 // 10018 | X27 Register |
323 // 1001c | X27 Register |
324 // +===========================================+
325 // 10020 | Frame Pointer |
326 // 10024 | Frame Pointer |
327 // +-------------------------------------------+
328 // 10028 | Link Register |
329 // 1002c | Link Register |
330 // +===========================================+
331 // 10030 | ........ | ........ | ........ | ........ |
332 // 10034 | ........ | ........ | ........ | ........ |
333 // +-------------------------------------------+
334 // 10038 | ........ | ........ | ........ | ........ |
335 // 1003c | ........ | ........ | ........ | ........ |
336 // +-------------------------------------------+
338 // [sp] = 10030 :: >>initial value<<
339 // sp = 10020 :: stp fp, lr, [sp, #-16]!
340 // fp = sp == 10020 :: mov fp, sp
341 // [sp] == 10020 :: stp x28, x27, [sp, #-16]!
342 // sp == 10010 :: >>final value<<
344 // The frame pointer (w29) points to address 10020. If we use an offset of
345 // '16' from 'w29', we get the CFI offsets of -8 for w30, -16 for w29, -24
346 // for w27, and -32 for w28:
349 // .cfi_def_cfa w29, 16
351 // .cfi_offset w30, -8
353 // .cfi_offset w29, -16
355 // .cfi_offset w27, -24
357 // .cfi_offset w28, -32
360 // Define the current CFA rule to use the provided FP.
361 unsigned Reg = RegInfo->getDwarfRegNum(FramePtr, true);
362 unsigned CFIIndex = MMI.addFrameInst(
363 MCCFIInstruction::createDefCfa(nullptr, Reg, 2 * StackGrowth));
364 BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
365 .addCFIIndex(CFIIndex);
367 // Record the location of the stored LR
368 unsigned LR = RegInfo->getDwarfRegNum(ARM64::LR, true);
369 CFIIndex = MMI.addFrameInst(
370 MCCFIInstruction::createOffset(nullptr, LR, StackGrowth));
371 BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
372 .addCFIIndex(CFIIndex);
374 // Record the location of the stored FP
375 CFIIndex = MMI.addFrameInst(
376 MCCFIInstruction::createOffset(nullptr, Reg, 2 * StackGrowth));
377 BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
378 .addCFIIndex(CFIIndex);
380 // Encode the stack size of the leaf function.
381 unsigned CFIIndex = MMI.addFrameInst(
382 MCCFIInstruction::createDefCfaOffset(nullptr, -MFI->getStackSize()));
383 BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
384 .addCFIIndex(CFIIndex);
387 // Now emit the moves for whatever callee saved regs we have.
388 emitCalleeSavedFrameMoves(MBB, MBBI, FramePtr);
392 static bool isCalleeSavedRegister(unsigned Reg, const MCPhysReg *CSRegs) {
393 for (unsigned i = 0; CSRegs[i]; ++i)
394 if (Reg == CSRegs[i])
399 static bool isCSRestore(MachineInstr *MI, const MCPhysReg *CSRegs) {
400 if (MI->getOpcode() == ARM64::LDPXpost ||
401 MI->getOpcode() == ARM64::LDPDpost || MI->getOpcode() == ARM64::LDPXi ||
402 MI->getOpcode() == ARM64::LDPDi) {
403 if (!isCalleeSavedRegister(MI->getOperand(0).getReg(), CSRegs) ||
404 !isCalleeSavedRegister(MI->getOperand(1).getReg(), CSRegs) ||
405 MI->getOperand(2).getReg() != ARM64::SP)
413 void ARM64FrameLowering::emitEpilogue(MachineFunction &MF,
414 MachineBasicBlock &MBB) const {
415 MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
416 assert(MBBI->isReturn() && "Can only insert epilog into returning blocks");
417 MachineFrameInfo *MFI = MF.getFrameInfo();
418 const ARM64InstrInfo *TII =
419 static_cast<const ARM64InstrInfo *>(MF.getTarget().getInstrInfo());
420 const ARM64RegisterInfo *RegInfo =
421 static_cast<const ARM64RegisterInfo *>(MF.getTarget().getRegisterInfo());
422 DebugLoc DL = MBBI->getDebugLoc();
424 int NumBytes = MFI->getStackSize();
425 unsigned NumRestores = 0;
426 // Move past the restores of the callee-saved registers.
427 MachineBasicBlock::iterator LastPopI = MBBI;
428 const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF);
429 if (LastPopI != MBB.begin()) {
433 } while (LastPopI != MBB.begin() && isCSRestore(LastPopI, CSRegs));
434 if (!isCSRestore(LastPopI, CSRegs)) {
439 NumBytes -= NumRestores * 16;
440 assert(NumBytes >= 0 && "Negative stack allocation size!?");
443 // If this was a redzone leaf function, we don't need to restore the
445 if (!canUseRedZone(MF))
446 emitFrameOffset(MBB, LastPopI, DL, ARM64::SP, ARM64::SP, NumBytes, TII);
450 // Restore the original stack pointer.
451 // FIXME: Rather than doing the math here, we should instead just use
452 // non-post-indexed loads for the restores if we aren't actually going to
453 // be able to save any instructions.
454 if (NumBytes || MFI->hasVarSizedObjects())
455 emitFrameOffset(MBB, LastPopI, DL, ARM64::SP, ARM64::FP,
456 -(NumRestores - 1) * 16, TII, MachineInstr::NoFlags);
459 /// getFrameIndexOffset - Returns the displacement from the frame register to
460 /// the stack frame of the specified index.
461 int ARM64FrameLowering::getFrameIndexOffset(const MachineFunction &MF,
464 return getFrameIndexReference(MF, FI, FrameReg);
467 /// getFrameIndexReference - Provide a base+offset reference to an FI slot for
468 /// debug info. It's the same as what we use for resolving the code-gen
469 /// references for now. FIXME: This can go wrong when references are
470 /// SP-relative and simple call frames aren't used.
471 int ARM64FrameLowering::getFrameIndexReference(const MachineFunction &MF,
473 unsigned &FrameReg) const {
474 return resolveFrameIndexReference(MF, FI, FrameReg);
477 int ARM64FrameLowering::resolveFrameIndexReference(const MachineFunction &MF,
478 int FI, unsigned &FrameReg,
479 bool PreferFP) const {
480 const MachineFrameInfo *MFI = MF.getFrameInfo();
481 const ARM64RegisterInfo *RegInfo =
482 static_cast<const ARM64RegisterInfo *>(MF.getTarget().getRegisterInfo());
483 const ARM64FunctionInfo *AFI = MF.getInfo<ARM64FunctionInfo>();
484 int FPOffset = MFI->getObjectOffset(FI) + 16;
485 int Offset = MFI->getObjectOffset(FI) + MFI->getStackSize();
486 bool isFixed = MFI->isFixedObjectIndex(FI);
488 // Use frame pointer to reference fixed objects. Use it for locals if
489 // there are VLAs (and thus the SP isn't reliable as a base).
490 // Make sure useFPForScavengingIndex() does the right thing for the emergency
493 if (AFI->hasStackFrame()) {
494 // Note: Keeping the following as multiple 'if' statements rather than
495 // merging to a single expression for readability.
497 // Argument access should always use the FP.
500 } else if (hasFP(MF) && !RegInfo->hasBasePointer(MF)) {
501 // Use SP or FP, whichever gives us the best chance of the offset
502 // being in range for direct access. If the FPOffset is positive,
503 // that'll always be best, as the SP will be even further away.
504 // If the FPOffset is negative, we have to keep in mind that the
505 // available offset range for negative offsets is smaller than for
506 // positive ones. If we have variable sized objects, we're stuck with
507 // using the FP regardless, though, as the SP offset is unknown
508 // and we don't have a base pointer available. If an offset is
509 // available via the FP and the SP, use whichever is closest.
510 if (PreferFP || MFI->hasVarSizedObjects() || FPOffset >= 0 ||
511 (FPOffset >= -256 && Offset > -FPOffset))
517 FrameReg = RegInfo->getFrameRegister(MF);
521 // Use the base pointer if we have one.
522 if (RegInfo->hasBasePointer(MF))
523 FrameReg = RegInfo->getBaseRegister();
525 FrameReg = ARM64::SP;
526 // If we're using the red zone for this function, the SP won't actually
527 // be adjusted, so the offsets will be negative. They're also all
528 // within range of the signed 9-bit immediate instructions.
529 if (canUseRedZone(MF))
530 Offset -= AFI->getLocalStackSize();
536 static unsigned getPrologueDeath(MachineFunction &MF, unsigned Reg) {
537 if (Reg != ARM64::LR)
538 return getKillRegState(true);
540 // LR maybe referred to later by an @llvm.returnaddress intrinsic.
541 bool LRLiveIn = MF.getRegInfo().isLiveIn(ARM64::LR);
542 bool LRKill = !(LRLiveIn && MF.getFrameInfo()->isReturnAddressTaken());
543 return getKillRegState(LRKill);
546 bool ARM64FrameLowering::spillCalleeSavedRegisters(
547 MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
548 const std::vector<CalleeSavedInfo> &CSI,
549 const TargetRegisterInfo *TRI) const {
550 MachineFunction &MF = *MBB.getParent();
551 const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
552 unsigned Count = CSI.size();
554 assert((Count & 1) == 0 && "Odd number of callee-saved regs to spill!");
557 DL = MI->getDebugLoc();
559 for (unsigned i = 0; i < Count; i += 2) {
560 unsigned idx = Count - i - 2;
561 unsigned Reg1 = CSI[idx].getReg();
562 unsigned Reg2 = CSI[idx + 1].getReg();
563 // GPRs and FPRs are saved in pairs of 64-bit regs. We expect the CSI
564 // list to come in sorted by frame index so that we can issue the store
565 // pair instructions directly. Assert if we see anything otherwise.
567 // The order of the registers in the list is controlled by
568 // getCalleeSavedRegs(), so they will always be in-order, as well.
569 assert(CSI[idx].getFrameIdx() + 1 == CSI[idx + 1].getFrameIdx() &&
570 "Out of order callee saved regs!");
572 assert((Count & 1) == 0 && "Odd number of callee-saved regs to spill!");
573 assert((i & 1) == 0 && "Odd index for callee-saved reg spill!");
574 // Issue sequence of non-sp increment and pi sp spills for cs regs. The
575 // first spill is a pre-increment that allocates the stack.
577 // stp x22, x21, [sp, #-48]! // addImm(-6)
578 // stp x20, x19, [sp, #16] // addImm(+2)
579 // stp fp, lr, [sp, #32] // addImm(+4)
580 // Rationale: This sequence saves uop updates compared to a sequence of
581 // pre-increment spills like stp xi,xj,[sp,#-16]!
582 // Note: Similar rational and sequence for restores in epilog.
583 if (ARM64::GPR64RegClass.contains(Reg1)) {
584 assert(ARM64::GPR64RegClass.contains(Reg2) &&
585 "Expected GPR64 callee-saved register pair!");
586 // For first spill use pre-increment store.
588 StrOpc = ARM64::STPXpre;
590 StrOpc = ARM64::STPXi;
591 } else if (ARM64::FPR64RegClass.contains(Reg1)) {
592 assert(ARM64::FPR64RegClass.contains(Reg2) &&
593 "Expected FPR64 callee-saved register pair!");
594 // For first spill use pre-increment store.
596 StrOpc = ARM64::STPDpre;
598 StrOpc = ARM64::STPDi;
600 llvm_unreachable("Unexpected callee saved register!");
601 DEBUG(dbgs() << "CSR spill: (" << TRI->getName(Reg1) << ", "
602 << TRI->getName(Reg2) << ") -> fi#(" << CSI[idx].getFrameIdx()
603 << ", " << CSI[idx + 1].getFrameIdx() << ")\n");
604 // Compute offset: i = 0 => offset = -Count;
605 // i = 2 => offset = -(Count - 2) + Count = 2 = i; etc.
606 const int Offset = (i == 0) ? -Count : i;
607 assert((Offset >= -64 && Offset <= 63) &&
608 "Offset out of bounds for STP immediate");
609 BuildMI(MBB, MI, DL, TII.get(StrOpc))
610 .addReg(Reg2, getPrologueDeath(MF, Reg2))
611 .addReg(Reg1, getPrologueDeath(MF, Reg1))
613 .addImm(Offset) // [sp, #offset * 8], where factor * 8 is implicit
614 .setMIFlag(MachineInstr::FrameSetup);
619 bool ARM64FrameLowering::restoreCalleeSavedRegisters(
620 MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
621 const std::vector<CalleeSavedInfo> &CSI,
622 const TargetRegisterInfo *TRI) const {
623 MachineFunction &MF = *MBB.getParent();
624 const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
625 unsigned Count = CSI.size();
627 assert((Count & 1) == 0 && "Odd number of callee-saved regs to spill!");
630 DL = MI->getDebugLoc();
632 for (unsigned i = 0; i < Count; i += 2) {
633 unsigned Reg1 = CSI[i].getReg();
634 unsigned Reg2 = CSI[i + 1].getReg();
635 // GPRs and FPRs are saved in pairs of 64-bit regs. We expect the CSI
636 // list to come in sorted by frame index so that we can issue the store
637 // pair instructions directly. Assert if we see anything otherwise.
638 assert(CSI[i].getFrameIdx() + 1 == CSI[i + 1].getFrameIdx() &&
639 "Out of order callee saved regs!");
640 // Issue sequence of non-sp increment and sp-pi restores for cs regs. Only
641 // the last load is sp-pi post-increment and de-allocates the stack:
643 // ldp fp, lr, [sp, #32] // addImm(+4)
644 // ldp x20, x19, [sp, #16] // addImm(+2)
645 // ldp x22, x21, [sp], #48 // addImm(+6)
646 // Note: see comment in spillCalleeSavedRegisters()
649 assert((Count & 1) == 0 && "Odd number of callee-saved regs to spill!");
650 assert((i & 1) == 0 && "Odd index for callee-saved reg spill!");
651 if (ARM64::GPR64RegClass.contains(Reg1)) {
652 assert(ARM64::GPR64RegClass.contains(Reg2) &&
653 "Expected GPR64 callee-saved register pair!");
655 LdrOpc = ARM64::LDPXpost;
657 LdrOpc = ARM64::LDPXi;
658 } else if (ARM64::FPR64RegClass.contains(Reg1)) {
659 assert(ARM64::FPR64RegClass.contains(Reg2) &&
660 "Expected FPR64 callee-saved register pair!");
662 LdrOpc = ARM64::LDPDpost;
664 LdrOpc = ARM64::LDPDi;
666 llvm_unreachable("Unexpected callee saved register!");
667 DEBUG(dbgs() << "CSR restore: (" << TRI->getName(Reg1) << ", "
668 << TRI->getName(Reg2) << ") -> fi#(" << CSI[i].getFrameIdx()
669 << ", " << CSI[i + 1].getFrameIdx() << ")\n");
671 // Compute offset: i = 0 => offset = Count - 2; i = 2 => offset = Count - 4;
673 const int Offset = (i == Count - 2) ? Count : Count - i - 2;
674 assert((Offset >= -64 && Offset <= 63) &&
675 "Offset out of bounds for LDP immediate");
676 BuildMI(MBB, MI, DL, TII.get(LdrOpc))
677 .addReg(Reg2, getDefRegState(true))
678 .addReg(Reg1, getDefRegState(true))
680 .addImm(Offset); // [sp], #offset * 8 or [sp, #offset * 8]
681 // where the factor * 8 is implicit
686 void ARM64FrameLowering::processFunctionBeforeCalleeSavedScan(
687 MachineFunction &MF, RegScavenger *RS) const {
688 const ARM64RegisterInfo *RegInfo =
689 static_cast<const ARM64RegisterInfo *>(MF.getTarget().getRegisterInfo());
690 ARM64FunctionInfo *AFI = MF.getInfo<ARM64FunctionInfo>();
691 MachineRegisterInfo *MRI = &MF.getRegInfo();
692 SmallVector<unsigned, 4> UnspilledCSGPRs;
693 SmallVector<unsigned, 4> UnspilledCSFPRs;
695 // The frame record needs to be created by saving the appropriate registers
697 MRI->setPhysRegUsed(ARM64::FP);
698 MRI->setPhysRegUsed(ARM64::LR);
701 // Spill the BasePtr if it's used. Do this first thing so that the
702 // getCalleeSavedRegs() below will get the right answer.
703 if (RegInfo->hasBasePointer(MF))
704 MRI->setPhysRegUsed(RegInfo->getBaseRegister());
706 // If any callee-saved registers are used, the frame cannot be eliminated.
707 unsigned NumGPRSpilled = 0;
708 unsigned NumFPRSpilled = 0;
709 bool ExtraCSSpill = false;
710 bool CanEliminateFrame = true;
711 DEBUG(dbgs() << "*** processFunctionBeforeCalleeSavedScan\nUsed CSRs:");
712 const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF);
714 // Check pairs of consecutive callee-saved registers.
715 for (unsigned i = 0; CSRegs[i]; i += 2) {
716 assert(CSRegs[i + 1] && "Odd number of callee-saved registers!");
718 const unsigned OddReg = CSRegs[i];
719 const unsigned EvenReg = CSRegs[i + 1];
720 assert((ARM64::GPR64RegClass.contains(OddReg) &&
721 ARM64::GPR64RegClass.contains(EvenReg)) ^
722 (ARM64::FPR64RegClass.contains(OddReg) &&
723 ARM64::FPR64RegClass.contains(EvenReg)) &&
724 "Register class mismatch!");
726 const bool OddRegUsed = MRI->isPhysRegUsed(OddReg);
727 const bool EvenRegUsed = MRI->isPhysRegUsed(EvenReg);
729 // Early exit if none of the registers in the register pair is actually
731 if (!OddRegUsed && !EvenRegUsed) {
732 if (ARM64::GPR64RegClass.contains(OddReg)) {
733 UnspilledCSGPRs.push_back(OddReg);
734 UnspilledCSGPRs.push_back(EvenReg);
736 UnspilledCSFPRs.push_back(OddReg);
737 UnspilledCSFPRs.push_back(EvenReg);
742 unsigned Reg = ARM64::NoRegister;
743 // If only one of the registers of the register pair is used, make sure to
744 // mark the other one as used as well.
745 if (OddRegUsed ^ EvenRegUsed) {
746 // Find out which register is the additional spill.
747 Reg = OddRegUsed ? EvenReg : OddReg;
748 MRI->setPhysRegUsed(Reg);
751 DEBUG(dbgs() << ' ' << PrintReg(OddReg, RegInfo));
752 DEBUG(dbgs() << ' ' << PrintReg(EvenReg, RegInfo));
754 assert(((OddReg == ARM64::LR && EvenReg == ARM64::FP) ||
755 (RegInfo->getEncodingValue(OddReg) + 1 ==
756 RegInfo->getEncodingValue(EvenReg))) &&
757 "Register pair of non-adjacent registers!");
758 if (ARM64::GPR64RegClass.contains(OddReg)) {
760 // If it's not a reserved register, we can use it in lieu of an
761 // emergency spill slot for the register scavenger.
762 // FIXME: It would be better to instead keep looking and choose another
763 // unspilled register that isn't reserved, if there is one.
764 if (Reg != ARM64::NoRegister && !RegInfo->isReservedReg(MF, Reg))
769 CanEliminateFrame = false;
772 // FIXME: Set BigStack if any stack slot references may be out of range.
773 // For now, just conservatively guestimate based on unscaled indexing
774 // range. We'll end up allocating an unnecessary spill slot a lot, but
775 // realistically that's not a big deal at this stage of the game.
776 // The CSR spill slots have not been allocated yet, so estimateStackSize
777 // won't include them.
778 MachineFrameInfo *MFI = MF.getFrameInfo();
779 unsigned CFSize = estimateStackSize(MF) + 8 * (NumGPRSpilled + NumFPRSpilled);
780 DEBUG(dbgs() << "Estimated stack frame size: " << CFSize << " bytes.\n");
781 bool BigStack = (CFSize >= 256);
782 if (BigStack || !CanEliminateFrame || RegInfo->cannotEliminateFrame(MF))
783 AFI->setHasStackFrame(true);
785 // Estimate if we might need to scavenge a register at some point in order
786 // to materialize a stack offset. If so, either spill one additional
787 // callee-saved register or reserve a special spill slot to facilitate
788 // register scavenging. If we already spilled an extra callee-saved register
789 // above to keep the number of spills even, we don't need to do anything else
791 if (BigStack && !ExtraCSSpill) {
793 // If we're adding a register to spill here, we have to add two of them
794 // to keep the number of regs to spill even.
795 assert(((UnspilledCSGPRs.size() & 1) == 0) && "Odd number of registers!");
797 while (!UnspilledCSGPRs.empty() && Count < 2) {
798 unsigned Reg = UnspilledCSGPRs.back();
799 UnspilledCSGPRs.pop_back();
800 DEBUG(dbgs() << "Spilling " << PrintReg(Reg, RegInfo)
801 << " to get a scratch register.\n");
802 MRI->setPhysRegUsed(Reg);
807 // If we didn't find an extra callee-saved register to spill, create
808 // an emergency spill slot.
810 const TargetRegisterClass *RC = &ARM64::GPR64RegClass;
811 int FI = MFI->CreateStackObject(RC->getSize(), RC->getAlignment(), false);
812 RS->addScavengingFrameIndex(FI);
813 DEBUG(dbgs() << "No available CS registers, allocated fi#" << FI
814 << " as the emergency spill slot.\n");