void ARM64FrameLowering::eliminateCallFramePseudoInstr(
MachineFunction &MF, MachineBasicBlock &MBB,
MachineBasicBlock::iterator I) const {
- const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
const ARM64InstrInfo *TII =
static_cast<const ARM64InstrInfo *>(MF.getTarget().getInstrInfo());
+ DebugLoc DL = I->getDebugLoc();
+ int Opc = I->getOpcode();
+ bool IsDestroy = Opc == TII->getCallFrameDestroyOpcode();
+ uint64_t CalleePopAmount = IsDestroy ? I->getOperand(1).getImm() : 0;
+
+ const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
if (!TFI->hasReservedCallFrame(MF)) {
- // If we have alloca, convert as follows:
- // ADJCALLSTACKDOWN -> sub, sp, sp, amount
- // ADJCALLSTACKUP -> add, sp, sp, amount
- MachineInstr *Old = I;
- DebugLoc DL = Old->getDebugLoc();
- unsigned Amount = Old->getOperand(0).getImm();
- if (Amount != 0) {
- // We need to keep the stack aligned properly. To do this, we round the
- // amount of space needed for the outgoing arguments up to the next
- // alignment boundary.
- unsigned Align = TFI->getStackAlignment();
- Amount = (Amount + Align - 1) / Align * Align;
-
- // Replace the pseudo instruction with a new instruction...
- unsigned Opc = Old->getOpcode();
- if (Opc == ARM64::ADJCALLSTACKDOWN) {
- emitFrameOffset(MBB, I, DL, ARM64::SP, ARM64::SP, -Amount, TII);
- } else {
- assert(Opc == ARM64::ADJCALLSTACKUP && "expected ADJCALLSTACKUP");
- emitFrameOffset(MBB, I, DL, ARM64::SP, ARM64::SP, Amount, TII);
- }
+ unsigned Align = getStackAlignment();
+
+ int64_t Amount = I->getOperand(0).getImm();
+ Amount = RoundUpToAlignment(Amount, Align);
+ if (!IsDestroy)
+ Amount = -Amount;
+
+ // N.b. if CalleePopAmount is valid but zero (i.e. callee would pop, but it
+ // doesn't have to pop anything), then the first operand will be zero too so
+ // this adjustment is a no-op.
+ if (CalleePopAmount == 0) {
+ // FIXME: in-function stack adjustment for calls is limited to 24-bits
+ // because there's no guaranteed temporary register available.
+ //
+ // ADD/SUB (immediate) has only LSL #0 and LSL #12 avaiable.
+ // 1) For offset <= 12-bit, we use LSL #0
+ // 2) For 12-bit <= offset <= 24-bit, we use two instructions. One uses
+ // LSL #0, and the other uses LSL #12.
+ //
+ // Mostly call frames will be allocated at the start of a function so
+ // this is OK, but it is a limitation that needs dealing with.
+ assert(Amount > -0xffffff && Amount < 0xffffff && "call frame too large");
+ emitFrameOffset(MBB, I, DL, ARM64::SP, ARM64::SP, Amount, TII);
}
+ } else if (CalleePopAmount != 0) {
+ // If the calling convention demands that the callee pops arguments from the
+ // stack, we want to add it back if we have a reserved call frame.
+ assert(CalleePopAmount < 0xffffff && "call frame too large");
+ emitFrameOffset(MBB, I, DL, ARM64::SP, ARM64::SP, -CalleePopAmount, TII);
}
MBB.erase(I);
}
// that is a multiple of -2.
assert((MBBI->getOpcode() == ARM64::STPXpre ||
MBBI->getOpcode() == ARM64::STPDpre) &&
- MBBI->getOperand(2).getReg() == ARM64::SP &&
- MBBI->getOperand(3).getImm() < 0 &&
- (MBBI->getOperand(3).getImm() & 1) == 0);
+ MBBI->getOperand(3).getReg() == ARM64::SP &&
+ MBBI->getOperand(4).getImm() < 0 &&
+ (MBBI->getOperand(4).getImm() & 1) == 0);
// Frame pointer is fp = sp - 16. Since the STPXpre subtracts the space
// required for the callee saved register area we get the frame pointer
// by addding that offset - 16 = -getImm()*8 - 2*8 = -(getImm() + 2) * 8.
- FPOffset = -(MBBI->getOperand(3).getImm() + 2) * 8;
+ FPOffset = -(MBBI->getOperand(4).getImm() + 2) * 8;
assert(FPOffset >= 0 && "Bad Framepointer Offset");
}
}
static bool isCSRestore(MachineInstr *MI, const MCPhysReg *CSRegs) {
+ unsigned RtIdx = 0;
+ if (MI->getOpcode() == ARM64::LDPXpost || MI->getOpcode() == ARM64::LDPDpost)
+ RtIdx = 1;
+
if (MI->getOpcode() == ARM64::LDPXpost ||
MI->getOpcode() == ARM64::LDPDpost || MI->getOpcode() == ARM64::LDPXi ||
MI->getOpcode() == ARM64::LDPDi) {
- if (!isCalleeSavedRegister(MI->getOperand(0).getReg(), CSRegs) ||
- !isCalleeSavedRegister(MI->getOperand(1).getReg(), CSRegs) ||
- MI->getOperand(2).getReg() != ARM64::SP)
+ if (!isCalleeSavedRegister(MI->getOperand(RtIdx).getReg(), CSRegs) ||
+ !isCalleeSavedRegister(MI->getOperand(RtIdx + 1).getReg(), CSRegs) ||
+ MI->getOperand(RtIdx + 2).getReg() != ARM64::SP)
return false;
return true;
}
const ARM64RegisterInfo *RegInfo =
static_cast<const ARM64RegisterInfo *>(MF.getTarget().getRegisterInfo());
DebugLoc DL = MBBI->getDebugLoc();
+ unsigned RetOpcode = MBBI->getOpcode();
int NumBytes = MFI->getStackSize();
+ const ARM64FunctionInfo *AFI = MF.getInfo<ARM64FunctionInfo>();
+
+ // Initial and residual are named for consitency with the prologue. Note that
+ // in the epilogue, the residual adjustment is executed first.
+ uint64_t ArgumentPopSize = 0;
+ if (RetOpcode == ARM64::TCRETURNdi || RetOpcode == ARM64::TCRETURNri) {
+ MachineOperand &StackAdjust = MBBI->getOperand(1);
+
+ // For a tail-call in a callee-pops-arguments environment, some or all of
+ // the stack may actually be in use for the call's arguments, this is
+ // calculated during LowerCall and consumed here...
+ ArgumentPopSize = StackAdjust.getImm();
+ } else {
+ // ... otherwise the amount to pop is *all* of the argument space,
+ // conveniently stored in the MachineFunctionInfo by
+ // LowerFormalArguments. This will, of course, be zero for the C calling
+ // convention.
+ ArgumentPopSize = AFI->getArgumentStackToRestore();
+ }
+
+ // The stack frame should be like below,
+ //
+ // ---------------------- ---
+ // | | |
+ // | BytesInStackArgArea| CalleeArgStackSize
+ // | (NumReusableBytes) | (of tail call)
+ // | | ---
+ // | | |
+ // ---------------------| --- |
+ // | | | |
+ // | CalleeSavedReg | | |
+ // | (NumRestores * 16) | | |
+ // | | | |
+ // ---------------------| | NumBytes
+ // | | StackSize (StackAdjustUp)
+ // | LocalStackSize | | |
+ // | (covering callee | | |
+ // | args) | | |
+ // | | | |
+ // ---------------------- --- ---
+ //
+ // So NumBytes = StackSize + BytesInStackArgArea - CalleeArgStackSize
+ // = StackSize + ArgumentPopSize
+ //
+ // ARM64TargetLowering::LowerCall figures out ArgumentPopSize and keeps
+ // it as the 2nd argument of ARM64ISD::TC_RETURN.
+ NumBytes += ArgumentPopSize;
+
unsigned NumRestores = 0;
// Move past the restores of the callee-saved registers.
MachineBasicBlock::iterator LastPopI = MBBI;
const int Offset = (i == 0) ? -Count : i;
assert((Offset >= -64 && Offset <= 63) &&
"Offset out of bounds for STP immediate");
- BuildMI(MBB, MI, DL, TII.get(StrOpc))
- .addReg(Reg2, getPrologueDeath(MF, Reg2))
+ MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(StrOpc));
+ if (StrOpc == ARM64::STPDpre || StrOpc == ARM64::STPXpre)
+ MIB.addReg(ARM64::SP, RegState::Define);
+
+ MIB.addReg(Reg2, getPrologueDeath(MF, Reg2))
.addReg(Reg1, getPrologueDeath(MF, Reg1))
.addReg(ARM64::SP)
.addImm(Offset) // [sp, #offset * 8], where factor * 8 is implicit
const int Offset = (i == Count - 2) ? Count : Count - i - 2;
assert((Offset >= -64 && Offset <= 63) &&
"Offset out of bounds for LDP immediate");
- BuildMI(MBB, MI, DL, TII.get(LdrOpc))
- .addReg(Reg2, getDefRegState(true))
+ MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(LdrOpc));
+ if (LdrOpc == ARM64::LDPXpost || LdrOpc == ARM64::LDPDpost)
+ MIB.addReg(ARM64::SP, RegState::Define);
+
+ MIB.addReg(Reg2, getDefRegState(true))
.addReg(Reg1, getDefRegState(true))
.addReg(ARM64::SP)
.addImm(Offset); // [sp], #offset * 8 or [sp, #offset * 8]