From: NAKAMURA Takumi Date: Sun, 27 Feb 2011 08:47:19 +0000 (+0000) Subject: Target/X86: Always emit "push/pop GPRs" in prologue/epilogue and emit "spill/reload... X-Git-Url: http://demsky.eecs.uci.edu/git/?a=commitdiff_plain;h=419f23278388a8999b5c0bbe73ce823f63747f28;p=oota-llvm.git Target/X86: Always emit "push/pop GPRs" in prologue/epilogue and emit "spill/reload frames" for XMMs. It improves Win64's prologue/epilogue but it would not affect ia32 and amd64 (lack of nonvolatile XMMs). git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@126568 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/X86/X86FrameLowering.cpp b/lib/Target/X86/X86FrameLowering.cpp index 0a3f931acf9..3246c4379ce 100644 --- a/lib/Target/X86/X86FrameLowering.cpp +++ b/lib/Target/X86/X86FrameLowering.cpp @@ -892,7 +892,6 @@ bool X86FrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB, MachineFunction &MF = *MBB.getParent(); - bool isWin64 = STI.isTargetWin64(); unsigned SlotSize = STI.is64Bit() ? 8 : 4; unsigned FPReg = TRI->getFrameRegister(MF); unsigned CalleeFrameSize = 0; @@ -900,25 +899,39 @@ bool X86FrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB, const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); X86MachineFunctionInfo *X86FI = MF.getInfo(); + // Push GPRs. It increases frame size. unsigned Opc = STI.is64Bit() ? X86::PUSH64r : X86::PUSH32r; for (unsigned i = CSI.size(); i != 0; --i) { unsigned Reg = CSI[i-1].getReg(); + if (!X86::GR64RegClass.contains(Reg) && + !X86::GR32RegClass.contains(Reg)) + continue; // Add the callee-saved register as live-in. It's killed at the spill. MBB.addLiveIn(Reg); if (Reg == FPReg) // X86RegisterInfo::emitPrologue will handle spilling of frame register. continue; - if (!X86::VR128RegClass.contains(Reg) && !isWin64) { - CalleeFrameSize += SlotSize; - BuildMI(MBB, MI, DL, TII.get(Opc)).addReg(Reg, RegState::Kill); - } else { - const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); - TII.storeRegToStackSlot(MBB, MI, Reg, true, CSI[i-1].getFrameIdx(), - RC, TRI); - } + CalleeFrameSize += SlotSize; + BuildMI(MBB, MI, DL, TII.get(Opc)).addReg(Reg, RegState::Kill); } X86FI->setCalleeSavedFrameSize(CalleeFrameSize); + + // Make XMM regs spilled. X86 does not have ability of push/pop XMM. + // It can be done by spilling XMMs to stack frame. + // Note that only Win64 ABI might spill XMMs. + for (unsigned i = CSI.size(); i != 0; --i) { + unsigned Reg = CSI[i-1].getReg(); + if (X86::GR64RegClass.contains(Reg) || + X86::GR32RegClass.contains(Reg)) + continue; + // Add the callee-saved register as live-in. It's killed at the spill. + MBB.addLiveIn(Reg); + const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); + TII.storeRegToStackSlot(MBB, MI, Reg, true, CSI[i-1].getFrameIdx(), + RC, TRI); + } + return true; } @@ -933,21 +946,30 @@ bool X86FrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB, MachineFunction &MF = *MBB.getParent(); const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); + + // Reload XMMs from stack frame. + for (unsigned i = 0, e = CSI.size(); i != e; ++i) { + unsigned Reg = CSI[i].getReg(); + if (X86::GR64RegClass.contains(Reg) || + X86::GR32RegClass.contains(Reg)) + continue; + const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); + TII.loadRegFromStackSlot(MBB, MI, Reg, CSI[i].getFrameIdx(), + RC, TRI); + } + + // POP GPRs. unsigned FPReg = TRI->getFrameRegister(MF); - bool isWin64 = STI.isTargetWin64(); unsigned Opc = STI.is64Bit() ? X86::POP64r : X86::POP32r; for (unsigned i = 0, e = CSI.size(); i != e; ++i) { unsigned Reg = CSI[i].getReg(); + if (!X86::GR64RegClass.contains(Reg) && + !X86::GR32RegClass.contains(Reg)) + continue; if (Reg == FPReg) // X86RegisterInfo::emitEpilogue will handle restoring of frame register. continue; - if (!X86::VR128RegClass.contains(Reg) && !isWin64) { - BuildMI(MBB, MI, DL, TII.get(Opc), Reg); - } else { - const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); - TII.loadRegFromStackSlot(MBB, MI, Reg, CSI[i].getFrameIdx(), - RC, TRI); - } + BuildMI(MBB, MI, DL, TII.get(Opc), Reg); } return true; } diff --git a/test/CodeGen/X86/tailcallstack64.ll b/test/CodeGen/X86/tailcallstack64.ll index 0c732d56b6c..09277794315 100644 --- a/test/CodeGen/X86/tailcallstack64.ll +++ b/test/CodeGen/X86/tailcallstack64.ll @@ -2,7 +2,7 @@ ; RUN: llc < %s -tailcallopt -mtriple=x86_64-win32 -post-RA-scheduler=true | FileCheck %s ; FIXME: Redundant unused stack allocation could be eliminated. -; CHECK: subq ${{24|88}}, %rsp +; CHECK: subq ${{24|72}}, %rsp ; Check that lowered arguments on the stack do not overwrite each other. ; Add %in1 %p1 to a different temporary register (%eax).