From: Venkatraman Govindaraju Date: Sat, 1 Jun 2013 04:51:18 +0000 (+0000) Subject: [Sparc] Generate correct code for leaf functions with stack objects X-Git-Url: http://demsky.eecs.uci.edu/git/?a=commitdiff_plain;h=72ad17c48c15562fe31c65f6daa09c83f42860c1;p=oota-llvm.git [Sparc] Generate correct code for leaf functions with stack objects git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@183067 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/Sparc/SparcFrameLowering.cpp b/lib/Target/Sparc/SparcFrameLowering.cpp index 2e5229879c7..6456cda9403 100644 --- a/lib/Target/Sparc/SparcFrameLowering.cpp +++ b/lib/Target/Sparc/SparcFrameLowering.cpp @@ -35,8 +35,6 @@ DisableLeafProc("disable-sparc-leaf-proc", void SparcFrameLowering::emitPrologue(MachineFunction &MF) const { SparcMachineFunctionInfo *FuncInfo = MF.getInfo(); - if (FuncInfo->isLeafProc()) - return; MachineBasicBlock &MBB = MF.front(); MachineFrameInfo *MFI = MF.getFrameInfo(); @@ -48,31 +46,18 @@ void SparcFrameLowering::emitPrologue(MachineFunction &MF) const { // Get the number of bytes to allocate from the FrameInfo int NumBytes = (int) MFI->getStackSize(); - if (SubTarget.is64Bit()) { - // All 64-bit stack frames must be 16-byte aligned, and must reserve space - // for spilling the 16 window registers at %sp+BIAS..%sp+BIAS+128. - NumBytes += 128; - // Frames with calls must also reserve space for 6 outgoing arguments - // whether they are used or not. LowerCall_64 takes care of that. - assert(NumBytes % 16 == 0 && "Stack size not 16-byte aligned"); - } else { - // Emit the correct save instruction based on the number of bytes in - // the frame. Minimum stack frame size according to V8 ABI is: - // 16 words for register window spill - // 1 word for address of returned aggregate-value - // + 6 words for passing parameters on the stack - // ---------- - // 23 words * 4 bytes per word = 92 bytes - NumBytes += 92; - - // Round up to next doubleword boundary -- a double-word boundary - // is required by the ABI. - NumBytes = RoundUpToAlignment(NumBytes, 8); + unsigned SAVEri = SP::SAVEri; + unsigned SAVErr = SP::SAVErr; + if (FuncInfo->isLeafProc()) { + if (NumBytes == 0) + return; + SAVEri = SP::ADDri; + SAVErr = SP::ADDrr; } - NumBytes = -NumBytes; + NumBytes = - SubTarget.getAdjustedFrameSize(NumBytes); if (NumBytes >= -4096) { - BuildMI(MBB, MBBI, dl, TII.get(SP::SAVEri), SP::O6) + BuildMI(MBB, MBBI, dl, TII.get(SAVEri), SP::O6) .addReg(SP::O6).addImm(NumBytes); } else { // Emit this the hard way. This clobbers G1 which we always know is @@ -82,7 +67,7 @@ void SparcFrameLowering::emitPrologue(MachineFunction &MF) const { // Emit G1 = G1 + I6 BuildMI(MBB, MBBI, dl, TII.get(SP::ORri), SP::G1) .addReg(SP::G1).addImm(NumBytes & ((1 << 10)-1)); - BuildMI(MBB, MBBI, dl, TII.get(SP::SAVErr), SP::O6) + BuildMI(MBB, MBBI, dl, TII.get(SAVErr), SP::O6) .addReg(SP::O6).addReg(SP::G1); } } @@ -109,16 +94,39 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, void SparcFrameLowering::emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const { SparcMachineFunctionInfo *FuncInfo = MF.getInfo(); - if (FuncInfo->isLeafProc()) - return; MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr(); const SparcInstrInfo &TII = *static_cast(MF.getTarget().getInstrInfo()); DebugLoc dl = MBBI->getDebugLoc(); assert(MBBI->getOpcode() == SP::RETL && "Can only put epilog before 'retl' instruction!"); - BuildMI(MBB, MBBI, dl, TII.get(SP::RESTORErr), SP::G0).addReg(SP::G0) - .addReg(SP::G0); + if (!FuncInfo->isLeafProc()) { + BuildMI(MBB, MBBI, dl, TII.get(SP::RESTORErr), SP::G0).addReg(SP::G0) + .addReg(SP::G0); + return; + } + MachineFrameInfo *MFI = MF.getFrameInfo(); + + int NumBytes = (int) MFI->getStackSize(); + if (NumBytes == 0) + return; + + NumBytes = SubTarget.getAdjustedFrameSize(NumBytes); + + if (NumBytes < 4096) { + BuildMI(MBB, MBBI, dl, TII.get(SP::ADDri), SP::O6) + .addReg(SP::O6).addImm(NumBytes); + } else { + // Emit this the hard way. This clobbers G1 which we always know is + // available here. + unsigned OffHi = (unsigned)NumBytes >> 10U; + BuildMI(MBB, MBBI, dl, TII.get(SP::SETHIi), SP::G1).addImm(OffHi); + // Emit G1 = G1 + I6 + BuildMI(MBB, MBBI, dl, TII.get(SP::ORri), SP::G1) + .addReg(SP::G1).addImm(NumBytes & ((1 << 10)-1)); + BuildMI(MBB, MBBI, dl, TII.get(SP::ADDrr), SP::O6) + .addReg(SP::O6).addReg(SP::G1); + } } bool SparcFrameLowering::hasReservedCallFrame(const MachineFunction &MF) const { diff --git a/lib/Target/Sparc/SparcRegisterInfo.cpp b/lib/Target/Sparc/SparcRegisterInfo.cpp index e860cbbaf41..6d7c9f56097 100644 --- a/lib/Target/Sparc/SparcRegisterInfo.cpp +++ b/lib/Target/Sparc/SparcRegisterInfo.cpp @@ -91,7 +91,14 @@ SparcRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, MI.getOperand(FIOperandNum + 1).getImm() + Subtarget.getStackPointerBias(); SparcMachineFunctionInfo *FuncInfo = MF.getInfo(); - unsigned FramePtr = (FuncInfo->isLeafProc()) ? SP::O6 : SP::I6; + unsigned FramePtr = SP::I6; + if (FuncInfo->isLeafProc()) { + //Use %sp and adjust offset if needed. + FramePtr = SP::O6; + int stackSize = MF.getFrameInfo()->getStackSize(); + Offset += (stackSize) ? Subtarget.getAdjustedFrameSize(stackSize) : 0 ; + } + // Replace frame index with a frame pointer reference. if (Offset >= -4096 && Offset <= 4095) { // If the offset is small enough to fit in the immediate field, directly diff --git a/lib/Target/Sparc/SparcSubtarget.cpp b/lib/Target/Sparc/SparcSubtarget.cpp index e5b2aeb1bb8..ba129ec926c 100644 --- a/lib/Target/Sparc/SparcSubtarget.cpp +++ b/lib/Target/Sparc/SparcSubtarget.cpp @@ -13,6 +13,7 @@ #include "SparcSubtarget.h" #include "Sparc.h" +#include "llvm/Support/MathExtras.h" #include "llvm/Support/TargetRegistry.h" #define GET_SUBTARGETINFO_TARGET_DESC @@ -44,3 +45,30 @@ SparcSubtarget::SparcSubtarget(const std::string &TT, const std::string &CPU, // Parse features string. ParseSubtargetFeatures(CPUName, FS); } + + +int SparcSubtarget::getAdjustedFrameSize(int frameSize) const { + + if (is64Bit()) { + // All 64-bit stack frames must be 16-byte aligned, and must reserve space + // for spilling the 16 window registers at %sp+BIAS..%sp+BIAS+128. + frameSize += 128; + // Frames with calls must also reserve space for 6 outgoing arguments + // whether they are used or not. LowerCall_64 takes care of that. + assert(frameSize % 16 == 0 && "Stack size not 16-byte aligned"); + } else { + // Emit the correct save instruction based on the number of bytes in + // the frame. Minimum stack frame size according to V8 ABI is: + // 16 words for register window spill + // 1 word for address of returned aggregate-value + // + 6 words for passing parameters on the stack + // ---------- + // 23 words * 4 bytes per word = 92 bytes + frameSize += 92; + + // Round up to next doubleword boundary -- a double-word boundary + // is required by the ABI. + frameSize = RoundUpToAlignment(frameSize, 8); + } + return frameSize; +} diff --git a/lib/Target/Sparc/SparcSubtarget.h b/lib/Target/Sparc/SparcSubtarget.h index b94dd110ea9..30014aec271 100644 --- a/lib/Target/Sparc/SparcSubtarget.h +++ b/lib/Target/Sparc/SparcSubtarget.h @@ -58,6 +58,12 @@ public: int64_t getStackPointerBias() const { return is64Bit() ? 2047 : 0; } + + /// Given a actual stack size as determined by FrameInfo, this function + /// returns adjusted framesize which includes space for register window + /// spills and arguments. + int getAdjustedFrameSize(int stackSize) const; + }; } // end namespace llvm diff --git a/test/CodeGen/SPARC/leafproc.ll b/test/CodeGen/SPARC/leafproc.ll index 5abdfba3e71..a162df1b0cb 100644 --- a/test/CodeGen/SPARC/leafproc.ll +++ b/test/CodeGen/SPARC/leafproc.ll @@ -55,3 +55,26 @@ entry: %6 = add nsw i32 %5, %h ret i32 %6 } + +; CHECK: leaf_proc_with_local_array: +; CHECK: add %sp, -104, %sp +; CHECK: or %g0, 1, [[R1:%[go][0-7]]] +; CHECK: st [[R1]], [%sp+96] +; CHECK: or %g0, 2, [[R2:%[go][0-7]]] +; CHECK: st [[R2]], [%sp+100] +; CHECK: ld {{.+}}, %o0 +; CHECK: jmp %o7+8 +; CHECK-NEXT: add %sp, 104, %sp + +define i32 @leaf_proc_with_local_array(i32 %a, i32 %b, i32 %c) { +entry: + %array = alloca [2 x i32], align 4 + %0 = sub nsw i32 %b, %c + %1 = getelementptr inbounds [2 x i32]* %array, i32 0, i32 0 + store i32 1, i32* %1, align 4 + %2 = getelementptr inbounds [2 x i32]* %array, i32 0, i32 1 + store i32 2, i32* %2, align 4 + %3 = getelementptr inbounds [2 x i32]* %array, i32 0, i32 %a + %4 = load i32* %3, align 4 + ret i32 %4 +}