From: Michael Kuperstein Date: Tue, 11 Aug 2015 08:48:48 +0000 (+0000) Subject: [X86] When optimizing for minsize, use POP for small post-call stack clean-up X-Git-Url: http://demsky.eecs.uci.edu/git/?a=commitdiff_plain;h=18913db07a603b95b1459ec9112db794dad10fb8;p=oota-llvm.git [X86] When optimizing for minsize, use POP for small post-call stack clean-up When optimizing for size, replace "addl $4, %esp" and "addl $8, %esp" following a call by one or two pops, respectively. We don't try to do it in general, but only when the stack adjustment immediately follows a call - which is the most common case. That allows taking a short-cut when trying to find a free register to pop into, instead of a full-blown liveness check. If the adjustment immediately follows a call, then every register the call clobbers but doesn't define should be dead at that point, and can be used. Differential Revision: http://reviews.llvm.org/D11749 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@244578 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/X86/X86FrameLowering.cpp b/lib/Target/X86/X86FrameLowering.cpp index b8858362e74..6cb2680be21 100644 --- a/lib/Target/X86/X86FrameLowering.cpp +++ b/lib/Target/X86/X86FrameLowering.cpp @@ -1851,6 +1851,69 @@ void X86FrameLowering::adjustForHiPEPrologue( #endif } +bool X86FrameLowering::adjustStackWithPops(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, DebugLoc DL, int Offset) const { + + if (Offset % SlotSize) + return false; + + int NumPops = Offset / SlotSize; + // This is only worth it if we have at most 2 pops. + if (NumPops != 1 && NumPops != 2) + return false; + + // Handle only the trivial case where the adjustment directly follows + // a call. This is the most common one, anyway. + if (MBBI == MBB.begin()) + return false; + MachineBasicBlock::iterator Prev = std::prev(MBBI); + if (!Prev->isCall() || !Prev->getOperand(1).isRegMask()) + return false; + + unsigned Regs[2]; + unsigned FoundRegs = 0; + + auto RegMask = Prev->getOperand(1); + + // Try to find up to NumPops free registers. + for (auto Candidate : X86::GR32_NOREX_NOSPRegClass) { + + // Poor man's liveness: + // Since we're immediately after a call, any register that is clobbered + // by the call and not defined by it can be considered dead. + if (!RegMask.clobbersPhysReg(Candidate)) + continue; + + bool IsDef = false; + for (const MachineOperand &MO : Prev->implicit_operands()) { + if (MO.isReg() && MO.isDef() && MO.getReg() == Candidate) { + IsDef = true; + break; + } + } + + if (IsDef) + continue; + + Regs[FoundRegs++] = Candidate; + if (FoundRegs == (unsigned)NumPops) + break; + } + + if (FoundRegs == 0) + return false; + + // If we found only one free register, but need two, reuse the same one twice. + while (FoundRegs < (unsigned)NumPops) + Regs[FoundRegs++] = Regs[0]; + + for (int i = 0; i < NumPops; ++i) + BuildMI(MBB, MBBI, DL, + TII.get(STI.is64Bit() ? X86::POP64r : X86::POP32r), Regs[i]); + + return true; +} + void X86FrameLowering:: eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const { @@ -1882,8 +1945,12 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, if (Amount) { // Add Amount to SP to destroy a frame, and subtract to setup. int Offset = isDestroy ? Amount : -Amount; - BuildStackAdjustment(MBB, I, DL, Offset, /*InEpilogue=*/false); + + if (!(MF.getFunction()->optForMinSize() && + adjustStackWithPops(MBB, I, DL, Offset))) + BuildStackAdjustment(MBB, I, DL, Offset, /*InEpilogue=*/false); } + return; } diff --git a/lib/Target/X86/X86FrameLowering.h b/lib/Target/X86/X86FrameLowering.h index 495cfcd1c3f..b74fcf46169 100644 --- a/lib/Target/X86/X86FrameLowering.h +++ b/lib/Target/X86/X86FrameLowering.h @@ -146,6 +146,11 @@ private: MachineBasicBlock::iterator MBBI, DebugLoc DL, uint64_t MaxAlign) const; + /// Make small positive stack adjustments using POPs. + bool adjustStackWithPops(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, DebugLoc DL, + int Offset) const; + /// Adjusts the stack pointer using LEA, SUB, or ADD. MachineInstrBuilder BuildStackAdjustment(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, diff --git a/test/CodeGen/X86/fold-push.ll b/test/CodeGen/X86/fold-push.ll index f5f33321c13..cb4e435e35c 100644 --- a/test/CodeGen/X86/fold-push.ll +++ b/test/CodeGen/X86/fold-push.ll @@ -27,11 +27,11 @@ define void @test_min(i32 %a, i32 %b) minsize { ; CHECK: movl [[EAX:%e..]], (%esp) ; CHECK-NEXT: pushl [[EAX]] ; CHECK-NEXT: calll -; CHECK-NEXT: addl $4, %esp +; CHECK-NEXT: popl ; CHECK: nop ; CHECK: pushl (%esp) ; CHECK: calll -; CHECK-NEXT: addl $4, %esp +; CHECK-NEXT: popl %c = add i32 %a, %b call void @foo(i32 %c) call void asm sideeffect "nop", "~{ax},~{bx},~{cx},~{dx},~{bp},~{si},~{di}"() diff --git a/test/CodeGen/X86/pop-stack-cleanup.ll b/test/CodeGen/X86/pop-stack-cleanup.ll new file mode 100644 index 00000000000..c6fb2d8885b --- /dev/null +++ b/test/CodeGen/X86/pop-stack-cleanup.ll @@ -0,0 +1,61 @@ +; RUN: llc < %s -mtriple=i686-windows | FileCheck %s -check-prefix=CHECK -check-prefix=NORMAL + +declare void @param1(i32 %a) +declare i32 @param2_ret(i32 %a, i32 %b) +declare i64 @param2_ret64(i32 %a, i32 %b) +declare void @param2(i32 %a, i32 %b) +declare void @param3(i32 %a, i32 %b, i32 %c) + +define void @test() minsize { +; CHECK-LABEL: test: +; CHECK: calll _param1 +; CHECK-NEXT: popl %eax +; CHECK: calll _param2 +; CHECK-NEXT: popl %eax +; CHECK-NEXT: popl %ecx +; CHECK: calll _param2_ret +; CHECK-NEXT: popl %ecx +; CHECK-NEXT: popl %edx +; CHECK-NEXT: pushl %eax +; CHECK: calll _param3 +; CHECK-NEXT: addl $12, %esp +; CHECK: calll _param2_ret64 +; CHECK-NEXT: popl %ecx +; CHECK-NEXT: popl %ecx + call void @param1(i32 1) + call void @param2(i32 1, i32 2) + %ret = call i32 @param2_ret(i32 1, i32 2) + call void @param3(i32 1, i32 2, i32 %ret) + %ret64 = call i64 @param2_ret64(i32 1, i32 2) + ret void +} + +define void @negative(i32 %k) { +; CHECK-LABEL: negative: +; CHECK: calll _param1 +; CHECK-NEXT: addl $4, %esp +; CHECK: calll _param2 +; CHECK-NEXT: addl $8, %esp +; CHECK: calll _param3 +; CHECK-NEXT: movl %ebp, %esp + %v = alloca i32, i32 %k + call void @param1(i32 1) + call void @param2(i32 1, i32 2) + call void @param3(i32 1, i32 2, i32 3) + ret void +} + +define void @spill(i32 inreg %a, i32 inreg %b, i32 inreg %c) minsize { +; CHECK-LABEL: spill: +; CHECK-DAG: movl %ecx, +; CHECK-DAG: movl %edx, +; CHECK: calll _param2_ret +; CHECK-NEXT: popl %ecx +; CHECK-NEXT: popl %edx +; CHECK-DAG: movl {{.*}}, %ecx +; CHECK-DAG: movl {{.*}}, %edx +; CHECK: calll _spill + %i = call i32 @param2_ret(i32 1, i32 2) + call void @spill(i32 %a, i32 %b, i32 %c) + ret void +}