From 1031549bec52ab3c5dd9f4dd7703e21bb5253bff Mon Sep 17 00:00:00 2001 From: Robert Lougher Date: Thu, 29 Jan 2015 16:18:29 +0000 Subject: [PATCH] [X86] Use single add/sub for large stack offsets For large stack offsets the compiler generates multiple immediate mode sub/add instructions in the prologue/epilogue. This patch makes the compiler place the final amount to be added/subtracted into a register, which is then added/substracted with a single operation. Differential Revision: http://reviews.llvm.org/D7226 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@227458 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86FrameLowering.cpp | 59 +++++++++++++++++++++------ test/CodeGen/X86/huge-stack-offset.ll | 59 +++++++++++++++++++++++++++ 2 files changed, 105 insertions(+), 13 deletions(-) create mode 100644 test/CodeGen/X86/huge-stack-offset.ll diff --git a/lib/Target/X86/X86FrameLowering.cpp b/lib/Target/X86/X86FrameLowering.cpp index 16aab16d63e..bb2007dd682 100644 --- a/lib/Target/X86/X86FrameLowering.cpp +++ b/lib/Target/X86/X86FrameLowering.cpp @@ -82,6 +82,14 @@ static unsigned getADDriOpcode(unsigned IsLP64, int64_t Imm) { } } +static unsigned getSUBrrOpcode(unsigned isLP64) { + return isLP64 ? X86::SUB64rr : X86::SUB32rr; +} + +static unsigned getADDrrOpcode(unsigned isLP64) { + return isLP64 ? X86::ADD64rr : X86::ADD32rr; +} + static unsigned getANDriOpcode(bool IsLP64, int64_t Imm) { if (IsLP64) { if (isInt<8>(Imm)) @@ -165,6 +173,18 @@ static unsigned findDeadCallerSavedReg(MachineBasicBlock &MBB, return 0; } +static bool isEAXLiveIn(MachineFunction &MF) { + for (MachineRegisterInfo::livein_iterator II = MF.getRegInfo().livein_begin(), + EE = MF.getRegInfo().livein_end(); II != EE; ++II) { + unsigned Reg = II->first; + + if (Reg == X86::RAX || Reg == X86::EAX || Reg == X86::AX || + Reg == X86::AH || Reg == X86::AL) + return true; + } + + return false; +} /// emitSPUpdate - Emit a series of instructions to increment / decrement the /// stack pointer by a constant value. @@ -187,6 +207,32 @@ void emitSPUpdate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, DebugLoc DL = MBB.findDebugLoc(MBBI); while (Offset) { + if (Offset > Chunk) { + // Rather than emit a long series of instructions for large offsets, + // load the offset into a register and do one sub/add + unsigned Reg = 0; + + if (isSub && !isEAXLiveIn(*MBB.getParent())) + Reg = (unsigned)(Is64BitTarget ? X86::RAX : X86::EAX); + else + Reg = findDeadCallerSavedReg(MBB, MBBI, TRI, Is64BitTarget); + + if (Reg) { + Opc = Is64BitTarget ? X86::MOV64ri : X86::MOV32ri; + BuildMI(MBB, MBBI, DL, TII.get(Opc), Reg) + .addImm(Offset); + Opc = isSub + ? getSUBrrOpcode(Is64BitTarget) + : getADDrrOpcode(Is64BitTarget); + MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr) + .addReg(StackPtr) + .addReg(Reg); + MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead. + Offset = 0; + continue; + } + } + uint64_t ThisVal = (Offset > Chunk) ? Chunk : Offset; if (ThisVal == (Is64BitTarget ? 8 : 4)) { // Use push / pop instead. @@ -316,19 +362,6 @@ static int mergeSPUpdates(MachineBasicBlock &MBB, return Offset; } -static bool isEAXLiveIn(MachineFunction &MF) { - for (MachineRegisterInfo::livein_iterator II = MF.getRegInfo().livein_begin(), - EE = MF.getRegInfo().livein_end(); II != EE; ++II) { - unsigned Reg = II->first; - - if (Reg == X86::EAX || Reg == X86::AX || - Reg == X86::AH || Reg == X86::AL) - return true; - } - - return false; -} - void X86FrameLowering::emitCalleeSavedFrameMoves(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, diff --git a/test/CodeGen/X86/huge-stack-offset.ll b/test/CodeGen/X86/huge-stack-offset.ll new file mode 100644 index 00000000000..619516101a8 --- /dev/null +++ b/test/CodeGen/X86/huge-stack-offset.ll @@ -0,0 +1,59 @@ +; RUN: llc < %s -mtriple=x86_64-linux-unknown | FileCheck %s --check-prefix=CHECK-64 +; RUN: llc < %s -mtriple=i386-linux-unknown | FileCheck %s --check-prefix=CHECK-32 + +; Test that a large stack offset uses a single add/sub instruction to +; adjust the stack pointer. + +define void @foo() nounwind { +; CHECK-64-LABEL: foo: +; CHECK-64: movabsq $50000000{{..}}, %rax +; CHECK-64-NEXT: subq %rax, %rsp +; CHECK-64-NOT: subq $2147483647, %rsp +; CHECK-64: movabsq $50000000{{..}}, [[RAX:%r..]] +; CHECK-64-NEXT: addq [[RAX]], %rsp + +; CHECK-32-LABEL: foo: +; CHECK-32: movl $50000000{{..}}, %eax +; CHECK-32-NEXT: subl %eax, %esp +; CHECK-32-NOT: subl $2147483647, %esp +; CHECK-32: movl $50000000{{..}}, [[EAX:%e..]] +; CHECK-32-NEXT: addl [[EAX]], %esp + %1 = alloca [5000000000 x i8], align 16 + %2 = getelementptr inbounds [5000000000 x i8]* %1, i32 0, i32 0 + call void @bar(i8* %2) + ret void +} + +; Verify that we do not clobber the return value. + +define i32 @foo2() nounwind { +; CHECK-64-LABEL: foo2: +; CHECK-64: movl $10, %eax +; CHECK-64-NOT: movabsq ${{.*}}, %rax + +; CHECK-32-LABEL: foo2: +; CHECK-32: movl $10, %eax +; CHECK-32-NOT: movl ${{.*}}, %eax + %1 = alloca [5000000000 x i8], align 16 + %2 = getelementptr inbounds [5000000000 x i8]* %1, i32 0, i32 0 + call void @bar(i8* %2) + ret i32 10 +} + +; Verify that we do not clobber EAX when using inreg attribute + +define i32 @foo3(i32 inreg %x) nounwind { +; CHECK-64-LABEL: foo3: +; CHECK-64: movabsq $50000000{{..}}, %rax +; CHECK-64-NEXT: subq %rax, %rsp + +; CHECK-32-LABEL: foo3: +; CHECK-32: subl $2147483647, %esp +; CHECK-32-NOT: movl ${{.*}}, %eax + %1 = alloca [5000000000 x i8], align 16 + %2 = getelementptr inbounds [5000000000 x i8]* %1, i32 0, i32 0 + call void @bar(i8* %2) + ret i32 %x +} + +declare void @bar(i8*) -- 2.34.1