From: Nadav Rotem Date: Fri, 21 Dec 2012 23:48:49 +0000 (+0000) Subject: In some cases, due to scheduling constraints we copy the EFLAGS. X-Git-Url: http://demsky.eecs.uci.edu/git/?a=commitdiff_plain;h=d0696ef8c33b9b2504e89bc0aab2ea99a6c90756;p=oota-llvm.git In some cases, due to scheduling constraints we copy the EFLAGS. The only way to read the eflags is using push and pop. If we don't adjust the stack then we run over the first frame index. This is not something that we want to do, so we have to make sure that our machine function does not copy the flags. If it does then we have to emit the prolog that adjusts the stack. rdar://12896831 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@170961 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/X86/X86FrameLowering.cpp b/lib/Target/X86/X86FrameLowering.cpp index 1d5457297d0..cf6c5c86d3a 100644 --- a/lib/Target/X86/X86FrameLowering.cpp +++ b/lib/Target/X86/X86FrameLowering.cpp @@ -625,6 +625,22 @@ uint32_t X86FrameLowering::getCompactUnwindEncoding(MachineFunction &MF) const { return CompactUnwindEncoding; } +/// colobbersTheStack - This function checks if any of the users of EFLAGS +/// copies the EFLAGS. We know that the code that lowers COPY of EFLAGS has +/// to use the stack, and if we don't adjust the stack we clobber the first +/// frame index. +/// See X86InstrInfo::copyPhysReg. +static bool colobbersTheStack(MachineFunction &MF) { + MachineRegisterInfo &MRI = MF.getRegInfo(); + + for (MachineRegisterInfo::reg_iterator ri = MRI.reg_begin(X86::EFLAGS), + re = MRI.reg_end(); ri != re; ++ri) + if (ri->isCopy()) + return true; + + return false; +} + /// emitPrologue - Push callee-saved registers onto the stack, which /// automatically adjust the stack pointer. Adjust the stack pointer to allocate /// space for local variables. Also emit labels used by the exception handler to @@ -673,12 +689,14 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const { // If this is x86-64 and the Red Zone is not disabled, if we are a leaf // function, and use up to 128 bytes of stack space, don't have a frame // pointer, calls, or dynamic alloca then we do not need to adjust the - // stack pointer (we fit in the Red Zone). + // stack pointer (we fit in the Red Zone). We also check that we don't + // push and pop from the stack. if (Is64Bit && !Fn->getFnAttributes().hasAttribute(Attribute::NoRedZone) && !RegInfo->needsStackRealignment(MF) && !MFI->hasVarSizedObjects() && // No dynamic alloca. !MFI->adjustsStack() && // No calls. !IsWin64 && // Win64 has no Red Zone + !colobbersTheStack(MF) && // Don't push and pop. !MF.getTarget().Options.EnableSegmentedStacks) { // Regular stack uint64_t MinSize = X86FI->getCalleeSavedFrameSize(); if (HasFP) MinSize += SlotSize; diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index 7bbab3862da..fbfbc29a71c 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -2892,6 +2892,8 @@ void X86InstrInfo::copyPhysReg(MachineBasicBlock &MBB, } // Moving EFLAGS to / from another register requires a push and a pop. + // Notice that we have to adjust the stack if we don't want to clobber the + // first frame index. See X86FrameLowering.cpp - colobbersTheStack. if (SrcReg == X86::EFLAGS) { if (X86::GR64RegClass.contains(DestReg)) { BuildMI(MBB, MI, DL, get(X86::PUSHF64)); diff --git a/test/CodeGen/X86/clobber-fi0.ll b/test/CodeGen/X86/clobber-fi0.ll new file mode 100644 index 00000000000..38a42dbf1aa --- /dev/null +++ b/test/CodeGen/X86/clobber-fi0.ll @@ -0,0 +1,37 @@ +; RUN: llc < %s -mcpu=generic -mtriple=x86_64-linux | FileCheck %s + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.7.0" + +; In the code below we need to copy the EFLAGS because of scheduling constraints. +; When copying the EFLAGS we need to write to the stack with push/pop. This forces +; us to emit the prolog. + +; CHECK: main +; CHECK: subq{{.*}}rsp +; CHECK: ret +define i32 @main(i32 %arg, i8** %arg1) nounwind { +bb: + %tmp = alloca i32, align 4 ; [#uses=3 type=i32*] + %tmp2 = alloca i32, align 4 ; [#uses=3 type=i32*] + %tmp3 = alloca i32 ; [#uses=1 type=i32*] + store i32 1, i32* %tmp, align 4 + store i32 1, i32* %tmp2, align 4 + br label %bb4 + +bb4: ; preds = %bb4, %bb + %tmp6 = load i32* %tmp2, align 4 ; [#uses=1 type=i32] + %tmp7 = add i32 %tmp6, -1 ; [#uses=2 type=i32] + store i32 %tmp7, i32* %tmp2, align 4 + %tmp8 = icmp eq i32 %tmp7, 0 ; [#uses=1 type=i1] + %tmp9 = load i32* %tmp ; [#uses=1 type=i32] + %tmp10 = add i32 %tmp9, -1 ; [#uses=1 type=i32] + store i32 %tmp10, i32* %tmp3 + br i1 %tmp8, label %bb11, label %bb4 + +bb11: ; preds = %bb4 + %tmp12 = load i32* %tmp, align 4 ; [#uses=1 type=i32] + ret i32 %tmp12 +} + +