From a64f17562f974f53233acce589e984e1861828e8 Mon Sep 17 00:00:00 2001
From: Michael Kuperstein <michael.m.kuperstein@intel.com>
Date: Tue, 3 Nov 2015 08:17:25 +0000
Subject: [PATCH] [X86] Generate .cfi_adjust_cfa_offset correctly when pushing
 arguments

When push instructions are being used to pass function arguments on
the stack, and either EH or debugging are enabled, we need to generate
.cfi_adjust_cfa_offset directives appropriately. For (synch) EH, it is
enough for the CFA offset to be correct at every call site, while
for debugging we want to be correct after every push.

Darwin does not support this well, so don't use pushes whenever it
would be required.

Differential Revision: http://reviews.llvm.org/D13767

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@251904 91177308-0d34-0410-b5e6-96231b3b80d8
---
 include/llvm/CodeGen/MachineModuleInfo.h    |   5 +
 lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp  |   3 +
 lib/Target/X86/X86CallFrameOptimization.cpp |  40 +--
 lib/Target/X86/X86FrameLowering.cpp         |  45 +++-
 lib/Target/X86/X86FrameLowering.h           |   6 +-
 test/CodeGen/X86/debugloc-argsize.ll        |   2 +-
 test/CodeGen/X86/fold-push.ll               |   4 +-
 test/CodeGen/X86/pop-stack-cleanup.ll       |   6 +-
 test/CodeGen/X86/push-cfi-debug.ll          |  53 ++++
 test/CodeGen/X86/push-cfi-obj.ll            |  48 ++--
 test/CodeGen/X86/push-cfi.ll                | 256 +++++++++++++++-----
 11 files changed, 354 insertions(+), 114 deletions(-)
 create mode 100644 test/CodeGen/X86/push-cfi-debug.ll
diff --git a/include/llvm/CodeGen/MachineModuleInfo.h b/include/llvm/CodeGen/MachineModuleInfo.h
index 8cc5d6f242f..4df580f0dba 100644
--- a/include/llvm/CodeGen/MachineModuleInfo.h
+++ b/include/llvm/CodeGen/MachineModuleInfo.h
@@ -245,6 +245,11 @@ public:
   bool hasDebugInfo() const { return DbgInfoAvailable; }
   void setDebugInfoAvailability(bool avail) { DbgInfoAvailable = avail; }
 
+  // Returns true if we need to generate precise CFI. Currently
+  // this is equivalent to hasDebugInfo(), but if we ever implement
+  // async EH, it will require precise CFI as well.
+  bool usePreciseUnwindInfo() const { return hasDebugInfo(); }
+
   bool callsEHReturn() const { return CallsEHReturn; }
   void setCallsEHReturn(bool b) { CallsEHReturn = b; }
 
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp b/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
index 8efa03a8862..9ede04c4c1b 100644
--- a/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
+++ b/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
@@ -216,6 +216,9 @@ void AsmPrinter::emitCFIInstruction(const MCCFIInstruction &Inst) const {
   case MCCFIInstruction::OpDefCfaOffset:
     OutStreamer->EmitCFIDefCfaOffset(Inst.getOffset());
     break;
+  case MCCFIInstruction::OpAdjustCfaOffset:
+    OutStreamer->EmitCFIAdjustCfaOffset(Inst.getOffset());
+    break;
   case MCCFIInstruction::OpDefCfa:
     OutStreamer->EmitCFIDefCfa(Inst.getRegister(), Inst.getOffset());
     break;
diff --git a/lib/Target/X86/X86CallFrameOptimization.cpp b/lib/Target/X86/X86CallFrameOptimization.cpp
index e3031b89464..23990b01ba1 100644
--- a/lib/Target/X86/X86CallFrameOptimization.cpp
+++ b/lib/Target/X86/X86CallFrameOptimization.cpp
@@ -103,7 +103,8 @@ private:
   const char *getPassName() const override { return "X86 Optimize Call Frame"; }
 
   const TargetInstrInfo *TII;
-  const TargetFrameLowering *TFL;
+  const X86FrameLowering *TFL;
+  const X86Subtarget *STI;
   const MachineRegisterInfo *MRI;
   static char ID;
 };
@@ -127,13 +128,15 @@ bool X86CallFrameOptimization::isLegal(MachineFunction &MF) {
   // No point in running this in 64-bit mode, since some arguments are
   // passed in-register in all common calling conventions, so the pattern
   // we're looking for will never match.
-  const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>();
-  if (STI.is64Bit())
+  if (STI->is64Bit())
     return false;
 
-  // We can't encode multiple DW_CFA_GNU_args_size in the compact
-  // unwind encoding that Darwin uses.
-  if (STI.isTargetDarwin() && !MF.getMMI().getLandingPads().empty())
+  // We can't encode multiple DW_CFA_GNU_args_size or DW_CFA_def_cfa_offset
+  // in the compact unwind encoding that Darwin uses. So, bail if there
+  // is a danger of that being generated.
+  if (STI->isTargetDarwin() && 
+     (!MF.getMMI().getLandingPads().empty() || 
+       (MF.getFunction()->needsUnwindTableEntry() && !TFL->hasFP(MF))))
     return false;
 
   // You would expect straight-line code between call-frame setup and
@@ -216,8 +219,9 @@ bool X86CallFrameOptimization::isProfitable(MachineFunction &MF,
 }
 
 bool X86CallFrameOptimization::runOnMachineFunction(MachineFunction &MF) {
-  TII = MF.getSubtarget().getInstrInfo();
-  TFL = MF.getSubtarget().getFrameLowering();
+  STI = &MF.getSubtarget<X86Subtarget>();
+  TII = STI->getInstrInfo();
+  TFL = STI->getFrameLowering();
   MRI = &MF.getRegInfo();
 
   if (!isLegal(MF))
@@ -312,7 +316,7 @@ void X86CallFrameOptimization::collectCallInfo(MachineFunction &MF,
   // Check that this particular call sequence is amenable to the
   // transformation.
   const X86RegisterInfo &RegInfo = *static_cast<const X86RegisterInfo *>(
-                                       MF.getSubtarget().getRegisterInfo());
+                                       STI->getRegisterInfo());
   unsigned FrameDestroyOpcode = TII->getCallFrameDestroyOpcode();
 
   // We expect to enter this at the beginning of a call sequence
@@ -455,6 +459,7 @@ bool X86CallFrameOptimization::adjustCallSequence(MachineFunction &MF,
   for (int Idx = (Context.ExpectedDist / 4) - 1; Idx >= 0; --Idx) {
     MachineBasicBlock::iterator MOV = *Context.MovVector[Idx];
     MachineOperand PushOp = MOV->getOperand(X86::AddrNumOperands);
+    MachineBasicBlock::iterator Push = nullptr;
     if (MOV->getOpcode() == X86::MOV32mi) {
       unsigned PushOpcode = X86::PUSHi32;
       // If the operand is a small (8-bit) immediate, we can use a
@@ -466,21 +471,20 @@ bool X86CallFrameOptimization::adjustCallSequence(MachineFunction &MF,
         if (isInt<8>(Val))
           PushOpcode = X86::PUSH32i8;
       }
-      BuildMI(MBB, Context.Call, DL, TII->get(PushOpcode)).addOperand(PushOp);
+      Push = BuildMI(MBB, Context.Call, DL, TII->get(PushOpcode))
+          .addOperand(PushOp);
     } else {
       unsigned int Reg = PushOp.getReg();
 
       // If PUSHrmm is not slow on this target, try to fold the source of the
       // push into the instruction.
-      const X86Subtarget &ST = MF.getSubtarget<X86Subtarget>();
-      bool SlowPUSHrmm = ST.isAtom() || ST.isSLM();
+      bool SlowPUSHrmm = STI->isAtom() || STI->isSLM();
 
       // Check that this is legal to fold. Right now, we're extremely
       // conservative about that.
       MachineInstr *DefMov = nullptr;
       if (!SlowPUSHrmm && (DefMov = canFoldIntoRegPush(FrameSetup, Reg))) {
-        MachineInstr *Push =
-            BuildMI(MBB, Context.Call, DL, TII->get(X86::PUSH32rmm));
+        Push = BuildMI(MBB, Context.Call, DL, TII->get(X86::PUSH32rmm));
 
         unsigned NumOps = DefMov->getDesc().getNumOperands();
         for (unsigned i = NumOps - X86::AddrNumOperands; i != NumOps; ++i)
@@ -488,12 +492,18 @@ bool X86CallFrameOptimization::adjustCallSequence(MachineFunction &MF,
 
         DefMov->eraseFromParent();
       } else {
-        BuildMI(MBB, Context.Call, DL, TII->get(X86::PUSH32r))
+        Push = BuildMI(MBB, Context.Call, DL, TII->get(X86::PUSH32r))
             .addReg(Reg)
             .getInstr();
       }
     }
 
+    // For debugging, when using SP-based CFA, we need to adjust the CFA
+    // offset after each push.
+    if (!TFL->hasFP(MF) && MF.getMMI().usePreciseUnwindInfo())
+      TFL->BuildCFI(MBB, std::next(Push), DL, 
+                    MCCFIInstruction::createAdjustCfaOffset(nullptr, 4));
+
     MBB.erase(MOV);
   }
 
diff --git a/lib/Target/X86/X86FrameLowering.cpp b/lib/Target/X86/X86FrameLowering.cpp
index 95cb76094ec..7b7f0daf12b 100644
--- a/lib/Target/X86/X86FrameLowering.cpp
+++ b/lib/Target/X86/X86FrameLowering.cpp
@@ -2105,18 +2105,23 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
     unsigned StackAlign = getStackAlignment();
     Amount = RoundUpToAlignment(Amount, StackAlign);
 
+    MachineModuleInfo &MMI = MF.getMMI();
+    const Function *Fn = MF.getFunction();
+    bool WindowsCFI = MF.getTarget().getMCAsmInfo()->usesWindowsCFI();
+    bool DwarfCFI = !WindowsCFI && 
+                    (MMI.hasDebugInfo() || Fn->needsUnwindTableEntry());
+
     // If we have any exception handlers in this function, and we adjust
-    // the SP before calls, we may need to indicate this to the unwinder,
-    // using GNU_ARGS_SIZE. Note that this may be necessary
-    // even when Amount == 0, because the preceding function may have
-    // set a non-0 GNU_ARGS_SIZE.
+    // the SP before calls, we may need to indicate this to the unwinder
+    // using GNU_ARGS_SIZE. Note that this may be necessary even when
+    // Amount == 0, because the preceding function may have set a non-0
+    // GNU_ARGS_SIZE.
     // TODO: We don't need to reset this between subsequent functions,
     // if it didn't change.
-    bool HasDwarfEHHandlers =
-      !MF.getTarget().getMCAsmInfo()->usesWindowsCFI() &&
-      !MF.getMMI().getLandingPads().empty();
+    bool HasDwarfEHHandlers = !WindowsCFI &&
+                              !MF.getMMI().getLandingPads().empty();
 
-    if (HasDwarfEHHandlers && !isDestroy && 
+    if (HasDwarfEHHandlers && !isDestroy &&
         MF.getInfo<X86MachineFunctionInfo>()->getHasPushSequences())
       BuildCFI(MBB, I, DL,
                MCCFIInstruction::createGnuArgsSize(nullptr, Amount));
@@ -2128,15 +2133,37 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
     // (Pushes of argument for frame setup, callee pops for frame destroy)
     Amount -= InternalAmt;
 
+    // If this is a callee-pop calling convention, and we're emitting precise
+    // SP-based CFI, emit a CFA adjust for the amount the callee popped.
+    if (isDestroy && InternalAmt && DwarfCFI && !hasFP(MF) && 
+        MMI.usePreciseUnwindInfo())
+      BuildCFI(MBB, I, DL, 
+               MCCFIInstruction::createAdjustCfaOffset(nullptr, -InternalAmt));
+
     if (Amount) {
       // Add Amount to SP to destroy a frame, and subtract to setup.
       int Offset = isDestroy ? Amount : -Amount;
 
-      if (!(MF.getFunction()->optForMinSize() && 
+      if (!(Fn->optForMinSize() && 
             adjustStackWithPops(MBB, I, DL, Offset)))
         BuildStackAdjustment(MBB, I, DL, Offset, /*InEpilogue=*/false);
     }
 
+    if (DwarfCFI && !hasFP(MF)) {
+      // If we don't have FP, but need to generate unwind information,
+      // we need to set the correct CFA offset after the stack adjustment.
+      // How much we adjust the CFA offset depends on whether we're emitting
+      // CFI only for EH purposes or for debugging. EH only requires the CFA
+      // offset to be correct at each call site, while for debugging we want
+      // it to be more precise.
+      int CFAOffset = Amount;
+      if (!MMI.usePreciseUnwindInfo())
+        CFAOffset += InternalAmt;
+      CFAOffset = isDestroy ? -CFAOffset : CFAOffset;
+      BuildCFI(MBB, I, DL, 
+               MCCFIInstruction::createAdjustCfaOffset(nullptr, CFAOffset));
+    }
+
     return;
   }
 
diff --git a/lib/Target/X86/X86FrameLowering.h b/lib/Target/X86/X86FrameLowering.h
index 59c6a062810..261eade9173 100644
--- a/lib/Target/X86/X86FrameLowering.h
+++ b/lib/Target/X86/X86FrameLowering.h
@@ -125,13 +125,13 @@ public:
   /// \p MBB will be correctly handled by the target.
   bool canUseAsEpilogue(const MachineBasicBlock &MBB) const override;
 
-private:
-  uint64_t calculateMaxStackAlign(const MachineFunction &MF) const;
-
   /// Wraps up getting a CFI index and building a MachineInstr for it.
   void BuildCFI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
                 DebugLoc DL, MCCFIInstruction CFIInst) const;
 
+private:
+  uint64_t calculateMaxStackAlign(const MachineFunction &MF) const;
+
   /// Aligns the stack pointer by ANDing it with -MaxAlign.
   void BuildStackAlignAND(MachineBasicBlock &MBB,
                           MachineBasicBlock::iterator MBBI, DebugLoc DL,
diff --git a/test/CodeGen/X86/debugloc-argsize.ll b/test/CodeGen/X86/debugloc-argsize.ll
index 7db7459e8fa..56f67b72d98 100644
--- a/test/CodeGen/X86/debugloc-argsize.ll
+++ b/test/CodeGen/X86/debugloc-argsize.ll
@@ -30,7 +30,7 @@ declare i8* @__cxa_begin_catch(i8*)
 
 declare void @__cxa_end_catch()
 
-attributes #0 = { optsize "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { optsize "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" }
 attributes #1 = { optsize }
 attributes #2 = { nounwind }
 
diff --git a/test/CodeGen/X86/fold-push.ll b/test/CodeGen/X86/fold-push.ll
index cb4e435e35c..eaf91351021 100644
--- a/test/CodeGen/X86/fold-push.ll
+++ b/test/CodeGen/X86/fold-push.ll
@@ -3,7 +3,7 @@
 
 declare void @foo(i32 %r)
 
-define void @test(i32 %a, i32 %b) optsize {
+define void @test(i32 %a, i32 %b) optsize nounwind {
 ; CHECK-LABEL: test:
 ; CHECK: movl [[EAX:%e..]], (%esp)
 ; CHECK-NEXT: pushl [[EAX]]
@@ -22,7 +22,7 @@ define void @test(i32 %a, i32 %b) optsize {
   ret void
 }
 
-define void @test_min(i32 %a, i32 %b) minsize {
+define void @test_min(i32 %a, i32 %b) minsize nounwind {
 ; CHECK-LABEL: test_min:
 ; CHECK: movl [[EAX:%e..]], (%esp)
 ; CHECK-NEXT: pushl [[EAX]]
diff --git a/test/CodeGen/X86/pop-stack-cleanup.ll b/test/CodeGen/X86/pop-stack-cleanup.ll
index 3a22cc19100..bcf7594065f 100644
--- a/test/CodeGen/X86/pop-stack-cleanup.ll
+++ b/test/CodeGen/X86/pop-stack-cleanup.ll
@@ -9,7 +9,7 @@ declare void @param3(i32 %a, i32 %b, i32 %c)
 declare void @param8(i64, i64, i64, i64, i64, i64, i64, i64)
 
 
-define void @test() minsize {
+define void @test() minsize nounwind {
 ; CHECK-LABEL: test:
 ; CHECK: calll _param1
 ; CHECK-NEXT: popl %eax
@@ -48,7 +48,7 @@ define void @negative(i32 %k) {
   ret void
 }
 
-define void @spill(i32 inreg %a, i32 inreg %b, i32 inreg %c) minsize {
+define void @spill(i32 inreg %a, i32 inreg %b, i32 inreg %c) minsize nounwind {
 ; CHECK-LABEL: spill:
 ; CHECK-DAG: movl %ecx,
 ; CHECK-DAG: movl %edx,
@@ -63,7 +63,7 @@ define void @spill(i32 inreg %a, i32 inreg %b, i32 inreg %c) minsize {
   ret void
 }
 
-define void @test_linux64(i32 %size) minsize {
+define void @test_linux64(i32 %size) minsize nounwind {
 ; LINUX64-LABEL: test_linux64:
 ; LINUX64: pushq %rbp
 ; LINUX64: callq param8
diff --git a/test/CodeGen/X86/push-cfi-debug.ll b/test/CodeGen/X86/push-cfi-debug.ll
new file mode 100644
index 00000000000..61110e566e6
--- /dev/null
+++ b/test/CodeGen/X86/push-cfi-debug.ll
@@ -0,0 +1,53 @@
+; RUN: llc < %s -mtriple=i686-pc-linux | FileCheck %s
+
+
+; Function Attrs: optsize
+declare void @foo(i32, i32) #0
+declare x86_stdcallcc void @stdfoo(i32, i32) #0
+
+; CHECK-LABEL: test1:
+; CHECK: subl $8, %esp
+; CHECK: .cfi_adjust_cfa_offset 8
+; CHECK: pushl $2
+; CHECK: .cfi_adjust_cfa_offset 4
+; CHECK: pushl $1
+; CHECK: .cfi_adjust_cfa_offset 4
+; CHECK: calll foo
+; CHECK: addl $16, %esp
+; CHECK: .cfi_adjust_cfa_offset -16
+; CHECK: subl $8, %esp
+; CHECK: .cfi_adjust_cfa_offset 8
+; CHECK: pushl $4
+; CHECK: .cfi_adjust_cfa_offset 4
+; CHECK: pushl $3
+; CHECK: .cfi_adjust_cfa_offset 4
+; CHECK: calll stdfoo
+; CHECK: .cfi_adjust_cfa_offset -8
+; CHECK: addl $8, %esp
+; CHECK: .cfi_adjust_cfa_offset -8
+define void @test1() #0 {
+entry:
+  tail call void @foo(i32 1, i32 2) #1, !dbg !10
+  tail call x86_stdcallcc void @stdfoo(i32 3, i32 4) #1, !dbg !11
+  ret void, !dbg !12
+}
+
+attributes #0 = { nounwind optsize }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!7, !8}
+!llvm.ident = !{!9}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.8.0 (trunk 250289)", isOptimized: true, runtimeVersion: 0, emissionKind: 1, enums: !2, subprograms: !3)
+!1 = !DIFile(filename: "foo.c", directory: "foo")
+!2 = !{}
+!3 = !{!4}
+!4 = distinct !DISubprogram(name: "test1", scope: !1, file: !1, line: 3, type: !5, isLocal: false, isDefinition: true, scopeLine: 3, isOptimized: true, function: void ()* @test1, variables: !2)
+!5 = !DISubroutineType(types: !6)
+!6 = !{null}
+!7 = !{i32 2, !"Dwarf Version", i32 4}
+!8 = !{i32 2, !"Debug Info Version", i32 3}
+!9 = !{!"clang version 3.8.0 (trunk 250289)"}
+!10 = !DILocation(line: 4, column: 3, scope: !4)
+!11 = !DILocation(line: 5, column: 3, scope: !4)
+!12 = !DILocation(line: 6, column: 1, scope: !4)
diff --git a/test/CodeGen/X86/push-cfi-obj.ll b/test/CodeGen/X86/push-cfi-obj.ll
index 43ab34e744b..bc01407f6f3 100644
--- a/test/CodeGen/X86/push-cfi-obj.ll
+++ b/test/CodeGen/X86/push-cfi-obj.ll
@@ -1,36 +1,36 @@
-; RUN: llc < %s -mtriple=i686-pc-linux -filetype=obj | llvm-readobj -s -sr -sd | FileCheck %s
+; RUN: llc < %s -mtriple=i686-pc-linux -filetype=obj | llvm-readobj -s -sr -sd | FileCheck %s -check-prefix=LINUX
 ; RUN: llc < %s -mtriple=i686-darwin-macosx10.7 -filetype=obj | llvm-readobj -sections | FileCheck -check-prefix=DARWIN %s
 
 ; On darwin, check that we manage to generate the compact unwind section
 ; DARWIN: Name: __compact_unwind
 ; DARWIN: Segment: __LD
 
-; CHECK:         Index: 8
-; CHECK-NEXT:    Name: .eh_frame (41)
-; CHECK-NEXT:    Type: SHT_PROGBITS (0x1)
-; CHECK-NEXT:    Flags [ (0x2)
-; CHECK-NEXT:      SHF_ALLOC (0x2)
-; CHECK-NEXT:    ]
-; CHECK-NEXT:    Address: 0x0
-; CHECK-NEXT:    Offset: 0x64
-; CHECK-NEXT:    Size: 60
-; CHECK-NEXT:    Link: 0
-; CHECK-NEXT:    Info: 0
-; CHECK-NEXT:    AddressAlignment: 4
-; CHECK-NEXT:    EntrySize: 0
-; CHECK-NEXT:    Relocations [
-; CHECK-NEXT:    ]
-; CHECK-NEXT:    SectionData (
-; CHECK-NEXT:      0000: 1C000000 00000000 017A504C 5200017C  |.........zPLR..||
-; CHECK-NEXT:      0010: 08070000 00000000 1B0C0404 88010000  |................|
-; CHECK-NEXT:      0020: 18000000 24000000 00000000 19000000  |....$...........|
-; CHECK-NEXT:      0030: 04000000 00430E10 2E100000           |.....C......|
-; CHECK-NEXT:    )
+; LINUX:         Index: 8
+; LINUX-NEXT:    Name: .eh_frame (41)
+; LINUX-NEXT:    Type: SHT_PROGBITS (0x1)
+; LINUX-NEXT:    Flags [ (0x2)
+; LINUX-NEXT:      SHF_ALLOC (0x2)
+; LINUX-NEXT:    ]
+; LINUX-NEXT:    Address: 0x0
+; LINUX-NEXT:    Offset: 0x68
+; LINUX-NEXT:    Size: 64
+; LINUX-NEXT:    Link: 0
+; LINUX-NEXT:    Info: 0
+; LINUX-NEXT:    AddressAlignment: 4
+; LINUX-NEXT:    EntrySize: 0
+; LINUX-NEXT:    Relocations [
+; LINUX-NEXT:    ]
+; LINUX-NEXT:    SectionData (
+; LINUX-NEXT:      0000: 1C000000 00000000 017A504C 5200017C  |.........zPLR..||
+; LINUX-NEXT:      0010: 08070000 00000000 1B0C0404 88010000  |................|
+; LINUX-NEXT:      0020: 1C000000 24000000 00000000 1D000000  |....$...........|
+; LINUX-NEXT:      0030: 04000000 00410E08 8502420D 05432E10  |.....A....B..C..|
+; LINUX-NEXT:    )
 
 declare i32 @__gxx_personality_v0(...)
 declare void @good(i32 %a, i32 %b, i32 %c, i32 %d)
 
-define void @test() optsize personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
+define void @test() #0 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
 entry:
   invoke void @good(i32 1, i32 2, i32 3, i32 4)
           to label %continue unwind label %cleanup
@@ -41,3 +41,5 @@ cleanup:
      cleanup
   ret void
 }
+
+attributes #0 = { optsize "no-frame-pointer-elim"="true" }
diff --git a/test/CodeGen/X86/push-cfi.ll b/test/CodeGen/X86/push-cfi.ll
index 95952278266..4d07a1d8181 100644
--- a/test/CodeGen/X86/push-cfi.ll
+++ b/test/CodeGen/X86/push-cfi.ll
@@ -1,21 +1,51 @@
-; RUN: llc < %s -mtriple=i686-pc-linux | FileCheck %s
+; RUN: llc < %s -mtriple=i686-pc-linux | FileCheck %s -check-prefix=LINUX -check-prefix=CHECK
+; RUN: llc < %s -mtriple=i686-apple-darwin | FileCheck %s -check-prefix=DARWIN -check-prefix=CHECK
 
 declare i32 @__gxx_personality_v0(...)
 declare void @good(i32 %a, i32 %b, i32 %c, i32 %d)
 declare void @large(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f)
 declare void @empty()
 
-; We use an invoke, and expect a .cfi_escape GNU_ARGS_SIZE with size 16
-; before the invocation
-; CHECK-LABEL: test1:
-; CHECK: .cfi_escape 0x2e, 0x10
-; CHECK-NEXT: pushl   $4
-; CHECK-NEXT: pushl   $3
-; CHECK-NEXT: pushl   $2
-; CHECK-NEXT: pushl   $1
-; CHECK-NEXT: call
-; CHECK-NEXT: addl $16, %esp
-define void @test1() optsize personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
+; When we use an invoke, and have FP, we expect a .cfi_escape GNU_ARGS_SIZE
+; with size 16 before the invocation. Without FP, we expect.cfi_adjust_cfa_offset
+; before and after.
+; Darwin should not generate pushes in neither circumstance.
+; CHECK-LABEL: test1_nofp:
+; LINUX: .cfi_escape 0x2e, 0x10
+; LINUX: .cfi_adjust_cfa_offset 16
+; LINUX-NEXT: pushl   $4
+; LINUX-NEXT: pushl   $3
+; LINUX-NEXT: pushl   $2
+; LINUX-NEXT: pushl   $1
+; LINUX-NEXT: call
+; LINUX-NEXT: addl $16, %esp
+; LINUX: .cfi_adjust_cfa_offset -16
+; DARWIN-NOT: .cfi_escape
+; DARWIN-NOT: pushl
+define void @test1_nofp() #0 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
+entry:
+  invoke void @good(i32 1, i32 2, i32 3, i32 4)
+          to label %continue unwind label %cleanup
+continue:
+  ret void
+cleanup:  
+  landingpad { i8*, i32 }
+     cleanup
+  ret void
+}
+
+; CHECK-LABEL: test1_fp:
+; LINUX: .cfi_escape 0x2e, 0x10
+; LINUX-NEXT: pushl   $4
+; LINUX-NEXT: pushl   $3
+; LINUX-NEXT: pushl   $2
+; LINUX-NEXT: pushl   $1
+; LINUX-NEXT: call
+; LINUX-NEXT: addl $16, %esp
+; DARWIN: pushl %ebp
+; DARWIN-NOT: .cfi_escape
+; DARWIN-NOT: pushl
+define void @test1_fp() #1 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
 entry:
   invoke void @good(i32 1, i32 2, i32 3, i32 4)
           to label %continue unwind label %cleanup
@@ -28,27 +58,69 @@ cleanup:
 }
 
 ; If the function has no handlers, we don't need to generate GNU_ARGS_SIZE,
-; even if it has an unwind table.
-; CHECK-LABEL: test2:
+; even if it has an unwind table. Without FP, we still need cfi_adjust_cfa_offset,
+; so darwin should not generate pushes.
+; CHECK-LABEL: test2_nofp:
+; LINUX-NOT: .cfi_escape
+; LINUX: .cfi_adjust_cfa_offset 16
+; LINUX-NEXT: pushl   $4
+; LINUX-NEXT: pushl   $3
+; LINUX-NEXT: pushl   $2
+; LINUX-NEXT: pushl   $1
+; LINUX-NEXT: call
+; LINUX-NEXT: addl $16, %esp
+; LINUX: .cfi_adjust_cfa_offset -16
+; DARWIN-NOT: .cfi_escape
+; DARWIN-NOT: pushl
+define void @test2_nofp() #0 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
+entry:
+  call void @good(i32 1, i32 2, i32 3, i32 4)
+  ret void
+}
+
+; CHECK-LABEL: test2_fp:
 ; CHECK-NOT: .cfi_escape
+; CHECK-NOT: .cfi_adjust_cfa_offset
 ; CHECK: pushl   $4
 ; CHECK-NEXT: pushl   $3
 ; CHECK-NEXT: pushl   $2
 ; CHECK-NEXT: pushl   $1
 ; CHECK-NEXT: call
-; CHECK-NEXT: addl $16, %esp
-define void @test2() optsize personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
+; CHECK-NEXT: addl $24, %esp
+define void @test2_fp() #1 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
 entry:
   call void @good(i32 1, i32 2, i32 3, i32 4)
   ret void
 }
 
-; If we did not end up using any pushes, no need for GNU_ARGS_SIZE anywhere
-; CHECK-LABEL: test3:
-; CHECK-NOT: .cfi_escape
-; CHECK-NOT: pushl
-; CHECK: retl
-define void @test3() optsize personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
+; If we did not end up using any pushes, no need for GNU_ARGS_SIZE or
+; cfi_adjust_cfa_offset.
+; CHECK-LABEL: test3_nofp:
+; LINUX-NOT: .cfi_escape
+; LINUX-NOT: .cfi_adjust_cfa_offset
+; LINUX-NOT: pushl
+; LINUX: retl
+define void @test3_nofp() #0 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
+entry:
+  invoke void @empty()
+          to label %continue unwind label %cleanup
+continue:
+  ret void
+cleanup:  
+  landingpad { i8*, i32 }
+     cleanup
+  ret void
+}
+
+; If we did not end up using any pushes, no need for GNU_ARGS_SIZE or
+; cfi_adjust_cfa_offset.
+; CHECK-LABEL: test3_fp:
+; LINUX: pushl %ebp
+; LINUX-NOT: .cfi_escape
+; LINUX-NOT: .cfi_adjust_cfa_offset
+; LINUX-NOT: pushl
+; LINUX: retl
+define void @test3_fp() #1 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
 entry:
   invoke void @empty()
           to label %continue unwind label %cleanup
@@ -62,24 +134,24 @@ cleanup:
 
 ; Different sized stacks need different GNU_ARGS_SIZEs
 ; CHECK-LABEL: test4:
-; CHECK: .cfi_escape 0x2e, 0x10
-; CHECK-NEXT: pushl   $4
-; CHECK-NEXT: pushl   $3
-; CHECK-NEXT: pushl   $2
-; CHECK-NEXT: pushl   $1
-; CHECK-NEXT: call
-; CHECK-NEXT: addl $16, %esp
-; CHECK: .cfi_escape 0x2e, 0x20
-; CHECK-NEXT: subl    $8, %esp
-; CHECK-NEXT: pushl   $11
-; CHECK-NEXT: pushl   $10
-; CHECK-NEXT: pushl   $9
-; CHECK-NEXT: pushl   $8
-; CHECK-NEXT: pushl   $7
-; CHECK-NEXT: pushl   $6
-; CHECK-NEXT: calll   large
-; CHECK-NEXT: addl $32, %esp
-define void @test4() optsize personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
+; LINUX: .cfi_escape 0x2e, 0x10
+; LINUX-NEXT: pushl   $4
+; LINUX-NEXT: pushl   $3
+; LINUX-NEXT: pushl   $2
+; LINUX-NEXT: pushl   $1
+; LINUX-NEXT: call
+; LINUX-NEXT: addl $16, %esp
+; LINUX: .cfi_escape 0x2e, 0x20
+; LINUX: subl    $8, %esp
+; LINUX-NEXT: pushl   $11
+; LINUX-NEXT: pushl   $10
+; LINUX-NEXT: pushl   $9
+; LINUX-NEXT: pushl   $8
+; LINUX-NEXT: pushl   $7
+; LINUX-NEXT: pushl   $6
+; LINUX-NEXT: calll   large
+; LINUX-NEXT: addl $32, %esp
+define void @test4() #1 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
 entry:
   invoke void @good(i32 1, i32 2, i32 3, i32 4)
           to label %continue1 unwind label %cleanup
@@ -95,18 +167,48 @@ cleanup:
 }
 
 ; If we did use pushes, we need to reset GNU_ARGS_SIZE before a call
-; without parameters
-; CHECK-LABEL: test5:
-; CHECK: .cfi_escape 0x2e, 0x10
-; CHECK-NEXT: pushl   $4
-; CHECK-NEXT: pushl   $3
-; CHECK-NEXT: pushl   $2
-; CHECK-NEXT: pushl   $1
-; CHECK-NEXT: call
-; CHECK-NEXT: addl $16, %esp
-; CHECK: .cfi_escape 0x2e, 0x00
-; CHECK-NEXT: call
-define void @test5() optsize personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
+; without parameters, but don't need to adjust the cfa offset
+; CHECK-LABEL: test5_nofp:
+; LINUX: .cfi_escape 0x2e, 0x10
+; LINUX: .cfi_adjust_cfa_offset 16
+; LINUX-NEXT: pushl   $4
+; LINUX-NEXT: pushl   $3
+; LINUX-NEXT: pushl   $2
+; LINUX-NEXT: pushl   $1
+; LINUX-NEXT: call
+; LINUX-NEXT: addl $16, %esp
+; LINUX: .cfi_adjust_cfa_offset -16
+; LINUX-NOT: .cfi_adjust_cfa_offset
+; LINUX: .cfi_escape 0x2e, 0x00
+; LINUX-NOT: .cfi_adjust_cfa_offset
+; LINUX: call
+define void @test5_nofp() #0 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
+entry:
+  invoke void @good(i32 1, i32 2, i32 3, i32 4)
+          to label %continue1 unwind label %cleanup
+continue1:
+  invoke void @empty()
+          to label %continue2 unwind label %cleanup
+continue2:
+  ret void          
+cleanup:  
+  landingpad { i8*, i32 }
+     cleanup
+  ret void
+}
+
+; CHECK-LABEL: test5_fp:
+; LINUX: .cfi_escape 0x2e, 0x10
+; LINUX-NEXT: pushl   $4
+; LINUX-NEXT: pushl   $3
+; LINUX-NEXT: pushl   $2
+; LINUX-NEXT: pushl   $1
+; LINUX-NEXT: call
+; LINUX-NEXT: addl $16, %esp
+; LINUX: .cfi_escape 0x2e, 0x00
+; LINUX-NOT: .cfi_adjust_cfa_offset
+; LINUX: call
+define void @test5_fp() #1 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
 entry:
   invoke void @good(i32 1, i32 2, i32 3, i32 4)
           to label %continue1 unwind label %cleanup
@@ -121,13 +223,13 @@ cleanup:
   ret void
 }
 
-; This is actually inefficient - we don't need to repeat the .cfi_escape twice.
+; FIXME: This is actually inefficient - we don't need to repeat the .cfi_escape twice.
 ; CHECK-LABEL: test6:
-; CHECK: .cfi_escape 0x2e, 0x10
-; CHECK: call
-; CHECK: .cfi_escape 0x2e, 0x10
-; CHECK: call
-define void @test6() optsize personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
+; LINUX: .cfi_escape 0x2e, 0x10
+; LINUX: call
+; LINUX: .cfi_escape 0x2e, 0x10
+; LINUX: call
+define void @test6() #1 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
 entry:
   invoke void @good(i32 1, i32 2, i32 3, i32 4)
           to label %continue1 unwind label %cleanup
@@ -141,3 +243,41 @@ cleanup:
      cleanup
   ret void
 }
+
+; Darwin should generate pushes in the presense of FP and an unwind table,
+; but not FP and invoke.
+; CHECK-LABEL: test7:
+; DARWIN: pushl %ebp
+; DARWIN: movl %esp, %ebp
+; DARWIN: .cfi_def_cfa_register %ebp
+; DARWIN-NOT: .cfi_adjust_cfa_offset
+; DARWIN: pushl   $4
+; DARWIN-NEXT: pushl   $3
+; DARWIN-NEXT: pushl   $2
+; DARWIN-NEXT: pushl   $1
+; DARWIN-NEXT: call
+define void @test7() #1 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
+entry:
+  call void @good(i32 1, i32 2, i32 3, i32 4)
+  ret void
+}
+
+; CHECK-LABEL: test8:
+; DARWIN: pushl %ebp
+; DARWIN: movl %esp, %ebp
+; DARWIN-NOT: .cfi_adjust_cfa_offset
+; DARWIN-NOT: pushl
+define void @test8() #1 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
+entry:
+  invoke void @good(i32 1, i32 2, i32 3, i32 4)
+          to label %continue unwind label %cleanup
+continue:
+  ret void
+cleanup:  
+  landingpad { i8*, i32 }
+     cleanup
+  ret void
+}
+
+attributes #0 = { optsize }
+attributes #1 = { optsize "no-frame-pointer-elim"="true" }
-- 
2.34.1