From dff75493e8040cefd56c9b984e6af5b60c32ec45 Mon Sep 17 00:00:00 2001 From: Reid Kleckner Date: Tue, 8 Sep 2015 22:44:41 +0000 Subject: [PATCH] [WinEH] Emit prologues and epilogues for funclets Summary: 32-bit funclets have short prologues that allocate enough stack for the largest call in the whole function. The runtime saves CSRs for the funclet. It doesn't restore CSRs after we finally transfer control back to the parent funciton via a CATCHRET, but that's a separate issue. 32-bit funclets also have to adjust the incoming EBP value, which is what llvm.x86.seh.recoverframe does in the old model. 64-bit funclets need to spill CSRs as normal. For simplicity, this just spills the same set of CSRs as the parent function, rather than trying to compute different CSR sets for the parent function and each funclet. 64-bit funclets also allocate enough stack space for the largest outgoing call frame, like 32-bit. Reviewers: majnemer Subscribers: llvm-commits Differential Revision: http://reviews.llvm.org/D12546 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@247092 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/CodeGen/WinEHFuncInfo.h | 3 + lib/CodeGen/AsmPrinter/WinException.cpp | 20 ++- lib/CodeGen/PrologEpilogInserter.cpp | 56 ++++--- .../SelectionDAG/FunctionLoweringInfo.cpp | 6 + lib/Target/X86/X86FrameLowering.cpp | 144 +++++++++++++++++- lib/Target/X86/X86FrameLowering.h | 7 + lib/Target/X86/X86InstrCompiler.td | 2 +- lib/Target/X86/X86MCInstLower.cpp | 3 +- lib/Target/X86/X86WinEHState.cpp | 11 +- test/CodeGen/X86/win-catchpad-csrs.ll | 136 +++++++++++++++++ test/CodeGen/X86/win-catchpad.ll | 68 +++++++-- 11 files changed, 399 insertions(+), 57 deletions(-) create mode 100644 test/CodeGen/X86/win-catchpad-csrs.ll diff --git a/include/llvm/CodeGen/WinEHFuncInfo.h b/include/llvm/CodeGen/WinEHFuncInfo.h index 5517c812ccc..a9071acb94c 100644 --- a/include/llvm/CodeGen/WinEHFuncInfo.h +++ b/include/llvm/CodeGen/WinEHFuncInfo.h @@ -19,6 +19,7 @@ #include "llvm/ADT/DenseMap.h" namespace llvm { +class AllocaInst; class BasicBlock; class Constant; class Function; @@ -156,6 +157,8 @@ struct WinEHFuncInfo { /// localescape index of the 32-bit EH registration node. Set by /// WinEHStatePass and used indirectly by SEH filter functions of the parent. int EHRegNodeEscapeIndex = INT_MAX; + const AllocaInst *EHRegNode = nullptr; + int EHRegNodeFrameIndex = INT_MAX; WinEHFuncInfo() {} }; diff --git a/lib/CodeGen/AsmPrinter/WinException.cpp b/lib/CodeGen/AsmPrinter/WinException.cpp index 547dc4cbdc3..aedac5467c6 100644 --- a/lib/CodeGen/AsmPrinter/WinException.cpp +++ b/lib/CodeGen/AsmPrinter/WinException.cpp @@ -473,12 +473,20 @@ void WinException::emitCXXFrameHandler3Table(const MachineFunction *MF) { OS.EmitValue(create32bitRef(HT.Handler), 4); if (shouldEmitPersonality) { - MCSymbol *ParentFrameOffset = - Asm->OutContext.getOrCreateParentFrameOffsetSymbol( - GlobalValue::getRealLinkageName(HT.Handler->getName())); - const MCSymbolRefExpr *ParentFrameOffsetRef = MCSymbolRefExpr::create( - ParentFrameOffset, Asm->OutContext); - OS.EmitValue(ParentFrameOffsetRef, 4); // ParentFrameOffset + if (FuncInfo.CatchHandlerParentFrameObjOffset.empty()) { + // With the new IR, this is always 16 + 8 + getMaxCallFrameSize(). + // Keep this in sync with X86FrameLowering::emitPrologue. + int ParentFrameOffset = + 16 + 8 + MF->getFrameInfo()->getMaxCallFrameSize(); + OS.EmitIntValue(ParentFrameOffset, 4); // ParentFrameOffset + } else { + MCSymbol *ParentFrameOffset = + Asm->OutContext.getOrCreateParentFrameOffsetSymbol( + GlobalValue::getRealLinkageName(HT.Handler->getName())); + const MCSymbolRefExpr *ParentFrameOffsetRef = + MCSymbolRefExpr::create(ParentFrameOffset, Asm->OutContext); + OS.EmitValue(ParentFrameOffsetRef, 4); // ParentFrameOffset + } } } } diff --git a/lib/CodeGen/PrologEpilogInserter.cpp b/lib/CodeGen/PrologEpilogInserter.cpp index b2fdee6c8e4..36819c83ab8 100644 --- a/lib/CodeGen/PrologEpilogInserter.cpp +++ b/lib/CodeGen/PrologEpilogInserter.cpp @@ -71,8 +71,9 @@ private: // stack frame indexes. unsigned MinCSFrameIndex, MaxCSFrameIndex; - // Save and Restore blocks of the current function. - MachineBasicBlock *SaveBlock; + // Save and Restore blocks of the current function. Typically there is a + // single save block, unless Windows EH funclets are involved. + SmallVector SaveBlocks; SmallVector RestoreBlocks; // Flag to control whether to use the register scavenger to resolve @@ -142,7 +143,7 @@ void PEI::calculateSets(MachineFunction &Fn) { // Use the points found by shrink-wrapping, if any. if (MFI->getSavePoint()) { - SaveBlock = MFI->getSavePoint(); + SaveBlocks.push_back(MFI->getSavePoint()); assert(MFI->getRestorePoint() && "Both restore and save must be set"); MachineBasicBlock *RestoreBlock = MFI->getRestorePoint(); // If RestoreBlock does not have any successor and is not a return block @@ -154,13 +155,13 @@ void PEI::calculateSets(MachineFunction &Fn) { } // Save refs to entry and return blocks. - SaveBlock = Fn.begin(); - for (MachineFunction::iterator MBB = Fn.begin(), E = Fn.end(); - MBB != E; ++MBB) - if (isReturnBlock(MBB)) - RestoreBlocks.push_back(MBB); - - return; + SaveBlocks.push_back(Fn.begin()); + for (MachineBasicBlock &MBB : Fn) { + if (MBB.isEHFuncletEntry()) + SaveBlocks.push_back(&MBB); + if (isReturnBlock(&MBB)) + RestoreBlocks.push_back(&MBB); + } } /// StackObjSet - A set of stack object indexes @@ -237,6 +238,7 @@ bool PEI::runOnMachineFunction(MachineFunction &Fn) { } delete RS; + SaveBlocks.clear(); RestoreBlocks.clear(); return true; } @@ -446,18 +448,20 @@ void PEI::insertCSRSpillsAndRestores(MachineFunction &Fn) { MachineBasicBlock::iterator I; // Spill using target interface. - I = SaveBlock->begin(); - if (!TFI->spillCalleeSavedRegisters(*SaveBlock, I, CSI, TRI)) { - for (unsigned i = 0, e = CSI.size(); i != e; ++i) { - // Insert the spill to the stack frame. - unsigned Reg = CSI[i].getReg(); - const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); - TII.storeRegToStackSlot(*SaveBlock, I, Reg, true, CSI[i].getFrameIdx(), - RC, TRI); + for (MachineBasicBlock *SaveBlock : SaveBlocks) { + I = SaveBlock->begin(); + if (!TFI->spillCalleeSavedRegisters(*SaveBlock, I, CSI, TRI)) { + for (unsigned i = 0, e = CSI.size(); i != e; ++i) { + // Insert the spill to the stack frame. + unsigned Reg = CSI[i].getReg(); + const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); + TII.storeRegToStackSlot(*SaveBlock, I, Reg, true, CSI[i].getFrameIdx(), + RC, TRI); + } } + // Update the live-in information of all the blocks up to the save point. + updateLiveness(Fn); } - // Update the live-in information of all the blocks up to the save point. - updateLiveness(Fn); // Restore using target interface. for (MachineBasicBlock *MBB : RestoreBlocks) { @@ -771,7 +775,8 @@ void PEI::insertPrologEpilogCode(MachineFunction &Fn) { const TargetFrameLowering &TFI = *Fn.getSubtarget().getFrameLowering(); // Add prologue to the function... - TFI.emitPrologue(Fn, *SaveBlock); + for (MachineBasicBlock *SaveBlock : SaveBlocks) + TFI.emitPrologue(Fn, *SaveBlock); // Add epilogue to restore the callee-save registers in each exiting block. for (MachineBasicBlock *RestoreBlock : RestoreBlocks) @@ -781,8 +786,10 @@ void PEI::insertPrologEpilogCode(MachineFunction &Fn) { // we've been asked for it. This, when linked with a runtime with support // for segmented stacks (libgcc is one), will result in allocating stack // space in small chunks instead of one large contiguous block. - if (Fn.shouldSplitStack()) - TFI.adjustForSegmentedStacks(Fn, *SaveBlock); + if (Fn.shouldSplitStack()) { + for (MachineBasicBlock *SaveBlock : SaveBlocks) + TFI.adjustForSegmentedStacks(Fn, *SaveBlock); + } // Emit additional code that is required to explicitly handle the stack in // HiPE native code (if needed) when loaded in the Erlang/OTP runtime. The @@ -790,7 +797,8 @@ void PEI::insertPrologEpilogCode(MachineFunction &Fn) { // different conditional check and another BIF for allocating more stack // space. if (Fn.getFunction()->getCallingConv() == CallingConv::HiPE) - TFI.adjustForHiPEPrologue(Fn, *SaveBlock); + for (MachineBasicBlock *SaveBlock : SaveBlocks) + TFI.adjustForHiPEPrologue(Fn, *SaveBlock); } /// replaceFrameIndices - Replace all MO_FrameIndex operands with physical diff --git a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp index 7a45858885d..3a51676bc9a 100644 --- a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp +++ b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp @@ -280,6 +280,12 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf, for (WinEHHandlerType &H : TBME.HandlerArray) if (const auto *BB = dyn_cast(H.Handler)) H.HandlerMBB = MBBMap[BB]; + // If there's an explicit EH registration node on the stack, record its + // frame index. + if (EHInfo.EHRegNode && EHInfo.EHRegNode->getParent()->getParent() == Fn) { + assert(StaticAllocaMap.count(EHInfo.EHRegNode)); + EHInfo.EHRegNodeFrameIndex = StaticAllocaMap[EHInfo.EHRegNode]; + } } // Copy the state numbers to LandingPadInfo for the current function, which diff --git a/lib/Target/X86/X86FrameLowering.cpp b/lib/Target/X86/X86FrameLowering.cpp index 3b544d561a5..a2f9efb5aa0 100644 --- a/lib/Target/X86/X86FrameLowering.cpp +++ b/lib/Target/X86/X86FrameLowering.cpp @@ -23,6 +23,7 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/WinEHFuncInfo.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Function.h" #include "llvm/MC/MCAsmInfo.h" @@ -88,7 +89,7 @@ bool X86FrameLowering::hasFP(const MachineFunction &MF) const { MFI->hasVarSizedObjects() || MFI->isFrameAddressTaken() || MFI->hasOpaqueSPAdjustment() || MF.getInfo()->getForceFramePointer() || - MMI.callsUnwindInit() || MMI.callsEHReturn() || + MMI.callsUnwindInit() || MMI.hasEHFunclets() || MMI.callsEHReturn() || MFI->hasStackMap() || MFI->hasPatchPoint()); } @@ -695,7 +696,40 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF, uint64_t NumBytes = 0; int stackGrowth = -SlotSize; - if (HasFP) { + if (MBB.isEHFuncletEntry()) { + assert(STI.isOSWindows() && "funclets only supported on Windows"); + + // Set up the FramePtr and BasePtr physical registers using the address + // passed as EBP or RDX by the MSVC EH runtime. + if (STI.is32Bit()) { + MBBI = restoreWin32EHFrameAndBasePtr(MBB, MBBI, DL); + } else { + // FIXME: Add SEH directives. + NeedsWinCFI = false; + // Immediately spill RDX into the home slot. The runtime cares about this. + unsigned RDX = Uses64BitFramePtr ? X86::RDX : X86::EDX; + // MOV64mr %rdx, 16(%rsp) + unsigned MOVmr = Uses64BitFramePtr ? X86::MOV64mr : X86::MOV32mr; + addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(MOVmr)), + StackPtr, true, 16) + .addReg(RDX) + .setMIFlag(MachineInstr::FrameSetup); + // PUSH64r %rbp + BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::PUSH64r : X86::PUSH32r)) + .addReg(MachineFramePtr, RegState::Kill) + .setMIFlag(MachineInstr::FrameSetup); + // MOV64rr %rdx, %rbp + unsigned MOVrr = Uses64BitFramePtr ? X86::MOV64rr : X86::MOV32rr; + BuildMI(MBB, MBBI, DL, TII.get(MOVrr), FramePtr) + .addReg(RDX) + .setMIFlag(MachineInstr::FrameSetup); + assert(!TRI->hasBasePointer(MF) && + "x64 funclets with base ptrs not yet implemented"); + } + + // For EH funclets, only allocate enough space for outgoing calls. + NumBytes = MFI->getMaxCallFrameSize(); + } else if (HasFP) { // Calculate required stack adjustment. uint64_t FrameSize = StackSize - SlotSize; // If required, include space for extra hidden slot for stashing base pointer. @@ -956,10 +990,11 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF, // it recovers the frame pointer from the base pointer rather than the // other way around. unsigned Opm = Uses64BitFramePtr ? X86::MOV64mr : X86::MOV32mr; - unsigned IgnoredFrameReg; - addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(Opm)), BasePtr, true, - getFrameIndexReference(MF, X86FI->getSEHFramePtrSaveIndex(), - IgnoredFrameReg)) + unsigned UsedReg; + int Offset = + getFrameIndexReference(MF, X86FI->getSEHFramePtrSaveIndex(), UsedReg); + assert(UsedReg == BasePtr); + addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(Opm)), UsedReg, true, Offset) .addReg(FramePtr) .setMIFlag(MachineInstr::FrameSetup); } @@ -991,6 +1026,17 @@ bool X86FrameLowering::canUseLEAForSPInEpilogue( return !MF.getTarget().getMCAsmInfo()->usesWindowsCFI() || hasFP(MF); } +static bool isFuncletReturnInstr(MachineInstr *MI) { + switch (MI->getOpcode()) { + case X86::CATCHRET: + case X86::CATCHRET64: + return true; + default: + return false; + } + llvm_unreachable("impossible"); +} + void X86FrameLowering::emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const { const MachineFrameInfo *MFI = MF.getFrameInfo(); @@ -1016,7 +1062,16 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF, unsigned CSSize = X86FI->getCalleeSavedFrameSize(); uint64_t NumBytes = 0; - if (hasFP(MF)) { + if (isFuncletReturnInstr(MBBI)) { + NumBytes = MFI->getMaxCallFrameSize(); + + if (Is64Bit) { + assert(hasFP(MF) && "win64 EH funclets without FP not yet implemented"); + // POP64r %rbp + BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::POP64r : X86::POP32r), + MachineFramePtr); + } + } else if (hasFP(MF)) { // Calculate required stack adjustment. uint64_t FrameSize = StackSize - SlotSize; NumBytes = FrameSize - CSSize; @@ -1331,6 +1386,11 @@ bool X86FrameLowering::spillCalleeSavedRegisters( const TargetRegisterInfo *TRI) const { DebugLoc DL = MBB.findDebugLoc(MI); + // Don't save CSRs in 32-bit EH funclets. The caller saves EBX, EBP, ESI, EDI + // for us, and there are no XMM CSRs on Win32. + if (MBB.isEHFuncletEntry() && STI.is32Bit() && STI.isOSWindows()) + return true; + // Push GPRs. It increases frame size. unsigned Opc = STI.is64Bit() ? X86::PUSH64r : X86::PUSH32r; for (unsigned i = CSI.size(); i != 0; --i) { @@ -1372,6 +1432,11 @@ bool X86FrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB, if (CSI.empty()) return false; + // Don't restore CSRs in 32-bit EH funclets. Matches + // spillCalleeSavedRegisters. + if (isFuncletReturnInstr(MI) && STI.is32Bit() && STI.isOSWindows()) + return true; + DebugLoc DL = MBB.findDebugLoc(MI); // Reload XMMs from stack frame. @@ -1423,8 +1488,16 @@ void X86FrameLowering::determineCalleeSaves(MachineFunction &MF, } // Spill the BasePtr if it's used. - if (TRI->hasBasePointer(MF)) + if (TRI->hasBasePointer(MF)) { SavedRegs.set(TRI->getBaseRegister()); + + // Allocate a spill slot for EBP if we have a base pointer and EH funclets. + if (MF.getMMI().hasEHFunclets()) { + int FI = MFI->CreateSpillStackObject(SlotSize, SlotSize); + X86FI->setHasSEHFramePtrSave(true); + X86FI->setSEHFramePtrSaveIndex(FI); + } + } } static bool @@ -1976,3 +2049,58 @@ bool X86FrameLowering::canUseAsEpilogue(const MachineBasicBlock &MBB) const { // safe to insert the epilogue here. return !terminatorsNeedFlagsAsInput(MBB); } + +MachineBasicBlock::iterator X86FrameLowering::restoreWin32EHFrameAndBasePtr( + MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, + DebugLoc DL) const { + assert(STI.isTargetWindowsMSVC() && "funclets only supported in MSVC env"); + assert(STI.isTargetWin32() && "EBP/ESI restoration only required on win32"); + assert(STI.is32Bit() && !Uses64BitFramePtr && + "restoring EBP/ESI on non-32-bit target"); + + MachineFunction &MF = *MBB.getParent(); + unsigned FramePtr = TRI->getFrameRegister(MF); + unsigned BasePtr = TRI->getBaseRegister(); + MachineModuleInfo &MMI = MF.getMMI(); + const Function *Fn = MF.getFunction(); + WinEHFuncInfo &FuncInfo = MMI.getWinEHFuncInfo(Fn); + X86MachineFunctionInfo *X86FI = MF.getInfo(); + MachineFrameInfo *MFI = MF.getFrameInfo(); + + // FIXME: Don't set FrameSetup flag in catchret case. + + int FI = FuncInfo.EHRegNodeFrameIndex; + unsigned UsedReg; + int EHRegOffset = getFrameIndexReference(MF, FI, UsedReg); + int EHRegSize = MFI->getObjectSize(FI); + int EndOffset = -EHRegOffset - EHRegSize; + assert(EndOffset >= 0 && + "end of registration object above normal EBP position!"); + if (UsedReg == FramePtr) { + // ADD $offset, %ebp + assert(UsedReg == FramePtr); + unsigned ADDri = getADDriOpcode(false, EndOffset); + BuildMI(MBB, MBBI, DL, TII.get(ADDri), FramePtr) + .addReg(FramePtr) + .addImm(EndOffset) + .setMIFlag(MachineInstr::FrameSetup) + ->getOperand(3) + .setIsDead(); + } else { + assert(UsedReg == BasePtr); + // LEA offset(%ebp), %esi + addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::LEA32r), BasePtr), + FramePtr, false, EndOffset) + .setMIFlag(MachineInstr::FrameSetup); + // MOV32mr SavedEBPOffset(%esi), %ebp + assert(X86FI->getHasSEHFramePtrSave()); + int Offset = + getFrameIndexReference(MF, X86FI->getSEHFramePtrSaveIndex(), UsedReg); + assert(UsedReg == BasePtr); + addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32mr)), UsedReg, true, + Offset) + .addReg(FramePtr) + .setMIFlag(MachineInstr::FrameSetup); + } + return MBBI; +} diff --git a/lib/Target/X86/X86FrameLowering.h b/lib/Target/X86/X86FrameLowering.h index dd83928b46c..7cbd18bd4d0 100644 --- a/lib/Target/X86/X86FrameLowering.h +++ b/lib/Target/X86/X86FrameLowering.h @@ -154,6 +154,13 @@ private: MachineBasicBlock::iterator MBBI, DebugLoc DL, int64_t Offset, bool InEpilogue) const; + + /// Sets up EBP and optionally ESI based on the incoming EBP value. Only + /// needed for 32-bit. Used in funclet prologues and at catchret destinations. + MachineBasicBlock::iterator + restoreWin32EHFrameAndBasePtr(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + DebugLoc DL) const; }; } // End llvm namespace diff --git a/lib/Target/X86/X86InstrCompiler.td b/lib/Target/X86/X86InstrCompiler.td index 18c8d7d61f3..4798474cecb 100644 --- a/lib/Target/X86/X86InstrCompiler.td +++ b/lib/Target/X86/X86InstrCompiler.td @@ -152,7 +152,7 @@ def EH_RETURN64 : I<0xC3, RawFrm, (outs), (ins GR64:$addr), } -let isTerminator = 1, isBarrier = 1, hasCtrlDep = 1, isCodeGenOnly = 1 in { +let isTerminator = 1, isBarrier = 1, hasCtrlDep = 1, isCodeGenOnly = 1, isReturn = 1 in { def CATCHRET : I<0xC3, RawFrm, (outs), (ins GR32:$addr), "ret{l}\t# CATCHRET", [(X86catchret GR32:$addr)], IIC_RET>, Sched<[WriteJumpLd]>; diff --git a/lib/Target/X86/X86MCInstLower.cpp b/lib/Target/X86/X86MCInstLower.cpp index b0010308f05..88a6d00f4cc 100644 --- a/lib/Target/X86/X86MCInstLower.cpp +++ b/lib/Target/X86/X86MCInstLower.cpp @@ -531,7 +531,8 @@ ReSimplify: break; } - case X86::CATCHRET: { + case X86::CATCHRET: + case X86::CATCHRET64: { OutMI = MCInst(); OutMI.setOpcode(getRetOpcode(AsmPrinter.getSubtarget())); break; diff --git a/lib/Target/X86/X86WinEHState.cpp b/lib/Target/X86/X86WinEHState.cpp index 19f574ad746..ccec7743b3b 100644 --- a/lib/Target/X86/X86WinEHState.cpp +++ b/lib/Target/X86/X86WinEHState.cpp @@ -179,6 +179,8 @@ bool WinEHStatePass::runOnFunction(Function &F) { WinEHFuncInfo &FuncInfo = *(MMI ? &MMI->getWinEHFuncInfo(&F) : FuncInfoPtr.get()); + FuncInfo.EHRegNode = RegNode; + switch (Personality) { default: llvm_unreachable("unexpected personality function"); case EHPersonality::MSVC_CXX: @@ -494,9 +496,12 @@ void WinEHStatePass::addCXXStateStoresToFunclet(Value *ParentRegNode, // Insert calls to llvm.x86.seh.restoreframe at catchret destinations. if (auto *CR = dyn_cast(BB.getTerminator())) { - //llvm::errs() << "BB: " << BB << '\n'; - //llvm::errs() << "CR->getSuccessor(): " << *CR->getSuccessor() << '\n'; - IRBuilder<> Builder(CR->getSuccessor()->begin()); + Instruction *Start = CR->getSuccessor()->begin(); + assert(!isa(Start) && + "winehprepare failed to demote phi after catchret"); + if (match(Start, m_Intrinsic())) + continue; + IRBuilder<> Builder(Start); Builder.CreateCall(RestoreFrame, {}); } } diff --git a/test/CodeGen/X86/win-catchpad-csrs.ll b/test/CodeGen/X86/win-catchpad-csrs.ll new file mode 100644 index 00000000000..f765207d19d --- /dev/null +++ b/test/CodeGen/X86/win-catchpad-csrs.ll @@ -0,0 +1,136 @@ +; RUN: llc -mtriple=i686-pc-windows-msvc < %s | FileCheck --check-prefix=X86 %s +; RUN: llc -mtriple=x86_64-pc-windows-msvc < %s | FileCheck --check-prefix=X64 %s + +%rtti.TypeDescriptor2 = type { i8**, i8*, [3 x i8] } +%eh.CatchableType = type { i32, i8*, i32, i32, i32, i32, i8* } +%eh.CatchableTypeArray.1 = type { i32, [1 x %eh.CatchableType*] } +%eh.ThrowInfo = type { i32, i8*, i8*, i8* } +%eh.CatchHandlerType = type { i32, i8* } + +$"\01??_R0H@8" = comdat any + +@"\01??_7type_info@@6B@" = external constant i8* +@"\01??_R0H@8" = linkonce_odr global %rtti.TypeDescriptor2 { i8** @"\01??_7type_info@@6B@", i8* null, [3 x i8] c".H\00" }, comdat + +@llvm.eh.handlertype.H.0 = private unnamed_addr constant %eh.CatchHandlerType { i32 0, i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0H@8" to i8*) }, section "llvm.metadata" +@llvm.eh.handlertype.H.1 = private unnamed_addr constant %eh.CatchHandlerType { i32 1, i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0H@8" to i8*) }, section "llvm.metadata" + +declare i32 @getint() +declare void @useints(...) +declare void @f(i32 %p) +declare i32 @__CxxFrameHandler3(...) + +define i32 @try_catch_catch() personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) { +entry: + %a = call i32 @getint() + %b = call i32 @getint() + %c = call i32 @getint() + %d = call i32 @getint() + call void (...) @useints(i32 %a, i32 %b, i32 %c, i32 %d) + invoke void @f(i32 1) + to label %try.cont unwind label %catch.dispatch + +catch.dispatch: ; preds = %entry + %0 = catchpad [%eh.CatchHandlerType* @llvm.eh.handlertype.H.0, i8* null] + to label %catch unwind label %catchendblock + +catch: + invoke void @f(i32 2) + to label %invoke.cont.2 unwind label %catchendblock + +invoke.cont.2: ; preds = %catch + catchret %0 to label %try.cont + +try.cont: ; preds = %entry, %invoke.cont.2, %invoke.cont.3 + ret i32 0 + +catchendblock: ; preds = %catch, + catchendpad unwind to caller +} + +; X86-LABEL: _try_catch_catch: +; X86: pushl %ebp +; X86: movl %esp, %ebp +; X86: pushl %ebx +; X86: pushl %edi +; X86: pushl %esi +; X86: subl ${{[0-9]+}}, %esp +; X86: calll _getint +; X86: calll _getint +; X86: calll _getint +; X86: calll _getint +; X86: calll _useints +; X86: movl $0, -{{[0-9]+}}(%ebp) +; X86: movl $1, (%esp) +; X86: calll _f +; X86: [[contbb:LBB0_[0-9]+]]: +; X86: movl -{{[0-9]+}}(%ebp), %esp +; X86: addl ${{[0-9]+}}, %esp +; X86: popl %esi +; X86: popl %edi +; X86: popl %ebx +; X86: popl %ebp +; X86: retl + +; X86: [[catch1bb:LBB0_[0-9]+]]: # %catch{{$}} +; X86-NOT: pushl +; X86: addl $12, %ebp +; X86: subl $16, %esp +; X86: movl $1, -{{[0-9]+}}(%ebp) +; X86: movl $2, (%esp) +; X86: calll _f +; X86: movl $[[contbb]], %eax +; X86-NEXT: addl $16, %esp +; X86-NOT: popl +; X86-NEXT: retl + +; X86: L__ehtable$try_catch_catch: +; X86: $handlerMap$0$try_catch_catch: +; X86: .long 0 +; X86: .long "??_R0H@8" +; X86: .long 0 +; X86: .long [[catch1bb]] + +; X64-LABEL: try_catch_catch: +; X64: pushq %rbp +; X64: .seh_pushreg 5 +; X64: pushq %rsi +; X64: .seh_pushreg 6 +; X64: pushq %rdi +; X64: .seh_pushreg 7 +; X64: pushq %rbx +; X64: .seh_pushreg 3 +; X64: subq $40, %rsp +; X64: .seh_stackalloc 40 +; X64: leaq 32(%rsp), %rbp +; X64: .seh_setframe 5, 32 +; X64: callq getint +; X64: callq getint +; X64: callq getint +; X64: callq getint +; X64: callq useints +; X64: movl $1, %ecx +; X64: callq f +; X64: [[contbb:\.LBB0_[0-9]+]]: +; X64: addq $40, %rsp +; X64: popq %rbp +; X64: retq + +; X64: [[catch1bb:\.LBB0_[0-9]+]]: # %catch{{$}} +; X64: movq %rdx, 16(%rsp) +; X64: pushq %rbp +; X64: movq %rdx, %rbp +; X64: subq $32, %rsp +; X64: movl $2, %ecx +; X64: callq f +; X64: leaq [[contbb]](%rip), %rax +; X64: addq $32, %rsp +; X64: popq %rbp +; X64: retq + +; X64: $handlerMap$0$try_catch_catch: +; X64: .long 0 +; X64: .long "??_R0H@8"@IMGREL +; X64: .long 0 +; X64: .long [[catch1bb]]@IMGREL +; X64: .long 56 diff --git a/test/CodeGen/X86/win-catchpad.ll b/test/CodeGen/X86/win-catchpad.ll index 607d948e3d1..7ab59ab7223 100644 --- a/test/CodeGen/X86/win-catchpad.ll +++ b/test/CodeGen/X86/win-catchpad.ll @@ -27,12 +27,14 @@ $"\01??_R0H@8" = comdat any @llvm.eh.handlertype.H.0 = private unnamed_addr constant %eh.CatchHandlerType { i32 0, i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0H@8" to i8*) }, section "llvm.metadata" @llvm.eh.handlertype.H.1 = private unnamed_addr constant %eh.CatchHandlerType { i32 1, i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0H@8" to i8*) }, section "llvm.metadata" -declare void @f(i32 %p) +declare void @f(i32 %p, i32* %l) declare i32 @__CxxFrameHandler3(...) +declare void @barrier() define i32 @try_catch_catch() personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) { entry: - invoke void @f(i32 1) + %local = alloca i32 + invoke void @f(i32 1, i32* %local) to label %try.cont unwind label %catch.dispatch catch.dispatch: ; preds = %entry @@ -40,7 +42,11 @@ catch.dispatch: ; preds = %entry to label %catch unwind label %catch.dispatch.2 catch: ; preds = %catch.dispatch - invoke void @f(i32 2) + ; FIXME: Remove this barrier once we add more real register allocation barriers. + invoke void @barrier() + to label %barrier.split unwind label %catchendblock +barrier.split: + invoke void @f(i32 2, i32* %local) to label %invoke.cont.2 unwind label %catchendblock invoke.cont.2: ; preds = %catch @@ -51,7 +57,7 @@ catch.dispatch.2: ; preds = %catch.dispatch to label %catch.2 unwind label %catchendblock catch.2: ; preds = %catch.dispatch.2 - invoke void @f(i32 3) + invoke void @f(i32 3, i32* %local) to label %invoke.cont.3 unwind label %catchendblock invoke.cont.3: ; preds = %catch.2 @@ -66,24 +72,37 @@ catchendblock: ; preds = %catch, %catch.2, %c ; X86-LABEL: _try_catch_catch: ; X86: movl $0, -{{[0-9]+}}(%ebp) -; X86: movl $1, (%esp) +; X86: leal -[[local_offs:[0-9]+]](%ebp), %[[addr_reg:[a-z]+]] +; X86-DAG: movl %[[addr_reg]], 4(%esp) +; X86-DAG: movl $1, (%esp) ; X86: calll _f ; X86: [[contbb:LBB0_[0-9]+]]: ; X86: movl -{{[0-9]+}}(%ebp), %esp ; X86: retl ; X86: [[catch1bb:LBB0_[0-9]+]]: # %catch{{$}} +; X86: addl $12, %ebp +; X86: subl $8, %esp +; X86: calll _barrier ; X86: movl $1, -{{[0-9]+}}(%ebp) -; X86: movl $2, (%esp) +; X86: leal -[[local_offs]](%ebp), %[[addr_reg:[a-z]+]] +; X86-DAG: movl %[[addr_reg]], 4(%esp) +; X86-DAG: movl $2, (%esp) ; X86: calll _f ; X86: movl $[[contbb]], %eax +; X86-NEXT: addl $8, %esp ; X86-NEXT: retl ; X86: [[catch2bb:LBB0_[0-9]+]]: # %catch.2{{$}} +; X86: addl $12, %ebp +; X86: subl $8, %esp ; X86: movl $1, -{{[0-9]+}}(%ebp) -; X86: movl $3, (%esp) +; X86: leal -[[local_offs]](%ebp), %[[addr_reg:[a-z]+]] +; X86-DAG: movl %[[addr_reg]], 4(%esp) +; X86-DAG: movl $3, (%esp) ; X86: calll _f ; X86: movl $[[contbb]], %eax +; X86-NEXT: addl $8, %esp ; X86-NEXT: retl ; X86: L__ehtable$try_catch_catch: @@ -98,33 +117,54 @@ catchendblock: ; preds = %catch, %catch.2, %c ; X86: .long [[catch2bb]] ; X64-LABEL: try_catch_catch: -; X64: movl $1, %ecx +; X64: pushq %rbp +; X64: .seh_pushreg 5 +; X64: subq $48, %rsp +; X64: .seh_stackalloc 48 +; X64: leaq 48(%rsp), %rbp +; X64: .seh_setframe 5, 48 +; X64-DAG: leaq -[[local_offs:[0-9]+]](%rbp), %rdx +; X64-DAG: movl $1, %ecx ; X64: callq f ; X64: [[contbb:\.LBB0_[0-9]+]]: +; X64: addq $48, %rsp +; X64: popq %rbp ; X64: retq ; X64: [[catch1bb:\.LBB0_[0-9]+]]: # %catch{{$}} -; X64: movl $2, %ecx +; X64: movq %rdx, 16(%rsp) +; X64: pushq %rbp +; X64: movq %rdx, %rbp +; X64: subq $32, %rsp +; X64-DAG: leaq -[[local_offs]](%rbp), %rdx +; X64-DAG: movl $2, %ecx ; X64: callq f ; X64: leaq [[contbb]](%rip), %rax +; X64: addq $32, %rsp +; X64: popq %rbp ; X64: retq ; X64: [[catch2bb:\.LBB0_[0-9]+]]: # %catch.2{{$}} -; X64: movl $3, %ecx +; X64: movq %rdx, 16(%rsp) +; X64: pushq %rbp +; X64: movq %rdx, %rbp +; X64: subq $32, %rsp +; X64-DAG: leaq -[[local_offs]](%rbp), %rdx +; X64-DAG: movl $3, %ecx ; X64: callq f ; X64: leaq [[contbb]](%rip), %rax +; X64: addq $32, %rsp +; X64: popq %rbp ; X64: retq -; FIXME: Get rid of these parent_frame_offset things below. They are leftover -; from our IR outlining strategy. ; X64: $handlerMap$0$try_catch_catch: ; X64: .long 0 ; X64: .long "??_R0H@8"@IMGREL ; X64: .long 0 ; X64: .long [[catch1bb]]@IMGREL -; X64 .long .Lcatch$parent_frame_offset +; X64: .long 56 ; X64: .long 0 ; X64: .long "??_R0H@8"@IMGREL ; X64: .long 0 ; X64: .long [[catch2bb]]@IMGREL -; X64 .long .Lcatch.2$parent_frame_offset +; X64: .long 56 -- 2.34.1