From edfd4f18bc18f702271dc7c14dec68ffd2d10417 Mon Sep 17 00:00:00 2001 From: Ulrich Weigand Date: Sun, 20 Jul 2014 23:31:44 +0000 Subject: [PATCH] [PowerPC] ELFv2 function call changes This patch builds upon the two preceding MC changes to implement the basic ELFv2 function call convention. In the ELFv1 ABI, a "function descriptor" was associated with every function, pointing to both the entry address and the related TOC base (and a static chain pointer for nested functions). Function pointers would actually refer to that descriptor, and the indirect call sequence needed to load up both entry address and TOC base. In the ELFv2 ABI, there are no more function descriptors, and function pointers simply refer to the (global) entry point of the function code. Indirect function calls simply branch to that address, after loading it up into r12 (as required by the ABI rules for a global entry point). Direct function calls continue to just do a "bl" to the target symbol; this will be resolved by the linker to the local entry point of the target function if it is local, and to a PLT stub if it is global. That PLT stub would then load the (global) entry point address of the final target into r12 and branch to it. Note that when performing a local function call, r2 must be set up to point to the current TOC base: if the target ends up local, the ABI requires that its local entry point is called with r2 set up; if the target ends up global, the PLT stub requires that r2 is set up. This patch implements all LLVM changes to implement that scheme: - No longer create a function descriptor when emitting a function definition (in EmitFunctionEntryLabel) - Emit two entry points *if* the function needs the TOC base (r2) anywhere (this is done EmitFunctionBodyStart; note that this cannot be done in EmitFunctionBodyStart because the global entry point prologue code must be *part* of the function as covered by debug info). - In order to make use tracking of r2 (as needed above) work correctly, mark direct function calls as implicitly using r2. - Implement the ELFv2 indirect function call sequence (no function descriptors; load target address into r12). - When creating an ELFv2 object file, emit the .abiversion 2 directive to tell the linker to create the appropriate version of PLT stubs. Reviewed by Hal Finkel. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@213489 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/PowerPC/PPCAsmPrinter.cpp | 78 +++++++++++++++++++++- lib/Target/PowerPC/PPCFastISel.cpp | 4 ++ lib/Target/PowerPC/PPCISelLowering.cpp | 15 ++++- lib/Target/PowerPC/PPCSubtarget.h | 3 + test/CodeGen/PowerPC/ppc64le-calls.ll | 17 +++++ test/CodeGen/PowerPC/ppc64le-localentry.ll | 46 +++++++++++++ 6 files changed, 160 insertions(+), 3 deletions(-) create mode 100644 test/CodeGen/PowerPC/ppc64le-calls.ll create mode 100644 test/CodeGen/PowerPC/ppc64le-localentry.ll diff --git a/lib/Target/PowerPC/PPCAsmPrinter.cpp b/lib/Target/PowerPC/PPCAsmPrinter.cpp index 844d98d1a95..6f67c598c75 100644 --- a/lib/Target/PowerPC/PPCAsmPrinter.cpp +++ b/lib/Target/PowerPC/PPCAsmPrinter.cpp @@ -33,6 +33,7 @@ #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineModuleInfoImpls.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DebugInfo.h" @@ -106,6 +107,7 @@ namespace { void EmitFunctionEntryLabel() override; + void EmitFunctionBodyStart() override; void EmitFunctionBodyEnd() override; }; @@ -781,6 +783,14 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { } void PPCLinuxAsmPrinter::EmitStartOfAsmFile(Module &M) { + if (Subtarget.isELFv2ABI()) { + PPCTargetStreamer *TS = + static_cast(OutStreamer.getTargetStreamer()); + + if (TS) + TS->emitAbiVersion(2); + } + if (Subtarget.isPPC64() || TM.getRelocationModel() != Reloc::PIC_) return AsmPrinter::EmitStartOfAsmFile(M); @@ -834,7 +844,11 @@ void PPCLinuxAsmPrinter::EmitFunctionEntryLabel() { } else return AsmPrinter::EmitFunctionEntryLabel(); } - + + // ELFv2 ABI - Normal entry label. + if (Subtarget.isELFv2ABI()) + return AsmPrinter::EmitFunctionEntryLabel(); + // Emit an official procedure descriptor. MCSectionSubPair Current = OutStreamer.getCurrentSection(); const MCSectionELF *Section = OutStreamer.getContext().getELFSection(".opd", @@ -919,6 +933,68 @@ bool PPCLinuxAsmPrinter::doFinalization(Module &M) { return AsmPrinter::doFinalization(M); } +/// EmitFunctionBodyStart - Emit a global entry point prefix for ELFv2. +void PPCLinuxAsmPrinter::EmitFunctionBodyStart() { + // In the ELFv2 ABI, in functions that use the TOC register, we need to + // provide two entry points. The ABI guarantees that when calling the + // local entry point, r2 is set up by the caller to contain the TOC base + // for this function, and when calling the global entry point, r12 is set + // up by the caller to hold the address of the global entry point. We + // thus emit a prefix sequence along the following lines: + // + // func: + // # global entry point + // addis r2,r12,(.TOC.-func)@ha + // addi r2,r2,(.TOC.-func)@l + // .localentry func, .-func + // # local entry point, followed by function body + // + // This ensures we have r2 set up correctly while executing the function + // body, no matter which entry point is called. + if (Subtarget.isELFv2ABI() + // Only do all that if the function uses r2 in the first place. + && !MF->getRegInfo().use_empty(PPC::X2)) { + + MCSymbol *GlobalEntryLabel = OutContext.CreateTempSymbol(); + OutStreamer.EmitLabel(GlobalEntryLabel); + const MCSymbolRefExpr *GlobalEntryLabelExp = + MCSymbolRefExpr::Create(GlobalEntryLabel, OutContext); + + MCSymbol *TOCSymbol = OutContext.GetOrCreateSymbol(StringRef(".TOC.")); + const MCExpr *TOCDeltaExpr = + MCBinaryExpr::CreateSub(MCSymbolRefExpr::Create(TOCSymbol, OutContext), + GlobalEntryLabelExp, OutContext); + + const MCExpr *TOCDeltaHi = + PPCMCExpr::CreateHa(TOCDeltaExpr, false, OutContext); + EmitToStreamer(OutStreamer, MCInstBuilder(PPC::ADDIS) + .addReg(PPC::X2) + .addReg(PPC::X12) + .addExpr(TOCDeltaHi)); + + const MCExpr *TOCDeltaLo = + PPCMCExpr::CreateLo(TOCDeltaExpr, false, OutContext); + EmitToStreamer(OutStreamer, MCInstBuilder(PPC::ADDI) + .addReg(PPC::X2) + .addReg(PPC::X2) + .addExpr(TOCDeltaLo)); + + MCSymbol *LocalEntryLabel = OutContext.CreateTempSymbol(); + OutStreamer.EmitLabel(LocalEntryLabel); + const MCSymbolRefExpr *LocalEntryLabelExp = + MCSymbolRefExpr::Create(LocalEntryLabel, OutContext); + const MCExpr *LocalOffsetExp = + MCBinaryExpr::CreateSub(LocalEntryLabelExp, + GlobalEntryLabelExp, OutContext); + + PPCTargetStreamer *TS = + static_cast(OutStreamer.getTargetStreamer()); + + if (TS) + TS->emitLocalEntry(CurrentFnSym, LocalOffsetExp); + } +} + /// EmitFunctionBodyEnd - Print the traceback table before the .size /// directive. /// diff --git a/lib/Target/PowerPC/PPCFastISel.cpp b/lib/Target/PowerPC/PPCFastISel.cpp index 92a0ec1a9b3..d9e011e18fe 100644 --- a/lib/Target/PowerPC/PPCFastISel.cpp +++ b/lib/Target/PowerPC/PPCFastISel.cpp @@ -1498,6 +1498,10 @@ bool PPCFastISel::SelectCall(const Instruction *I) { for (unsigned II = 0, IE = RegArgs.size(); II != IE; ++II) MIB.addReg(RegArgs[II], RegState::Implicit); + // Direct calls in the ELFv2 ABI need the TOC register live into the call. + if (PPCSubTarget->isELFv2ABI()) + MIB.addReg(PPC::X2, RegState::Implicit); + // Add a register mask with the call-preserved registers. Proper // defs for return values will be added by setPhysRegsDeadExcept(). MIB.addRegMask(TRI.getCallPreservedMask(CC)); diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index c671e02e813..2731053c17b 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -3371,6 +3371,7 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag, bool isPPC64 = Subtarget.isPPC64(); bool isSVR4ABI = Subtarget.isSVR4ABI(); + bool isELFv2ABI = Subtarget.isELFv2ABI(); EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); NodeTys.push_back(MVT::Other); // Returns a chain @@ -3440,7 +3441,7 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag, // to do the call, we can't use PPCISD::CALL. SDValue MTCTROps[] = {Chain, Callee, InFlag}; - if (isSVR4ABI && isPPC64) { + if (isSVR4ABI && isPPC64 && !isELFv2ABI) { // Function pointers in the 64-bit SVR4 ABI do not point to the function // entry point, but to the function descriptor (the function entry point // address is part of the function descriptor though). @@ -3520,7 +3521,7 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag, CallOpc = PPCISD::BCTRL; Callee.setNode(nullptr); // Add use of X11 (holding environment pointer) - if (isSVR4ABI && isPPC64) + if (isSVR4ABI && isPPC64 && !isELFv2ABI) Ops.push_back(DAG.getRegister(PPC::X11, PtrVT)); // Add CTR register as callee so a bctr can be emitted later. if (isTailCall) @@ -3542,6 +3543,10 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag, Ops.push_back(DAG.getRegister(RegsToPass[i].first, RegsToPass[i].second.getValueType())); + // Direct calls in the ELFv2 ABI need the TOC register live into the call. + if (Callee.getNode() && isELFv2ABI) + Ops.push_back(DAG.getRegister(PPC::X2, PtrVT)); + return CallOpc; } @@ -3988,6 +3993,7 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee, SDLoc dl, SelectionDAG &DAG, SmallVectorImpl &InVals) const { + bool isELFv2ABI = Subtarget.isELFv2ABI(); bool isLittleEndian = Subtarget.isLittleEndian(); unsigned NumOps = Outs.size(); @@ -4373,6 +4379,11 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee, SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff); Chain = DAG.getStore(Val.getValue(1), dl, Val, AddPtr, MachinePointerInfo(), false, false, 0); + // In the ELFv2 ABI, R12 must contain the address of an indirect callee. + // This does not mean the MTCTR instruction must use R12; it's easier + // to model this as an extra parameter, so do that. + if (isELFv2ABI) + RegsToPass.push_back(std::make_pair((unsigned)PPC::X12, Callee)); } // Build a sequence of copy-to-reg nodes chained together with token chain diff --git a/lib/Target/PowerPC/PPCSubtarget.h b/lib/Target/PowerPC/PPCSubtarget.h index 58314538a10..a3cedafb5ef 100644 --- a/lib/Target/PowerPC/PPCSubtarget.h +++ b/lib/Target/PowerPC/PPCSubtarget.h @@ -227,6 +227,9 @@ public: bool isDarwinABI() const { return isDarwin(); } bool isSVR4ABI() const { return !isDarwin(); } + /// FIXME: Should use a command-line option. + bool isELFv2ABI() const { return isPPC64() && isSVR4ABI() && + isLittleEndian(); } bool enableEarlyIfConversion() const override { return hasISEL(); } diff --git a/test/CodeGen/PowerPC/ppc64le-calls.ll b/test/CodeGen/PowerPC/ppc64le-calls.ll new file mode 100644 index 00000000000..84b431ae2df --- /dev/null +++ b/test/CodeGen/PowerPC/ppc64le-calls.ll @@ -0,0 +1,17 @@ +; RUN: llc -march=ppc64le -mcpu=pwr8 < %s | FileCheck %s + +target datalayout = "e-m:e-i64:64-n32:64" +target triple = "powerpc64le-unknown-linux-gnu" + +; Indirect calls requires a full stub creation +define void @test_indirect(void ()* nocapture %fp) { +; CHECK-LABEL: @test_indirect + tail call void %fp() +; CHECK-DAG: std 2, 40(1) +; CHECK-DAG: mr 12, 3 +; CHECK-DAG: mtctr 3 +; CHECK: bctrl +; CHECK-NEXT: ld 2, 40(1) + ret void +} + diff --git a/test/CodeGen/PowerPC/ppc64le-localentry.ll b/test/CodeGen/PowerPC/ppc64le-localentry.ll new file mode 100644 index 00000000000..4676ce8eadc --- /dev/null +++ b/test/CodeGen/PowerPC/ppc64le-localentry.ll @@ -0,0 +1,46 @@ +; RUN: llc -march=ppc64le -mcpu=pwr8 < %s | FileCheck %s +; RUN: llc -march=ppc64le -mcpu=pwr8 -O0 < %s | FileCheck %s + +target datalayout = "e-m:e-i64:64-n32:64" +target triple = "powerpc64le-unknown-linux-gnu" + +@number64 = global i64 10, align 8 + +; CHECK: .abiversion 2 + +define i64 @use_toc(i64 %a) nounwind { +entry: +; CHECK-LABEL: @use_toc +; CHECK-NEXT: .Ltmp[[TMP1:[0-9]+]]: +; CHECK-NEXT: addis 2, 12, .TOC.-.Ltmp[[TMP1]]@ha +; CHECK-NEXT: addi 2, 2, .TOC.-.Ltmp[[TMP1]]@l +; CHECK-NEXT: .Ltmp[[TMP2:[0-9]+]]: +; CHECK-NEXT: .localentry use_toc, .Ltmp[[TMP2]]-.Ltmp[[TMP1]] +; CHECK-NEXT: %entry + %0 = load i64* @number64, align 8 + %cmp = icmp eq i64 %0, %a + %conv1 = zext i1 %cmp to i64 + ret i64 %conv1 +} + +declare void @callee() +define void @use_toc_implicit() nounwind { +entry: +; CHECK-LABEL: @use_toc_implicit +; CHECK-NEXT: .Ltmp[[TMP1:[0-9]+]]: +; CHECK-NEXT: addis 2, 12, .TOC.-.Ltmp[[TMP1]]@ha +; CHECK-NEXT: addi 2, 2, .TOC.-.Ltmp[[TMP1]]@l +; CHECK-NEXT: .Ltmp[[TMP2:[0-9]+]]: +; CHECK-NEXT: .localentry use_toc_implicit, .Ltmp[[TMP2]]-.Ltmp[[TMP1]] +; CHECK-NEXT: %entry + call void @callee() + ret void +} + +define i64 @no_toc(i64 %a) nounwind { +entry: +; CHECK-LABEL: @no_toc +; CHECK-NEXT: %entry + ret i64 %a +} + -- 2.34.1