PPCTargetObjectFile.cpp
PPCTargetTransformInfo.cpp
PPCSelectionDAGInfo.cpp
+ PPCTLSDynamicCall.cpp
PPCVSXCopy.cpp
PPCVSXFMAMutate.cpp
)
FunctionPass *createPPCVSXFMAMutatePass();
FunctionPass *createPPCBranchSelectionPass();
FunctionPass *createPPCISelDag(PPCTargetMachine &TM);
+ FunctionPass *createPPCTLSDynamicCallPass();
void LowerPPCMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI,
AsmPrinter &AP, bool isDarwin);
MO_TOC_LO = 7 << 4,
// Symbol for VK_PPC_TLS fixup attached to an ADD instruction
- MO_TLS = 8 << 4,
-
- // Symbols for VK_PPC_TLSGD and VK_PPC_TLSLD in __tls_get_addr
- // call sequences.
- MO_TLSLD = 9 << 4,
- MO_TLSGD = 10 << 4
+ MO_TLS = 8 << 4
};
} // end namespace PPCII
const MachineInstr &MI);
void LowerPATCHPOINT(MCStreamer &OutStreamer, StackMaps &SM,
const MachineInstr &MI);
+ void EmitTlsCall(const MachineInstr *MI, MCSymbolRefExpr::VariantKind VK);
};
/// PPCLinuxAsmPrinter - PowerPC assembly printer, customized for Linux
EmitToStreamer(OutStreamer, MCInstBuilder(PPC::NOP));
}
+/// EmitTlsCall -- Given a GETtls[ld]ADDR[32] instruction, print a
+/// call to __tls_get_addr to the current output stream.
+void PPCAsmPrinter::EmitTlsCall(const MachineInstr *MI,
+ MCSymbolRefExpr::VariantKind VK) {
+ StringRef Name = "__tls_get_addr";
+ MCSymbol *TlsGetAddr = OutContext.GetOrCreateSymbol(Name);
+ MCSymbolRefExpr::VariantKind Kind = MCSymbolRefExpr::VK_None;
+
+ assert(MI->getOperand(0).isReg() &&
+ ((Subtarget.isPPC64() && MI->getOperand(0).getReg() == PPC::X3) ||
+ (!Subtarget.isPPC64() && MI->getOperand(0).getReg() == PPC::R3)) &&
+ "GETtls[ld]ADDR[32] must define GPR3");
+ assert(MI->getOperand(1).isReg() &&
+ ((Subtarget.isPPC64() && MI->getOperand(1).getReg() == PPC::X3) ||
+ (!Subtarget.isPPC64() && MI->getOperand(1).getReg() == PPC::R3)) &&
+ "GETtls[ld]ADDR[32] must read GPR3");
+
+ if (!Subtarget.isPPC64() && !Subtarget.isDarwin() &&
+ TM.getRelocationModel() == Reloc::PIC_)
+ Kind = MCSymbolRefExpr::VK_PLT;
+ const MCSymbolRefExpr *TlsRef =
+ MCSymbolRefExpr::Create(TlsGetAddr, Kind, OutContext);
+ const MachineOperand &MO = MI->getOperand(2);
+ const GlobalValue *GValue = MO.getGlobal();
+ MCSymbol *MOSymbol = getSymbol(GValue);
+ const MCExpr *SymVar = MCSymbolRefExpr::Create(MOSymbol, VK, OutContext);
+ EmitToStreamer(OutStreamer,
+ MCInstBuilder(Subtarget.isPPC64() ?
+ PPC::BL8_NOP_TLS : PPC::BL_TLS)
+ .addExpr(TlsRef)
+ .addExpr(SymVar));
+}
+
/// EmitInstruction -- Print out a single PowerPC MI in Darwin syntax to
/// the current output stream.
///
.addExpr(SymGotTlsGD));
return;
}
+ case PPC::GETtlsADDR:
+ // Transform: %X3 = GETtlsADDR %X3, <ga:@sym>
+ // Into: BL8_NOP_TLS __tls_get_addr(sym at tlsgd)
+ case PPC::GETtlsADDR32: {
+ // Transform: %R3 = GETtlsADDR32 %R3, <ga:@sym>
+ // Into: BL_TLS __tls_get_addr(sym at tlsgd)@PLT
+ EmitTlsCall(MI, MCSymbolRefExpr::VK_PPC_TLSGD);
+ return;
+ }
case PPC::ADDIStlsldHA: {
// Transform: %Xd = ADDIStlsldHA %X2, <ga:@sym>
// Into: %Xd = ADDIS8 %X2, sym@got@tlsld@ha
.addExpr(SymGotTlsLD));
return;
}
+ case PPC::GETtlsldADDR:
+ // Transform: %X3 = GETtlsldADDR %X3, <ga:@sym>
+ // Into: BL8_NOP_TLS __tls_get_addr(sym at tlsld)
+ case PPC::GETtlsldADDR32: {
+ // Transform: %R3 = GETtlsldADDR32 %R3, <ga:@sym>
+ // Into: BL_TLS __tls_get_addr(sym at tlsld)@PLT
+ EmitTlsCall(MI, MCSymbolRefExpr::VK_PPC_TLSLD);
+ return;
+ }
case PPC::ADDISdtprelHA:
// Transform: %Xd = ADDISdtprelHA %X3, <ga:@sym>
// Into: %Xd = ADDIS8 %X3, sym@dtprel@ha
return FuncInfo->hasNonRISpills();
}
+/// MustSaveLR - Return true if this function requires that we save the LR
+/// register onto the stack in the prolog and restore it in the epilog of the
+/// function.
+static bool MustSaveLR(const MachineFunction &MF, unsigned LR) {
+ const PPCFunctionInfo *MFI = MF.getInfo<PPCFunctionInfo>();
+
+ // We need a save/restore of LR if there is any def of LR (which is
+ // defined by calls, including the PIC setup sequence), or if there is
+ // some use of the LR stack slot (e.g. for builtin_return_address).
+ // (LR comes in 32 and 64 bit versions.)
+ MachineRegisterInfo::def_iterator RI = MF.getRegInfo().def_begin(LR);
+ return RI !=MF.getRegInfo().def_end() || MFI->isLRStoreRequired();
+}
+
/// determineFrameLayout - Determine the size of the frame and maximum call
/// frame size.
unsigned PPCFrameLowering::determineFrameLayout(MachineFunction &MF,
// stackless code if all local vars are reg-allocated.
bool DisableRedZone = MF.getFunction()->getAttributes().
hasAttribute(AttributeSet::FunctionIndex, Attribute::NoRedZone);
+ unsigned LR = RegInfo->getRARegister();
if (!DisableRedZone &&
(Subtarget.isPPC64() || // 32-bit SVR4, no stack-
!Subtarget.isSVR4ABI() || // allocated locals.
FrameSize <= 224 && // Fits in red zone.
!MFI->hasVarSizedObjects() && // No dynamic alloca.
!MFI->adjustsStack() && // No calls.
+ !MustSaveLR(MF, LR) &&
!RegInfo->hasBasePointer(MF)) { // No special alignment.
// No need for frame
if (UpdateMF)
}
}
-/// MustSaveLR - Return true if this function requires that we save the LR
-/// register onto the stack in the prolog and restore it in the epilog of the
-/// function.
-static bool MustSaveLR(const MachineFunction &MF, unsigned LR) {
- const PPCFunctionInfo *MFI = MF.getInfo<PPCFunctionInfo>();
-
- // We need a save/restore of LR if there is any def of LR (which is
- // defined by calls, including the PIC setup sequence), or if there is
- // some use of the LR stack slot (e.g. for builtin_return_address).
- // (LR comes in 32 and 64 bit versions.)
- MachineRegisterInfo::def_iterator RI = MF.getRegInfo().def_begin(LR);
- return RI !=MF.getRegInfo().def_end() || MFI->isLRStoreRequired();
-}
-
void
PPCFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
RegScavenger *) const {
case PPCISD::SHL: return "PPCISD::SHL";
case PPCISD::CALL: return "PPCISD::CALL";
case PPCISD::CALL_NOP: return "PPCISD::CALL_NOP";
- case PPCISD::CALL_TLS: return "PPCISD::CALL_TLS";
- case PPCISD::CALL_NOP_TLS: return "PPCISD::CALL_NOP_TLS";
case PPCISD::MTCTR: return "PPCISD::MTCTR";
case PPCISD::BCTRL: return "PPCISD::BCTRL";
case PPCISD::BCTRL_LOAD_TOC: return "PPCISD::BCTRL_LOAD_TOC";
case PPCISD::ADD_TLS: return "PPCISD::ADD_TLS";
case PPCISD::ADDIS_TLSGD_HA: return "PPCISD::ADDIS_TLSGD_HA";
case PPCISD::ADDI_TLSGD_L: return "PPCISD::ADDI_TLSGD_L";
+ case PPCISD::GET_TLS_ADDR: return "PPCISD::GET_TLS_ADDR";
+ case PPCISD::ADDI_TLSGD_L_ADDR: return "PPCISD::ADDI_TLSGD_L_ADDR";
case PPCISD::ADDIS_TLSLD_HA: return "PPCISD::ADDIS_TLSLD_HA";
case PPCISD::ADDI_TLSLD_L: return "PPCISD::ADDI_TLSLD_L";
+ case PPCISD::GET_TLSLD_ADDR: return "PPCISD::GET_TLSLD_ADDR";
+ case PPCISD::ADDI_TLSLD_L_ADDR: return "PPCISD::ADDI_TLSLD_L_ADDR";
case PPCISD::ADDIS_DTPREL_HA: return "PPCISD::ADDIS_DTPREL_HA";
case PPCISD::ADDI_DTPREL_L: return "PPCISD::ADDI_DTPREL_L";
case PPCISD::VADD_SPLAT: return "PPCISD::VADD_SPLAT";
return LowerLabelRef(TgtBAHi, TgtBALo, isPIC, DAG);
}
-// Generate a call to __tls_get_addr for the given GOT entry Op.
-std::pair<SDValue,SDValue>
-PPCTargetLowering::lowerTLSCall(SDValue Op, SDLoc dl,
- SelectionDAG &DAG) const {
-
- Type *IntPtrTy = getDataLayout()->getIntPtrType(*DAG.getContext());
- TargetLowering::ArgListTy Args;
- TargetLowering::ArgListEntry Entry;
- Entry.Node = Op;
- Entry.Ty = IntPtrTy;
- Args.push_back(Entry);
-
- TargetLowering::CallLoweringInfo CLI(DAG);
- CLI.setDebugLoc(dl).setChain(DAG.getEntryNode())
- .setCallee(CallingConv::C, IntPtrTy,
- DAG.getTargetExternalSymbol("__tls_get_addr", getPointerTy()),
- std::move(Args), 0);
-
- return LowerCallTo(CLI);
-}
-
SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op,
SelectionDAG &DAG) const {
}
if (Model == TLSModel::GeneralDynamic) {
- SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
- PPCII::MO_TLSGD);
+ SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
SDValue GOTPtr;
if (is64bit) {
setUsesTOCBasePtr(DAG);
else
GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
}
- SDValue GOTEntry = DAG.getNode(PPCISD::ADDI_TLSGD_L, dl, PtrVT,
- GOTPtr, TGA);
- std::pair<SDValue, SDValue> CallResult = lowerTLSCall(GOTEntry, dl, DAG);
- return CallResult.first;
+ return DAG.getNode(PPCISD::ADDI_TLSGD_L_ADDR, dl, PtrVT,
+ GOTPtr, TGA, TGA);
}
if (Model == TLSModel::LocalDynamic) {
- SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
- PPCII::MO_TLSLD);
+ SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
SDValue GOTPtr;
if (is64bit) {
setUsesTOCBasePtr(DAG);
else
GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
}
- SDValue GOTEntry = DAG.getNode(PPCISD::ADDI_TLSLD_L, dl, PtrVT,
- GOTPtr, TGA);
- std::pair<SDValue, SDValue> CallResult = lowerTLSCall(GOTEntry, dl, DAG);
- SDValue TLSAddr = CallResult.first;
- SDValue Chain = CallResult.second;
- SDValue DtvOffsetHi = DAG.getNode(PPCISD::ADDIS_DTPREL_HA, dl, PtrVT,
- Chain, TLSAddr, TGA);
+ SDValue TLSAddr = DAG.getNode(PPCISD::ADDI_TLSLD_L_ADDR, dl,
+ PtrVT, GOTPtr, TGA, TGA);
+ SDValue DtvOffsetHi = DAG.getNode(PPCISD::ADDIS_DTPREL_HA, dl,
+ PtrVT, TLSAddr, TGA);
return DAG.getNode(PPCISD::ADDI_DTPREL_L, dl, PtrVT, DtvOffsetHi, TGA);
}
if (Callee.getNode()) {
Ops.push_back(Chain);
Ops.push_back(Callee);
-
- // If this is a call to __tls_get_addr, find the symbol whose address
- // is to be taken and add it to the list. This will be used to
- // generate __tls_get_addr(<sym>@tlsgd) or __tls_get_addr(<sym>@tlsld).
- // We find the symbol by walking the chain to the CopyFromReg, walking
- // back from the CopyFromReg to the ADDI_TLSGD_L or ADDI_TLSLD_L, and
- // pulling the symbol from that node.
- if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee))
- if (!strcmp(S->getSymbol(), "__tls_get_addr")) {
- assert(!needIndirectCall && "Indirect call to __tls_get_addr???");
- SDNode *AddI = Chain.getNode()->getOperand(2).getNode();
- SDValue TGTAddr = AddI->getOperand(1);
- assert(TGTAddr.getNode()->getOpcode() == ISD::TargetGlobalTLSAddress &&
- "Didn't find target global TLS address where we expected one");
- Ops.push_back(TGTAddr);
- CallOpc = PPCISD::CALL_TLS;
- }
}
// If this is a tail call add stack pointer delta.
if (isTailCall)
Ops.insert(std::next(Ops.begin()), AddTOC);
} else if ((CallOpc == PPCISD::CALL) &&
(!isLocalCall(Callee) ||
- DAG.getTarget().getRelocationModel() == Reloc::PIC_)) {
+ DAG.getTarget().getRelocationModel() == Reloc::PIC_))
// Otherwise insert NOP for non-local calls.
CallOpc = PPCISD::CALL_NOP;
- } else if (CallOpc == PPCISD::CALL_TLS)
- // For 64-bit SVR4, TLS calls are always non-local.
- CallOpc = PPCISD::CALL_NOP_TLS;
}
Chain = DAG.getNode(CallOpc, dl, NodeTys, Ops);
/// SVR4 calls.
CALL, CALL_NOP,
- /// CALL_TLS and CALL_NOP_TLS - Versions of CALL and CALL_NOP used
- /// to access TLS variables.
- CALL_TLS, CALL_NOP_TLS,
-
/// CHAIN,FLAG = MTCTR(VAL, CHAIN[, INFLAG]) - Directly corresponds to a
/// MTCTR instruction.
MTCTR,
/// register to sym\@got\@tlsgd\@ha.
ADDIS_TLSGD_HA,
- /// G8RC = ADDI_TLSGD_L G8RReg, Symbol - For the general-dynamic TLS
+ /// %X3 = ADDI_TLSGD_L G8RReg, Symbol - For the general-dynamic TLS
/// model, produces an ADDI8 instruction that adds G8RReg to
- /// sym\@got\@tlsgd\@l.
+ /// sym\@got\@tlsgd\@l and stores the result in X3. Hidden by
+ /// ADDIS_TLSGD_L_ADDR until after register assignment.
ADDI_TLSGD_L,
+ /// %X3 = GET_TLS_ADDR %X3, Symbol - For the general-dynamic TLS
+ /// model, produces a call to __tls_get_addr(sym\@tlsgd). Hidden by
+ /// ADDIS_TLSGD_L_ADDR until after register assignment.
+ GET_TLS_ADDR,
+
+ /// G8RC = ADDI_TLSGD_L_ADDR G8RReg, Symbol, Symbol - Op that
+ /// combines ADDI_TLSGD_L and GET_TLS_ADDR until expansion following
+ /// register assignment.
+ ADDI_TLSGD_L_ADDR,
+
/// G8RC = ADDIS_TLSLD_HA %X2, Symbol - For the local-dynamic TLS
/// model, produces an ADDIS8 instruction that adds the GOT base
/// register to sym\@got\@tlsld\@ha.
ADDIS_TLSLD_HA,
- /// G8RC = ADDI_TLSLD_L G8RReg, Symbol - For the local-dynamic TLS
+ /// %X3 = ADDI_TLSLD_L G8RReg, Symbol - For the local-dynamic TLS
/// model, produces an ADDI8 instruction that adds G8RReg to
- /// sym\@got\@tlsld\@l.
+ /// sym\@got\@tlsld\@l and stores the result in X3. Hidden by
+ /// ADDIS_TLSLD_L_ADDR until after register assignment.
ADDI_TLSLD_L,
- /// G8RC = ADDIS_DTPREL_HA %X3, Symbol, Chain - For the
- /// local-dynamic TLS model, produces an ADDIS8 instruction
- /// that adds X3 to sym\@dtprel\@ha. The Chain operand is needed
- /// to tie this in place following a copy to %X3 from the result
- /// of a GET_TLSLD_ADDR.
+ /// %X3 = GET_TLSLD_ADDR %X3, Symbol - For the local-dynamic TLS
+ /// model, produces a call to __tls_get_addr(sym\@tlsld). Hidden by
+ /// ADDIS_TLSLD_L_ADDR until after register assignment.
+ GET_TLSLD_ADDR,
+
+ /// G8RC = ADDI_TLSLD_L_ADDR G8RReg, Symbol, Symbol - Op that
+ /// combines ADDI_TLSLD_L and GET_TLSLD_ADDR until expansion
+ /// following register assignment.
+ ADDI_TLSLD_L_ADDR,
+
+ /// G8RC = ADDIS_DTPREL_HA %X3, Symbol - For the local-dynamic TLS
+ /// model, produces an ADDIS8 instruction that adds X3 to
+ /// sym\@dtprel\@ha.
ADDIS_DTPREL_HA,
/// G8RC = ADDI_DTPREL_L G8RReg, Symbol - For the local-dynamic TLS
SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
- std::pair<SDValue,SDValue> lowerTLSCall(SDValue Op, SDLoc dl,
- SelectionDAG &DAG) const;
SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
def : Pat<(PPCcall_nop (i64 texternalsym:$dst)),
(BL8_NOP texternalsym:$dst)>;
-def : Pat<(PPCcall_nop_tls texternalsym:$func, tglobaltlsaddr:$sym),
- (BL8_NOP_TLS texternalsym:$func, tglobaltlsaddr:$sym)>;
-
// Atomic operations
let usesCustomInserter = 1 in {
let Defs = [CR0] in {
[(set i64:$rD,
(PPCaddiTlsgdL i64:$reg, tglobaltlsaddr:$disp))]>,
isPPC64;
+// LR8 is a true define, while the rest of the Defs are clobbers. X3 is
+// explicitly defined when this op is created, so not mentioned here.
+let hasExtraSrcRegAllocReq = 1, hasExtraDefRegAllocReq = 1,
+ Defs = [X0,X4,X5,X6,X7,X8,X9,X10,X11,X12,LR8,CTR8,CR0,CR1,CR5,CR6,CR7] in
+def GETtlsADDR : Pseudo<(outs g8rc:$rD), (ins g8rc:$reg, tlsgd:$sym),
+ "#GETtlsADDR",
+ [(set i64:$rD,
+ (PPCgetTlsAddr i64:$reg, tglobaltlsaddr:$sym))]>,
+ isPPC64;
+// Combined op for ADDItlsgdL and GETtlsADDR, late expanded. X3 and LR8
+// are true defines while the rest of the Defs are clobbers.
+let hasExtraSrcRegAllocReq = 1, hasExtraDefRegAllocReq = 1,
+ Defs = [X0,X3,X4,X5,X6,X7,X8,X9,X10,X11,X12,LR8,CTR8,CR0,CR1,CR5,CR6,CR7]
+ in
+def ADDItlsgdLADDR : Pseudo<(outs g8rc:$rD),
+ (ins g8rc_nox0:$reg, s16imm64:$disp, tlsgd:$sym),
+ "#ADDItlsgdLADDR",
+ [(set i64:$rD,
+ (PPCaddiTlsgdLAddr i64:$reg,
+ tglobaltlsaddr:$disp,
+ tglobaltlsaddr:$sym))]>,
+ isPPC64;
def ADDIStlsldHA: Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, s16imm64:$disp),
"#ADDIStlsldHA",
[(set i64:$rD,
[(set i64:$rD,
(PPCaddiTlsldL i64:$reg, tglobaltlsaddr:$disp))]>,
isPPC64;
+// LR8 is a true define, while the rest of the Defs are clobbers. X3 is
+// explicitly defined when this op is created, so not mentioned here.
+let hasExtraSrcRegAllocReq = 1, hasExtraDefRegAllocReq = 1,
+ Defs = [X0,X4,X5,X6,X7,X8,X9,X10,X11,X12,LR8,CTR8,CR0,CR1,CR5,CR6,CR7] in
+def GETtlsldADDR : Pseudo<(outs g8rc:$rD), (ins g8rc:$reg, tlsgd:$sym),
+ "#GETtlsldADDR",
+ [(set i64:$rD,
+ (PPCgetTlsldAddr i64:$reg, tglobaltlsaddr:$sym))]>,
+ isPPC64;
+// Combined op for ADDItlsldL and GETtlsADDR, late expanded. X3 and LR8
+// are true defines, while the rest of the Defs are clobbers.
+let hasExtraSrcRegAllocReq = 1, hasExtraDefRegAllocReq = 1,
+ Defs = [X0,X3,X4,X5,X6,X7,X8,X9,X10,X11,X12,LR8,CTR8,CR0,CR1,CR5,CR6,CR7]
+ in
+def ADDItlsldLADDR : Pseudo<(outs g8rc:$rD),
+ (ins g8rc_nox0:$reg, s16imm64:$disp, tlsgd:$sym),
+ "#ADDItlsldLADDR",
+ [(set i64:$rD,
+ (PPCaddiTlsldLAddr i64:$reg,
+ tglobaltlsaddr:$disp,
+ tglobaltlsaddr:$sym))]>,
+ isPPC64;
def ADDISdtprelHA: Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, s16imm64:$disp),
"#ADDISdtprelHA",
[(set i64:$rD,
def PPCaddTls : SDNode<"PPCISD::ADD_TLS", SDTIntBinOp, []>;
def PPCaddisTlsgdHA : SDNode<"PPCISD::ADDIS_TLSGD_HA", SDTIntBinOp>;
def PPCaddiTlsgdL : SDNode<"PPCISD::ADDI_TLSGD_L", SDTIntBinOp>;
+def PPCgetTlsAddr : SDNode<"PPCISD::GET_TLS_ADDR", SDTIntBinOp>;
+def PPCaddiTlsgdLAddr : SDNode<"PPCISD::ADDI_TLSGD_L_ADDR",
+ SDTypeProfile<1, 3, [
+ SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>,
+ SDTCisSameAs<0, 3>, SDTCisInt<0> ]>>;
def PPCaddisTlsldHA : SDNode<"PPCISD::ADDIS_TLSLD_HA", SDTIntBinOp>;
def PPCaddiTlsldL : SDNode<"PPCISD::ADDI_TLSLD_L", SDTIntBinOp>;
-def PPCaddisDtprelHA : SDNode<"PPCISD::ADDIS_DTPREL_HA", SDTIntBinOp,
- [SDNPHasChain]>;
+def PPCgetTlsldAddr : SDNode<"PPCISD::GET_TLSLD_ADDR", SDTIntBinOp>;
+def PPCaddiTlsldLAddr : SDNode<"PPCISD::ADDI_TLSLD_L_ADDR",
+ SDTypeProfile<1, 3, [
+ SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>,
+ SDTCisSameAs<0, 3>, SDTCisInt<0> ]>>;
+def PPCaddisDtprelHA : SDNode<"PPCISD::ADDIS_DTPREL_HA", SDTIntBinOp>;
def PPCaddiDtprelL : SDNode<"PPCISD::ADDI_DTPREL_L", SDTIntBinOp>;
def PPCvperm : SDNode<"PPCISD::VPERM", SDT_PPCvperm, []>;
def PPCcall : SDNode<"PPCISD::CALL", SDT_PPCCall,
[SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
SDNPVariadic]>;
-def PPCcall_tls : SDNode<"PPCISD::CALL_TLS", SDT_PPCCall,
- [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
- SDNPVariadic]>;
def PPCcall_nop : SDNode<"PPCISD::CALL_NOP", SDT_PPCCall,
[SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
SDNPVariadic]>;
-def PPCcall_nop_tls : SDNode<"PPCISD::CALL_NOP_TLS", SDT_PPCCall,
- [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
- SDNPVariadic]>;
def PPCmtctr : SDNode<"PPCISD::MTCTR", SDT_PPCCall,
[SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
def PPCbctrl : SDNode<"PPCISD::BCTRL", SDTNone,
def : Pat<(PPCcall (i32 texternalsym:$dst)),
(BL texternalsym:$dst)>;
-def : Pat<(PPCcall_tls texternalsym:$func, tglobaltlsaddr:$sym),
- (BL_TLS texternalsym:$func, tglobaltlsaddr:$sym)>;
-
def : Pat<(PPCtc_return (i32 tglobaladdr:$dst), imm:$imm),
(TCRETURNdi tglobaladdr:$dst, imm:$imm)>;
"#ADDItlsgdL32",
[(set i32:$rD,
(PPCaddiTlsgdL i32:$reg, tglobaltlsaddr:$disp))]>;
+// LR is a true define, while the rest of the Defs are clobbers. R3 is
+// explicitly defined when this op is created, so not mentioned here.
+let hasExtraSrcRegAllocReq = 1, hasExtraDefRegAllocReq = 1,
+ Defs = [R0,R4,R5,R6,R7,R8,R9,R10,R11,R12,LR,CTR,CR0,CR1,CR5,CR6,CR7] in
+def GETtlsADDR32 : Pseudo<(outs gprc:$rD), (ins gprc:$reg, tlsgd32:$sym),
+ "GETtlsADDR32",
+ [(set i32:$rD,
+ (PPCgetTlsAddr i32:$reg, tglobaltlsaddr:$sym))]>;
+// Combined op for ADDItlsgdL32 and GETtlsADDR32, late expanded. R3 and LR
+// are true defines while the rest of the Defs are clobbers.
+let hasExtraSrcRegAllocReq = 1, hasExtraDefRegAllocReq = 1,
+ Defs = [R0,R3,R4,R5,R6,R7,R8,R9,R10,R11,R12,LR,CTR,CR0,CR1,CR5,CR6,CR7] in
+def ADDItlsgdLADDR32 : Pseudo<(outs gprc:$rD),
+ (ins gprc_nor0:$reg, s16imm:$disp, tlsgd32:$sym),
+ "#ADDItlsgdLADDR32",
+ [(set i32:$rD,
+ (PPCaddiTlsgdLAddr i32:$reg,
+ tglobaltlsaddr:$disp,
+ tglobaltlsaddr:$sym))]>;
def ADDItlsldL32 : Pseudo<(outs gprc:$rD), (ins gprc_nor0:$reg, s16imm:$disp),
"#ADDItlsldL32",
[(set i32:$rD,
(PPCaddiTlsldL i32:$reg, tglobaltlsaddr:$disp))]>;
+// LR is a true define, while the rest of the Defs are clobbers. R3 is
+// explicitly defined when this op is created, so not mentioned here.
+let hasExtraSrcRegAllocReq = 1, hasExtraDefRegAllocReq = 1,
+ Defs = [R0,R4,R5,R6,R7,R8,R9,R10,R11,R12,LR,CTR,CR0,CR1,CR5,CR6,CR7] in
+def GETtlsldADDR32 : Pseudo<(outs gprc:$rD), (ins gprc:$reg, tlsgd32:$sym),
+ "GETtlsldADDR32",
+ [(set i32:$rD,
+ (PPCgetTlsldAddr i32:$reg,
+ tglobaltlsaddr:$sym))]>;
+// Combined op for ADDItlsldL32 and GETtlsADDR32, late expanded. R3 and LR
+// are true defines while the rest of the Defs are clobbers.
+let hasExtraSrcRegAllocReq = 1, hasExtraDefRegAllocReq = 1,
+ Defs = [R0,R3,R4,R5,R6,R7,R8,R9,R10,R11,R12,LR,CTR,CR0,CR1,CR5,CR6,CR7] in
+def ADDItlsldLADDR32 : Pseudo<(outs gprc:$rD),
+ (ins gprc_nor0:$reg, s16imm:$disp, tlsgd32:$sym),
+ "#ADDItlsldLADDR32",
+ [(set i32:$rD,
+ (PPCaddiTlsldLAddr i32:$reg,
+ tglobaltlsaddr:$disp,
+ tglobaltlsaddr:$sym))]>;
def ADDIdtprelL32 : Pseudo<(outs gprc:$rD), (ins gprc_nor0:$reg, s16imm:$disp),
"#ADDIdtprelL32",
[(set i32:$rD,
case PPCII::MO_TLS:
RefKind = MCSymbolRefExpr::VK_PPC_TLS;
break;
- case PPCII::MO_TLSGD:
- RefKind = MCSymbolRefExpr::VK_PPC_TLSGD;
- break;
- case PPCII::MO_TLSLD:
- RefKind = MCSymbolRefExpr::VK_PPC_TLSLD;
- break;
}
if (MO.getTargetFlags() == PPCII::MO_PLT_OR_STUB && !isDarwin)
--- /dev/null
+//===---------- PPCTLSDynamicCall.cpp - TLS Dynamic Call Fixup ------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass expands ADDItls{ld,gd}LADDR[32] machine instructions into
+// separate ADDItls[gd]L[32] and GETtlsADDR[32] instructions, both of
+// which define GPR3. A copy is added from GPR3 to the target virtual
+// register of the original instruction. The GETtlsADDR[32] is really
+// a call instruction, so its target register is constrained to be GPR3.
+// This is not true of ADDItls[gd]L[32], but there is a legacy linker
+// optimization bug that requires the target register of the addi of
+// a local- or general-dynamic TLS access sequence to be GPR3.
+//
+// This is done in a late pass so that TLS variable accesses can be
+// fully commoned by MachineCSE.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PPCInstrInfo.h"
+#include "PPC.h"
+#include "PPCInstrBuilder.h"
+#include "PPCTargetMachine.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "ppc-tls-dynamic-call"
+
+namespace llvm {
+ void initializePPCTLSDynamicCallPass(PassRegistry&);
+}
+
+namespace {
+ struct PPCTLSDynamicCall : public MachineFunctionPass {
+ static char ID;
+ PPCTLSDynamicCall() : MachineFunctionPass(ID) {
+ initializePPCTLSDynamicCallPass(*PassRegistry::getPassRegistry());
+ }
+
+ const PPCTargetMachine *TM;
+ const PPCInstrInfo *TII;
+ LiveIntervals *LIS;
+
+protected:
+ bool processBlock(MachineBasicBlock &MBB) {
+ bool Changed = false;
+ bool Is64Bit = TM->getSubtargetImpl()->isPPC64();
+
+ for (MachineBasicBlock::iterator I = MBB.begin(), IE = MBB.end();
+ I != IE; ++I) {
+ MachineInstr *MI = I;
+
+ if (MI->getOpcode() != PPC::ADDItlsgdLADDR &&
+ MI->getOpcode() != PPC::ADDItlsldLADDR &&
+ MI->getOpcode() != PPC::ADDItlsgdLADDR32 &&
+ MI->getOpcode() != PPC::ADDItlsldLADDR32)
+ continue;
+
+ DEBUG(dbgs() << "TLS Dynamic Call Fixup:\n " << *MI;);
+
+ unsigned OutReg = MI->getOperand(0).getReg();
+ unsigned InReg = MI->getOperand(1).getReg();
+ DebugLoc DL = MI->getDebugLoc();
+ unsigned GPR3 = Is64Bit ? PPC::X3 : PPC::R3;
+ unsigned Opc1, Opc2;
+ SmallVector<unsigned, 4> OrigRegs;
+ OrigRegs.push_back(OutReg);
+ OrigRegs.push_back(InReg);
+ OrigRegs.push_back(GPR3);
+
+ switch (MI->getOpcode()) {
+ default:
+ llvm_unreachable("Opcode inconsistency error");
+ case PPC::ADDItlsgdLADDR:
+ Opc1 = PPC::ADDItlsgdL;
+ Opc2 = PPC::GETtlsADDR;
+ break;
+ case PPC::ADDItlsldLADDR:
+ Opc1 = PPC::ADDItlsldL;
+ Opc2 = PPC::GETtlsldADDR;
+ break;
+ case PPC::ADDItlsgdLADDR32:
+ Opc1 = PPC::ADDItlsgdL32;
+ Opc2 = PPC::GETtlsADDR32;
+ break;
+ case PPC::ADDItlsldLADDR32:
+ Opc1 = PPC::ADDItlsldL32;
+ Opc2 = PPC::GETtlsldADDR32;
+ break;
+ }
+
+ // Expand into two ops built prior to the existing instruction.
+ MachineInstr *Addi = BuildMI(MBB, I, DL, TII->get(Opc1), GPR3)
+ .addReg(InReg);
+ Addi->addOperand(MI->getOperand(2));
+
+ // The ADDItls* instruction is the first instruction in the
+ // repair range.
+ MachineBasicBlock::iterator First = I;
+ --First;
+
+ MachineInstr *Call = (BuildMI(MBB, I, DL, TII->get(Opc2), GPR3)
+ .addReg(GPR3));
+ Call->addOperand(MI->getOperand(3));
+
+ BuildMI(MBB, I, DL, TII->get(TargetOpcode::COPY), OutReg)
+ .addReg(GPR3);
+
+ // The COPY is the last instruction in the repair range.
+ MachineBasicBlock::iterator Last = I;
+ --Last;
+
+ // Move past the original instruction and remove it.
+ ++I;
+ MI->removeFromParent();
+
+ // Repair the live intervals.
+ LIS->repairIntervalsInRange(&MBB, First, Last, OrigRegs);
+ Changed = true;
+ }
+
+ return Changed;
+ }
+
+public:
+ bool runOnMachineFunction(MachineFunction &MF) override {
+ TM = static_cast<const PPCTargetMachine *>(&MF.getTarget());
+ TII = TM->getSubtargetImpl()->getInstrInfo();
+ LIS = &getAnalysis<LiveIntervals>();
+
+ bool Changed = false;
+
+ for (MachineFunction::iterator I = MF.begin(); I != MF.end();) {
+ MachineBasicBlock &B = *I++;
+ if (processBlock(B))
+ Changed = true;
+ }
+
+ return Changed;
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<LiveIntervals>();
+ AU.addPreserved<LiveIntervals>();
+ AU.addRequired<SlotIndexes>();
+ AU.addPreserved<SlotIndexes>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+ };
+}
+
+INITIALIZE_PASS_BEGIN(PPCTLSDynamicCall, DEBUG_TYPE,
+ "PowerPC TLS Dynamic Call Fixup", false, false)
+INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
+INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
+INITIALIZE_PASS_END(PPCTLSDynamicCall, DEBUG_TYPE,
+ "PowerPC TLS Dynamic Call Fixup", false, false)
+
+char PPCTLSDynamicCall::ID = 0;
+FunctionPass*
+llvm::createPPCTLSDynamicCallPass() { return new PPCTLSDynamicCall(); }
initializePPCVSXFMAMutatePass(*PassRegistry::getPassRegistry());
insertPass(VSXFMAMutateEarly ? &RegisterCoalescerID : &MachineSchedulerID,
&PPCVSXFMAMutateID);
+ if (getPPCTargetMachine().getRelocationModel() == Reloc::PIC_)
+ addPass(createPPCTLSDynamicCallPass());
}
void PPCPassConfig::addPreSched2() {
; CHECK-LABEL: @test1
; CHECK: mflr 0
; CHECK: std 0, 16(1)
-; FIXME: These next two lines don't both need to load the same value.
-; CHECK-DAG: ld 3, 16(1)
+; CHECK-DAG: ld 3, 64(1)
; CHECK-DAG: ld 0, 16(1)
; CHECK: mtlr 0
; CHECK: blr
--- /dev/null
+; RUN: llc -march=ppc64 -mcpu=pwr7 -O2 -relocation-model=pic < %s | FileCheck %s
+; RUN: llc -march=ppc64 -mcpu=pwr7 -O2 -relocation-model=pic < %s | grep "__tls_get_addr" | count 1
+
+; This test was derived from LLVM's own
+; PrettyStackTraceEntry::~PrettyStackTraceEntry(). It demonstrates an
+; opportunity for CSE of calls to __tls_get_addr().
+
+target datalayout = "e-m:e-i64:64-n32:64"
+target triple = "powerpc64le-unknown-linux-gnu"
+
+%"class.llvm::PrettyStackTraceEntry" = type { i32 (...)**, %"class.llvm::PrettyStackTraceEntry"* }
+
+@_ZTVN4llvm21PrettyStackTraceEntryE = unnamed_addr constant [5 x i8*] [i8* null, i8* null, i8* bitcast (void (%"class.llvm::PrettyStackTraceEntry"*)* @_ZN4llvm21PrettyStackTraceEntryD2Ev to i8*), i8* bitcast (void (%"class.llvm::PrettyStackTraceEntry"*)* @_ZN4llvm21PrettyStackTraceEntryD0Ev to i8*), i8* bitcast (void ()* @__cxa_pure_virtual to i8*)], align 8
+@_ZL20PrettyStackTraceHead = internal thread_local unnamed_addr global %"class.llvm::PrettyStackTraceEntry"* null, align 8
+@.str = private unnamed_addr constant [87 x i8] c"PrettyStackTraceHead == this && \22Pretty stack trace entry destruction is out of order\22\00", align 1
+@.str1 = private unnamed_addr constant [64 x i8] c"/home/wschmidt/llvm/llvm-test2/lib/Support/PrettyStackTrace.cpp\00", align 1
+@__PRETTY_FUNCTION__._ZN4llvm21PrettyStackTraceEntryD2Ev = private unnamed_addr constant [62 x i8] c"virtual llvm::PrettyStackTraceEntry::~PrettyStackTraceEntry()\00", align 1
+
+declare void @_ZN4llvm21PrettyStackTraceEntryD2Ev(%"class.llvm::PrettyStackTraceEntry"* %this) unnamed_addr
+declare void @__cxa_pure_virtual()
+declare void @__assert_fail(i8*, i8*, i32 zeroext, i8*)
+declare void @_ZdlPv(i8*)
+
+define void @_ZN4llvm21PrettyStackTraceEntryD0Ev(%"class.llvm::PrettyStackTraceEntry"* %this) unnamed_addr align 2 {
+entry:
+ %0 = getelementptr inbounds %"class.llvm::PrettyStackTraceEntry"* %this, i64 0, i32 0
+ store i32 (...)** bitcast (i8** getelementptr inbounds ([5 x i8*]* @_ZTVN4llvm21PrettyStackTraceEntryE, i64 0, i64 2) to i32 (...)**), i32 (...)*** %0, align 8
+ %1 = load %"class.llvm::PrettyStackTraceEntry"** @_ZL20PrettyStackTraceHead, align 8
+ %cmp.i = icmp eq %"class.llvm::PrettyStackTraceEntry"* %1, %this
+ br i1 %cmp.i, label %_ZN4llvm21PrettyStackTraceEntryD2Ev.exit, label %cond.false.i
+
+cond.false.i: ; preds = %entry
+ tail call void @__assert_fail(i8* getelementptr inbounds ([87 x i8]* @.str, i64 0, i64 0), i8* getelementptr inbounds ([64 x i8]* @.str1, i64 0, i64 0), i32 zeroext 119, i8* getelementptr inbounds ([62 x i8]* @__PRETTY_FUNCTION__._ZN4llvm21PrettyStackTraceEntryD2Ev, i64 0, i64 0))
+ unreachable
+
+_ZN4llvm21PrettyStackTraceEntryD2Ev.exit: ; preds = %entry
+ %NextEntry.i.i = getelementptr inbounds %"class.llvm::PrettyStackTraceEntry"* %this, i64 0, i32 1
+ %2 = bitcast %"class.llvm::PrettyStackTraceEntry"** %NextEntry.i.i to i64*
+ %3 = load i64* %2, align 8
+ store i64 %3, i64* bitcast (%"class.llvm::PrettyStackTraceEntry"** @_ZL20PrettyStackTraceHead to i64*), align 8
+ %4 = bitcast %"class.llvm::PrettyStackTraceEntry"* %this to i8*
+ tail call void @_ZdlPv(i8* %4)
+ ret void
+}
+
+; CHECK-LABEL: _ZN4llvm21PrettyStackTraceEntryD0Ev:
+; CHECK: addis [[REG1:[0-9]+]], 2, _ZL20PrettyStackTraceHead@got@tlsld@ha
+; CHECK: addi 3, [[REG1]], _ZL20PrettyStackTraceHead@got@tlsld@l
+; CHECK: bl __tls_get_addr(_ZL20PrettyStackTraceHead@tlsld)
+; CHECK: addis 3, 3, _ZL20PrettyStackTraceHead@dtprel@ha
+; CHECK: ld {{[0-9]+}}, _ZL20PrettyStackTraceHead@dtprel@l(3)
+; CHECK: std {{[0-9]+}}, _ZL20PrettyStackTraceHead@dtprel@l(3)
}
; CHECK-LABEL: call_once:
-; CHECK: addis 3, 2, __once_callable@got@tlsgd@ha
-; CHECK: addi 3, 3, __once_callable@got@tlsgd@l
+; CHECK: addi 3, {{[0-9]+}}, __once_callable@got@tlsgd@l
; CHECK: bl __tls_get_addr(__once_callable@tlsgd)
; CHECK-NEXT: nop
; CHECK: std {{[0-9]+}}, 0(3)
-; CHECK: addis 3, 2, __once_call@got@tlsgd@ha
-; CHECK: addi 3, 3, __once_call@got@tlsgd@l
+; CHECK: addi 3, {{[0-9]+}}, __once_call@got@tlsgd@l
; CHECK: bl __tls_get_addr(__once_call@tlsgd)
; CHECK-NEXT: nop
; CHECK: std {{[0-9]+}}, 0(3)