/// SYMBOL_LABEL @TLSGD
MO_TLSGD,
+ /// MO_TLSLD - On a symbol operand this indicates that the immediate is
+ /// the offset of the GOT entry with the TLS index for the module that
+ /// contains the symbol. When this index is passed to a call to to
+ /// __tls_get_addr, the function will return the base address of the TLS
+ /// block for the symbol.
+ ///
+ /// See 'ELF Handling for Thread-Local Storage' for more details.
+ /// SYMBOL_LABEL @TLSLD
+ MO_TLSLD,
+
+ /// MO_TLSLDM - On a symbol operand this indicates that the immediate is
+ /// the offset of the GOT entry with the TLS index for the module that
+ /// contains the symbol. When this index is passed to a call to to
+ /// ___tls_get_addr, the function will return the base address of the TLS
+ /// block for the symbol.
+ ///
+ /// See 'ELF Handling for Thread-Local Storage' for more details.
+ /// SYMBOL_LABEL @TLSLDM
+ MO_TLSLDM,
+
/// MO_GOTTPOFF - On a symbol operand this indicates that the immediate is
/// some TLS offset.
///
/// SYMBOL_LABEL @TPOFF
MO_TPOFF,
+ /// MO_DTPOFF - On a symbol operand this indicates that the immediate is
+ /// the offset of the GOT entry with the TLS offset of the symbol.
+ ///
+ /// See 'ELF Handling for Thread-Local Storage' for more details.
+ /// SYMBOL_LABEL @DTPOFF
+ MO_DTPOFF,
+
/// MO_NTPOFF - On a symbol operand this indicates that the immediate is
/// some TLS offset.
///
/// register for PIC on x86-32.
FunctionPass* createGlobalBaseRegPass();
+/// createCleanupLocalDynamicTLSPass() - This pass combines multiple accesses
+/// to local-dynamic TLS variables so that the TLS base address for the module
+/// is only fetched once per execution path through the function.
+FunctionPass *createCleanupLocalDynamicTLSPass();
+
/// createX86FloatingPointStackifierPass - This function returns a pass which
/// converts floating point register references and pseudo instructions into
/// floating point stack references and physical instructions.
O << '-' << *MF->getPICBaseSymbol();
break;
case X86II::MO_TLSGD: O << "@TLSGD"; break;
+ case X86II::MO_TLSLD: O << "@TLSLD"; break;
+ case X86II::MO_TLSLDM: O << "@TLSLDM"; break;
case X86II::MO_GOTTPOFF: O << "@GOTTPOFF"; break;
case X86II::MO_INDNTPOFF: O << "@INDNTPOFF"; break;
case X86II::MO_TPOFF: O << "@TPOFF"; break;
+ case X86II::MO_DTPOFF: O << "@DTPOFF"; break;
case X86II::MO_NTPOFF: O << "@NTPOFF"; break;
case X86II::MO_GOTNTPOFF: O << "@GOTNTPOFF"; break;
case X86II::MO_GOTPCREL: O << "@GOTPCREL"; break;
static SDValue
GetTLSADDR(SelectionDAG &DAG, SDValue Chain, GlobalAddressSDNode *GA,
SDValue *InFlag, const EVT PtrVT, unsigned ReturnReg,
- unsigned char OperandFlags) {
+ unsigned char OperandFlags, bool LocalDynamic = false) {
MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
DebugLoc dl = GA->getDebugLoc();
GA->getValueType(0),
GA->getOffset(),
OperandFlags);
+
+ X86ISD::NodeType CallType = LocalDynamic ? X86ISD::TLSBASEADDR
+ : X86ISD::TLSADDR;
+
if (InFlag) {
SDValue Ops[] = { Chain, TGA, *InFlag };
- Chain = DAG.getNode(X86ISD::TLSADDR, dl, NodeTys, Ops, 3);
+ Chain = DAG.getNode(CallType, dl, NodeTys, Ops, 3);
} else {
SDValue Ops[] = { Chain, TGA };
- Chain = DAG.getNode(X86ISD::TLSADDR, dl, NodeTys, Ops, 2);
+ Chain = DAG.getNode(CallType, dl, NodeTys, Ops, 2);
}
// TLSADDR will be codegen'ed as call. Inform MFI that function has calls.
X86::RAX, X86II::MO_TLSGD);
}
+static SDValue LowerToTLSLocalDynamicModel(GlobalAddressSDNode *GA,
+ SelectionDAG &DAG,
+ const EVT PtrVT,
+ bool is64Bit) {
+ DebugLoc dl = GA->getDebugLoc();
+
+ // Get the start address of the TLS block for this module.
+ X86MachineFunctionInfo* MFI = DAG.getMachineFunction()
+ .getInfo<X86MachineFunctionInfo>();
+ MFI->incNumLocalDynamicTLSAccesses();
+
+ SDValue Base;
+ if (is64Bit) {
+ Base = GetTLSADDR(DAG, DAG.getEntryNode(), GA, NULL, PtrVT, X86::RAX,
+ X86II::MO_TLSLD, /*LocalDynamic=*/true);
+ } else {
+ SDValue InFlag;
+ SDValue Chain = DAG.getCopyToReg(DAG.getEntryNode(), dl, X86::EBX,
+ DAG.getNode(X86ISD::GlobalBaseReg, DebugLoc(), PtrVT), InFlag);
+ InFlag = Chain.getValue(1);
+ Base = GetTLSADDR(DAG, Chain, GA, &InFlag, PtrVT, X86::EAX,
+ X86II::MO_TLSLDM, /*LocalDynamic=*/true);
+ }
+
+ // Note: the CleanupLocalDynamicTLSPass will remove redundant computations
+ // of Base.
+
+ // Build x@dtpoff.
+ unsigned char OperandFlags = X86II::MO_DTPOFF;
+ unsigned WrapperKind = X86ISD::Wrapper;
+ SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), dl,
+ GA->getValueType(0),
+ GA->getOffset(), OperandFlags);
+ SDValue Offset = DAG.getNode(WrapperKind, dl, PtrVT, TGA);
+
+ // Add x@dtpoff with the base.
+ return DAG.getNode(ISD::ADD, dl, PtrVT, Offset, Base);
+}
+
// Lower ISD::GlobalTLSAddress using the "initial exec" or "local exec" model.
static SDValue LowerToTLSExecModel(GlobalAddressSDNode *GA, SelectionDAG &DAG,
const EVT PtrVT, TLSModel::Model model,
const GlobalValue *GV = GA->getGlobal();
if (Subtarget->isTargetELF()) {
- // TODO: implement the "local dynamic" model
-
// If GV is an alias then use the aliasee for determining
// thread-localness.
if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV))
switch (model) {
case TLSModel::GeneralDynamic:
- case TLSModel::LocalDynamic: // not implemented
if (Subtarget->is64Bit())
return LowerToTLSGeneralDynamicModel64(GA, DAG, getPointerTy());
return LowerToTLSGeneralDynamicModel32(GA, DAG, getPointerTy());
-
+ case TLSModel::LocalDynamic:
+ return LowerToTLSLocalDynamicModel(GA, DAG, getPointerTy(),
+ Subtarget->is64Bit());
case TLSModel::InitialExec:
case TLSModel::LocalExec:
return LowerToTLSExecModel(GA, DAG, getPointerTy(), model,
case X86ISD::FRSQRT: return "X86ISD::FRSQRT";
case X86ISD::FRCP: return "X86ISD::FRCP";
case X86ISD::TLSADDR: return "X86ISD::TLSADDR";
+ case X86ISD::TLSBASEADDR: return "X86ISD::TLSBASEADDR";
case X86ISD::TLSCALL: return "X86ISD::TLSCALL";
case X86ISD::EH_RETURN: return "X86ISD::EH_RETURN";
case X86ISD::TC_RETURN: return "X86ISD::TC_RETURN";
// TLSADDR - Thread Local Storage.
TLSADDR,
+ // TLSBASEADDR - Thread Local Storage. A call to get the start address
+ // of the TLS block for the current module.
+ TLSBASEADDR,
+
// TLSCALL - Thread Local Storage. When calling to an OS provided
// thunk at the address from an earlier relocation.
TLSCALL,
MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7,
XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS],
- Uses = [ESP] in
+ Uses = [ESP] in {
def TLS_addr32 : I<0, Pseudo, (outs), (ins i32mem:$sym),
"# TLS_addr32",
[(X86tlsaddr tls32addr:$sym)]>,
Requires<[In32BitMode]>;
+def TLS_base_addr32 : I<0, Pseudo, (outs), (ins i32mem:$sym),
+ "# TLS_base_addr32",
+ [(X86tlsbaseaddr tls32baseaddr:$sym)]>,
+ Requires<[In32BitMode]>;
+}
// All calls clobber the non-callee saved registers. RSP is marked as
// a use to prevent stack-pointer assignments that appear immediately
MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7,
XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS],
- Uses = [RSP] in
+ Uses = [RSP] in {
def TLS_addr64 : I<0, Pseudo, (outs), (ins i64mem:$sym),
"# TLS_addr64",
[(X86tlsaddr tls64addr:$sym)]>,
Requires<[In64BitMode]>;
+def TLS_base_addr64 : I<0, Pseudo, (outs), (ins i64mem:$sym),
+ "# TLS_base_addr64",
+ [(X86tlsbaseaddr tls64baseaddr:$sym)]>,
+ Requires<[In64BitMode]>;
+}
// Darwin TLS Support
// For i386, the address of the thunk is passed on the stack, on return the
#include "llvm/LLVMContext.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
AU.setPreservesCFG();
MachineFunctionPass::getAnalysisUsage(AU);
}
+
+ private:
+ unsigned BaseReg;
};
}
char CGBR::ID = 0;
FunctionPass*
llvm::createGlobalBaseRegPass() { return new CGBR(); }
+
+namespace {
+ struct LDTLSCleanup : public MachineFunctionPass {
+ static char ID;
+ LDTLSCleanup() : MachineFunctionPass(ID) {}
+
+ virtual bool runOnMachineFunction(MachineFunction &MF) {
+ X86MachineFunctionInfo* MFI = MF.getInfo<X86MachineFunctionInfo>();
+ if (MFI->getNumLocalDynamicTLSAccesses() < 2) {
+ // No point folding accesses if there isn't at least two.
+ return false;
+ }
+
+ MachineDominatorTree *DT = &getAnalysis<MachineDominatorTree>();
+ return VisitNode(DT->getRootNode(), 0);
+ }
+
+ // Visit the dominator subtree rooted at Node in pre-order.
+ // If TLSBaseAddrReg is non-null, then use that to replace any
+ // TLS_base_addr instructions. Otherwise, create the register
+ // when the first such instruction is seen, and then use it
+ // as we encounter more instructions.
+ bool VisitNode(MachineDomTreeNode *Node, unsigned TLSBaseAddrReg) {
+ MachineBasicBlock *BB = Node->getBlock();
+ bool Changed = false;
+
+ // Traverse the current block.
+ for (MachineBasicBlock::iterator I = BB->begin(), E = BB->end(); I != E;
+ ++I) {
+ switch (I->getOpcode()) {
+ case X86::TLS_base_addr32:
+ case X86::TLS_base_addr64:
+ if (TLSBaseAddrReg)
+ I = ReplaceTLSBaseAddrCall(I, TLSBaseAddrReg);
+ else
+ I = SetRegister(I, &TLSBaseAddrReg);
+ Changed = true;
+ break;
+ default:
+ break;
+ }
+ }
+
+ // Visit the children of this block in the dominator tree.
+ for (MachineDomTreeNode::iterator I = Node->begin(), E = Node->end();
+ I != E; ++I) {
+ Changed |= VisitNode(*I, TLSBaseAddrReg);
+ }
+
+ return Changed;
+ }
+
+ // Replace the TLS_base_addr instruction I with a copy from
+ // TLSBaseAddrReg, returning the new instruction.
+ MachineInstr *ReplaceTLSBaseAddrCall(MachineInstr *I,
+ unsigned TLSBaseAddrReg) {
+ MachineFunction *MF = I->getParent()->getParent();
+ const X86TargetMachine *TM =
+ static_cast<const X86TargetMachine *>(&MF->getTarget());
+ const bool is64Bit = TM->getSubtarget<X86Subtarget>().is64Bit();
+ const X86InstrInfo *TII = TM->getInstrInfo();
+
+ // Insert a Copy from TLSBaseAddrReg to RAX/EAX.
+ MachineInstr *Copy = BuildMI(*I->getParent(), I, I->getDebugLoc(),
+ TII->get(TargetOpcode::COPY),
+ is64Bit ? X86::RAX : X86::EAX)
+ .addReg(TLSBaseAddrReg);
+
+ // Erase the TLS_base_addr instruction.
+ I->eraseFromParent();
+
+ return Copy;
+ }
+
+ // Create a virtal register in *TLSBaseAddrReg, and populate it by
+ // inserting a copy instruction after I. Returns the new instruction.
+ MachineInstr *SetRegister(MachineInstr *I, unsigned *TLSBaseAddrReg) {
+ MachineFunction *MF = I->getParent()->getParent();
+ const X86TargetMachine *TM =
+ static_cast<const X86TargetMachine *>(&MF->getTarget());
+ const bool is64Bit = TM->getSubtarget<X86Subtarget>().is64Bit();
+ const X86InstrInfo *TII = TM->getInstrInfo();
+
+ // Create a virtual register for the TLS base address.
+ MachineRegisterInfo &RegInfo = MF->getRegInfo();
+ *TLSBaseAddrReg = RegInfo.createVirtualRegister(is64Bit
+ ? &X86::GR64RegClass
+ : &X86::GR32RegClass);
+
+ // Insert a copy from RAX/EAX to TLSBaseAddrReg.
+ MachineInstr *Next = I->getNextNode();
+ MachineInstr *Copy = BuildMI(*I->getParent(), Next, I->getDebugLoc(),
+ TII->get(TargetOpcode::COPY),
+ *TLSBaseAddrReg)
+ .addReg(is64Bit ? X86::RAX : X86::EAX);
+
+ return Copy;
+ }
+
+ virtual const char *getPassName() const {
+ return "Local Dynamic TLS Access Clean-up";
+ }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ AU.addRequired<MachineDominatorTree>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+ };
+}
+
+char LDTLSCleanup::ID = 0;
+FunctionPass*
+llvm::createCleanupLocalDynamicTLSPass() { return new LDTLSCleanup(); }
def SDT_X86TLSADDR : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
+def SDT_X86TLSBASEADDR : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
+
def SDT_X86TLSCALL : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
def SDT_X86SEG_ALLOCA : SDTypeProfile<1, 1, [SDTCisVT<0, iPTR>, SDTCisVT<1, iPTR>]>;
def X86tlsaddr : SDNode<"X86ISD::TLSADDR", SDT_X86TLSADDR,
[SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
+def X86tlsbaseaddr : SDNode<"X86ISD::TLSBASEADDR", SDT_X86TLSBASEADDR,
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
+
def X86ehret : SDNode<"X86ISD::EH_RETURN", SDT_X86EHRET,
[SDNPHasChain]>;
def tls32addr : ComplexPattern<i32, 5, "SelectTLSADDRAddr",
[tglobaltlsaddr], []>;
+def tls32baseaddr : ComplexPattern<i32, 5, "SelectTLSADDRAddr",
+ [tglobaltlsaddr], []>;
+
def lea64addr : ComplexPattern<i64, 5, "SelectLEAAddr",
[add, sub, mul, X86mul_imm, shl, or, frameindex,
X86WrapperRIP], []>;
def tls64addr : ComplexPattern<i64, 5, "SelectTLSADDRAddr",
[tglobaltlsaddr], []>;
+def tls64baseaddr : ComplexPattern<i64, 5, "SelectTLSADDRAddr",
+ [tglobaltlsaddr], []>;
+
//===----------------------------------------------------------------------===//
// X86 Instruction Predicate Definitions.
def HasCMov : Predicate<"Subtarget->hasCMov()">;
break;
case X86II::MO_SECREL: RefKind = MCSymbolRefExpr::VK_SECREL; break;
case X86II::MO_TLSGD: RefKind = MCSymbolRefExpr::VK_TLSGD; break;
+ case X86II::MO_TLSLD: RefKind = MCSymbolRefExpr::VK_TLSLD; break;
+ case X86II::MO_TLSLDM: RefKind = MCSymbolRefExpr::VK_TLSLDM; break;
case X86II::MO_GOTTPOFF: RefKind = MCSymbolRefExpr::VK_GOTTPOFF; break;
case X86II::MO_INDNTPOFF: RefKind = MCSymbolRefExpr::VK_INDNTPOFF; break;
case X86II::MO_TPOFF: RefKind = MCSymbolRefExpr::VK_TPOFF; break;
+ case X86II::MO_DTPOFF: RefKind = MCSymbolRefExpr::VK_DTPOFF; break;
case X86II::MO_NTPOFF: RefKind = MCSymbolRefExpr::VK_NTPOFF; break;
case X86II::MO_GOTNTPOFF: RefKind = MCSymbolRefExpr::VK_GOTNTPOFF; break;
case X86II::MO_GOTPCREL: RefKind = MCSymbolRefExpr::VK_GOTPCREL; break;
static void LowerTlsAddr(MCStreamer &OutStreamer,
X86MCInstLower &MCInstLowering,
const MachineInstr &MI) {
- bool is64Bits = MI.getOpcode() == X86::TLS_addr64;
+
+ bool is64Bits = MI.getOpcode() == X86::TLS_addr64 ||
+ MI.getOpcode() == X86::TLS_base_addr64;
+
+ bool needsPadding = MI.getOpcode() == X86::TLS_addr64;
+
MCContext &context = OutStreamer.getContext();
- if (is64Bits) {
+ if (needsPadding) {
MCInst prefix;
prefix.setOpcode(X86::DATA16_PREFIX);
OutStreamer.EmitInstruction(prefix);
}
+
+ MCSymbolRefExpr::VariantKind SRVK;
+ switch (MI.getOpcode()) {
+ case X86::TLS_addr32:
+ case X86::TLS_addr64:
+ SRVK = MCSymbolRefExpr::VK_TLSGD;
+ break;
+ case X86::TLS_base_addr32:
+ SRVK = MCSymbolRefExpr::VK_TLSLDM;
+ break;
+ case X86::TLS_base_addr64:
+ SRVK = MCSymbolRefExpr::VK_TLSLD;
+ break;
+ default:
+ llvm_unreachable("unexpected opcode");
+ }
+
MCSymbol *sym = MCInstLowering.GetSymbolFromOperand(MI.getOperand(3));
- const MCSymbolRefExpr *symRef =
- MCSymbolRefExpr::Create(sym, MCSymbolRefExpr::VK_TLSGD, context);
+ const MCSymbolRefExpr *symRef = MCSymbolRefExpr::Create(sym, SRVK, context);
MCInst LEA;
if (is64Bits) {
}
OutStreamer.EmitInstruction(LEA);
- if (is64Bits) {
+ if (needsPadding) {
MCInst prefix;
prefix.setOpcode(X86::DATA16_PREFIX);
OutStreamer.EmitInstruction(prefix);
case X86::TLS_addr32:
case X86::TLS_addr64:
+ case X86::TLS_base_addr32:
+ case X86::TLS_base_addr64:
return LowerTlsAddr(OutStreamer, MCInstLowering, *MI);
case X86::MOVPC32r: {
MCInstLowering.Lower(MI, TmpInst);
OutStreamer.EmitInstruction(TmpInst);
}
-
/// ArgumentStackSize - The number of bytes on stack consumed by the arguments
/// being passed on the stack.
unsigned ArgumentStackSize;
+ /// NumLocalDynamics - Number of local-dynamic TLS accesses.
+ unsigned NumLocalDynamics;
public:
X86MachineFunctionInfo() : ForceFramePointer(false),
RegSaveFrameIndex(0),
VarArgsGPOffset(0),
VarArgsFPOffset(0),
- ArgumentStackSize(0) {}
+ ArgumentStackSize(0),
+ NumLocalDynamics(0) {}
explicit X86MachineFunctionInfo(MachineFunction &MF)
: ForceFramePointer(false),
RegSaveFrameIndex(0),
VarArgsGPOffset(0),
VarArgsFPOffset(0),
- ArgumentStackSize(0) {}
+ ArgumentStackSize(0),
+ NumLocalDynamics(0) {}
bool getForceFramePointer() const { return ForceFramePointer;}
void setForceFramePointer(bool forceFP) { ForceFramePointer = forceFP; }
unsigned getArgumentStackSize() const { return ArgumentStackSize; }
void setArgumentStackSize(unsigned size) { ArgumentStackSize = size; }
+
+ unsigned getNumLocalDynamicTLSAccesses() const { return NumLocalDynamics; }
+ void incNumLocalDynamicTLSAccesses() { ++NumLocalDynamics; }
};
} // End llvm namespace
// Install an instruction selector.
PM->add(createX86ISelDag(getX86TargetMachine(), getOptLevel()));
+ // For ELF, cleanup any local-dynamic TLS accesses.
+ if (getX86Subtarget().isTargetELF() && getOptLevel() != CodeGenOpt::None)
+ PM->add(createCleanupLocalDynamicTLSPass());
+
// For 32-bit, prepend instructions to set the "global base reg" for PIC.
if (!getX86Subtarget().is64Bit())
PM->add(createGlobalBaseRegPass());
--- /dev/null
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu -relocation-model=pic | FileCheck %s
+
+@x = internal thread_local global i32 0, align 4
+@y = internal thread_local global i32 0, align 4
+
+; get_x and get_y are here to prevent x and y to be optimized away as 0
+
+define i32* @get_x() {
+entry:
+ ret i32* @x
+; FIXME: This function uses a single thread-local variable,
+; so we might want to fall back to general-dynamic here.
+; CHECK: get_x:
+; CHECK: leaq x@TLSLD(%rip), %rdi
+; CHECK-NEXT: callq __tls_get_addr@PLT
+; CHECK: x@DTPOFF
+}
+
+define i32* @get_y() {
+entry:
+ ret i32* @y
+}
+
+define i32 @f(i32 %i) {
+entry:
+ %cmp = icmp eq i32 %i, 1
+ br i1 %cmp, label %return, label %if.else
+; This bb does not access TLS, so should not call __tls_get_addr.
+; CHECK: f:
+; CHECK-NOT: __tls_get_addr
+; CHECK: je
+
+
+if.else:
+ %0 = load i32* @x, align 4
+ %cmp1 = icmp eq i32 %i, 2
+ br i1 %cmp1, label %if.then2, label %return
+; Now we call __tls_get_addr.
+; CHECK: # %if.else
+; CHECK: leaq x@TLSLD(%rip), %rdi
+; CHECK-NEXT: callq __tls_get_addr@PLT
+; CHECK: x@DTPOFF
+
+
+if.then2:
+ %1 = load i32* @y, align 4
+ %add = add nsw i32 %1, %0
+ br label %return
+; This accesses TLS, but is dominated by the previous block,
+; so should not have to call __tls_get_addr again.
+; CHECK: # %if.then2
+; CHECK-NOT: __tls_get_addr
+; CHECK: y@DTPOFF
+
+
+return:
+ %retval.0 = phi i32 [ %add, %if.then2 ], [ 5, %entry ], [ %0, %if.else ]
+ ret i32 %retval.0
+}
; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu -relocation-model=pic | FileCheck -check-prefix=X64 %s
@i = thread_local global i32 15
+@j = internal thread_local global i32 42
+@k = internal thread_local global i32 42
define i32 @f1() {
entry:
; X64: callq __tls_get_addr@PLT
+define i32 @f5() nounwind {
+entry:
+ %0 = load i32* @j, align 4
+ %1 = load i32* @k, align 4
+ %add = add nsw i32 %0, %1
+ ret i32 %add
+}
+; X32: f5:
+; X32: leal {{[jk]}}@TLSLDM
+; X32-NEXT: calll ___tls_get_addr@PLT
+; X32-NEXT: movl {{[jk]}}@DTPOFF(%eax)
+; X32-NEXT: addl {{[jk]}}@DTPOFF(%eax)
+
+; X64: f5:
+; X64: leaq {{[jk]}}@TLSLD(%rip), %rdi
+; X64-NEXT: callq __tls_get_addr@PLT
+; X64-NEXT: movl {{[jk]}}@DTPOFF(%rax)
+; X64-NEXT: addl {{[jk]}}@DTPOFF(%rax)