return CSR_NoRegs_RegMask;
}
+const uint32_t *
+ARMBaseRegisterInfo::getTLSCallPreservedMask(const MachineFunction &MF) const {
+ assert(MF.getSubtarget<ARMSubtarget>().isTargetDarwin() &&
+ "only know about special TLS call on Darwin");
+ return CSR_iOS_TLSCall_RegMask;
+}
+
+
const uint32_t *
ARMBaseRegisterInfo::getThisReturnPreservedMask(const MachineFunction &MF,
CallingConv::ID CC) const {
const uint32_t *getCallPreservedMask(const MachineFunction &MF,
CallingConv::ID) const override;
const uint32_t *getNoPreservedMask() const override;
+ const uint32_t *getTLSCallPreservedMask(const MachineFunction &MF) const;
/// getThisReturnPreservedMask - Returns a call preserved mask specific to the
/// case that 'returned' is on an i32 first argument if the calling convention
def CSR_iOS_ThisReturn : CalleeSavedRegs<(add LR, R7, R6, R5, R4,
(sub CSR_AAPCS_ThisReturn, R9))>;
+def CSR_iOS_TLSCall : CalleeSavedRegs<(add LR, SP,
+ (sequence "R%u", 12, 1),
+ (sequence "D%u", 31, 0))>;
+
// The "interrupt" attribute is used to generate code that is acceptable in
// exception-handlers of various kinds. It makes us use a different return
// instruction (handled elsewhere) and affects which registers we must return to
unsigned ARMFastISel::ARMMaterializeGV(const GlobalValue *GV, MVT VT) {
// For now 32-bit only.
- if (VT != MVT::i32) return 0;
+ if (VT != MVT::i32 || GV->isThreadLocal()) return 0;
Reloc::Model RelocM = TM.getRelocationModel();
bool IsIndirect = Subtarget->GVIsIndirectSymbol(GV, RelocM);
}
if (N.getOpcode() == ARMISD::Wrapper &&
- N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress) {
+ N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
+ N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
Base = N.getOperand(0);
} else
Base = N;
Base = CurDAG->getTargetFrameIndex(
FI, TLI->getPointerTy(CurDAG->getDataLayout()));
} else if (N.getOpcode() == ARMISD::Wrapper &&
- N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress) {
+ N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
+ N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
Base = N.getOperand(0);
}
Offset = CurDAG->getRegister(0, MVT::i32);
Base = CurDAG->getTargetFrameIndex(
FI, TLI->getPointerTy(CurDAG->getDataLayout()));
} else if (N.getOpcode() == ARMISD::Wrapper &&
- N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress) {
+ N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
+ N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
Base = N.getOperand(0);
}
Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0),
if (N.getOpcode() == ISD::ADD) {
return false; // We want to select register offset instead
} else if (N.getOpcode() == ARMISD::Wrapper &&
- N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress) {
+ N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
+ N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
Base = N.getOperand(0);
} else {
Base = N;
}
if (N.getOpcode() == ARMISD::Wrapper &&
- N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress) {
+ N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
+ N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
Base = N.getOperand(0);
if (Base.getOpcode() == ISD::TargetConstantPool)
return false; // We want to select t2LDRpci instead.
return DAG.getNode(ARMISD::PIC_ADD, DL, PtrVT, Result, PICLabel);
}
+/// \brief Convert a TLS address reference into the correct sequence of loads
+/// and calls to compute the variable's address for Darwin, and return an
+/// SDValue containing the final node.
+
+/// Darwin only has one TLS scheme which must be capable of dealing with the
+/// fully general situation, in the worst case. This means:
+/// + "extern __thread" declaration.
+/// + Defined in a possibly unknown dynamic library.
+///
+/// The general system is that each __thread variable has a [3 x i32] descriptor
+/// which contains information used by the runtime to calculate the address. The
+/// only part of this the compiler needs to know about is the first word, which
+/// contains a function pointer that must be called with the address of the
+/// entire descriptor in "r0".
+///
+/// Since this descriptor may be in a different unit, in general access must
+/// proceed along the usual ARM rules. A common sequence to produce is:
+///
+/// movw rT1, :lower16:_var$non_lazy_ptr
+/// movt rT1, :upper16:_var$non_lazy_ptr
+/// ldr r0, [rT1]
+/// ldr rT2, [r0]
+/// blx rT2
+/// [...address now in r0...]
+SDValue
+ARMTargetLowering::LowerGlobalTLSAddressDarwin(SDValue Op,
+ SelectionDAG &DAG) const {
+ assert(Subtarget->isTargetDarwin() && "TLS only supported on Darwin");
+ SDLoc DL(Op);
+
+ // First step is to get the address of the actua global symbol. This is where
+ // the TLS descriptor lives.
+ SDValue DescAddr = LowerGlobalAddressDarwin(Op, DAG);
+
+ // The first entry in the descriptor is a function pointer that we must call
+ // to obtain the address of the variable.
+ SDValue Chain = DAG.getEntryNode();
+ SDValue FuncTLVGet =
+ DAG.getLoad(MVT::i32, DL, Chain, DescAddr,
+ MachinePointerInfo::getGOT(DAG.getMachineFunction()),
+ false, true, true, 4);
+ Chain = FuncTLVGet.getValue(1);
+
+ MachineFunction &F = DAG.getMachineFunction();
+ MachineFrameInfo *MFI = F.getFrameInfo();
+ MFI->setAdjustsStack(true);
+
+ // TLS calls preserve all registers except those that absolutely must be
+ // trashed: R0 (it takes an argument), LR (it's a call) and CPSR (let's not be
+ // silly).
+ auto TRI =
+ getTargetMachine().getSubtargetImpl(*F.getFunction())->getRegisterInfo();
+ auto ARI = static_cast<const ARMRegisterInfo *>(TRI);
+ const uint32_t *Mask = ARI->getTLSCallPreservedMask(DAG.getMachineFunction());
+
+ // Finally, we can make the call. This is just a degenerate version of a
+ // normal AArch64 call node: r0 takes the address of the descriptor, and
+ // returns the address of the variable in this thread.
+ Chain = DAG.getCopyToReg(Chain, DL, ARM::R0, DescAddr, SDValue());
+ Chain =
+ DAG.getNode(ARMISD::CALL, DL, DAG.getVTList(MVT::Other, MVT::Glue),
+ Chain, FuncTLVGet, DAG.getRegister(ARM::R0, MVT::i32),
+ DAG.getRegisterMask(Mask), Chain.getValue(1));
+ return DAG.getCopyFromReg(Chain, DL, ARM::R0, MVT::i32, Chain.getValue(1));
+}
+
// Lower ISD::GlobalTLSAddress using the "general dynamic" model
SDValue
ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA,
SDValue
ARMTargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
+ if (Subtarget->isTargetDarwin())
+ return LowerGlobalTLSAddressDarwin(Op, DAG);
+
// TODO: implement the "local dynamic" model
- assert(Subtarget->isTargetELF() &&
- "TLS not implemented for non-ELF targets");
+ assert(Subtarget->isTargetELF() && "Only ELF implemented here");
GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
if (DAG.getTarget().Options.EmulatedTLS)
return LowerToTLSEmulatedModel(GA, DAG);
SDValue LowerToTLSExecModels(GlobalAddressSDNode *GA,
SelectionDAG &DAG,
TLSModel::Model model) const;
+ SDValue LowerGlobalTLSAddressDarwin(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerGLOBAL_OFFSET_TABLE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerXALUO(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
Requires<[IsARM, UseMovt]>;
} // isReMaterializable
+// The many different faces of TLS access.
+def : ARMPat<(ARMWrapper tglobaltlsaddr :$dst),
+ (MOVi32imm tglobaltlsaddr :$dst)>,
+ Requires<[IsARM, UseMovt]>;
+
+def : Pat<(ARMWrapper tglobaltlsaddr:$src),
+ (LDRLIT_ga_abs tglobaltlsaddr:$src)>,
+ Requires<[IsARM, DontUseMovt]>;
+
+def : Pat<(ARMWrapperPIC tglobaltlsaddr:$addr),
+ (MOV_ga_pcrel tglobaltlsaddr:$addr)>, Requires<[IsARM, UseMovt]>;
+
+def : Pat<(ARMWrapperPIC tglobaltlsaddr:$addr),
+ (LDRLIT_ga_pcrel tglobaltlsaddr:$addr)>,
+ Requires<[IsARM, DontUseMovt]>;
+let AddedComplexity = 10 in
+def : Pat<(load (ARMWrapperPIC tglobaltlsaddr:$addr)),
+ (MOV_ga_pcrel_ldr tglobaltlsaddr:$addr)>,
+ Requires<[IsARM, UseMovt]>;
+
+
// ConstantPool, GlobalAddress, and JumpTable
def : ARMPat<(ARMWrapper tconstpool :$dst), (LEApcrel tconstpool :$dst)>;
def : ARMPat<(ARMWrapper tglobaladdr :$dst), (MOVi32imm tglobaladdr :$dst)>,
(ARMWrapper tglobaladdr:$src))]>,
Requires<[IsThumb, DontUseMovt]>;
+// TLS globals
+def : Pat<(ARMWrapperPIC tglobaltlsaddr:$addr),
+ (tLDRLIT_ga_pcrel tglobaltlsaddr:$addr)>,
+ Requires<[IsThumb, DontUseMovt]>;
+def : Pat<(ARMWrapper tglobaltlsaddr:$addr),
+ (tLDRLIT_ga_abs tglobaltlsaddr:$addr)>,
+ Requires<[IsThumb, DontUseMovt]>;
+
// JumpTable
def : T1Pat<(ARMWrapperJT tjumptable:$dst),
}
+def : T2Pat<(ARMWrapperPIC tglobaltlsaddr :$dst),
+ (t2MOV_ga_pcrel tglobaltlsaddr:$dst)>,
+ Requires<[IsThumb2, UseMovt]>;
+def : T2Pat<(ARMWrapper tglobaltlsaddr:$dst),
+ (t2MOVi32imm tglobaltlsaddr:$dst)>,
+ Requires<[IsThumb2, UseMovt]>;
+
// ConstantPool, GlobalAddress, and JumpTable
def : T2Pat<(ARMWrapper tconstpool :$dst), (t2LEApcrel tconstpool :$dst)>;
def : T2Pat<(ARMWrapper tglobaladdr :$dst), (t2MOVi32imm tglobaladdr :$dst)>,
--- /dev/null
+; RUN: llc -mtriple=thumbv7s-apple-ios7.0 -o - -fast-isel %s | FileCheck %s --check-prefix=T2-MOVT-PIC
+; RUN: llc -mtriple=thumbv7s-apple-ios7.0 -o - %s -mattr=+no-movt | FileCheck %s --check-prefix=T2-LIT-PIC
+; RUN: llc -mtriple=thumbv7s-apple-ios7.0 -o - %s -relocation-model=static | FileCheck %s --check-prefix=T2-MOVT-STATIC
+; RUN: llc -mtriple=thumbv7s-apple-ios7.0 -o - %s -mattr=+no-movt -relocation-model=static | FileCheck %s --check-prefix=T2-LIT-STATIC
+; RUN: llc -mtriple=armv7s-apple-ios7.0 -o - %s | FileCheck %s --check-prefix=ARM-MOVT-PIC
+; RUN: llc -mtriple=armv7s-apple-ios7.0 -o - %s -mattr=+no-movt | FileCheck %s --check-prefix=ARM-LIT-PIC
+; RUN: llc -mtriple=armv7s-apple-ios7.0 -o - %s -relocation-model=static | FileCheck %s --check-prefix=ARM-MOVT-STATIC
+; RUN: llc -mtriple=armv7s-apple-ios7.0 -o - %s -mattr=+no-movt -relocation-model=static | FileCheck %s --check-prefix=ARM-LIT-STATIC
+
+
+@local_tls_var = thread_local global i32 0
+@external_tls_var = external thread_local global i32
+
+define i32 @test_local_tls() {
+; T2-MOVT-PIC-LABEL: test_local_tls:
+; T2-MOVT-PIC: movw r0, :lower16:(_local_tls_var-([[PCREL_LOC:LPC[0-9]+_[0-9]+]]+4))
+; T2-MOVT-PIC: movt r0, :upper16:(_local_tls_var-([[PCREL_LOC]]+4))
+; T2-MOVT-PIC: [[PCREL_LOC]]:
+; T2-MOVT-PIC-NEXT: add r0, pc
+; T2-MOVT-PIC: ldr [[TLV_GET_ADDR:r[0-9]+]], [r0]
+; T2-MOVT-PIC: blx [[TLV_GET_ADDR]]
+; T2-MOVT-PIC: ldr r0, [r0]
+
+; T2-LIT-PIC-LABEL: test_local_tls:
+; T2-LIT-PIC: ldr r0, [[LOCAL_VAR_ADDR:LCPI[0-9]+_[0-9]+]]
+; T2-LIT-PIC: [[PCREL_LOC:LPC[0-9]+_[0-9]+]]:
+; T2-LIT-PIC-NEXT: add r0, pc
+; T2-LIT-PIC: ldr [[TLV_GET_ADDR:r[0-9]+]], [r0]
+; T2-LIT-PIC: blx [[TLV_GET_ADDR]]
+; T2-LIT-PIC: ldr r0, [r0]
+; T2-LIT-PIC: [[LOCAL_VAR_ADDR]]:
+; T2-LIT-PIC-NEXT: .long _local_tls_var-([[PCREL_LOC]]+4)
+
+; T2-MOVT-STATIC-LABEL: test_local_tls:
+; T2-MOVT-STATIC: movw r0, :lower16:_local_tls_var
+; T2-MOVT-STATIC: movt r0, :upper16:_local_tls_var
+; T2-MOVT-STATIC: ldr [[TLV_GET_ADDR:r[0-9]+]], [r0]
+; T2-MOVT-STATIC: blx [[TLV_GET_ADDR]]
+; T2-MOVT-STATIC: ldr r0, [r0]
+
+; T2-LIT-STATIC-LABEL: test_local_tls:
+; T2-LIT-STATIC: ldr r0, [[LOCAL_VAR_ADDR:LCPI[0-9]+_[0-9]+]]
+; T2-LIT-STATIC: ldr [[TLV_GET_ADDR:r[0-9]+]], [r0]
+; T2-LIT-STATIC: blx [[TLV_GET_ADDR]]
+; T2-LIT-STATIC: ldr r0, [r0]
+; T2-LIT-STATIC: [[LOCAL_VAR_ADDR]]:
+; T2-LIT-STATIC-NEXT: .long _local_tls_var
+
+; ARM-MOVT-PIC-LABEL: test_local_tls:
+; ARM-MOVT-PIC: movw [[VARPC1:r[0-9]+]], :lower16:(_local_tls_var-([[PCREL_LOC1:LPC[0-9]+_[0-9]+]]+8))
+; ARM-MOVT-PIC: movt [[VARPC1]], :upper16:(_local_tls_var-([[PCREL_LOC1]]+8))
+; ARM-MOVT-PIC: [[PCREL_LOC1]]:
+; ARM-MOVT-PIC: add r0, pc, [[VARPC1]]
+; ARM-MOVT-PIC: movw [[VARPC2:r[0-9]+]], :lower16:(_local_tls_var-([[PCREL_LOC2:LPC[0-9]+_[0-9]+]]+8))
+; ARM-MOVT-PIC: movt [[VARPC2]], :upper16:(_local_tls_var-([[PCREL_LOC2]]+8))
+; ARM-MOVT-PIC: [[PCREL_LOC2]]:
+; ARM-MOVT-PIC-NEXT: ldr [[TLV_GET_ADDR:r[0-9]+]], [pc, [[VARPC2]]]
+; ARM-MOVT-PIC: blx [[TLV_GET_ADDR]]
+; ARM-MOVT-PIC: ldr r0, [r0]
+
+; ARM-LIT-PIC-LABEL: test_local_tls:
+; ARM-LIT-PIC: ldr r0, [[LOCAL_VAR_ADDR:LCPI[0-9]+_[0-9]+]]
+; ARM-LIT-PIC: [[PCREL_LOC:LPC[0-9]+_[0-9]+]]:
+; ARM-LIT-PIC-NEXT: add r0, pc
+; ARM-LIT-PIC: ldr [[TLV_GET_ADDR:r[0-9]+]], [r0]
+; ARM-LIT-PIC: blx [[TLV_GET_ADDR]]
+; ARM-LIT-PIC: ldr r0, [r0]
+; ARM-LIT-PIC: [[LOCAL_VAR_ADDR]]:
+; ARM-LIT-PIC-NEXT: .long _local_tls_var-([[PCREL_LOC]]+8)
+
+; ARM-MOVT-STATIC-LABEL: test_local_tls:
+; ARM-MOVT-STATIC: movw r0, :lower16:_local_tls_var
+; ARM-MOVT-STATIC: movt r0, :upper16:_local_tls_var
+; ARM-MOVT-STATIC: ldr [[TLV_GET_ADDR:r[0-9]+]], [r0]
+; ARM-MOVT-STATIC: blx [[TLV_GET_ADDR]]
+; ARM-MOVT-STATIC: ldr r0, [r0]
+
+; ARM-LIT-STATIC-LABEL: test_local_tls:
+; ARM-LIT-STATIC: ldr r0, [[LOCAL_VAR_ADDR:LCPI[0-9]+_[0-9]+]]
+; ARM-LIT-STATIC: ldr [[TLV_GET_ADDR:r[0-9]+]], [r0]
+; ARM-LIT-STATIC: blx [[TLV_GET_ADDR]]
+; ARM-LIT-STATIC: ldr r0, [r0]
+; ARM-LIT-STATIC: [[LOCAL_VAR_ADDR]]:
+; ARM-LIT-STATIC-NEXT: .long _local_tls_var
+
+
+ %val = load i32, i32* @local_tls_var, align 4
+ ret i32 %val
+}
+
+define i32 @test_external_tls() {
+; T2-MOVT-PIC-LABEL: test_external_tls:
+; T2-MOVT-PIC: movw r[[EXTGOT:[0-9]+]], :lower16:(L_external_tls_var$non_lazy_ptr-([[PCREL_LOC:LPC[0-9]+_[0-9]+]]+4))
+; T2-MOVT-PIC: movt r[[EXTGOT]], :upper16:(L_external_tls_var$non_lazy_ptr-([[PCREL_LOC]]+4))
+; T2-MOVT-PIC: [[PCREL_LOC]]:
+; T2-MOVT-PIC-NEXT: add r[[EXTGOT]], pc
+; T2-MOVT-PIC: ldr r0, [r[[EXTGOT]]]
+; T2-MOVT-PIC: ldr [[TLV_GET_ADDR:r[0-9]+]], [r0]
+; T2-MOVT-PIC: blx [[TLV_GET_ADDR]]
+; T2-MOVT-PIC: ldr r0, [r0]
+
+; T2-LIT-PIC-LABEL: test_external_tls:
+; T2-LIT-PIC: ldr r[[EXTGOT:[0-9]+]], [[EXTERNAL_VAR_ADDR:LCPI[0-9]+_[0-9]+]]
+; T2-LIT-PIC: [[PCREL_LOC:LPC[0-9]+_[0-9]+]]:
+; T2-LIT-PIC-NEXT: add r[[EXTGOT]], pc
+; T2-LIT-PIC: ldr r0, [r[[EXTGOT]]]
+; T2-LIT-PIC: ldr [[TLV_GET_ADDR:r[0-9]+]], [r0]
+; T2-LIT-PIC: blx [[TLV_GET_ADDR]]
+; T2-LIT-PIC: ldr r0, [r0]
+; T2-LIT-PIC: [[EXTERNAL_VAR_ADDR]]:
+; T2-LIT-PIC-NEXT: .long L_external_tls_var$non_lazy_ptr-([[PCREL_LOC]]+4)
+
+; T2-MOVT-STATIC-LABEL: test_external_tls:
+; T2-MOVT-STATIC: movw r0, :lower16:_external_tls_var
+; T2-MOVT-STATIC: movt r0, :upper16:_external_tls_var
+; T2-MOVT-STATIC: ldr [[TLV_GET_ADDR:r[0-9]+]], [r0]
+; T2-MOVT-STATIC: blx [[TLV_GET_ADDR]]
+; T2-MOVT-STATIC: ldr r0, [r0]
+
+; T2-LIT-STATIC-LABEL: test_external_tls:
+; T2-LIT-STATIC: ldr r0, [[EXTERNAL_VAR_ADDR:LCPI[0-9]+_[0-9]+]]
+; T2-LIT-STATIC: ldr [[TLV_GET_ADDR:r[0-9]+]], [r0]
+; T2-LIT-STATIC: blx [[TLV_GET_ADDR]]
+; T2-LIT-STATIC: ldr r0, [r0]
+; T2-LIT-STATIC: [[EXTERNAL_VAR_ADDR]]:
+; T2-LIT-STATIC-NEXT: .long _external_tls_var
+
+; ARM-MOVT-PIC-LABEL: test_external_tls:
+; ARM-MOVT-PIC: movw r[[EXTGOT:[0-9]+]], :lower16:(L_external_tls_var$non_lazy_ptr-([[PCREL_LOC:LPC[0-9]+_[0-9]+]]+8))
+; ARM-MOVT-PIC: movt r[[EXTGOT]], :upper16:(L_external_tls_var$non_lazy_ptr-([[PCREL_LOC]]+8))
+; ARM-MOVT-PIC: [[PCREL_LOC]]:
+; ARM-MOVT-PIC-NEXT: ldr r0, [pc, r[[EXTGOT]]]
+; ARM-MOVT-PIC: ldr [[TLV_GET_ADDR:r[0-9]+]], [r0]
+; ARM-MOVT-PIC: blx [[TLV_GET_ADDR]]
+; ARM-MOVT-PIC: ldr r0, [r0]
+
+; ARM-LIT-PIC-LABEL: test_external_tls:
+; ARM-LIT-PIC: ldr r[[EXTGOT:[0-9]+]], [[EXTERNAL_VAR_ADDR:LCPI[0-9]+_[0-9]+]]
+; ARM-LIT-PIC: [[PCREL_LOC:LPC[0-9]+_[0-9]+]]:
+; ARM-LIT-PIC-NEXT: add r[[EXTGOT]], pc
+; ARM-LIT-PIC: ldr r0, [r[[EXTGOT]]]
+; ARM-LIT-PIC: ldr [[TLV_GET_ADDR:r[0-9]+]], [r0]
+; ARM-LIT-PIC: blx [[TLV_GET_ADDR]]
+; ARM-LIT-PIC: ldr r0, [r0]
+; ARM-LIT-PIC: [[EXTERNAL_VAR_ADDR]]:
+; ARM-LIT-PIC-NEXT: .long L_external_tls_var$non_lazy_ptr-([[PCREL_LOC]]+8)
+
+; ARM-MOVT-STATIC-LABEL: test_external_tls:
+; ARM-MOVT-STATIC: movw r0, :lower16:_external_tls_var
+; ARM-MOVT-STATIC: movt r0, :upper16:_external_tls_var
+; ARM-MOVT-STATIC: ldr [[TLV_GET_ADDR:r[0-9]+]], [r0]
+; ARM-MOVT-STATIC: blx [[TLV_GET_ADDR]]
+; ARM-MOVT-STATIC: ldr r0, [r0]
+
+; ARM-LIT-STATIC-LABEL: test_external_tls:
+; ARM-LIT-STATIC: ldr r0, [[EXTERNAL_VAR_ADDR:LCPI[0-9]+_[0-9]+]]
+; ARM-LIT-STATIC: ldr [[TLV_GET_ADDR:r[0-9]+]], [r0]
+; ARM-LIT-STATIC: blx [[TLV_GET_ADDR]]
+; ARM-LIT-STATIC: ldr r0, [r0]
+; ARM-LIT-STATIC: [[EXTERNAL_VAR_ADDR]]:
+; ARM-LIT-STATIC-NEXT: .long _external_tls_var
+
+ %val = load i32, i32* @external_tls_var, align 4
+ ret i32 %val
+}