This commit adds a new target-independent calling convention for C++ TLS
access functions. It aims to minimize overhead in the caller by perserving as
many registers as possible.
The target-specific implementation for X86-64 is defined as following:
Arguments are passed as for the default C calling convention
The same applies for the return value(s)
The callee preserves all GPRs - except RAX and RDI
The access function makes C-style TLS function calls in the entry and exit
block, C-style TLS functions save a lot more registers than normal calls.
The added calling convention ties into the existing implementation of the
C-style TLS functions, so we can't simply use existing calling conventions
such as preserve_mostcc.
rdar://
9001553
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254737
91177308-0d34-0410-b5e6-
96231b3b80d8
* ``anyregcc``: code 13
* ``preserve_mostcc``: code 14
* ``preserve_allcc``: code 15
+ * ``cxx_fast_tlscc``: code 17
* ``x86_stdcallcc``: code 64
* ``x86_fastcallcc``: code 65
* ``arm_apcscc``: code 66
This calling convention, like the `PreserveMost` calling convention, will be
used by a future version of the ObjectiveC runtime and should be considered
experimental at this time.
+"``cxx_fast_tlscc``" - The `CXX_FAST_TLS` calling convention for access functions
+ This calling convention aims to minimize overhead in the caller by
+ preserving as many registers as possible. This calling convention behaves
+ identical to the `C` calling convention on how arguments and return values
+ are passed, but it uses a different set of caller/callee-saved registers.
+ Given that C-style TLS on Darwin has its own special CSRs, we can't use the
+ existing `PreserveMost`.
+
+ - On X86-64 the callee preserves all general purpose registers, except for
+ RDI and RAX.
"``cc <n>``" - Numbered convention
Any calling convention may be specified by number, allowing
target-specific calling conventions to be used. Target specific
// Swift - Calling convention for Swift.
Swift = 16,
+ // CXX_FAST_TLS - Calling convention for access functions.
+ CXX_FAST_TLS = 17,
+
// Target - This is the start of the target-specific calling conventions,
// e.g. fastcall and thiscall on X86.
FirstTargetCC = 64,
KEYWORD(ghccc);
KEYWORD(hhvmcc);
KEYWORD(hhvm_ccc);
+ KEYWORD(cxx_fast_tlscc);
KEYWORD(cc);
KEYWORD(c);
/// ::= 'ghccc'
/// ::= 'hhvmcc'
/// ::= 'hhvm_ccc'
+/// ::= 'cxx_fast_tlscc'
/// ::= 'cc' UINT
///
bool LLParser::ParseOptionalCallingConv(unsigned &CC) {
case lltok::kw_ghccc: CC = CallingConv::GHC; break;
case lltok::kw_hhvmcc: CC = CallingConv::HHVM; break;
case lltok::kw_hhvm_ccc: CC = CallingConv::HHVM_C; break;
+ case lltok::kw_cxx_fast_tlscc: CC = CallingConv::CXX_FAST_TLS; break;
case lltok::kw_cc: {
Lex.Lex();
return ParseUInt32(CC);
kw_preserve_mostcc, kw_preserve_allcc,
kw_ghccc,
kw_hhvmcc, kw_hhvm_ccc,
+ kw_cxx_fast_tlscc,
// Attributes:
kw_attributes,
case CallingConv::AnyReg: Out << "anyregcc"; break;
case CallingConv::PreserveMost: Out << "preserve_mostcc"; break;
case CallingConv::PreserveAll: Out << "preserve_allcc"; break;
+ case CallingConv::CXX_FAST_TLS: Out << "cxx_fast_tlscc"; break;
case CallingConv::GHC: Out << "ghccc"; break;
case CallingConv::X86_StdCall: Out << "x86_stdcallcc"; break;
case CallingConv::X86_FastCall: Out << "x86_fastcallcc"; break;
if (HasAVX)
return CSR_64_RT_AllRegs_AVX_SaveList;
return CSR_64_RT_AllRegs_SaveList;
+ case CallingConv::CXX_FAST_TLS:
+ if (Is64Bit)
+ return CSR_64_TLS_Darwin_SaveList;
+ break;
case CallingConv::Intel_OCL_BI: {
if (HasAVX512 && IsWin64)
return CSR_Win64_Intel_OCL_BI_AVX512_SaveList;
if (HasAVX)
return CSR_64_RT_AllRegs_AVX_RegMask;
return CSR_64_RT_AllRegs_RegMask;
+ case CallingConv::CXX_FAST_TLS:
+ if (Is64Bit)
+ return CSR_64_TLS_Darwin_RegMask;
+ break;
case CallingConv::Intel_OCL_BI: {
if (HasAVX512 && IsWin64)
return CSR_Win64_Intel_OCL_BI_AVX512_RegMask;
--- /dev/null
+; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -enable-shrink-wrap=true | FileCheck --check-prefix=SHRINK %s
+%struct.S = type { i8 }
+
+@sg = internal thread_local global %struct.S zeroinitializer, align 1
+@__dso_handle = external global i8
+@__tls_guard = internal thread_local unnamed_addr global i1 false
+
+declare void @_ZN1SC1Ev(%struct.S*)
+declare void @_ZN1SD1Ev(%struct.S*)
+declare i32 @_tlv_atexit(void (i8*)*, i8*, i8*)
+
+; Every GPR should be saved - except rdi, rax, and rsp
+; CHECK-LABEL: _ZTW2sg
+; CHECK: pushq %r11
+; CHECK: pushq %r10
+; CHECK: pushq %r9
+; CHECK: pushq %r8
+; CHECK: pushq %rsi
+; CHECK: pushq %rdx
+; CHECK: pushq %rcx
+; CHECK: pushq %rbx
+; CHECK: callq
+; CHECK: jne
+; CHECK: callq
+; CHECK: tlv_atexit
+; CHECK: callq
+; CHECK: popq %rbx
+; CHECK: popq %rcx
+; CHECK: popq %rdx
+; CHECK: popq %rsi
+; CHECK: popq %r8
+; CHECK: popq %r9
+; CHECK: popq %r10
+; CHECK: popq %r11
+; SHRINK-LABEL: _ZTW2sg
+; SHRINK: callq
+; SHRINK: jne
+; SHRINK: pushq %r11
+; SHRINK: pushq %r10
+; SHRINK: pushq %r9
+; SHRINK: pushq %r8
+; SHRINK: pushq %rsi
+; SHRINK: pushq %rdx
+; SHRINK: pushq %rcx
+; SHRINK: pushq %rbx
+; SHRINK: callq
+; SHRINK: tlv_atexit
+; SHRINK: popq %rbx
+; SHRINK: popq %rcx
+; SHRINK: popq %rdx
+; SHRINK: popq %rsi
+; SHRINK: popq %r8
+; SHRINK: popq %r9
+; SHRINK: popq %r10
+; SHRINK: popq %r11
+; SHRINK: LBB{{.*}}:
+; SHRINK: callq
+define cxx_fast_tlscc nonnull %struct.S* @_ZTW2sg() {
+ %.b.i = load i1, i1* @__tls_guard, align 1
+ br i1 %.b.i, label %__tls_init.exit, label %init.i
+
+init.i:
+ store i1 true, i1* @__tls_guard, align 1
+ tail call void @_ZN1SC1Ev(%struct.S* nonnull @sg) #2
+ %1 = tail call i32 @_tlv_atexit(void (i8*)* nonnull bitcast (void (%struct.S*)* @_ZN1SD1Ev to void (i8*)*), i8* nonnull getelementptr inbounds (%struct.S, %struct.S* @sg, i64 0, i32 0), i8* nonnull @__dso_handle) #2
+ br label %__tls_init.exit
+
+__tls_init.exit:
+ ret %struct.S* @sg
+}