From cd2103de5a0dcbe429010509d91d7648aaca6f59 Mon Sep 17 00:00:00 2001 From: Manman Ren Date: Fri, 4 Dec 2015 17:40:13 +0000 Subject: [PATCH] [CXX TLS calling convention] Add CXX TLS calling convention. This commit adds a new target-independent calling convention for C++ TLS access functions. It aims to minimize overhead in the caller by perserving as many registers as possible. The target-specific implementation for X86-64 is defined as following: Arguments are passed as for the default C calling convention The same applies for the return value(s) The callee preserves all GPRs - except RAX and RDI The access function makes C-style TLS function calls in the entry and exit block, C-style TLS functions save a lot more registers than normal calls. The added calling convention ties into the existing implementation of the C-style TLS functions, so we can't simply use existing calling conventions such as preserve_mostcc. rdar://9001553 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254737 91177308-0d34-0410-b5e6-96231b3b80d8 --- docs/BitCodeFormat.rst | 1 + docs/LangRef.rst | 10 +++++ include/llvm/IR/CallingConv.h | 3 ++ lib/AsmParser/LLLexer.cpp | 1 + lib/AsmParser/LLParser.cpp | 2 + lib/AsmParser/LLToken.h | 1 + lib/IR/AsmWriter.cpp | 1 + lib/Target/X86/X86RegisterInfo.cpp | 8 ++++ test/CodeGen/X86/cxx_tlscc64.ll | 71 ++++++++++++++++++++++++++++++ 9 files changed, 98 insertions(+) create mode 100644 test/CodeGen/X86/cxx_tlscc64.ll diff --git a/docs/BitCodeFormat.rst b/docs/BitCodeFormat.rst index 62d66f85d55..d6e3099bdb6 100644 --- a/docs/BitCodeFormat.rst +++ b/docs/BitCodeFormat.rst @@ -756,6 +756,7 @@ function. The operand fields are: * ``anyregcc``: code 13 * ``preserve_mostcc``: code 14 * ``preserve_allcc``: code 15 + * ``cxx_fast_tlscc``: code 17 * ``x86_stdcallcc``: code 64 * ``x86_fastcallcc``: code 65 * ``arm_apcscc``: code 66 diff --git a/docs/LangRef.rst b/docs/LangRef.rst index cf1ceab1f1c..82b33557c12 100644 --- a/docs/LangRef.rst +++ b/docs/LangRef.rst @@ -406,6 +406,16 @@ added in the future: This calling convention, like the `PreserveMost` calling convention, will be used by a future version of the ObjectiveC runtime and should be considered experimental at this time. +"``cxx_fast_tlscc``" - The `CXX_FAST_TLS` calling convention for access functions + This calling convention aims to minimize overhead in the caller by + preserving as many registers as possible. This calling convention behaves + identical to the `C` calling convention on how arguments and return values + are passed, but it uses a different set of caller/callee-saved registers. + Given that C-style TLS on Darwin has its own special CSRs, we can't use the + existing `PreserveMost`. + + - On X86-64 the callee preserves all general purpose registers, except for + RDI and RAX. "``cc ``" - Numbered convention Any calling convention may be specified by number, allowing target-specific calling conventions to be used. Target specific diff --git a/include/llvm/IR/CallingConv.h b/include/llvm/IR/CallingConv.h index ac7cc9b74ab..8204d3e2e81 100644 --- a/include/llvm/IR/CallingConv.h +++ b/include/llvm/IR/CallingConv.h @@ -72,6 +72,9 @@ namespace CallingConv { // Swift - Calling convention for Swift. Swift = 16, + // CXX_FAST_TLS - Calling convention for access functions. + CXX_FAST_TLS = 17, + // Target - This is the start of the target-specific calling conventions, // e.g. fastcall and thiscall on X86. FirstTargetCC = 64, diff --git a/lib/AsmParser/LLLexer.cpp b/lib/AsmParser/LLLexer.cpp index 372c5658886..f95a763e3da 100644 --- a/lib/AsmParser/LLLexer.cpp +++ b/lib/AsmParser/LLLexer.cpp @@ -591,6 +591,7 @@ lltok::Kind LLLexer::LexIdentifier() { KEYWORD(ghccc); KEYWORD(hhvmcc); KEYWORD(hhvm_ccc); + KEYWORD(cxx_fast_tlscc); KEYWORD(cc); KEYWORD(c); diff --git a/lib/AsmParser/LLParser.cpp b/lib/AsmParser/LLParser.cpp index 307ed397834..145b5eaacec 100644 --- a/lib/AsmParser/LLParser.cpp +++ b/lib/AsmParser/LLParser.cpp @@ -1544,6 +1544,7 @@ bool LLParser::ParseOptionalDLLStorageClass(unsigned &Res) { /// ::= 'ghccc' /// ::= 'hhvmcc' /// ::= 'hhvm_ccc' +/// ::= 'cxx_fast_tlscc' /// ::= 'cc' UINT /// bool LLParser::ParseOptionalCallingConv(unsigned &CC) { @@ -1574,6 +1575,7 @@ bool LLParser::ParseOptionalCallingConv(unsigned &CC) { case lltok::kw_ghccc: CC = CallingConv::GHC; break; case lltok::kw_hhvmcc: CC = CallingConv::HHVM; break; case lltok::kw_hhvm_ccc: CC = CallingConv::HHVM_C; break; + case lltok::kw_cxx_fast_tlscc: CC = CallingConv::CXX_FAST_TLS; break; case lltok::kw_cc: { Lex.Lex(); return ParseUInt32(CC); diff --git a/lib/AsmParser/LLToken.h b/lib/AsmParser/LLToken.h index 6a9a1de0b85..48abeac9506 100644 --- a/lib/AsmParser/LLToken.h +++ b/lib/AsmParser/LLToken.h @@ -99,6 +99,7 @@ namespace lltok { kw_preserve_mostcc, kw_preserve_allcc, kw_ghccc, kw_hhvmcc, kw_hhvm_ccc, + kw_cxx_fast_tlscc, // Attributes: kw_attributes, diff --git a/lib/IR/AsmWriter.cpp b/lib/IR/AsmWriter.cpp index fae1ebee5f2..f8040a7b5f8 100644 --- a/lib/IR/AsmWriter.cpp +++ b/lib/IR/AsmWriter.cpp @@ -304,6 +304,7 @@ static void PrintCallingConv(unsigned cc, raw_ostream &Out) { case CallingConv::AnyReg: Out << "anyregcc"; break; case CallingConv::PreserveMost: Out << "preserve_mostcc"; break; case CallingConv::PreserveAll: Out << "preserve_allcc"; break; + case CallingConv::CXX_FAST_TLS: Out << "cxx_fast_tlscc"; break; case CallingConv::GHC: Out << "ghccc"; break; case CallingConv::X86_StdCall: Out << "x86_stdcallcc"; break; case CallingConv::X86_FastCall: Out << "x86_fastcallcc"; break; diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp index 39de5004143..88843763478 100644 --- a/lib/Target/X86/X86RegisterInfo.cpp +++ b/lib/Target/X86/X86RegisterInfo.cpp @@ -248,6 +248,10 @@ X86RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { if (HasAVX) return CSR_64_RT_AllRegs_AVX_SaveList; return CSR_64_RT_AllRegs_SaveList; + case CallingConv::CXX_FAST_TLS: + if (Is64Bit) + return CSR_64_TLS_Darwin_SaveList; + break; case CallingConv::Intel_OCL_BI: { if (HasAVX512 && IsWin64) return CSR_Win64_Intel_OCL_BI_AVX512_SaveList; @@ -310,6 +314,10 @@ X86RegisterInfo::getCallPreservedMask(const MachineFunction &MF, if (HasAVX) return CSR_64_RT_AllRegs_AVX_RegMask; return CSR_64_RT_AllRegs_RegMask; + case CallingConv::CXX_FAST_TLS: + if (Is64Bit) + return CSR_64_TLS_Darwin_RegMask; + break; case CallingConv::Intel_OCL_BI: { if (HasAVX512 && IsWin64) return CSR_Win64_Intel_OCL_BI_AVX512_RegMask; diff --git a/test/CodeGen/X86/cxx_tlscc64.ll b/test/CodeGen/X86/cxx_tlscc64.ll new file mode 100644 index 00000000000..c229521cc9a --- /dev/null +++ b/test/CodeGen/X86/cxx_tlscc64.ll @@ -0,0 +1,71 @@ +; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-apple-darwin -enable-shrink-wrap=true | FileCheck --check-prefix=SHRINK %s +%struct.S = type { i8 } + +@sg = internal thread_local global %struct.S zeroinitializer, align 1 +@__dso_handle = external global i8 +@__tls_guard = internal thread_local unnamed_addr global i1 false + +declare void @_ZN1SC1Ev(%struct.S*) +declare void @_ZN1SD1Ev(%struct.S*) +declare i32 @_tlv_atexit(void (i8*)*, i8*, i8*) + +; Every GPR should be saved - except rdi, rax, and rsp +; CHECK-LABEL: _ZTW2sg +; CHECK: pushq %r11 +; CHECK: pushq %r10 +; CHECK: pushq %r9 +; CHECK: pushq %r8 +; CHECK: pushq %rsi +; CHECK: pushq %rdx +; CHECK: pushq %rcx +; CHECK: pushq %rbx +; CHECK: callq +; CHECK: jne +; CHECK: callq +; CHECK: tlv_atexit +; CHECK: callq +; CHECK: popq %rbx +; CHECK: popq %rcx +; CHECK: popq %rdx +; CHECK: popq %rsi +; CHECK: popq %r8 +; CHECK: popq %r9 +; CHECK: popq %r10 +; CHECK: popq %r11 +; SHRINK-LABEL: _ZTW2sg +; SHRINK: callq +; SHRINK: jne +; SHRINK: pushq %r11 +; SHRINK: pushq %r10 +; SHRINK: pushq %r9 +; SHRINK: pushq %r8 +; SHRINK: pushq %rsi +; SHRINK: pushq %rdx +; SHRINK: pushq %rcx +; SHRINK: pushq %rbx +; SHRINK: callq +; SHRINK: tlv_atexit +; SHRINK: popq %rbx +; SHRINK: popq %rcx +; SHRINK: popq %rdx +; SHRINK: popq %rsi +; SHRINK: popq %r8 +; SHRINK: popq %r9 +; SHRINK: popq %r10 +; SHRINK: popq %r11 +; SHRINK: LBB{{.*}}: +; SHRINK: callq +define cxx_fast_tlscc nonnull %struct.S* @_ZTW2sg() { + %.b.i = load i1, i1* @__tls_guard, align 1 + br i1 %.b.i, label %__tls_init.exit, label %init.i + +init.i: + store i1 true, i1* @__tls_guard, align 1 + tail call void @_ZN1SC1Ev(%struct.S* nonnull @sg) #2 + %1 = tail call i32 @_tlv_atexit(void (i8*)* nonnull bitcast (void (%struct.S*)* @_ZN1SD1Ev to void (i8*)*), i8* nonnull getelementptr inbounds (%struct.S, %struct.S* @sg, i64 0, i32 0), i8* nonnull @__dso_handle) #2 + br label %__tls_init.exit + +__tls_init.exit: + ret %struct.S* @sg +} -- 2.34.1