From c39f5dd0e2d689a10d1e7de3da07f1975c0aa8f4 Mon Sep 17 00:00:00 2001 From: Peter Collingbourne Date: Fri, 3 Apr 2015 01:46:11 +0000 Subject: [PATCH] MC: For variable symbols, maintain MCSymbol::Section as a cache. Fixes PR19582. Previously, when an asm assignment (.set or =) was created, we would look up the section immediately in MCSymbol::setVariableValue. This caused symbols to receive the wrong section if the RHS of the assignment had not been seen yet. This had a knock-on effect in the object file emitters, causing them to emit extra symbols, or to give symbols the wrong visibility or the wrong section. For example, in the following asm: .data .Llocal: .text leaq .Llocal1(%rip), %rdi .Llocal1 = .Llocal2 .Llocal2 = .Llocal the first assignment would give .Llocal1 a null section, which would never get fixed up by the second assignment. This would cause the ELF object file emitter to consider .Llocal1 to be an undefined symbol and give it external linkage, even though .Llocal1 should not have been emitted at all in the object file. Or in the following asm: alias_to_local = Ltmp0 Ltmp0: the Mach-O object file emitter would give the alias_to_local symbol a n_type of N_SECT and a n_sect of 0. This is invalid under the Mach-O specification, which requires N_SECT symbols to receive a non-zero section number if the symbol is defined in a section in the object file. https://developer.apple.com/library/mac/documentation/DeveloperTools/Conceptual/MachORuntime/#//apple_ref/c/tag/nlist After this change we do not look up the section when the assignment is created, but instead look it up on demand and store it in Section, which is treated as a cache if the symbol is a variable symbol. This change also fixes a bug in MCExpr::FindAssociatedSection. Previously, if we saw a subtraction, we would return the first referenced section, even in cases where we should have been returning the absolute pseudo-section. Now we always return the absolute pseudo-section for expressions that subtract two section-derived expressions. This isn't always correct (e.g. if one of the sections ends up being laid out at an absolute address), but it's probably the best we can do without more context. This allows us to remove code in two places where we appear to have been working around this bug, in MachObjectWriter::markAbsoluteVariableSymbols and in X86AsmPrinter::EmitStartOfAsmFile. Re-applies r233595 (aka D8586), which was reverted in r233898. Differential Revision: http://reviews.llvm.org/D8798 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@233995 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/MC/MCMachObjectWriter.h | 2 -- include/llvm/MC/MCSymbol.h | 26 +++++++++++++++++--------- lib/MC/MCExpr.cpp | 4 ++++ lib/MC/MCSymbol.cpp | 8 +------- lib/MC/MachObjectWriter.cpp | 21 --------------------- lib/Target/X86/X86AsmPrinter.cpp | 1 - test/MC/ELF/alias.s | 9 +++++++++ test/MC/ELF/pr19582.s | 8 ++++++++ test/MC/MachO/ARM/aliased-symbols.s | 10 +++++----- 9 files changed, 44 insertions(+), 45 deletions(-) create mode 100644 test/MC/ELF/pr19582.s diff --git a/include/llvm/MC/MCMachObjectWriter.h b/include/llvm/MC/MCMachObjectWriter.h index 514700bce35..3d270ce7e0d 100644 --- a/include/llvm/MC/MCMachObjectWriter.h +++ b/include/llvm/MC/MCMachObjectWriter.h @@ -257,8 +257,6 @@ public: void computeSectionAddresses(const MCAssembler &Asm, const MCAsmLayout &Layout); - void markAbsoluteVariableSymbols(MCAssembler &Asm, - const MCAsmLayout &Layout); void ExecutePostLayoutBinding(MCAssembler &Asm, const MCAsmLayout &Layout) override; diff --git a/include/llvm/MC/MCSymbol.h b/include/llvm/MC/MCSymbol.h index 53443b01d96..e4bdfda55d6 100644 --- a/include/llvm/MC/MCSymbol.h +++ b/include/llvm/MC/MCSymbol.h @@ -15,6 +15,7 @@ #define LLVM_MC_MCSYMBOL_H #include "llvm/ADT/StringRef.h" +#include "llvm/MC/MCExpr.h" #include "llvm/Support/Compiler.h" namespace llvm { @@ -42,8 +43,9 @@ namespace llvm { /// Section - The section the symbol is defined in. This is null for /// undefined symbols, and the special AbsolutePseudoSection value for - /// absolute symbols. - const MCSection *Section; + /// absolute symbols. If this is a variable symbol, this caches the + /// variable value's section. + mutable const MCSection *Section; /// Value - If non-null, the value for a variable symbol. const MCExpr *Value; @@ -68,6 +70,12 @@ namespace llvm { MCSymbol(const MCSymbol&) = delete; void operator=(const MCSymbol&) = delete; + const MCSection *getSectionPtr() const { + if (Section || !Value) + return Section; + return Section = Value->FindAssociatedSection(); + } + public: /// getName - Get the symbol name. StringRef getName() const { return Name; } @@ -103,7 +111,7 @@ namespace llvm { /// /// Defined symbols are either absolute or in some section. bool isDefined() const { - return Section != nullptr; + return getSectionPtr() != nullptr; } /// isInSection - Check if this symbol is defined in some section (i.e., it @@ -119,27 +127,27 @@ namespace llvm { /// isAbsolute - Check if this is an absolute symbol. bool isAbsolute() const { - return Section == AbsolutePseudoSection; + return getSectionPtr() == AbsolutePseudoSection; } /// getSection - Get the section associated with a defined, non-absolute /// symbol. const MCSection &getSection() const { assert(isInSection() && "Invalid accessor!"); - return *Section; + return *getSectionPtr(); } /// setSection - Mark the symbol as defined in the section \p S. - void setSection(const MCSection &S) { Section = &S; } + void setSection(const MCSection &S) { + assert(!isVariable() && "Cannot set section of variable"); + Section = &S; + } /// setUndefined - Mark the symbol as undefined. void setUndefined() { Section = nullptr; } - /// setAbsolute - Mark the symbol as absolute. - void setAbsolute() { Section = AbsolutePseudoSection; } - /// @} /// @name Variable Symbols /// @{ diff --git a/lib/MC/MCExpr.cpp b/lib/MC/MCExpr.cpp index 8a64403362c..0702539218c 100644 --- a/lib/MC/MCExpr.cpp +++ b/lib/MC/MCExpr.cpp @@ -775,6 +775,10 @@ const MCSection *MCExpr::FindAssociatedSection() const { if (RHS_S == MCSymbol::AbsolutePseudoSection) return LHS_S; + // Not always correct, but probably the best we can do without more context. + if (BE->getOpcode() == MCBinaryExpr::Sub) + return MCSymbol::AbsolutePseudoSection; + // Otherwise, return the first non-null section. return LHS_S ? LHS_S : RHS_S; } diff --git a/lib/MC/MCSymbol.cpp b/lib/MC/MCSymbol.cpp index 24165254e56..6582574ae94 100644 --- a/lib/MC/MCSymbol.cpp +++ b/lib/MC/MCSymbol.cpp @@ -55,13 +55,7 @@ void MCSymbol::setVariableValue(const MCExpr *Value) { assert(!IsUsed && "Cannot set a variable that has already been used."); assert(Value && "Invalid variable value!"); this->Value = Value; - - // Variables should always be marked as in the same "section" as the value. - const MCSection *Section = Value->FindAssociatedSection(); - if (Section) - setSection(*Section); - else - setUndefined(); + this->Section = nullptr; } void MCSymbol::print(raw_ostream &OS) const { diff --git a/lib/MC/MachObjectWriter.cpp b/lib/MC/MachObjectWriter.cpp index 5e9e86f18a0..56cccab1d39 100644 --- a/lib/MC/MachObjectWriter.cpp +++ b/lib/MC/MachObjectWriter.cpp @@ -649,33 +649,12 @@ void MachObjectWriter::computeSectionAddresses(const MCAssembler &Asm, } } -void MachObjectWriter::markAbsoluteVariableSymbols(MCAssembler &Asm, - const MCAsmLayout &Layout) { - for (MCSymbolData &SD : Asm.symbols()) { - if (!SD.getSymbol().isVariable()) - continue; - - // Is the variable is a symbol difference (SA - SB + C) expression, - // and neither symbol is external, mark the variable as absolute. - const MCExpr *Expr = SD.getSymbol().getVariableValue(); - MCValue Value; - if (Expr->EvaluateAsRelocatable(Value, &Layout, nullptr)) { - if (Value.getSymA() && Value.getSymB()) - const_cast(&SD.getSymbol())->setAbsolute(); - } - } -} - void MachObjectWriter::ExecutePostLayoutBinding(MCAssembler &Asm, const MCAsmLayout &Layout) { computeSectionAddresses(Asm, Layout); // Create symbol data for any indirect symbols. BindIndirectSymbols(Asm); - - // Mark symbol difference expressions in variables (from .set or = directives) - // as absolute. - markAbsoluteVariableSymbols(Asm, Layout); } bool MachObjectWriter:: diff --git a/lib/Target/X86/X86AsmPrinter.cpp b/lib/Target/X86/X86AsmPrinter.cpp index f6033a7e157..a84f0585aea 100644 --- a/lib/Target/X86/X86AsmPrinter.cpp +++ b/lib/Target/X86/X86AsmPrinter.cpp @@ -523,7 +523,6 @@ void X86AsmPrinter::EmitStartOfAsmFile(Module &M) { // must be registered in .sxdata. Use of any unregistered handlers will // cause the process to terminate immediately. LLVM does not know how to // register any SEH handlers, so its object files should be safe. - S->setAbsolute(); OutStreamer.EmitSymbolAttribute(S, MCSA_Global); OutStreamer.EmitAssignment( S, MCConstantExpr::Create(int64_t(1), MMI->getContext())); diff --git a/test/MC/ELF/alias.s b/test/MC/ELF/alias.s index 78df7371d25..0ab6dd4b5b8 100644 --- a/test/MC/ELF/alias.s +++ b/test/MC/ELF/alias.s @@ -24,6 +24,15 @@ bar5 = bar4 bar6 = bar5 bar6: +// Test that indirect local aliases do not appear as symbols. +.data +.Llocal: + +.text +leaq .Llocal1(%rip), %rdi +.Llocal1 = .Llocal2 +.Llocal2 = .Llocal + // CHECK: Symbols [ // CHECK-NEXT: Symbol { // CHECK-NEXT: Name: (0) diff --git a/test/MC/ELF/pr19582.s b/test/MC/ELF/pr19582.s new file mode 100644 index 00000000000..304cacb2972 --- /dev/null +++ b/test/MC/ELF/pr19582.s @@ -0,0 +1,8 @@ +// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | llvm-readobj -r | FileCheck %s + +a: + .section foo + c = b +b: + // CHECK: 0x0 R_X86_64_PC32 .text 0x0 + .long a - c diff --git a/test/MC/MachO/ARM/aliased-symbols.s b/test/MC/MachO/ARM/aliased-symbols.s index e87b81c6a15..cc2e200ce8a 100644 --- a/test/MC/MachO/ARM/aliased-symbols.s +++ b/test/MC/MachO/ARM/aliased-symbols.s @@ -45,9 +45,9 @@ Ltmp0: // CHECK-NEXT: Value: 0x[[DEFINED_EARLY]] // CHECK-NEXT: } - // defined_late was defined. Just after defined_early. + // alias_to_late was an alias to defined_late. But we can resolve it. // CHECK: Symbol { -// CHECK-NEXT: Name: defined_late +// CHECK-NEXT: Name: alias_to_late // CHECK-NEXT: Type: Section (0xE) // CHECK-NEXT: Section: __data (0x2) // CHECK-NEXT: RefType: UndefinedNonLazy (0x0) @@ -56,9 +56,9 @@ Ltmp0: // CHECK-NEXT: Value: 0x[[DEFINED_LATE:[0-9A-F]+]] // CHECK-NEXT: } - // alias_to_late was an alias to defined_late. But we can resolve it. + // defined_late was defined. Just after defined_early. // CHECK: Symbol { -// CHECK-NEXT: Name: alias_to_late +// CHECK-NEXT: Name: defined_late // CHECK-NEXT: Type: Section (0xE) // CHECK-NEXT: Section: __data (0x2) // CHECK-NEXT: RefType: UndefinedNonLazy (0x0) @@ -72,7 +72,7 @@ Ltmp0: // CHECK: Symbol { // CHECK-NEXT: Name: alias_to_local (42) // CHECK-NEXT: Type: Section (0xE) -// CHECK-NEXT: Section: (0x0) +// CHECK-NEXT: Section: __data (0x2) // CHECK-NEXT: RefType: UndefinedNonLazy (0x0) // CHECK-NEXT: Flags [ (0x0) // CHECK-NEXT: ] -- 2.34.1