From 32e078b09cc544366720bd0e62119c8197893100 Mon Sep 17 00:00:00 2001 From: Alexey Samsonov Date: Thu, 29 Oct 2015 22:21:37 +0000 Subject: [PATCH] [LLVMSymbolize] Move ModuleInfo into a separate class (SymbolizableModule). Summary: This is mostly NFC. It is a first step in cleaning up LLVMSymbolize library. It removes "ModuleInfo" class which bundles together ObjectFile and its debug info context in favor of: * abstract SymbolizableModule in public headers; * SymbolizableObjectFile subclass in implementation. Additionally, SymbolizableObjectFile is now created via factory, so we can properly detect object parsing error at this stage instead of keeping the broken half-parsed object. As a next step, we would be able to propagate the error all the way back to the library user. Further improvements might include: * factoring out the logic of finding appropriate file with debug info for a given object file, and caching all parsed object files into a separate class [A]. * factoring out DILineInfo rendering [B]. This would make what is now a heavyweight "LLVMSymbolizer" a relatively straightforward class, that calls into [A] to turn filepath into a SymbolizableModule, delegates actual symbolization to concrete SymbolizableModule implementation, and lets [C] render the result. Reviewers: dblaikie, echristo, rafael Subscribers: llvm-commits Differential Revision: http://reviews.llvm.org/D14099 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@251662 91177308-0d34-0410-b5e6-96231b3b80d8 --- .../DebugInfo/Symbolize/SymbolizableModule.h | 54 ++++ include/llvm/DebugInfo/Symbolize/Symbolize.h | 61 +---- lib/DebugInfo/Symbolize/CMakeLists.txt | 1 + .../Symbolize/SymbolizableObjectFile.cpp | 251 ++++++++++++++++++ .../Symbolize/SymbolizableObjectFile.h | 80 ++++++ lib/DebugInfo/Symbolize/Symbolize.cpp | 239 ++--------------- 6 files changed, 416 insertions(+), 270 deletions(-) create mode 100644 include/llvm/DebugInfo/Symbolize/SymbolizableModule.h create mode 100644 lib/DebugInfo/Symbolize/SymbolizableObjectFile.cpp create mode 100644 lib/DebugInfo/Symbolize/SymbolizableObjectFile.h diff --git a/include/llvm/DebugInfo/Symbolize/SymbolizableModule.h b/include/llvm/DebugInfo/Symbolize/SymbolizableModule.h new file mode 100644 index 00000000000..94a72faa944 --- /dev/null +++ b/include/llvm/DebugInfo/Symbolize/SymbolizableModule.h @@ -0,0 +1,54 @@ +//===-- SymbolizableModule.h ------------------------------------ C++ -----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file declares the SymbolizableModule interface. +// +//===----------------------------------------------------------------------===// +#ifndef LLVM_DEBUGINFO_SYMBOLIZE_SYMBOLIZABLEMODULE_H +#define LLVM_DEBUGINFO_SYMBOLIZE_SYMBOLIZABLEMODULE_H + +#include "llvm/DebugInfo/DIContext.h" +#include +#include + +namespace llvm { +namespace object { +class ObjectFile; +} +} + +namespace llvm { +namespace symbolize { + +using FunctionNameKind = DILineInfoSpecifier::FunctionNameKind; + +class SymbolizableModule { +public: + virtual ~SymbolizableModule() {} + virtual DILineInfo symbolizeCode(uint64_t ModuleOffset, + FunctionNameKind FNKind, + bool UseSymbolTable) const = 0; + virtual DIInliningInfo symbolizeInlinedCode(uint64_t ModuleOffset, + FunctionNameKind FNKind, + bool UseSymbolTable) const = 0; + virtual bool symbolizeData(uint64_t ModuleOffset, std::string &Name, + uint64_t &Start, uint64_t &Size) const = 0; + + // Return true if this is a 32-bit x86 PE COFF module. + virtual bool isWin32Module() const = 0; + + // Returns the preferred base of the module, i.e. where the loader would place + // it in memory assuming there were no conflicts. + virtual uint64_t getModulePreferredBase() const = 0; +}; + +} // namespace symbolize +} // namespace llvm + +#endif // LLVM_DEBUGINFO_SYMBOLIZE_SYMBOLIZABLEMODULE_H diff --git a/include/llvm/DebugInfo/Symbolize/Symbolize.h b/include/llvm/DebugInfo/Symbolize/Symbolize.h index 1e3f37aaae9..e3de8c732e1 100644 --- a/include/llvm/DebugInfo/Symbolize/Symbolize.h +++ b/include/llvm/DebugInfo/Symbolize/Symbolize.h @@ -15,9 +15,9 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/DebugInfo/DIContext.h" +#include "llvm/DebugInfo/Symbolize/SymbolizableModule.h" #include "llvm/Object/MachOUniversal.h" #include "llvm/Object/ObjectFile.h" -#include "llvm/Support/DataExtractor.h" #include "llvm/Support/MemoryBuffer.h" #include #include @@ -28,7 +28,6 @@ namespace symbolize { using namespace object; using FunctionNameKind = DILineInfoSpecifier::FunctionNameKind; -class ModuleInfo; class LLVMSymbolizer { public: @@ -61,13 +60,15 @@ public: std::string symbolizeData(const std::string &ModuleName, uint64_t ModuleOffset); void flush(); - static std::string DemangleName(const std::string &Name, ModuleInfo *ModInfo); + static std::string DemangleName(const std::string &Name, + const SymbolizableModule *ModInfo); private: typedef std::pair ObjectPair; - ModuleInfo *getOrCreateModuleInfo(const std::string &ModuleName); - ObjectFile *lookUpDsymFile(const std::string &Path, const MachOObjectFile *ExeObj, + SymbolizableModule *getOrCreateModuleInfo(const std::string &ModuleName); + ObjectFile *lookUpDsymFile(const std::string &Path, + const MachOObjectFile *ExeObj, const std::string &ArchName); /// \brief Returns pair of pointers to object and debug object. @@ -77,7 +78,8 @@ private: /// universal binary (or the binary itself if it is an object file). ObjectFile *getObjectFileFromBinary(Binary *Bin, const std::string &ArchName); - std::string printDILineInfo(DILineInfo LineInfo, ModuleInfo *ModInfo) const; + std::string printDILineInfo(DILineInfo LineInfo, + const SymbolizableModule *ModInfo) const; // Owns all the parsed binaries and object files. SmallVector, 4> ParsedBinariesAndObjects; @@ -90,7 +92,7 @@ private: MemoryBuffers.push_back(std::move(MemBuf)); } - std::map> Modules; + std::map> Modules; std::map, ObjectFile *> ObjectFileForArch; std::map, ObjectPair> @@ -100,51 +102,6 @@ private: static const char kBadString[]; }; -class ModuleInfo { -public: - ModuleInfo(ObjectFile *Obj, std::unique_ptr DICtx); - - DILineInfo symbolizeCode(uint64_t ModuleOffset, FunctionNameKind FNKind, - bool UseSymbolTable) const; - DIInliningInfo symbolizeInlinedCode(uint64_t ModuleOffset, - FunctionNameKind FNKind, - bool UseSymbolTable) const; - bool symbolizeData(uint64_t ModuleOffset, std::string &Name, uint64_t &Start, - uint64_t &Size) const; - - // Return true if this is a 32-bit x86 PE COFF module. - bool isWin32Module() const; - - // Returns the preferred base of the module, i.e. where the loader would place - // it in memory assuming there were no conflicts. - uint64_t getModulePreferredBase() const; - -private: - bool getNameFromSymbolTable(SymbolRef::Type Type, uint64_t Address, - std::string &Name, uint64_t &Addr, - uint64_t &Size) const; - // For big-endian PowerPC64 ELF, OpdAddress is the address of the .opd - // (function descriptor) section and OpdExtractor refers to its contents. - void addSymbol(const SymbolRef &Symbol, uint64_t SymbolSize, - DataExtractor *OpdExtractor = nullptr, - uint64_t OpdAddress = 0); - void addCoffExportSymbols(const COFFObjectFile *CoffObj); - ObjectFile *Module; - std::unique_ptr DebugInfoContext; - - struct SymbolDesc { - uint64_t Addr; - // If size is 0, assume that symbol occupies the whole memory range up to - // the following symbol. - uint64_t Size; - friend bool operator<(const SymbolDesc &s1, const SymbolDesc &s2) { - return s1.Addr < s2.Addr; - } - }; - std::map Functions; - std::map Objects; -}; - } // namespace symbolize } // namespace llvm diff --git a/lib/DebugInfo/Symbolize/CMakeLists.txt b/lib/DebugInfo/Symbolize/CMakeLists.txt index 0e43eaa977c..82156c122ec 100644 --- a/lib/DebugInfo/Symbolize/CMakeLists.txt +++ b/lib/DebugInfo/Symbolize/CMakeLists.txt @@ -1,4 +1,5 @@ add_llvm_library(LLVMSymbolize + SymbolizableObjectFile.cpp Symbolize.cpp ADDITIONAL_HEADER_DIRS diff --git a/lib/DebugInfo/Symbolize/SymbolizableObjectFile.cpp b/lib/DebugInfo/Symbolize/SymbolizableObjectFile.cpp new file mode 100644 index 00000000000..65d4cce3dc1 --- /dev/null +++ b/lib/DebugInfo/Symbolize/SymbolizableObjectFile.cpp @@ -0,0 +1,251 @@ +//===-- SymbolizableObjectFile.cpp ----------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Implementation of SymbolizableObjectFile class. +// +//===----------------------------------------------------------------------===// + +#include "SymbolizableObjectFile.h" +#include "llvm/Object/SymbolSize.h" +#include "llvm/Support/DataExtractor.h" + +namespace llvm { +namespace symbolize { + +using namespace object; + +static DILineInfoSpecifier +getDILineInfoSpecifier(FunctionNameKind FNKind) { + return DILineInfoSpecifier( + DILineInfoSpecifier::FileLineInfoKind::AbsoluteFilePath, FNKind); +} + +ErrorOr> +SymbolizableObjectFile::create(object::ObjectFile *Obj, + std::unique_ptr DICtx) { + std::unique_ptr res( + new SymbolizableObjectFile(Obj, std::move(DICtx))); + std::unique_ptr OpdExtractor; + uint64_t OpdAddress = 0; + // Find the .opd (function descriptor) section if any, for big-endian + // PowerPC64 ELF. + if (Obj->getArch() == Triple::ppc64) { + for (section_iterator Section : Obj->sections()) { + StringRef Name; + StringRef Data; + if (auto EC = Section->getName(Name)) + return EC; + if (Name == ".opd") { + if (auto EC = Section->getContents(Data)) + return EC; + OpdExtractor.reset(new DataExtractor(Data, Obj->isLittleEndian(), + Obj->getBytesInAddress())); + OpdAddress = Section->getAddress(); + break; + } + } + } + std::vector> Symbols = + computeSymbolSizes(*Obj); + for (auto &P : Symbols) + res->addSymbol(P.first, P.second, OpdExtractor.get(), OpdAddress); + + // If this is a COFF object and we didn't find any symbols, try the export + // table. + if (Symbols.empty()) { + if (auto *CoffObj = dyn_cast(Obj)) + if (auto EC = res->addCoffExportSymbols(CoffObj)) + return EC; + } + return std::move(res); +} + +SymbolizableObjectFile::SymbolizableObjectFile(ObjectFile *Obj, + std::unique_ptr DICtx) + : Module(Obj), DebugInfoContext(std::move(DICtx)) {} + +namespace { +struct OffsetNamePair { + uint32_t Offset; + StringRef Name; + bool operator<(const OffsetNamePair &R) const { + return Offset < R.Offset; + } +}; +} + +std::error_code SymbolizableObjectFile::addCoffExportSymbols( + const COFFObjectFile *CoffObj) { + // Get all export names and offsets. + std::vector ExportSyms; + for (const ExportDirectoryEntryRef &Ref : CoffObj->export_directories()) { + StringRef Name; + uint32_t Offset; + if (auto EC = Ref.getSymbolName(Name)) + return EC; + if (auto EC = Ref.getExportRVA(Offset)) + return EC; + ExportSyms.push_back(OffsetNamePair{Offset, Name}); + } + if (ExportSyms.empty()) + return std::error_code(); + + // Sort by ascending offset. + array_pod_sort(ExportSyms.begin(), ExportSyms.end()); + + // Approximate the symbol sizes by assuming they run to the next symbol. + // FIXME: This assumes all exports are functions. + uint64_t ImageBase = CoffObj->getImageBase(); + for (auto I = ExportSyms.begin(), E = ExportSyms.end(); I != E; ++I) { + OffsetNamePair &Export = *I; + // FIXME: The last export has a one byte size now. + uint32_t NextOffset = I != E ? I->Offset : Export.Offset + 1; + uint64_t SymbolStart = ImageBase + Export.Offset; + uint64_t SymbolSize = NextOffset - Export.Offset; + SymbolDesc SD = {SymbolStart, SymbolSize}; + Functions.insert(std::make_pair(SD, Export.Name)); + } + return std::error_code(); +} + +std::error_code SymbolizableObjectFile::addSymbol(const SymbolRef &Symbol, + uint64_t SymbolSize, + DataExtractor *OpdExtractor, + uint64_t OpdAddress) { + SymbolRef::Type SymbolType = Symbol.getType(); + if (SymbolType != SymbolRef::ST_Function && SymbolType != SymbolRef::ST_Data) + return std::error_code(); + ErrorOr SymbolAddressOrErr = Symbol.getAddress(); + if (auto EC = SymbolAddressOrErr.getError()) + return EC; + uint64_t SymbolAddress = *SymbolAddressOrErr; + if (OpdExtractor) { + // For big-endian PowerPC64 ELF, symbols in the .opd section refer to + // function descriptors. The first word of the descriptor is a pointer to + // the function's code. + // For the purposes of symbolization, pretend the symbol's address is that + // of the function's code, not the descriptor. + uint64_t OpdOffset = SymbolAddress - OpdAddress; + uint32_t OpdOffset32 = OpdOffset; + if (OpdOffset == OpdOffset32 && + OpdExtractor->isValidOffsetForAddress(OpdOffset32)) + SymbolAddress = OpdExtractor->getAddress(&OpdOffset32); + } + ErrorOr SymbolNameOrErr = Symbol.getName(); + if (auto EC = SymbolNameOrErr.getError()) + return EC; + StringRef SymbolName = *SymbolNameOrErr; + // Mach-O symbol table names have leading underscore, skip it. + if (Module->isMachO() && SymbolName.size() > 0 && SymbolName[0] == '_') + SymbolName = SymbolName.drop_front(); + // FIXME: If a function has alias, there are two entries in symbol table + // with same address size. Make sure we choose the correct one. + auto &M = SymbolType == SymbolRef::ST_Function ? Functions : Objects; + SymbolDesc SD = { SymbolAddress, SymbolSize }; + M.insert(std::make_pair(SD, SymbolName)); + return std::error_code(); +} + +// Return true if this is a 32-bit x86 PE COFF module. +bool SymbolizableObjectFile::isWin32Module() const { + auto *CoffObject = dyn_cast(Module); + return CoffObject && CoffObject->getMachine() == COFF::IMAGE_FILE_MACHINE_I386; +} + +uint64_t SymbolizableObjectFile::getModulePreferredBase() const { + if (auto *CoffObject = dyn_cast(Module)) + return CoffObject->getImageBase(); + return 0; +} + +bool SymbolizableObjectFile::getNameFromSymbolTable(SymbolRef::Type Type, + uint64_t Address, + std::string &Name, + uint64_t &Addr, + uint64_t &Size) const { + const auto &SymbolMap = Type == SymbolRef::ST_Function ? Functions : Objects; + if (SymbolMap.empty()) + return false; + SymbolDesc SD = { Address, Address }; + auto SymbolIterator = SymbolMap.upper_bound(SD); + if (SymbolIterator == SymbolMap.begin()) + return false; + --SymbolIterator; + if (SymbolIterator->first.Size != 0 && + SymbolIterator->first.Addr + SymbolIterator->first.Size <= Address) + return false; + Name = SymbolIterator->second.str(); + Addr = SymbolIterator->first.Addr; + Size = SymbolIterator->first.Size; + return true; +} + +DILineInfo SymbolizableObjectFile::symbolizeCode(uint64_t ModuleOffset, + FunctionNameKind FNKind, + bool UseSymbolTable) const { + DILineInfo LineInfo; + if (DebugInfoContext) { + LineInfo = DebugInfoContext->getLineInfoForAddress( + ModuleOffset, getDILineInfoSpecifier(FNKind)); + } + // Override function name from symbol table if necessary. + if (FNKind == FunctionNameKind::LinkageName && UseSymbolTable) { + std::string FunctionName; + uint64_t Start, Size; + if (getNameFromSymbolTable(SymbolRef::ST_Function, ModuleOffset, + FunctionName, Start, Size)) { + LineInfo.FunctionName = FunctionName; + } + } + return LineInfo; +} + +DIInliningInfo SymbolizableObjectFile::symbolizeInlinedCode( + uint64_t ModuleOffset, FunctionNameKind FNKind, bool UseSymbolTable) const { + DIInliningInfo InlinedContext; + + if (DebugInfoContext) + InlinedContext = DebugInfoContext->getInliningInfoForAddress( + ModuleOffset, getDILineInfoSpecifier(FNKind)); + // Make sure there is at least one frame in context. + if (InlinedContext.getNumberOfFrames() == 0) + InlinedContext.addFrame(DILineInfo()); + + if (FNKind != FunctionNameKind::LinkageName || !UseSymbolTable) + return InlinedContext; + + // Override the function name in lower frame with name from symbol table. + // We can't directly change the last element of DIInliningInfo, so copy + // all frames into new context, replacing function name in the last one. + DIInliningInfo PatchedInlinedContext; + for (uint32_t i = 0, n = InlinedContext.getNumberOfFrames(); i < n; i++) { + DILineInfo LineInfo = InlinedContext.getFrame(i); + if (i == n - 1) { + std::string FunctionName; + uint64_t Start, Size; + if (getNameFromSymbolTable(SymbolRef::ST_Function, ModuleOffset, + FunctionName, Start, Size)) { + LineInfo.FunctionName = FunctionName; + } + } + PatchedInlinedContext.addFrame(LineInfo); + } + return PatchedInlinedContext; +} + +bool SymbolizableObjectFile::symbolizeData(uint64_t ModuleOffset, + std::string &Name, uint64_t &Start, + uint64_t &Size) const { + return getNameFromSymbolTable(SymbolRef::ST_Data, ModuleOffset, Name, Start, + Size); +} + +} // namespace symbolize +} // namespace llvm + diff --git a/lib/DebugInfo/Symbolize/SymbolizableObjectFile.h b/lib/DebugInfo/Symbolize/SymbolizableObjectFile.h new file mode 100644 index 00000000000..7c138d3e468 --- /dev/null +++ b/lib/DebugInfo/Symbolize/SymbolizableObjectFile.h @@ -0,0 +1,80 @@ +//===-- SymbolizableObjectFile.h -------------------------------- C++ -----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file declares the SymbolizableObjectFile class. +// +//===----------------------------------------------------------------------===// +#ifndef LLVM_LIB_DEBUGINFO_SYMBOLIZE_SYMBOLIZABLEOBJECTFILE_H +#define LLVM_LIB_DEBUGINFO_SYMBOLIZE_SYMBOLIZABLEOBJECTFILE_H + +#include "llvm/DebugInfo/Symbolize/SymbolizableModule.h" +#include + +namespace llvm { +class DataExtractor; +} + +namespace llvm { +namespace symbolize { + +class SymbolizableObjectFile : public SymbolizableModule { +public: + static ErrorOr> + create(object::ObjectFile *Obj, std::unique_ptr DICtx); + + DILineInfo symbolizeCode(uint64_t ModuleOffset, FunctionNameKind FNKind, + bool UseSymbolTable) const override; + DIInliningInfo symbolizeInlinedCode(uint64_t ModuleOffset, + FunctionNameKind FNKind, + bool UseSymbolTable) const override; + bool symbolizeData(uint64_t ModuleOffset, std::string &Name, uint64_t &Start, + uint64_t &Size) const override; + + // Return true if this is a 32-bit x86 PE COFF module. + bool isWin32Module() const override; + + // Returns the preferred base of the module, i.e. where the loader would place + // it in memory assuming there were no conflicts. + uint64_t getModulePreferredBase() const override; + +private: + bool getNameFromSymbolTable(object::SymbolRef::Type Type, uint64_t Address, + std::string &Name, uint64_t &Addr, + uint64_t &Size) const; + // For big-endian PowerPC64 ELF, OpdAddress is the address of the .opd + // (function descriptor) section and OpdExtractor refers to its contents. + std::error_code addSymbol(const object::SymbolRef &Symbol, + uint64_t SymbolSize, + DataExtractor *OpdExtractor = nullptr, + uint64_t OpdAddress = 0); + std::error_code addCoffExportSymbols(const object::COFFObjectFile *CoffObj); + + object::ObjectFile *Module; + std::unique_ptr DebugInfoContext; + + struct SymbolDesc { + uint64_t Addr; + // If size is 0, assume that symbol occupies the whole memory range up to + // the following symbol. + uint64_t Size; + friend bool operator<(const SymbolDesc &s1, const SymbolDesc &s2) { + return s1.Addr < s2.Addr; + } + }; + std::map Functions; + std::map Objects; + + SymbolizableObjectFile(object::ObjectFile *Obj, + std::unique_ptr DICtx); +}; + +} // namespace symbolize +} // namespace llvm + +#endif // LLVM_LIB_DEBUGINFO_SYMBOLIZE_SYMBOLIZABLEOBJECTFILE_H diff --git a/lib/DebugInfo/Symbolize/Symbolize.cpp b/lib/DebugInfo/Symbolize/Symbolize.cpp index 5a72410decf..2da35f930d0 100644 --- a/lib/DebugInfo/Symbolize/Symbolize.cpp +++ b/lib/DebugInfo/Symbolize/Symbolize.cpp @@ -13,6 +13,8 @@ #include "llvm/DebugInfo/Symbolize/Symbolize.h" +#include "SymbolizableObjectFile.h" + #include "llvm/ADT/STLExtras.h" #include "llvm/Config/config.h" #include "llvm/DebugInfo/DWARF/DWARFContext.h" @@ -20,7 +22,6 @@ #include "llvm/DebugInfo/PDB/PDBContext.h" #include "llvm/Object/ELFObjectFile.h" #include "llvm/Object/MachO.h" -#include "llvm/Object/SymbolSize.h" #include "llvm/Support/COFF.h" #include "llvm/Support/Casting.h" #include "llvm/Support/Compression.h" @@ -54,216 +55,12 @@ static bool error(std::error_code ec) { return true; } -static DILineInfoSpecifier -getDILineInfoSpecifier(FunctionNameKind FNKind) { - return DILineInfoSpecifier( - DILineInfoSpecifier::FileLineInfoKind::AbsoluteFilePath, FNKind); -} - -ModuleInfo::ModuleInfo(ObjectFile *Obj, std::unique_ptr DICtx) - : Module(Obj), DebugInfoContext(std::move(DICtx)) { - std::unique_ptr OpdExtractor; - uint64_t OpdAddress = 0; - // Find the .opd (function descriptor) section if any, for big-endian - // PowerPC64 ELF. - if (Module->getArch() == Triple::ppc64) { - for (section_iterator Section : Module->sections()) { - StringRef Name; - if (!error(Section->getName(Name)) && Name == ".opd") { - StringRef Data; - if (!error(Section->getContents(Data))) { - OpdExtractor.reset(new DataExtractor(Data, Module->isLittleEndian(), - Module->getBytesInAddress())); - OpdAddress = Section->getAddress(); - } - break; - } - } - } - std::vector> Symbols = - computeSymbolSizes(*Module); - for (auto &P : Symbols) - addSymbol(P.first, P.second, OpdExtractor.get(), OpdAddress); - - // If this is a COFF object and we didn't find any symbols, try the export - // table. - if (Symbols.empty()) { - if (auto *CoffObj = dyn_cast(Obj)) - addCoffExportSymbols(CoffObj); - } -} - -namespace { -struct OffsetNamePair { - uint32_t Offset; - StringRef Name; - bool operator<(const OffsetNamePair &R) const { - return Offset < R.Offset; - } -}; -} - -void ModuleInfo::addCoffExportSymbols(const COFFObjectFile *CoffObj) { - // Get all export names and offsets. - std::vector ExportSyms; - for (const ExportDirectoryEntryRef &Ref : CoffObj->export_directories()) { - StringRef Name; - uint32_t Offset; - if (error(Ref.getSymbolName(Name)) || error(Ref.getExportRVA(Offset))) - return; - ExportSyms.push_back(OffsetNamePair{Offset, Name}); - } - if (ExportSyms.empty()) - return; - - // Sort by ascending offset. - array_pod_sort(ExportSyms.begin(), ExportSyms.end()); - - // Approximate the symbol sizes by assuming they run to the next symbol. - // FIXME: This assumes all exports are functions. - uint64_t ImageBase = CoffObj->getImageBase(); - for (auto I = ExportSyms.begin(), E = ExportSyms.end(); I != E; ++I) { - OffsetNamePair &Export = *I; - // FIXME: The last export has a one byte size now. - uint32_t NextOffset = I != E ? I->Offset : Export.Offset + 1; - uint64_t SymbolStart = ImageBase + Export.Offset; - uint64_t SymbolSize = NextOffset - Export.Offset; - SymbolDesc SD = {SymbolStart, SymbolSize}; - Functions.insert(std::make_pair(SD, Export.Name)); - } -} - -void ModuleInfo::addSymbol(const SymbolRef &Symbol, uint64_t SymbolSize, - DataExtractor *OpdExtractor, uint64_t OpdAddress) { - SymbolRef::Type SymbolType = Symbol.getType(); - if (SymbolType != SymbolRef::ST_Function && SymbolType != SymbolRef::ST_Data) - return; - ErrorOr SymbolAddressOrErr = Symbol.getAddress(); - if (error(SymbolAddressOrErr.getError())) - return; - uint64_t SymbolAddress = *SymbolAddressOrErr; - if (OpdExtractor) { - // For big-endian PowerPC64 ELF, symbols in the .opd section refer to - // function descriptors. The first word of the descriptor is a pointer to - // the function's code. - // For the purposes of symbolization, pretend the symbol's address is that - // of the function's code, not the descriptor. - uint64_t OpdOffset = SymbolAddress - OpdAddress; - uint32_t OpdOffset32 = OpdOffset; - if (OpdOffset == OpdOffset32 && - OpdExtractor->isValidOffsetForAddress(OpdOffset32)) - SymbolAddress = OpdExtractor->getAddress(&OpdOffset32); - } - ErrorOr SymbolNameOrErr = Symbol.getName(); - if (error(SymbolNameOrErr.getError())) - return; - StringRef SymbolName = *SymbolNameOrErr; - // Mach-O symbol table names have leading underscore, skip it. - if (Module->isMachO() && SymbolName.size() > 0 && SymbolName[0] == '_') - SymbolName = SymbolName.drop_front(); - // FIXME: If a function has alias, there are two entries in symbol table - // with same address size. Make sure we choose the correct one. - auto &M = SymbolType == SymbolRef::ST_Function ? Functions : Objects; - SymbolDesc SD = { SymbolAddress, SymbolSize }; - M.insert(std::make_pair(SD, SymbolName)); -} - -// Return true if this is a 32-bit x86 PE COFF module. -bool ModuleInfo::isWin32Module() const { - auto *CoffObject = dyn_cast(Module); - return CoffObject && CoffObject->getMachine() == COFF::IMAGE_FILE_MACHINE_I386; -} - -uint64_t ModuleInfo::getModulePreferredBase() const { - if (auto *CoffObject = dyn_cast(Module)) - return CoffObject->getImageBase(); - return 0; -} - -bool ModuleInfo::getNameFromSymbolTable(SymbolRef::Type Type, uint64_t Address, - std::string &Name, uint64_t &Addr, - uint64_t &Size) const { - const auto &SymbolMap = Type == SymbolRef::ST_Function ? Functions : Objects; - if (SymbolMap.empty()) - return false; - SymbolDesc SD = { Address, Address }; - auto SymbolIterator = SymbolMap.upper_bound(SD); - if (SymbolIterator == SymbolMap.begin()) - return false; - --SymbolIterator; - if (SymbolIterator->first.Size != 0 && - SymbolIterator->first.Addr + SymbolIterator->first.Size <= Address) - return false; - Name = SymbolIterator->second.str(); - Addr = SymbolIterator->first.Addr; - Size = SymbolIterator->first.Size; - return true; -} - -DILineInfo ModuleInfo::symbolizeCode(uint64_t ModuleOffset, - FunctionNameKind FNKind, - bool UseSymbolTable) const { - DILineInfo LineInfo; - if (DebugInfoContext) { - LineInfo = DebugInfoContext->getLineInfoForAddress( - ModuleOffset, getDILineInfoSpecifier(FNKind)); - } - // Override function name from symbol table if necessary. - if (FNKind == FunctionNameKind::LinkageName && UseSymbolTable) { - std::string FunctionName; - uint64_t Start, Size; - if (getNameFromSymbolTable(SymbolRef::ST_Function, ModuleOffset, - FunctionName, Start, Size)) { - LineInfo.FunctionName = FunctionName; - } - } - return LineInfo; -} - -DIInliningInfo ModuleInfo::symbolizeInlinedCode(uint64_t ModuleOffset, - FunctionNameKind FNKind, - bool UseSymbolTable) const { - DIInliningInfo InlinedContext; - - if (DebugInfoContext) { - InlinedContext = DebugInfoContext->getInliningInfoForAddress( - ModuleOffset, getDILineInfoSpecifier(FNKind)); - } - // Make sure there is at least one frame in context. - if (InlinedContext.getNumberOfFrames() == 0) { - InlinedContext.addFrame(DILineInfo()); - } - // Override the function name in lower frame with name from symbol table. - if (FNKind == FunctionNameKind::LinkageName && UseSymbolTable) { - DIInliningInfo PatchedInlinedContext; - for (uint32_t i = 0, n = InlinedContext.getNumberOfFrames(); i < n; i++) { - DILineInfo LineInfo = InlinedContext.getFrame(i); - if (i == n - 1) { - std::string FunctionName; - uint64_t Start, Size; - if (getNameFromSymbolTable(SymbolRef::ST_Function, ModuleOffset, - FunctionName, Start, Size)) { - LineInfo.FunctionName = FunctionName; - } - } - PatchedInlinedContext.addFrame(LineInfo); - } - InlinedContext = PatchedInlinedContext; - } - return InlinedContext; -} - -bool ModuleInfo::symbolizeData(uint64_t ModuleOffset, std::string &Name, - uint64_t &Start, uint64_t &Size) const { - return getNameFromSymbolTable(SymbolRef::ST_Data, ModuleOffset, Name, Start, - Size); -} const char LLVMSymbolizer::kBadString[] = "??"; std::string LLVMSymbolizer::symbolizeCode(const std::string &ModuleName, uint64_t ModuleOffset) { - ModuleInfo *Info = getOrCreateModuleInfo(ModuleName); + SymbolizableModule *Info = getOrCreateModuleInfo(ModuleName); if (!Info) return printDILineInfo(DILineInfo(), Info); @@ -295,9 +92,10 @@ std::string LLVMSymbolizer::symbolizeData(const std::string &ModuleName, uint64_t Start = 0; uint64_t Size = 0; if (Opts.UseSymbolTable) { - if (ModuleInfo *Info = getOrCreateModuleInfo(ModuleName)) { - // If the user is giving us relative addresses, add the preferred base of the - // object to the offset before we do the query. It's what DIContext expects. + if (SymbolizableModule *Info = getOrCreateModuleInfo(ModuleName)) { + // If the user is giving us relative addresses, add the preferred base of + // the object to the offset before we do the query. It's what DIContext + // expects. if (Opts.RelativeAddresses) ModuleOffset += Info->getModulePreferredBase(); if (Info->symbolizeData(ModuleOffset, Name, Start, Size) && Opts.Demangle) @@ -510,7 +308,7 @@ LLVMSymbolizer::getObjectFileFromBinary(Binary *Bin, return Res; } -ModuleInfo * +SymbolizableModule * LLVMSymbolizer::getOrCreateModuleInfo(const std::string &ModuleName) { const auto &I = Modules.find(ModuleName); if (I != Modules.end()) @@ -530,8 +328,7 @@ LLVMSymbolizer::getOrCreateModuleInfo(const std::string &ModuleName) { if (!Objects.first) { // Failed to find valid object file. - Modules.insert( - std::make_pair(ModuleName, std::unique_ptr(nullptr))); + Modules.insert(std::make_pair(ModuleName, nullptr)); return nullptr; } std::unique_ptr Context; @@ -548,14 +345,20 @@ LLVMSymbolizer::getOrCreateModuleInfo(const std::string &ModuleName) { if (!Context) Context.reset(new DWARFContextInMemory(*Objects.second)); assert(Context); - auto Info = llvm::make_unique(Objects.first, std::move(Context)); - ModuleInfo *Res = Info.get(); - Modules.insert(std::make_pair(ModuleName, std::move(Info))); + auto ErrOrInfo = + SymbolizableObjectFile::create(Objects.first, std::move(Context)); + if (error(ErrOrInfo.getError())) { + Modules.insert(std::make_pair(ModuleName, nullptr)); + return nullptr; + } + SymbolizableModule *Res = ErrOrInfo.get().get(); + Modules.insert(std::make_pair(ModuleName, std::move(ErrOrInfo.get()))); return Res; } -std::string LLVMSymbolizer::printDILineInfo(DILineInfo LineInfo, - ModuleInfo *ModInfo) const { +std::string +LLVMSymbolizer::printDILineInfo(DILineInfo LineInfo, + const SymbolizableModule *ModInfo) const { // By default, DILineInfo contains "" for function/filename it // cannot fetch. We replace it to "??" to make our output closer to addr2line. static const std::string kDILineInfoBadString = ""; @@ -611,7 +414,7 @@ extern "C" char *__cxa_demangle(const char *mangled_name, char *output_buffer, #endif std::string LLVMSymbolizer::DemangleName(const std::string &Name, - ModuleInfo *ModInfo) { + const SymbolizableModule *ModInfo) { #if !defined(_MSC_VER) // We can spoil names of symbols with C linkage, so use an heuristic // approach to check if the name should be demangled. -- 2.34.1