From: Alexey Samsonov Date: Mon, 26 Oct 2015 17:56:12 +0000 (+0000) Subject: Move parts of llvm-symbolizer tool into LLVMSymbolize library. X-Git-Url: http://demsky.eecs.uci.edu/git/?a=commitdiff_plain;h=0cd635f7509d60acd16b8267fa2ed0b47066e0fc;p=oota-llvm.git Move parts of llvm-symbolizer tool into LLVMSymbolize library. Summary: See http://lists.llvm.org/pipermail/llvm-dev/2015-October/091624.html Reviewers: echristo Subscribers: llvm-commits, aizatsky Differential Revision: http://reviews.llvm.org/D13998 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@251316 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/include/llvm/DebugInfo/Symbolize/Symbolize.h b/include/llvm/DebugInfo/Symbolize/Symbolize.h new file mode 100644 index 00000000000..c58c51b0239 --- /dev/null +++ b/include/llvm/DebugInfo/Symbolize/Symbolize.h @@ -0,0 +1,151 @@ +//===-- Symbolize.h --------------------------------------------- C++ -----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Header for LLVM symbolization library. +// +//===----------------------------------------------------------------------===// +#ifndef LLVM_DEBUGINFO_SYMBOLIZE_SYMBOLIZE_H +#define LLVM_DEBUGINFO_SYMBOLIZE_SYMBOLIZE_H + +#include "llvm/ADT/SmallVector.h" +#include "llvm/DebugInfo/DIContext.h" +#include "llvm/Object/MachOUniversal.h" +#include "llvm/Object/ObjectFile.h" +#include "llvm/Support/DataExtractor.h" +#include "llvm/Support/MemoryBuffer.h" +#include +#include +#include + +namespace llvm { +namespace symbolize { + +using namespace object; +using FunctionNameKind = DILineInfoSpecifier::FunctionNameKind; +class ModuleInfo; + +class LLVMSymbolizer { +public: + struct Options { + FunctionNameKind PrintFunctions; + bool UseSymbolTable : 1; + bool PrintInlining : 1; + bool Demangle : 1; + bool RelativeAddresses : 1; + std::string DefaultArch; + std::vector DsymHints; + Options(FunctionNameKind PrintFunctions = FunctionNameKind::LinkageName, + bool UseSymbolTable = true, bool PrintInlining = true, + bool Demangle = true, bool RelativeAddresses = false, + std::string DefaultArch = "") + : PrintFunctions(PrintFunctions), UseSymbolTable(UseSymbolTable), + PrintInlining(PrintInlining), Demangle(Demangle), + RelativeAddresses(RelativeAddresses), DefaultArch(DefaultArch) {} + }; + + LLVMSymbolizer(const Options &Opts = Options()) : Opts(Opts) {} + ~LLVMSymbolizer() { + flush(); + } + + // Returns the result of symbolization for module name/offset as + // a string (possibly containing newlines). + std::string + symbolizeCode(const std::string &ModuleName, uint64_t ModuleOffset); + std::string + symbolizeData(const std::string &ModuleName, uint64_t ModuleOffset); + void flush(); + static std::string DemangleName(const std::string &Name, ModuleInfo *ModInfo); + +private: + typedef std::pair ObjectPair; + + ModuleInfo *getOrCreateModuleInfo(const std::string &ModuleName); + ObjectFile *lookUpDsymFile(const std::string &Path, const MachOObjectFile *ExeObj, + const std::string &ArchName); + + /// \brief Returns pair of pointers to object and debug object. + ObjectPair getOrCreateObjects(const std::string &Path, + const std::string &ArchName); + /// \brief Returns a parsed object file for a given architecture in a + /// universal binary (or the binary itself if it is an object file). + ObjectFile *getObjectFileFromBinary(Binary *Bin, const std::string &ArchName); + + std::string printDILineInfo(DILineInfo LineInfo, ModuleInfo *ModInfo) const; + + // Owns all the parsed binaries and object files. + SmallVector, 4> ParsedBinariesAndObjects; + SmallVector, 4> MemoryBuffers; + void addOwningBinary(OwningBinary OwningBin) { + std::unique_ptr Bin; + std::unique_ptr MemBuf; + std::tie(Bin, MemBuf) = OwningBin.takeBinary(); + ParsedBinariesAndObjects.push_back(std::move(Bin)); + MemoryBuffers.push_back(std::move(MemBuf)); + } + + // Owns module info objects. + std::map Modules; + std::map, ObjectFile *> + ObjectFileForArch; + std::map, ObjectPair> + ObjectPairForPathArch; + + Options Opts; + static const char kBadString[]; +}; + +class ModuleInfo { +public: + ModuleInfo(ObjectFile *Obj, DIContext *DICtx); + + DILineInfo symbolizeCode(uint64_t ModuleOffset, + const LLVMSymbolizer::Options &Opts) const; + DIInliningInfo symbolizeInlinedCode( + uint64_t ModuleOffset, const LLVMSymbolizer::Options &Opts) const; + bool symbolizeData(uint64_t ModuleOffset, std::string &Name, uint64_t &Start, + uint64_t &Size) const; + + // Return true if this is a 32-bit x86 PE COFF module. + bool isWin32Module() const; + + // Returns the preferred base of the module, i.e. where the loader would place + // it in memory assuming there were no conflicts. + uint64_t getModulePreferredBase() const; + +private: + bool getNameFromSymbolTable(SymbolRef::Type Type, uint64_t Address, + std::string &Name, uint64_t &Addr, + uint64_t &Size) const; + // For big-endian PowerPC64 ELF, OpdAddress is the address of the .opd + // (function descriptor) section and OpdExtractor refers to its contents. + void addSymbol(const SymbolRef &Symbol, uint64_t SymbolSize, + DataExtractor *OpdExtractor = nullptr, + uint64_t OpdAddress = 0); + void addCoffExportSymbols(const COFFObjectFile *CoffObj); + ObjectFile *Module; + std::unique_ptr DebugInfoContext; + + struct SymbolDesc { + uint64_t Addr; + // If size is 0, assume that symbol occupies the whole memory range up to + // the following symbol. + uint64_t Size; + friend bool operator<(const SymbolDesc &s1, const SymbolDesc &s2) { + return s1.Addr < s2.Addr; + } + }; + std::map Functions; + std::map Objects; +}; + +} // namespace symbolize +} // namespace llvm + +#endif diff --git a/lib/DebugInfo/CMakeLists.txt b/lib/DebugInfo/CMakeLists.txt index 645d92fef22..86f0efe2226 100644 --- a/lib/DebugInfo/CMakeLists.txt +++ b/lib/DebugInfo/CMakeLists.txt @@ -1,4 +1,3 @@ - add_subdirectory(DWARF) add_subdirectory(PDB) - +add_subdirectory(Symbolize) diff --git a/lib/DebugInfo/LLVMBuild.txt b/lib/DebugInfo/LLVMBuild.txt index 7a8e8baec2c..fbffe3a0691 100644 --- a/lib/DebugInfo/LLVMBuild.txt +++ b/lib/DebugInfo/LLVMBuild.txt @@ -16,7 +16,7 @@ ;===------------------------------------------------------------------------===; [common] -subdirectories = DWARF PDB +subdirectories = DWARF PDB Symbolize [component_0] type = Group diff --git a/lib/DebugInfo/Makefile b/lib/DebugInfo/Makefile index 27a5e1f0f49..20e9495b433 100644 --- a/lib/DebugInfo/Makefile +++ b/lib/DebugInfo/Makefile @@ -10,6 +10,6 @@ LEVEL = ../.. include $(LEVEL)/Makefile.config -PARALLEL_DIRS := DWARF PDB +PARALLEL_DIRS := DWARF PDB Symbolize -include $(LEVEL)/Makefile.common \ No newline at end of file +include $(LEVEL)/Makefile.common diff --git a/lib/DebugInfo/Symbolize/CMakeLists.txt b/lib/DebugInfo/Symbolize/CMakeLists.txt new file mode 100644 index 00000000000..0e43eaa977c --- /dev/null +++ b/lib/DebugInfo/Symbolize/CMakeLists.txt @@ -0,0 +1,6 @@ +add_llvm_library(LLVMSymbolize + Symbolize.cpp + + ADDITIONAL_HEADER_DIRS + ${LLVM_MAIN_INCLUDE_DIR}/llvm/DebugInfo/Symbolize + ) diff --git a/lib/DebugInfo/Symbolize/LLVMBuild.txt b/lib/DebugInfo/Symbolize/LLVMBuild.txt new file mode 100644 index 00000000000..f9ec6b32f6d --- /dev/null +++ b/lib/DebugInfo/Symbolize/LLVMBuild.txt @@ -0,0 +1,22 @@ +;===- ./lib/DebugInfo/Symbolize/LLVMBuild.txt ------------------*- Conf -*--===; +; +; The LLVM Compiler Infrastructure +; +; This file is distributed under the University of Illinois Open Source +; License. See LICENSE.TXT for details. +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +[component_0] +type = Library +name = Symbolize +parent = DebugInfo +required_libraries = DebugInfoDWARF DebugInfoPDB Object Support diff --git a/lib/DebugInfo/Symbolize/Makefile b/lib/DebugInfo/Symbolize/Makefile new file mode 100644 index 00000000000..17aac939658 --- /dev/null +++ b/lib/DebugInfo/Symbolize/Makefile @@ -0,0 +1,15 @@ +##===- lib/DebugInfo/Symbolize/Makefile --------------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## + +LEVEL = ../../.. +LIBRARYNAME = LLVMSymbolize +BUILD_ARCHIVE := 1 + +include $(LEVEL)/Makefile.common + diff --git a/lib/DebugInfo/Symbolize/Symbolize.cpp b/lib/DebugInfo/Symbolize/Symbolize.cpp new file mode 100644 index 00000000000..ffe3747db0c --- /dev/null +++ b/lib/DebugInfo/Symbolize/Symbolize.cpp @@ -0,0 +1,644 @@ +//===-- LLVMSymbolize.cpp -------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Implementation for LLVM symbolization library. +// +//===----------------------------------------------------------------------===// + +#include "llvm/DebugInfo/Symbolize/Symbolize.h" + +#include "llvm/ADT/STLExtras.h" +#include "llvm/Config/config.h" +#include "llvm/DebugInfo/DWARF/DWARFContext.h" +#include "llvm/DebugInfo/PDB/PDB.h" +#include "llvm/DebugInfo/PDB/PDBContext.h" +#include "llvm/Object/ELFObjectFile.h" +#include "llvm/Object/MachO.h" +#include "llvm/Object/SymbolSize.h" +#include "llvm/Support/COFF.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/Compression.h" +#include "llvm/Support/DataExtractor.h" +#include "llvm/Support/Errc.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/Path.h" +#include +#include + +#if defined(_MSC_VER) +#include +#include +#pragma comment(lib, "dbghelp.lib") + +// Windows.h conflicts with our COFF header definitions. +#ifdef IMAGE_FILE_MACHINE_I386 +#undef IMAGE_FILE_MACHINE_I386 +#endif +#endif + +namespace llvm { +namespace symbolize { + +// FIXME: Move this to llvm-symbolizer tool. +static bool error(std::error_code ec) { + if (!ec) + return false; + errs() << "LLVMSymbolizer: error reading file: " << ec.message() << ".\n"; + return true; +} + +static DILineInfoSpecifier +getDILineInfoSpecifier(const LLVMSymbolizer::Options &Opts) { + return DILineInfoSpecifier( + DILineInfoSpecifier::FileLineInfoKind::AbsoluteFilePath, + Opts.PrintFunctions); +} + +ModuleInfo::ModuleInfo(ObjectFile *Obj, DIContext *DICtx) + : Module(Obj), DebugInfoContext(DICtx) { + std::unique_ptr OpdExtractor; + uint64_t OpdAddress = 0; + // Find the .opd (function descriptor) section if any, for big-endian + // PowerPC64 ELF. + if (Module->getArch() == Triple::ppc64) { + for (section_iterator Section : Module->sections()) { + StringRef Name; + if (!error(Section->getName(Name)) && Name == ".opd") { + StringRef Data; + if (!error(Section->getContents(Data))) { + OpdExtractor.reset(new DataExtractor(Data, Module->isLittleEndian(), + Module->getBytesInAddress())); + OpdAddress = Section->getAddress(); + } + break; + } + } + } + std::vector> Symbols = + computeSymbolSizes(*Module); + for (auto &P : Symbols) + addSymbol(P.first, P.second, OpdExtractor.get(), OpdAddress); + + // If this is a COFF object and we didn't find any symbols, try the export + // table. + if (Symbols.empty()) { + if (auto *CoffObj = dyn_cast(Obj)) + addCoffExportSymbols(CoffObj); + } +} + +namespace { +struct OffsetNamePair { + uint32_t Offset; + StringRef Name; + bool operator<(const OffsetNamePair &R) const { + return Offset < R.Offset; + } +}; +} + +void ModuleInfo::addCoffExportSymbols(const COFFObjectFile *CoffObj) { + // Get all export names and offsets. + std::vector ExportSyms; + for (const ExportDirectoryEntryRef &Ref : CoffObj->export_directories()) { + StringRef Name; + uint32_t Offset; + if (error(Ref.getSymbolName(Name)) || error(Ref.getExportRVA(Offset))) + return; + ExportSyms.push_back(OffsetNamePair{Offset, Name}); + } + if (ExportSyms.empty()) + return; + + // Sort by ascending offset. + array_pod_sort(ExportSyms.begin(), ExportSyms.end()); + + // Approximate the symbol sizes by assuming they run to the next symbol. + // FIXME: This assumes all exports are functions. + uint64_t ImageBase = CoffObj->getImageBase(); + for (auto I = ExportSyms.begin(), E = ExportSyms.end(); I != E; ++I) { + OffsetNamePair &Export = *I; + // FIXME: The last export has a one byte size now. + uint32_t NextOffset = I != E ? I->Offset : Export.Offset + 1; + uint64_t SymbolStart = ImageBase + Export.Offset; + uint64_t SymbolSize = NextOffset - Export.Offset; + SymbolDesc SD = {SymbolStart, SymbolSize}; + Functions.insert(std::make_pair(SD, Export.Name)); + } +} + +void ModuleInfo::addSymbol(const SymbolRef &Symbol, uint64_t SymbolSize, + DataExtractor *OpdExtractor, uint64_t OpdAddress) { + SymbolRef::Type SymbolType = Symbol.getType(); + if (SymbolType != SymbolRef::ST_Function && SymbolType != SymbolRef::ST_Data) + return; + ErrorOr SymbolAddressOrErr = Symbol.getAddress(); + if (error(SymbolAddressOrErr.getError())) + return; + uint64_t SymbolAddress = *SymbolAddressOrErr; + if (OpdExtractor) { + // For big-endian PowerPC64 ELF, symbols in the .opd section refer to + // function descriptors. The first word of the descriptor is a pointer to + // the function's code. + // For the purposes of symbolization, pretend the symbol's address is that + // of the function's code, not the descriptor. + uint64_t OpdOffset = SymbolAddress - OpdAddress; + uint32_t OpdOffset32 = OpdOffset; + if (OpdOffset == OpdOffset32 && + OpdExtractor->isValidOffsetForAddress(OpdOffset32)) + SymbolAddress = OpdExtractor->getAddress(&OpdOffset32); + } + ErrorOr SymbolNameOrErr = Symbol.getName(); + if (error(SymbolNameOrErr.getError())) + return; + StringRef SymbolName = *SymbolNameOrErr; + // Mach-O symbol table names have leading underscore, skip it. + if (Module->isMachO() && SymbolName.size() > 0 && SymbolName[0] == '_') + SymbolName = SymbolName.drop_front(); + // FIXME: If a function has alias, there are two entries in symbol table + // with same address size. Make sure we choose the correct one. + auto &M = SymbolType == SymbolRef::ST_Function ? Functions : Objects; + SymbolDesc SD = { SymbolAddress, SymbolSize }; + M.insert(std::make_pair(SD, SymbolName)); +} + +// Return true if this is a 32-bit x86 PE COFF module. +bool ModuleInfo::isWin32Module() const { + auto *CoffObject = dyn_cast(Module); + return CoffObject && CoffObject->getMachine() == COFF::IMAGE_FILE_MACHINE_I386; +} + +uint64_t ModuleInfo::getModulePreferredBase() const { + if (auto *CoffObject = dyn_cast(Module)) + return CoffObject->getImageBase(); + return 0; +} + +bool ModuleInfo::getNameFromSymbolTable(SymbolRef::Type Type, uint64_t Address, + std::string &Name, uint64_t &Addr, + uint64_t &Size) const { + const auto &SymbolMap = Type == SymbolRef::ST_Function ? Functions : Objects; + if (SymbolMap.empty()) + return false; + SymbolDesc SD = { Address, Address }; + auto SymbolIterator = SymbolMap.upper_bound(SD); + if (SymbolIterator == SymbolMap.begin()) + return false; + --SymbolIterator; + if (SymbolIterator->first.Size != 0 && + SymbolIterator->first.Addr + SymbolIterator->first.Size <= Address) + return false; + Name = SymbolIterator->second.str(); + Addr = SymbolIterator->first.Addr; + Size = SymbolIterator->first.Size; + return true; +} + +DILineInfo ModuleInfo::symbolizeCode( + uint64_t ModuleOffset, const LLVMSymbolizer::Options &Opts) const { + DILineInfo LineInfo; + if (DebugInfoContext) { + LineInfo = DebugInfoContext->getLineInfoForAddress( + ModuleOffset, getDILineInfoSpecifier(Opts)); + } + // Override function name from symbol table if necessary. + if (Opts.PrintFunctions != FunctionNameKind::None && Opts.UseSymbolTable) { + std::string FunctionName; + uint64_t Start, Size; + if (getNameFromSymbolTable(SymbolRef::ST_Function, ModuleOffset, + FunctionName, Start, Size)) { + LineInfo.FunctionName = FunctionName; + } + } + return LineInfo; +} + +DIInliningInfo ModuleInfo::symbolizeInlinedCode( + uint64_t ModuleOffset, const LLVMSymbolizer::Options &Opts) const { + DIInliningInfo InlinedContext; + + if (DebugInfoContext) { + InlinedContext = DebugInfoContext->getInliningInfoForAddress( + ModuleOffset, getDILineInfoSpecifier(Opts)); + } + // Make sure there is at least one frame in context. + if (InlinedContext.getNumberOfFrames() == 0) { + InlinedContext.addFrame(DILineInfo()); + } + // Override the function name in lower frame with name from symbol table. + if (Opts.PrintFunctions != FunctionNameKind::None && Opts.UseSymbolTable) { + DIInliningInfo PatchedInlinedContext; + for (uint32_t i = 0, n = InlinedContext.getNumberOfFrames(); i < n; i++) { + DILineInfo LineInfo = InlinedContext.getFrame(i); + if (i == n - 1) { + std::string FunctionName; + uint64_t Start, Size; + if (getNameFromSymbolTable(SymbolRef::ST_Function, ModuleOffset, + FunctionName, Start, Size)) { + LineInfo.FunctionName = FunctionName; + } + } + PatchedInlinedContext.addFrame(LineInfo); + } + InlinedContext = PatchedInlinedContext; + } + return InlinedContext; +} + +bool ModuleInfo::symbolizeData(uint64_t ModuleOffset, std::string &Name, + uint64_t &Start, uint64_t &Size) const { + return getNameFromSymbolTable(SymbolRef::ST_Data, ModuleOffset, Name, Start, + Size); +} + +const char LLVMSymbolizer::kBadString[] = "??"; + +std::string LLVMSymbolizer::symbolizeCode(const std::string &ModuleName, + uint64_t ModuleOffset) { + ModuleInfo *Info = getOrCreateModuleInfo(ModuleName); + if (!Info) + return printDILineInfo(DILineInfo(), Info); + + // If the user is giving us relative addresses, add the preferred base of the + // object to the offset before we do the query. It's what DIContext expects. + if (Opts.RelativeAddresses) + ModuleOffset += Info->getModulePreferredBase(); + + if (Opts.PrintInlining) { + DIInliningInfo InlinedContext = + Info->symbolizeInlinedCode(ModuleOffset, Opts); + uint32_t FramesNum = InlinedContext.getNumberOfFrames(); + assert(FramesNum > 0); + std::string Result; + for (uint32_t i = 0; i < FramesNum; i++) { + DILineInfo LineInfo = InlinedContext.getFrame(i); + Result += printDILineInfo(LineInfo, Info); + } + return Result; + } + DILineInfo LineInfo = Info->symbolizeCode(ModuleOffset, Opts); + return printDILineInfo(LineInfo, Info); +} + +std::string LLVMSymbolizer::symbolizeData(const std::string &ModuleName, + uint64_t ModuleOffset) { + std::string Name = kBadString; + uint64_t Start = 0; + uint64_t Size = 0; + if (Opts.UseSymbolTable) { + if (ModuleInfo *Info = getOrCreateModuleInfo(ModuleName)) { + // If the user is giving us relative addresses, add the preferred base of the + // object to the offset before we do the query. It's what DIContext expects. + if (Opts.RelativeAddresses) + ModuleOffset += Info->getModulePreferredBase(); + if (Info->symbolizeData(ModuleOffset, Name, Start, Size) && Opts.Demangle) + Name = DemangleName(Name, Info); + } + } + std::stringstream ss; + ss << Name << "\n" << Start << " " << Size << "\n"; + return ss.str(); +} + +void LLVMSymbolizer::flush() { + DeleteContainerSeconds(Modules); + ObjectPairForPathArch.clear(); + ObjectFileForArch.clear(); +} + +// For Path="/path/to/foo" and Basename="foo" assume that debug info is in +// /path/to/foo.dSYM/Contents/Resources/DWARF/foo. +// For Path="/path/to/bar.dSYM" and Basename="foo" assume that debug info is in +// /path/to/bar.dSYM/Contents/Resources/DWARF/foo. +static +std::string getDarwinDWARFResourceForPath( + const std::string &Path, const std::string &Basename) { + SmallString<16> ResourceName = StringRef(Path); + if (sys::path::extension(Path) != ".dSYM") { + ResourceName += ".dSYM"; + } + sys::path::append(ResourceName, "Contents", "Resources", "DWARF"); + sys::path::append(ResourceName, Basename); + return ResourceName.str(); +} + +static bool checkFileCRC(StringRef Path, uint32_t CRCHash) { + ErrorOr> MB = + MemoryBuffer::getFileOrSTDIN(Path); + if (!MB) + return false; + return !zlib::isAvailable() || CRCHash == zlib::crc32(MB.get()->getBuffer()); +} + +static bool findDebugBinary(const std::string &OrigPath, + const std::string &DebuglinkName, uint32_t CRCHash, + std::string &Result) { + std::string OrigRealPath = OrigPath; +#if defined(HAVE_REALPATH) + if (char *RP = realpath(OrigPath.c_str(), nullptr)) { + OrigRealPath = RP; + free(RP); + } +#endif + SmallString<16> OrigDir(OrigRealPath); + llvm::sys::path::remove_filename(OrigDir); + SmallString<16> DebugPath = OrigDir; + // Try /path/to/original_binary/debuglink_name + llvm::sys::path::append(DebugPath, DebuglinkName); + if (checkFileCRC(DebugPath, CRCHash)) { + Result = DebugPath.str(); + return true; + } + // Try /path/to/original_binary/.debug/debuglink_name + DebugPath = OrigRealPath; + llvm::sys::path::append(DebugPath, ".debug", DebuglinkName); + if (checkFileCRC(DebugPath, CRCHash)) { + Result = DebugPath.str(); + return true; + } + // Try /usr/lib/debug/path/to/original_binary/debuglink_name + DebugPath = "/usr/lib/debug"; + llvm::sys::path::append(DebugPath, llvm::sys::path::relative_path(OrigDir), + DebuglinkName); + if (checkFileCRC(DebugPath, CRCHash)) { + Result = DebugPath.str(); + return true; + } + return false; +} + +static bool getGNUDebuglinkContents(const ObjectFile *Obj, std::string &DebugName, + uint32_t &CRCHash) { + if (!Obj) + return false; + for (const SectionRef &Section : Obj->sections()) { + StringRef Name; + Section.getName(Name); + Name = Name.substr(Name.find_first_not_of("._")); + if (Name == "gnu_debuglink") { + StringRef Data; + Section.getContents(Data); + DataExtractor DE(Data, Obj->isLittleEndian(), 0); + uint32_t Offset = 0; + if (const char *DebugNameStr = DE.getCStr(&Offset)) { + // 4-byte align the offset. + Offset = (Offset + 3) & ~0x3; + if (DE.isValidOffsetForDataOfSize(Offset, 4)) { + DebugName = DebugNameStr; + CRCHash = DE.getU32(&Offset); + return true; + } + } + break; + } + } + return false; +} + +static +bool darwinDsymMatchesBinary(const MachOObjectFile *DbgObj, + const MachOObjectFile *Obj) { + ArrayRef dbg_uuid = DbgObj->getUuid(); + ArrayRef bin_uuid = Obj->getUuid(); + if (dbg_uuid.empty() || bin_uuid.empty()) + return false; + return !memcmp(dbg_uuid.data(), bin_uuid.data(), dbg_uuid.size()); +} + +ObjectFile *LLVMSymbolizer::lookUpDsymFile(const std::string &ExePath, + const MachOObjectFile *MachExeObj, const std::string &ArchName) { + // On Darwin we may find DWARF in separate object file in + // resource directory. + std::vector DsymPaths; + StringRef Filename = sys::path::filename(ExePath); + DsymPaths.push_back(getDarwinDWARFResourceForPath(ExePath, Filename)); + for (const auto &Path : Opts.DsymHints) { + DsymPaths.push_back(getDarwinDWARFResourceForPath(Path, Filename)); + } + for (const auto &path : DsymPaths) { + ErrorOr> BinaryOrErr = createBinary(path); + std::error_code EC = BinaryOrErr.getError(); + if (EC != errc::no_such_file_or_directory && !error(EC)) { + OwningBinary B = std::move(BinaryOrErr.get()); + ObjectFile *DbgObj = + getObjectFileFromBinary(B.getBinary(), ArchName); + const MachOObjectFile *MachDbgObj = + dyn_cast(DbgObj); + if (!MachDbgObj) continue; + if (darwinDsymMatchesBinary(MachDbgObj, MachExeObj)) { + addOwningBinary(std::move(B)); + return DbgObj; + } + } + } + return nullptr; +} + +LLVMSymbolizer::ObjectPair +LLVMSymbolizer::getOrCreateObjects(const std::string &Path, + const std::string &ArchName) { + const auto &I = ObjectPairForPathArch.find(std::make_pair(Path, ArchName)); + if (I != ObjectPairForPathArch.end()) + return I->second; + ObjectFile *Obj = nullptr; + ObjectFile *DbgObj = nullptr; + ErrorOr> BinaryOrErr = createBinary(Path); + if (!error(BinaryOrErr.getError())) { + OwningBinary &B = BinaryOrErr.get(); + Obj = getObjectFileFromBinary(B.getBinary(), ArchName); + if (!Obj) { + ObjectPair Res = std::make_pair(nullptr, nullptr); + ObjectPairForPathArch[std::make_pair(Path, ArchName)] = Res; + return Res; + } + addOwningBinary(std::move(B)); + if (auto MachObj = dyn_cast(Obj)) + DbgObj = lookUpDsymFile(Path, MachObj, ArchName); + // Try to locate the debug binary using .gnu_debuglink section. + if (!DbgObj) { + std::string DebuglinkName; + uint32_t CRCHash; + std::string DebugBinaryPath; + if (getGNUDebuglinkContents(Obj, DebuglinkName, CRCHash) && + findDebugBinary(Path, DebuglinkName, CRCHash, DebugBinaryPath)) { + BinaryOrErr = createBinary(DebugBinaryPath); + if (!error(BinaryOrErr.getError())) { + OwningBinary B = std::move(BinaryOrErr.get()); + DbgObj = getObjectFileFromBinary(B.getBinary(), ArchName); + addOwningBinary(std::move(B)); + } + } + } + } + if (!DbgObj) + DbgObj = Obj; + ObjectPair Res = std::make_pair(Obj, DbgObj); + ObjectPairForPathArch[std::make_pair(Path, ArchName)] = Res; + return Res; +} + +ObjectFile * +LLVMSymbolizer::getObjectFileFromBinary(Binary *Bin, + const std::string &ArchName) { + if (!Bin) + return nullptr; + ObjectFile *Res = nullptr; + if (MachOUniversalBinary *UB = dyn_cast(Bin)) { + const auto &I = ObjectFileForArch.find( + std::make_pair(UB, ArchName)); + if (I != ObjectFileForArch.end()) + return I->second; + ErrorOr> ParsedObj = + UB->getObjectForArch(ArchName); + if (ParsedObj) { + Res = ParsedObj.get().get(); + ParsedBinariesAndObjects.push_back(std::move(ParsedObj.get())); + } + ObjectFileForArch[std::make_pair(UB, ArchName)] = Res; + } else if (Bin->isObject()) { + Res = cast(Bin); + } + return Res; +} + +ModuleInfo * +LLVMSymbolizer::getOrCreateModuleInfo(const std::string &ModuleName) { + const auto &I = Modules.find(ModuleName); + if (I != Modules.end()) + return I->second; + std::string BinaryName = ModuleName; + std::string ArchName = Opts.DefaultArch; + size_t ColonPos = ModuleName.find_last_of(':'); + // Verify that substring after colon form a valid arch name. + if (ColonPos != std::string::npos) { + std::string ArchStr = ModuleName.substr(ColonPos + 1); + if (Triple(ArchStr).getArch() != Triple::UnknownArch) { + BinaryName = ModuleName.substr(0, ColonPos); + ArchName = ArchStr; + } + } + ObjectPair Objects = getOrCreateObjects(BinaryName, ArchName); + + if (!Objects.first) { + // Failed to find valid object file. + Modules.insert(make_pair(ModuleName, (ModuleInfo *)nullptr)); + return nullptr; + } + DIContext *Context = nullptr; + if (auto CoffObject = dyn_cast(Objects.first)) { + // If this is a COFF object, assume it contains PDB debug information. If + // we don't find any we will fall back to the DWARF case. + std::unique_ptr Session; + PDB_ErrorCode Error = loadDataForEXE(PDB_ReaderType::DIA, + Objects.first->getFileName(), Session); + if (Error == PDB_ErrorCode::Success) { + Context = new PDBContext(*CoffObject, std::move(Session)); + } + } + if (!Context) + Context = new DWARFContextInMemory(*Objects.second); + assert(Context); + ModuleInfo *Info = new ModuleInfo(Objects.first, Context); + Modules.insert(make_pair(ModuleName, Info)); + return Info; +} + +std::string LLVMSymbolizer::printDILineInfo(DILineInfo LineInfo, + ModuleInfo *ModInfo) const { + // By default, DILineInfo contains "" for function/filename it + // cannot fetch. We replace it to "??" to make our output closer to addr2line. + static const std::string kDILineInfoBadString = ""; + std::stringstream Result; + if (Opts.PrintFunctions != FunctionNameKind::None) { + std::string FunctionName = LineInfo.FunctionName; + if (FunctionName == kDILineInfoBadString) + FunctionName = kBadString; + else if (Opts.Demangle) + FunctionName = DemangleName(FunctionName, ModInfo); + Result << FunctionName << "\n"; + } + std::string Filename = LineInfo.FileName; + if (Filename == kDILineInfoBadString) + Filename = kBadString; + Result << Filename << ":" << LineInfo.Line << ":" << LineInfo.Column << "\n"; + return Result.str(); +} + +// Undo these various manglings for Win32 extern "C" functions: +// cdecl - _foo +// stdcall - _foo@12 +// fastcall - @foo@12 +// vectorcall - foo@@12 +// These are all different linkage names for 'foo'. +static StringRef demanglePE32ExternCFunc(StringRef SymbolName) { + // Remove any '_' or '@' prefix. + char Front = SymbolName.empty() ? '\0' : SymbolName[0]; + if (Front == '_' || Front == '@') + SymbolName = SymbolName.drop_front(); + + // Remove any '@[0-9]+' suffix. + if (Front != '?') { + size_t AtPos = SymbolName.rfind('@'); + if (AtPos != StringRef::npos && + std::all_of(SymbolName.begin() + AtPos + 1, SymbolName.end(), + [](char C) { return C >= '0' && C <= '9'; })) { + SymbolName = SymbolName.substr(0, AtPos); + } + } + + // Remove any ending '@' for vectorcall. + if (SymbolName.endswith("@")) + SymbolName = SymbolName.drop_back(); + + return SymbolName; +} + +#if !defined(_MSC_VER) +// Assume that __cxa_demangle is provided by libcxxabi (except for Windows). +extern "C" char *__cxa_demangle(const char *mangled_name, char *output_buffer, + size_t *length, int *status); +#endif + +std::string LLVMSymbolizer::DemangleName(const std::string &Name, + ModuleInfo *ModInfo) { +#if !defined(_MSC_VER) + // We can spoil names of symbols with C linkage, so use an heuristic + // approach to check if the name should be demangled. + if (Name.substr(0, 2) == "_Z") { + int status = 0; + char *DemangledName = __cxa_demangle(Name.c_str(), nullptr, nullptr, &status); + if (status != 0) + return Name; + std::string Result = DemangledName; + free(DemangledName); + return Result; + } +#else + if (!Name.empty() && Name.front() == '?') { + // Only do MSVC C++ demangling on symbols starting with '?'. + char DemangledName[1024] = {0}; + DWORD result = ::UnDecorateSymbolName( + Name.c_str(), DemangledName, 1023, + UNDNAME_NO_ACCESS_SPECIFIERS | // Strip public, private, protected + UNDNAME_NO_ALLOCATION_LANGUAGE | // Strip __thiscall, __stdcall, etc + UNDNAME_NO_THROW_SIGNATURES | // Strip throw() specifications + UNDNAME_NO_MEMBER_TYPE | // Strip virtual, static, etc specifiers + UNDNAME_NO_MS_KEYWORDS | // Strip all MS extension keywords + UNDNAME_NO_FUNCTION_RETURNS); // Strip function return types + return (result == 0) ? Name : std::string(DemangledName); + } +#endif + if (ModInfo->isWin32Module()) + return std::string(demanglePE32ExternCFunc(Name)); + return Name; +} + +} // namespace symbolize +} // namespace llvm diff --git a/tools/llvm-symbolizer/CMakeLists.txt b/tools/llvm-symbolizer/CMakeLists.txt index 5df3b17a065..b04c45ff744 100644 --- a/tools/llvm-symbolizer/CMakeLists.txt +++ b/tools/llvm-symbolizer/CMakeLists.txt @@ -8,9 +8,9 @@ set(LLVM_LINK_COMPONENTS DebugInfoPDB Object Support + Symbolize ) add_llvm_tool(llvm-symbolizer - LLVMSymbolize.cpp llvm-symbolizer.cpp ) diff --git a/tools/llvm-symbolizer/LLVMSymbolize.cpp b/tools/llvm-symbolizer/LLVMSymbolize.cpp deleted file mode 100644 index 51bb965b8df..00000000000 --- a/tools/llvm-symbolizer/LLVMSymbolize.cpp +++ /dev/null @@ -1,642 +0,0 @@ -//===-- LLVMSymbolize.cpp -------------------------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// Implementation for LLVM symbolization library. -// -//===----------------------------------------------------------------------===// - -#include "LLVMSymbolize.h" -#include "llvm/ADT/STLExtras.h" -#include "llvm/Config/config.h" -#include "llvm/DebugInfo/DWARF/DWARFContext.h" -#include "llvm/DebugInfo/PDB/PDB.h" -#include "llvm/DebugInfo/PDB/PDBContext.h" -#include "llvm/Object/ELFObjectFile.h" -#include "llvm/Object/MachO.h" -#include "llvm/Object/SymbolSize.h" -#include "llvm/Support/COFF.h" -#include "llvm/Support/Casting.h" -#include "llvm/Support/Compression.h" -#include "llvm/Support/DataExtractor.h" -#include "llvm/Support/Errc.h" -#include "llvm/Support/FileSystem.h" -#include "llvm/Support/MemoryBuffer.h" -#include "llvm/Support/Path.h" -#include -#include - -#if defined(_MSC_VER) -#include -#include -#pragma comment(lib, "dbghelp.lib") - -// Windows.h conflicts with our COFF header definitions. -#ifdef IMAGE_FILE_MACHINE_I386 -#undef IMAGE_FILE_MACHINE_I386 -#endif -#endif - -namespace llvm { -namespace symbolize { - -static bool error(std::error_code ec) { - if (!ec) - return false; - errs() << "LLVMSymbolizer: error reading file: " << ec.message() << ".\n"; - return true; -} - -static DILineInfoSpecifier -getDILineInfoSpecifier(const LLVMSymbolizer::Options &Opts) { - return DILineInfoSpecifier( - DILineInfoSpecifier::FileLineInfoKind::AbsoluteFilePath, - Opts.PrintFunctions); -} - -ModuleInfo::ModuleInfo(ObjectFile *Obj, DIContext *DICtx) - : Module(Obj), DebugInfoContext(DICtx) { - std::unique_ptr OpdExtractor; - uint64_t OpdAddress = 0; - // Find the .opd (function descriptor) section if any, for big-endian - // PowerPC64 ELF. - if (Module->getArch() == Triple::ppc64) { - for (section_iterator Section : Module->sections()) { - StringRef Name; - if (!error(Section->getName(Name)) && Name == ".opd") { - StringRef Data; - if (!error(Section->getContents(Data))) { - OpdExtractor.reset(new DataExtractor(Data, Module->isLittleEndian(), - Module->getBytesInAddress())); - OpdAddress = Section->getAddress(); - } - break; - } - } - } - std::vector> Symbols = - computeSymbolSizes(*Module); - for (auto &P : Symbols) - addSymbol(P.first, P.second, OpdExtractor.get(), OpdAddress); - - // If this is a COFF object and we didn't find any symbols, try the export - // table. - if (Symbols.empty()) { - if (auto *CoffObj = dyn_cast(Obj)) - addCoffExportSymbols(CoffObj); - } -} - -namespace { -struct OffsetNamePair { - uint32_t Offset; - StringRef Name; - bool operator<(const OffsetNamePair &R) const { - return Offset < R.Offset; - } -}; -} - -void ModuleInfo::addCoffExportSymbols(const COFFObjectFile *CoffObj) { - // Get all export names and offsets. - std::vector ExportSyms; - for (const ExportDirectoryEntryRef &Ref : CoffObj->export_directories()) { - StringRef Name; - uint32_t Offset; - if (error(Ref.getSymbolName(Name)) || error(Ref.getExportRVA(Offset))) - return; - ExportSyms.push_back(OffsetNamePair{Offset, Name}); - } - if (ExportSyms.empty()) - return; - - // Sort by ascending offset. - array_pod_sort(ExportSyms.begin(), ExportSyms.end()); - - // Approximate the symbol sizes by assuming they run to the next symbol. - // FIXME: This assumes all exports are functions. - uint64_t ImageBase = CoffObj->getImageBase(); - for (auto I = ExportSyms.begin(), E = ExportSyms.end(); I != E; ++I) { - OffsetNamePair &Export = *I; - // FIXME: The last export has a one byte size now. - uint32_t NextOffset = I != E ? I->Offset : Export.Offset + 1; - uint64_t SymbolStart = ImageBase + Export.Offset; - uint64_t SymbolSize = NextOffset - Export.Offset; - SymbolDesc SD = {SymbolStart, SymbolSize}; - Functions.insert(std::make_pair(SD, Export.Name)); - } -} - -void ModuleInfo::addSymbol(const SymbolRef &Symbol, uint64_t SymbolSize, - DataExtractor *OpdExtractor, uint64_t OpdAddress) { - SymbolRef::Type SymbolType = Symbol.getType(); - if (SymbolType != SymbolRef::ST_Function && SymbolType != SymbolRef::ST_Data) - return; - ErrorOr SymbolAddressOrErr = Symbol.getAddress(); - if (error(SymbolAddressOrErr.getError())) - return; - uint64_t SymbolAddress = *SymbolAddressOrErr; - if (OpdExtractor) { - // For big-endian PowerPC64 ELF, symbols in the .opd section refer to - // function descriptors. The first word of the descriptor is a pointer to - // the function's code. - // For the purposes of symbolization, pretend the symbol's address is that - // of the function's code, not the descriptor. - uint64_t OpdOffset = SymbolAddress - OpdAddress; - uint32_t OpdOffset32 = OpdOffset; - if (OpdOffset == OpdOffset32 && - OpdExtractor->isValidOffsetForAddress(OpdOffset32)) - SymbolAddress = OpdExtractor->getAddress(&OpdOffset32); - } - ErrorOr SymbolNameOrErr = Symbol.getName(); - if (error(SymbolNameOrErr.getError())) - return; - StringRef SymbolName = *SymbolNameOrErr; - // Mach-O symbol table names have leading underscore, skip it. - if (Module->isMachO() && SymbolName.size() > 0 && SymbolName[0] == '_') - SymbolName = SymbolName.drop_front(); - // FIXME: If a function has alias, there are two entries in symbol table - // with same address size. Make sure we choose the correct one. - auto &M = SymbolType == SymbolRef::ST_Function ? Functions : Objects; - SymbolDesc SD = { SymbolAddress, SymbolSize }; - M.insert(std::make_pair(SD, SymbolName)); -} - -// Return true if this is a 32-bit x86 PE COFF module. -bool ModuleInfo::isWin32Module() const { - auto *CoffObject = dyn_cast(Module); - return CoffObject && CoffObject->getMachine() == COFF::IMAGE_FILE_MACHINE_I386; -} - -uint64_t ModuleInfo::getModulePreferredBase() const { - if (auto *CoffObject = dyn_cast(Module)) - return CoffObject->getImageBase(); - return 0; -} - -bool ModuleInfo::getNameFromSymbolTable(SymbolRef::Type Type, uint64_t Address, - std::string &Name, uint64_t &Addr, - uint64_t &Size) const { - const auto &SymbolMap = Type == SymbolRef::ST_Function ? Functions : Objects; - if (SymbolMap.empty()) - return false; - SymbolDesc SD = { Address, Address }; - auto SymbolIterator = SymbolMap.upper_bound(SD); - if (SymbolIterator == SymbolMap.begin()) - return false; - --SymbolIterator; - if (SymbolIterator->first.Size != 0 && - SymbolIterator->first.Addr + SymbolIterator->first.Size <= Address) - return false; - Name = SymbolIterator->second.str(); - Addr = SymbolIterator->first.Addr; - Size = SymbolIterator->first.Size; - return true; -} - -DILineInfo ModuleInfo::symbolizeCode( - uint64_t ModuleOffset, const LLVMSymbolizer::Options &Opts) const { - DILineInfo LineInfo; - if (DebugInfoContext) { - LineInfo = DebugInfoContext->getLineInfoForAddress( - ModuleOffset, getDILineInfoSpecifier(Opts)); - } - // Override function name from symbol table if necessary. - if (Opts.PrintFunctions != FunctionNameKind::None && Opts.UseSymbolTable) { - std::string FunctionName; - uint64_t Start, Size; - if (getNameFromSymbolTable(SymbolRef::ST_Function, ModuleOffset, - FunctionName, Start, Size)) { - LineInfo.FunctionName = FunctionName; - } - } - return LineInfo; -} - -DIInliningInfo ModuleInfo::symbolizeInlinedCode( - uint64_t ModuleOffset, const LLVMSymbolizer::Options &Opts) const { - DIInliningInfo InlinedContext; - - if (DebugInfoContext) { - InlinedContext = DebugInfoContext->getInliningInfoForAddress( - ModuleOffset, getDILineInfoSpecifier(Opts)); - } - // Make sure there is at least one frame in context. - if (InlinedContext.getNumberOfFrames() == 0) { - InlinedContext.addFrame(DILineInfo()); - } - // Override the function name in lower frame with name from symbol table. - if (Opts.PrintFunctions != FunctionNameKind::None && Opts.UseSymbolTable) { - DIInliningInfo PatchedInlinedContext; - for (uint32_t i = 0, n = InlinedContext.getNumberOfFrames(); i < n; i++) { - DILineInfo LineInfo = InlinedContext.getFrame(i); - if (i == n - 1) { - std::string FunctionName; - uint64_t Start, Size; - if (getNameFromSymbolTable(SymbolRef::ST_Function, ModuleOffset, - FunctionName, Start, Size)) { - LineInfo.FunctionName = FunctionName; - } - } - PatchedInlinedContext.addFrame(LineInfo); - } - InlinedContext = PatchedInlinedContext; - } - return InlinedContext; -} - -bool ModuleInfo::symbolizeData(uint64_t ModuleOffset, std::string &Name, - uint64_t &Start, uint64_t &Size) const { - return getNameFromSymbolTable(SymbolRef::ST_Data, ModuleOffset, Name, Start, - Size); -} - -const char LLVMSymbolizer::kBadString[] = "??"; - -std::string LLVMSymbolizer::symbolizeCode(const std::string &ModuleName, - uint64_t ModuleOffset) { - ModuleInfo *Info = getOrCreateModuleInfo(ModuleName); - if (!Info) - return printDILineInfo(DILineInfo(), Info); - - // If the user is giving us relative addresses, add the preferred base of the - // object to the offset before we do the query. It's what DIContext expects. - if (Opts.RelativeAddresses) - ModuleOffset += Info->getModulePreferredBase(); - - if (Opts.PrintInlining) { - DIInliningInfo InlinedContext = - Info->symbolizeInlinedCode(ModuleOffset, Opts); - uint32_t FramesNum = InlinedContext.getNumberOfFrames(); - assert(FramesNum > 0); - std::string Result; - for (uint32_t i = 0; i < FramesNum; i++) { - DILineInfo LineInfo = InlinedContext.getFrame(i); - Result += printDILineInfo(LineInfo, Info); - } - return Result; - } - DILineInfo LineInfo = Info->symbolizeCode(ModuleOffset, Opts); - return printDILineInfo(LineInfo, Info); -} - -std::string LLVMSymbolizer::symbolizeData(const std::string &ModuleName, - uint64_t ModuleOffset) { - std::string Name = kBadString; - uint64_t Start = 0; - uint64_t Size = 0; - if (Opts.UseSymbolTable) { - if (ModuleInfo *Info = getOrCreateModuleInfo(ModuleName)) { - // If the user is giving us relative addresses, add the preferred base of the - // object to the offset before we do the query. It's what DIContext expects. - if (Opts.RelativeAddresses) - ModuleOffset += Info->getModulePreferredBase(); - if (Info->symbolizeData(ModuleOffset, Name, Start, Size) && Opts.Demangle) - Name = DemangleName(Name, Info); - } - } - std::stringstream ss; - ss << Name << "\n" << Start << " " << Size << "\n"; - return ss.str(); -} - -void LLVMSymbolizer::flush() { - DeleteContainerSeconds(Modules); - ObjectPairForPathArch.clear(); - ObjectFileForArch.clear(); -} - -// For Path="/path/to/foo" and Basename="foo" assume that debug info is in -// /path/to/foo.dSYM/Contents/Resources/DWARF/foo. -// For Path="/path/to/bar.dSYM" and Basename="foo" assume that debug info is in -// /path/to/bar.dSYM/Contents/Resources/DWARF/foo. -static -std::string getDarwinDWARFResourceForPath( - const std::string &Path, const std::string &Basename) { - SmallString<16> ResourceName = StringRef(Path); - if (sys::path::extension(Path) != ".dSYM") { - ResourceName += ".dSYM"; - } - sys::path::append(ResourceName, "Contents", "Resources", "DWARF"); - sys::path::append(ResourceName, Basename); - return ResourceName.str(); -} - -static bool checkFileCRC(StringRef Path, uint32_t CRCHash) { - ErrorOr> MB = - MemoryBuffer::getFileOrSTDIN(Path); - if (!MB) - return false; - return !zlib::isAvailable() || CRCHash == zlib::crc32(MB.get()->getBuffer()); -} - -static bool findDebugBinary(const std::string &OrigPath, - const std::string &DebuglinkName, uint32_t CRCHash, - std::string &Result) { - std::string OrigRealPath = OrigPath; -#if defined(HAVE_REALPATH) - if (char *RP = realpath(OrigPath.c_str(), nullptr)) { - OrigRealPath = RP; - free(RP); - } -#endif - SmallString<16> OrigDir(OrigRealPath); - llvm::sys::path::remove_filename(OrigDir); - SmallString<16> DebugPath = OrigDir; - // Try /path/to/original_binary/debuglink_name - llvm::sys::path::append(DebugPath, DebuglinkName); - if (checkFileCRC(DebugPath, CRCHash)) { - Result = DebugPath.str(); - return true; - } - // Try /path/to/original_binary/.debug/debuglink_name - DebugPath = OrigRealPath; - llvm::sys::path::append(DebugPath, ".debug", DebuglinkName); - if (checkFileCRC(DebugPath, CRCHash)) { - Result = DebugPath.str(); - return true; - } - // Try /usr/lib/debug/path/to/original_binary/debuglink_name - DebugPath = "/usr/lib/debug"; - llvm::sys::path::append(DebugPath, llvm::sys::path::relative_path(OrigDir), - DebuglinkName); - if (checkFileCRC(DebugPath, CRCHash)) { - Result = DebugPath.str(); - return true; - } - return false; -} - -static bool getGNUDebuglinkContents(const ObjectFile *Obj, std::string &DebugName, - uint32_t &CRCHash) { - if (!Obj) - return false; - for (const SectionRef &Section : Obj->sections()) { - StringRef Name; - Section.getName(Name); - Name = Name.substr(Name.find_first_not_of("._")); - if (Name == "gnu_debuglink") { - StringRef Data; - Section.getContents(Data); - DataExtractor DE(Data, Obj->isLittleEndian(), 0); - uint32_t Offset = 0; - if (const char *DebugNameStr = DE.getCStr(&Offset)) { - // 4-byte align the offset. - Offset = (Offset + 3) & ~0x3; - if (DE.isValidOffsetForDataOfSize(Offset, 4)) { - DebugName = DebugNameStr; - CRCHash = DE.getU32(&Offset); - return true; - } - } - break; - } - } - return false; -} - -static -bool darwinDsymMatchesBinary(const MachOObjectFile *DbgObj, - const MachOObjectFile *Obj) { - ArrayRef dbg_uuid = DbgObj->getUuid(); - ArrayRef bin_uuid = Obj->getUuid(); - if (dbg_uuid.empty() || bin_uuid.empty()) - return false; - return !memcmp(dbg_uuid.data(), bin_uuid.data(), dbg_uuid.size()); -} - -ObjectFile *LLVMSymbolizer::lookUpDsymFile(const std::string &ExePath, - const MachOObjectFile *MachExeObj, const std::string &ArchName) { - // On Darwin we may find DWARF in separate object file in - // resource directory. - std::vector DsymPaths; - StringRef Filename = sys::path::filename(ExePath); - DsymPaths.push_back(getDarwinDWARFResourceForPath(ExePath, Filename)); - for (const auto &Path : Opts.DsymHints) { - DsymPaths.push_back(getDarwinDWARFResourceForPath(Path, Filename)); - } - for (const auto &path : DsymPaths) { - ErrorOr> BinaryOrErr = createBinary(path); - std::error_code EC = BinaryOrErr.getError(); - if (EC != errc::no_such_file_or_directory && !error(EC)) { - OwningBinary B = std::move(BinaryOrErr.get()); - ObjectFile *DbgObj = - getObjectFileFromBinary(B.getBinary(), ArchName); - const MachOObjectFile *MachDbgObj = - dyn_cast(DbgObj); - if (!MachDbgObj) continue; - if (darwinDsymMatchesBinary(MachDbgObj, MachExeObj)) { - addOwningBinary(std::move(B)); - return DbgObj; - } - } - } - return nullptr; -} - -LLVMSymbolizer::ObjectPair -LLVMSymbolizer::getOrCreateObjects(const std::string &Path, - const std::string &ArchName) { - const auto &I = ObjectPairForPathArch.find(std::make_pair(Path, ArchName)); - if (I != ObjectPairForPathArch.end()) - return I->second; - ObjectFile *Obj = nullptr; - ObjectFile *DbgObj = nullptr; - ErrorOr> BinaryOrErr = createBinary(Path); - if (!error(BinaryOrErr.getError())) { - OwningBinary &B = BinaryOrErr.get(); - Obj = getObjectFileFromBinary(B.getBinary(), ArchName); - if (!Obj) { - ObjectPair Res = std::make_pair(nullptr, nullptr); - ObjectPairForPathArch[std::make_pair(Path, ArchName)] = Res; - return Res; - } - addOwningBinary(std::move(B)); - if (auto MachObj = dyn_cast(Obj)) - DbgObj = lookUpDsymFile(Path, MachObj, ArchName); - // Try to locate the debug binary using .gnu_debuglink section. - if (!DbgObj) { - std::string DebuglinkName; - uint32_t CRCHash; - std::string DebugBinaryPath; - if (getGNUDebuglinkContents(Obj, DebuglinkName, CRCHash) && - findDebugBinary(Path, DebuglinkName, CRCHash, DebugBinaryPath)) { - BinaryOrErr = createBinary(DebugBinaryPath); - if (!error(BinaryOrErr.getError())) { - OwningBinary B = std::move(BinaryOrErr.get()); - DbgObj = getObjectFileFromBinary(B.getBinary(), ArchName); - addOwningBinary(std::move(B)); - } - } - } - } - if (!DbgObj) - DbgObj = Obj; - ObjectPair Res = std::make_pair(Obj, DbgObj); - ObjectPairForPathArch[std::make_pair(Path, ArchName)] = Res; - return Res; -} - -ObjectFile * -LLVMSymbolizer::getObjectFileFromBinary(Binary *Bin, - const std::string &ArchName) { - if (!Bin) - return nullptr; - ObjectFile *Res = nullptr; - if (MachOUniversalBinary *UB = dyn_cast(Bin)) { - const auto &I = ObjectFileForArch.find( - std::make_pair(UB, ArchName)); - if (I != ObjectFileForArch.end()) - return I->second; - ErrorOr> ParsedObj = - UB->getObjectForArch(ArchName); - if (ParsedObj) { - Res = ParsedObj.get().get(); - ParsedBinariesAndObjects.push_back(std::move(ParsedObj.get())); - } - ObjectFileForArch[std::make_pair(UB, ArchName)] = Res; - } else if (Bin->isObject()) { - Res = cast(Bin); - } - return Res; -} - -ModuleInfo * -LLVMSymbolizer::getOrCreateModuleInfo(const std::string &ModuleName) { - const auto &I = Modules.find(ModuleName); - if (I != Modules.end()) - return I->second; - std::string BinaryName = ModuleName; - std::string ArchName = Opts.DefaultArch; - size_t ColonPos = ModuleName.find_last_of(':'); - // Verify that substring after colon form a valid arch name. - if (ColonPos != std::string::npos) { - std::string ArchStr = ModuleName.substr(ColonPos + 1); - if (Triple(ArchStr).getArch() != Triple::UnknownArch) { - BinaryName = ModuleName.substr(0, ColonPos); - ArchName = ArchStr; - } - } - ObjectPair Objects = getOrCreateObjects(BinaryName, ArchName); - - if (!Objects.first) { - // Failed to find valid object file. - Modules.insert(make_pair(ModuleName, (ModuleInfo *)nullptr)); - return nullptr; - } - DIContext *Context = nullptr; - if (auto CoffObject = dyn_cast(Objects.first)) { - // If this is a COFF object, assume it contains PDB debug information. If - // we don't find any we will fall back to the DWARF case. - std::unique_ptr Session; - PDB_ErrorCode Error = loadDataForEXE(PDB_ReaderType::DIA, - Objects.first->getFileName(), Session); - if (Error == PDB_ErrorCode::Success) { - Context = new PDBContext(*CoffObject, std::move(Session)); - } - } - if (!Context) - Context = new DWARFContextInMemory(*Objects.second); - assert(Context); - ModuleInfo *Info = new ModuleInfo(Objects.first, Context); - Modules.insert(make_pair(ModuleName, Info)); - return Info; -} - -std::string LLVMSymbolizer::printDILineInfo(DILineInfo LineInfo, - ModuleInfo *ModInfo) const { - // By default, DILineInfo contains "" for function/filename it - // cannot fetch. We replace it to "??" to make our output closer to addr2line. - static const std::string kDILineInfoBadString = ""; - std::stringstream Result; - if (Opts.PrintFunctions != FunctionNameKind::None) { - std::string FunctionName = LineInfo.FunctionName; - if (FunctionName == kDILineInfoBadString) - FunctionName = kBadString; - else if (Opts.Demangle) - FunctionName = DemangleName(FunctionName, ModInfo); - Result << FunctionName << "\n"; - } - std::string Filename = LineInfo.FileName; - if (Filename == kDILineInfoBadString) - Filename = kBadString; - Result << Filename << ":" << LineInfo.Line << ":" << LineInfo.Column << "\n"; - return Result.str(); -} - -// Undo these various manglings for Win32 extern "C" functions: -// cdecl - _foo -// stdcall - _foo@12 -// fastcall - @foo@12 -// vectorcall - foo@@12 -// These are all different linkage names for 'foo'. -static StringRef demanglePE32ExternCFunc(StringRef SymbolName) { - // Remove any '_' or '@' prefix. - char Front = SymbolName.empty() ? '\0' : SymbolName[0]; - if (Front == '_' || Front == '@') - SymbolName = SymbolName.drop_front(); - - // Remove any '@[0-9]+' suffix. - if (Front != '?') { - size_t AtPos = SymbolName.rfind('@'); - if (AtPos != StringRef::npos && - std::all_of(SymbolName.begin() + AtPos + 1, SymbolName.end(), - [](char C) { return C >= '0' && C <= '9'; })) { - SymbolName = SymbolName.substr(0, AtPos); - } - } - - // Remove any ending '@' for vectorcall. - if (SymbolName.endswith("@")) - SymbolName = SymbolName.drop_back(); - - return SymbolName; -} - -#if !defined(_MSC_VER) -// Assume that __cxa_demangle is provided by libcxxabi (except for Windows). -extern "C" char *__cxa_demangle(const char *mangled_name, char *output_buffer, - size_t *length, int *status); -#endif - -std::string LLVMSymbolizer::DemangleName(const std::string &Name, - ModuleInfo *ModInfo) { -#if !defined(_MSC_VER) - // We can spoil names of symbols with C linkage, so use an heuristic - // approach to check if the name should be demangled. - if (Name.substr(0, 2) == "_Z") { - int status = 0; - char *DemangledName = __cxa_demangle(Name.c_str(), nullptr, nullptr, &status); - if (status != 0) - return Name; - std::string Result = DemangledName; - free(DemangledName); - return Result; - } -#else - if (!Name.empty() && Name.front() == '?') { - // Only do MSVC C++ demangling on symbols starting with '?'. - char DemangledName[1024] = {0}; - DWORD result = ::UnDecorateSymbolName( - Name.c_str(), DemangledName, 1023, - UNDNAME_NO_ACCESS_SPECIFIERS | // Strip public, private, protected - UNDNAME_NO_ALLOCATION_LANGUAGE | // Strip __thiscall, __stdcall, etc - UNDNAME_NO_THROW_SIGNATURES | // Strip throw() specifications - UNDNAME_NO_MEMBER_TYPE | // Strip virtual, static, etc specifiers - UNDNAME_NO_MS_KEYWORDS | // Strip all MS extension keywords - UNDNAME_NO_FUNCTION_RETURNS); // Strip function return types - return (result == 0) ? Name : std::string(DemangledName); - } -#endif - if (ModInfo->isWin32Module()) - return std::string(demanglePE32ExternCFunc(Name)); - return Name; -} - -} // namespace symbolize -} // namespace llvm diff --git a/tools/llvm-symbolizer/LLVMSymbolize.h b/tools/llvm-symbolizer/LLVMSymbolize.h deleted file mode 100644 index 17df56e695a..00000000000 --- a/tools/llvm-symbolizer/LLVMSymbolize.h +++ /dev/null @@ -1,153 +0,0 @@ -//===-- LLVMSymbolize.h ----------------------------------------- C++ -----===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// Header for LLVM symbolization library. -// -//===----------------------------------------------------------------------===// -#ifndef LLVM_TOOLS_LLVM_SYMBOLIZER_LLVMSYMBOLIZE_H -#define LLVM_TOOLS_LLVM_SYMBOLIZER_LLVMSYMBOLIZE_H - -#include "llvm/ADT/SmallVector.h" -#include "llvm/DebugInfo/DIContext.h" -#include "llvm/Object/MachOUniversal.h" -#include "llvm/Object/ObjectFile.h" -#include "llvm/Support/DataExtractor.h" -#include "llvm/Support/MemoryBuffer.h" -#include -#include -#include - -namespace llvm { - -typedef DILineInfoSpecifier::FunctionNameKind FunctionNameKind; -using namespace object; - -namespace symbolize { - -class ModuleInfo; - -class LLVMSymbolizer { -public: - struct Options { - FunctionNameKind PrintFunctions; - bool UseSymbolTable : 1; - bool PrintInlining : 1; - bool Demangle : 1; - bool RelativeAddresses : 1; - std::string DefaultArch; - std::vector DsymHints; - Options(FunctionNameKind PrintFunctions = FunctionNameKind::LinkageName, - bool UseSymbolTable = true, bool PrintInlining = true, - bool Demangle = true, bool RelativeAddresses = false, - std::string DefaultArch = "") - : PrintFunctions(PrintFunctions), UseSymbolTable(UseSymbolTable), - PrintInlining(PrintInlining), Demangle(Demangle), - RelativeAddresses(RelativeAddresses), DefaultArch(DefaultArch) {} - }; - - LLVMSymbolizer(const Options &Opts = Options()) : Opts(Opts) {} - ~LLVMSymbolizer() { - flush(); - } - - // Returns the result of symbolization for module name/offset as - // a string (possibly containing newlines). - std::string - symbolizeCode(const std::string &ModuleName, uint64_t ModuleOffset); - std::string - symbolizeData(const std::string &ModuleName, uint64_t ModuleOffset); - void flush(); - static std::string DemangleName(const std::string &Name, ModuleInfo *ModInfo); - -private: - typedef std::pair ObjectPair; - - ModuleInfo *getOrCreateModuleInfo(const std::string &ModuleName); - ObjectFile *lookUpDsymFile(const std::string &Path, const MachOObjectFile *ExeObj, - const std::string &ArchName); - - /// \brief Returns pair of pointers to object and debug object. - ObjectPair getOrCreateObjects(const std::string &Path, - const std::string &ArchName); - /// \brief Returns a parsed object file for a given architecture in a - /// universal binary (or the binary itself if it is an object file). - ObjectFile *getObjectFileFromBinary(Binary *Bin, const std::string &ArchName); - - std::string printDILineInfo(DILineInfo LineInfo, ModuleInfo *ModInfo) const; - - // Owns all the parsed binaries and object files. - SmallVector, 4> ParsedBinariesAndObjects; - SmallVector, 4> MemoryBuffers; - void addOwningBinary(OwningBinary OwningBin) { - std::unique_ptr Bin; - std::unique_ptr MemBuf; - std::tie(Bin, MemBuf) = OwningBin.takeBinary(); - ParsedBinariesAndObjects.push_back(std::move(Bin)); - MemoryBuffers.push_back(std::move(MemBuf)); - } - - // Owns module info objects. - std::map Modules; - std::map, ObjectFile *> - ObjectFileForArch; - std::map, ObjectPair> - ObjectPairForPathArch; - - Options Opts; - static const char kBadString[]; -}; - -class ModuleInfo { -public: - ModuleInfo(ObjectFile *Obj, DIContext *DICtx); - - DILineInfo symbolizeCode(uint64_t ModuleOffset, - const LLVMSymbolizer::Options &Opts) const; - DIInliningInfo symbolizeInlinedCode( - uint64_t ModuleOffset, const LLVMSymbolizer::Options &Opts) const; - bool symbolizeData(uint64_t ModuleOffset, std::string &Name, uint64_t &Start, - uint64_t &Size) const; - - // Return true if this is a 32-bit x86 PE COFF module. - bool isWin32Module() const; - - // Returns the preferred base of the module, i.e. where the loader would place - // it in memory assuming there were no conflicts. - uint64_t getModulePreferredBase() const; - -private: - bool getNameFromSymbolTable(SymbolRef::Type Type, uint64_t Address, - std::string &Name, uint64_t &Addr, - uint64_t &Size) const; - // For big-endian PowerPC64 ELF, OpdAddress is the address of the .opd - // (function descriptor) section and OpdExtractor refers to its contents. - void addSymbol(const SymbolRef &Symbol, uint64_t SymbolSize, - DataExtractor *OpdExtractor = nullptr, - uint64_t OpdAddress = 0); - void addCoffExportSymbols(const COFFObjectFile *CoffObj); - ObjectFile *Module; - std::unique_ptr DebugInfoContext; - - struct SymbolDesc { - uint64_t Addr; - // If size is 0, assume that symbol occupies the whole memory range up to - // the following symbol. - uint64_t Size; - friend bool operator<(const SymbolDesc &s1, const SymbolDesc &s2) { - return s1.Addr < s2.Addr; - } - }; - std::map Functions; - std::map Objects; -}; - -} // namespace symbolize -} // namespace llvm - -#endif diff --git a/tools/llvm-symbolizer/Makefile b/tools/llvm-symbolizer/Makefile index de75befb1c9..8272d61280c 100644 --- a/tools/llvm-symbolizer/Makefile +++ b/tools/llvm-symbolizer/Makefile @@ -9,7 +9,7 @@ LEVEL := ../.. TOOLNAME := llvm-symbolizer -LINK_COMPONENTS := DebugInfoDWARF DebugInfoPDB Object +LINK_COMPONENTS := DebugInfoDWARF DebugInfoPDB Object Support Symbolize # This tool has no plugins, optimize startup time. TOOL_NO_EXPORTS := 1 diff --git a/tools/llvm-symbolizer/llvm-symbolizer.cpp b/tools/llvm-symbolizer/llvm-symbolizer.cpp index e773f04f02e..5784e6ba5d9 100644 --- a/tools/llvm-symbolizer/llvm-symbolizer.cpp +++ b/tools/llvm-symbolizer/llvm-symbolizer.cpp @@ -15,8 +15,8 @@ // //===----------------------------------------------------------------------===// -#include "LLVMSymbolize.h" #include "llvm/ADT/StringRef.h" +#include "llvm/DebugInfo/Symbolize/Symbolize.h" #include "llvm/Support/COM.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h"