From: Nick Kledzik Date: Sat, 30 Aug 2014 00:20:14 +0000 (+0000) Subject: Object/llvm-objdump: allow dumping of mach-o exports trie X-Git-Url: http://demsky.eecs.uci.edu/git/?a=commitdiff_plain;h=aa4d2acf37ee5c8acaacd320cd9ab3940f328e84;p=oota-llvm.git Object/llvm-objdump: allow dumping of mach-o exports trie MachOObjectFile in lib/Object currently has no support for parsing the rebase, binding, and export information from the LC_DYLD_INFO load command in final linked mach-o images. This patch adds support for parsing the exports trie data structure. It also adds an option to llvm-objdump to dump that export info. I did the exports parsing first because it is the hardest. The information is encoded in a trie structure, but the standard ObjectFile way to inspect content is through iterators. So I needed to make an iterator that would do a non-recursive walk through the trie and maintain the concatenation of edges needed for the current string prefix. I plan to add similar support in MachOObjectFile and llvm-objdump to parse/display the rebasing and binding info too. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@216808 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/include/llvm/Object/MachO.h b/include/llvm/Object/MachO.h index e0b152b128c..4ad1555e54a 100644 --- a/include/llvm/Object/MachO.h +++ b/include/llvm/Object/MachO.h @@ -49,6 +49,57 @@ public: }; typedef content_iterator dice_iterator; +/// ExportEntry encapsulates the current-state-of-the-walk used when doing a +/// non-recursive walk of the trie data structure. This allows you to iterate +/// across all exported symbols using: +/// for (const llvm::object::ExportEntry &AnExport : Obj->exports()) { +/// } +class ExportEntry { +public: + ExportEntry(ArrayRef Trie); + + StringRef name() const; + uint64_t flags() const; + uint64_t address() const; + uint64_t other() const; + StringRef otherName() const; + uint32_t nodeOffset() const; + + bool operator==(const ExportEntry &) const; + + void moveNext(); + +private: + friend class MachOObjectFile; + void moveToFirst(); + void moveToEnd(); + uint64_t readULEB128(const uint8_t *&p); + void pushDownUntilBottom(); + void pushNode(uint64_t Offset); + + // Represents a node in the mach-o exports trie. + struct NodeState { + NodeState(const uint8_t *Ptr); + const uint8_t *Start; + const uint8_t *Current; + uint64_t Flags; + uint64_t Address; + uint64_t Other; + const char *ImportName; + unsigned ChildCount; + unsigned NextChildIndex; + unsigned ParentStringLength; + bool IsExportNode; + }; + + ArrayRef Trie; + SmallString<256> CumulativeString; + SmallVector Stack; + bool Malformed; + bool Done; +}; +typedef content_iterator export_iterator; + class MachOObjectFile : public ObjectFile { public: struct LoadCommandInfo { @@ -119,7 +170,7 @@ public: bool &Result) const override; // MachO specific. - std::error_code getLibraryShortNameByIndex(unsigned Index, StringRef &Res); + std::error_code getLibraryShortNameByIndex(unsigned Index, StringRef &) const; // TODO: Would be useful to have an iterator based version // of the load command interface too. @@ -144,6 +195,12 @@ public: dice_iterator begin_dices() const; dice_iterator end_dices() const; + + /// For use iterating over all exported symbols. + iterator_range exports() const; + + /// For use examining a trie not in a MachOObjectFile. + static iterator_range exports(ArrayRef Trie); // In a MachO file, sections have a segment name. This is used in the .o // files. They have a single segment, but this field specifies which segment @@ -207,6 +264,11 @@ public: MachO::symtab_command getSymtabLoadCommand() const; MachO::dysymtab_command getDysymtabLoadCommand() const; MachO::linkedit_data_command getDataInCodeLoadCommand() const; + ArrayRef getDyldInfoRebaseOpcodes() const; + ArrayRef getDyldInfoBindOpcodes() const; + ArrayRef getDyldInfoWeakBindOpcodes() const; + ArrayRef getDyldInfoLazyBindOpcodes() const; + ArrayRef getDyldInfoExportsTrie() const; StringRef getStringTableData() const; bool is64Bit() const; @@ -237,10 +299,11 @@ private: typedef SmallVector LibraryList; LibraryList Libraries; typedef SmallVector LibraryShortName; - LibraryShortName LibrariesShortNames; + mutable LibraryShortName LibrariesShortNames; const char *SymtabLoadCmd; const char *DysymtabLoadCmd; const char *DataInCodeLoadCmd; + const char *DyldInfoLoadCmd; }; /// DiceRef diff --git a/include/llvm/Support/MachO.h b/include/llvm/Support/MachO.h index 029566b6788..c9070b2db96 100644 --- a/include/llvm/Support/MachO.h +++ b/include/llvm/Support/MachO.h @@ -322,13 +322,13 @@ namespace llvm { }; enum { + EXPORT_SYMBOL_FLAGS_KIND_MASK = 0x03u, EXPORT_SYMBOL_FLAGS_WEAK_DEFINITION = 0x04u, EXPORT_SYMBOL_FLAGS_REEXPORT = 0x08u, EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER = 0x10u }; enum ExportSymbolKind { - EXPORT_SYMBOL_FLAGS_KIND_MASK = 0x03u, EXPORT_SYMBOL_FLAGS_KIND_REGULAR = 0x00u, EXPORT_SYMBOL_FLAGS_KIND_THREAD_LOCAL = 0x01u, EXPORT_SYMBOL_FLAGS_KIND_ABSOLUTE = 0x02u diff --git a/lib/Object/MachOObjectFile.cpp b/lib/Object/MachOObjectFile.cpp index da968b58f21..2daa4a81ea8 100644 --- a/lib/Object/MachOObjectFile.cpp +++ b/lib/Object/MachOObjectFile.cpp @@ -19,6 +19,8 @@ #include "llvm/Support/DataExtractor.h" #include "llvm/Support/Format.h" #include "llvm/Support/Host.h" +#include "llvm/Support/LEB128.h" +#include "llvm/Support/MachO.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/raw_ostream.h" #include @@ -226,7 +228,7 @@ MachOObjectFile::MachOObjectFile(MemoryBufferRef Object, bool IsLittleEndian, bool Is64bits, std::error_code &EC) : ObjectFile(getMachOType(IsLittleEndian, Is64bits), Object), SymtabLoadCmd(nullptr), DysymtabLoadCmd(nullptr), - DataInCodeLoadCmd(nullptr) { + DataInCodeLoadCmd(nullptr), DyldInfoLoadCmd(nullptr) { uint32_t LoadCommandCount = this->getHeader().ncmds; MachO::LoadCommandType SegmentLoadType = is64Bit() ? MachO::LC_SEGMENT_64 : MachO::LC_SEGMENT; @@ -242,6 +244,10 @@ MachOObjectFile::MachOObjectFile(MemoryBufferRef Object, bool IsLittleEndian, } else if (Load.C.cmd == MachO::LC_DATA_IN_CODE) { assert(!DataInCodeLoadCmd && "Multiple data in code tables"); DataInCodeLoadCmd = Load.Ptr; + } else if (Load.C.cmd == MachO::LC_DYLD_INFO || + Load.C.cmd == MachO::LC_DYLD_INFO_ONLY) { + assert(!DyldInfoLoadCmd && "Multiple dyldinfo load commands"); + DyldInfoLoadCmd = Load.Ptr; } else if (Load.C.cmd == SegmentLoadType) { uint32_t NumSections = getSegmentLoadCommandNumSections(this, Load); for (unsigned J = 0; J < NumSections; ++J) { @@ -1159,7 +1165,7 @@ guess_qtx: // It is passed the index (0 - based) of the library as translated from // GET_LIBRARY_ORDINAL (1 - based). std::error_code MachOObjectFile::getLibraryShortNameByIndex(unsigned Index, - StringRef &Res) { + StringRef &Res) const { if (Index >= Libraries.size()) return object_error::parse_failed; @@ -1505,6 +1511,183 @@ dice_iterator MachOObjectFile::end_dices() const { return dice_iterator(DiceRef(DRI, this)); } +ExportEntry::ExportEntry(ArrayRef T) + : Trie(T), Malformed(false), Done(false) { } + +void ExportEntry::moveToFirst() { + pushNode(0); + pushDownUntilBottom(); +} + +void ExportEntry::moveToEnd() { + Stack.clear(); + Done = true; +} + +bool ExportEntry::operator==(const ExportEntry &Other) const { + // Common case, one at end, other iterating from begin. + if (Done || Other.Done) + return (Done == Other.Done); + // Not equal if different stack sizes. + if (Stack.size() != Other.Stack.size()) + return false; + // Not equal if different cumulative strings. + if (!CumulativeString.str().equals(CumulativeString.str())) + return false; + // Equal if all nodes in both stacks match. + for (unsigned i=0; i < Stack.size(); ++i) { + if (Stack[i].Start != Other.Stack[i].Start) + return false; + } + return true; +} + +uint64_t ExportEntry::readULEB128(const uint8_t *&p) { + unsigned count; + uint64_t result = decodeULEB128(p, &count); + p += count; + if (p > Trie.end()) { + p = Trie.end(); + Malformed = true; + } + return result; +} + +StringRef ExportEntry::name() const { + return CumulativeString.str(); +} + +uint64_t ExportEntry::flags() const { + return Stack.back().Flags; +} + +uint64_t ExportEntry::address() const { + return Stack.back().Address; +} + +uint64_t ExportEntry::other() const { + return Stack.back().Other; +} + +StringRef ExportEntry::otherName() const { + const char* ImportName = Stack.back().ImportName; + if (ImportName) + return StringRef(ImportName); + return StringRef(); +} + +uint32_t ExportEntry::nodeOffset() const { + return Stack.back().Start - Trie.begin(); +} + +ExportEntry::NodeState::NodeState(const uint8_t *Ptr) + : Start(Ptr), Current(Ptr), Flags(0), Address(0), Other(0), + ImportName(nullptr), ChildCount(0), NextChildIndex(0), + ParentStringLength(0), IsExportNode(false) { +} + +void ExportEntry::pushNode(uint64_t offset) { + const uint8_t *Ptr = Trie.begin() + offset; + NodeState State(Ptr); + uint64_t ExportInfoSize = readULEB128(State.Current); + State.IsExportNode = (ExportInfoSize != 0); + const uint8_t* Children = State.Current + ExportInfoSize; + if (State.IsExportNode) { + State.Flags = readULEB128(State.Current); + if (State.Flags & MachO::EXPORT_SYMBOL_FLAGS_REEXPORT) { + State.Address = 0; + State.Other = readULEB128(State.Current); // dylib ordinal + State.ImportName = reinterpret_cast(State.Current); + } else { + State.Address = readULEB128(State.Current); + if (State.Flags & MachO::EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER) + State.Other = readULEB128(State.Current); + } + } + State.ChildCount = *Children; + State.Current = Children + 1; + State.NextChildIndex = 0; + State.ParentStringLength = CumulativeString.size(); + Stack.push_back(State); +} + +void ExportEntry::pushDownUntilBottom() { + while (Stack.back().NextChildIndex < Stack.back().ChildCount) { + NodeState &Top = Stack.back(); + CumulativeString.resize(Top.ParentStringLength); + for (;*Top.Current != 0; Top.Current++) { + char c = *Top.Current; + CumulativeString.push_back(c); + } + Top.Current += 1; + uint64_t childNodeIndex = readULEB128(Top.Current); + Top.NextChildIndex += 1; + pushNode(childNodeIndex); + } + if (!Stack.back().IsExportNode) { + Malformed = true; + moveToEnd(); + } +} + +// We have a trie data structure and need a way to walk it that is compatible +// with the C++ iterator model. The solution is a non-recursive depth first +// traversal where the iterator contains a stack of parent nodes along with a +// string that is the accumulation of all edge strings along the parent chain +// to this point. +// +// There is one “export” node for each exported symbol. But because some +// symbols may be a prefix of another symbol (e.g. _dup and _dup2), an export +// node may have child nodes too. +// +// The algorithm for moveNext() is to keep moving down the leftmost unvisited +// child until hitting a node with no children (which is an export node or +// else the trie is malformed). On the way down, each node is pushed on the +// stack ivar. If there is no more ways down, it pops up one and tries to go +// down a sibling path until a childless node is reached. +void ExportEntry::moveNext() { + if (Stack.empty() || !Stack.back().IsExportNode) { + Malformed = true; + moveToEnd(); + return; + } + + Stack.pop_back(); + while (!Stack.empty()) { + NodeState &Top = Stack.back(); + if (Top.NextChildIndex < Top.ChildCount) { + pushDownUntilBottom(); + // Now at the next export node. + return; + } else { + if (Top.IsExportNode) { + // This node has no children but is itself an export node. + CumulativeString.resize(Top.ParentStringLength); + return; + } + Stack.pop_back(); + } + } + Done = true; +} + +iterator_range +MachOObjectFile::exports(ArrayRef Trie) { + ExportEntry Start(Trie); + Start.moveToFirst(); + + ExportEntry Finish(Trie); + Finish.moveToEnd(); + + return iterator_range(export_iterator(Start), + export_iterator(Finish)); +} + +iterator_range MachOObjectFile::exports() const { + return exports(getDyldInfoExportsTrie()); +} + + StringRef MachOObjectFile::getSectionFinalSegmentName(DataRefImpl Sec) const { ArrayRef Raw = getSectionRawFinalSegmentName(Sec); @@ -1748,6 +1931,62 @@ MachOObjectFile::getDataInCodeLoadCommand() const { return Cmd; } +ArrayRef MachOObjectFile::getDyldInfoRebaseOpcodes() const { + if (!DyldInfoLoadCmd) + return ArrayRef(); + + MachO::dyld_info_command DyldInfo + = getStruct(this, DyldInfoLoadCmd); + const uint8_t *Ptr = reinterpret_cast( + getPtr(this, DyldInfo.rebase_off)); + return ArrayRef(Ptr, DyldInfo.rebase_size); +} + +ArrayRef MachOObjectFile::getDyldInfoBindOpcodes() const { + if (!DyldInfoLoadCmd) + return ArrayRef(); + + MachO::dyld_info_command DyldInfo + = getStruct(this, DyldInfoLoadCmd); + const uint8_t *Ptr = reinterpret_cast( + getPtr(this, DyldInfo.bind_off)); + return ArrayRef(Ptr, DyldInfo.bind_size); +} + +ArrayRef MachOObjectFile::getDyldInfoWeakBindOpcodes() const { + if (!DyldInfoLoadCmd) + return ArrayRef(); + + MachO::dyld_info_command DyldInfo + = getStruct(this, DyldInfoLoadCmd); + const uint8_t *Ptr = reinterpret_cast( + getPtr(this, DyldInfo.weak_bind_off)); + return ArrayRef(Ptr, DyldInfo.weak_bind_size); +} + +ArrayRef MachOObjectFile::getDyldInfoLazyBindOpcodes() const { + if (!DyldInfoLoadCmd) + return ArrayRef(); + + MachO::dyld_info_command DyldInfo + = getStruct(this, DyldInfoLoadCmd); + const uint8_t *Ptr = reinterpret_cast( + getPtr(this, DyldInfo.lazy_bind_off)); + return ArrayRef(Ptr, DyldInfo.lazy_bind_size); +} + +ArrayRef MachOObjectFile::getDyldInfoExportsTrie() const { + if (!DyldInfoLoadCmd) + return ArrayRef(); + + MachO::dyld_info_command DyldInfo + = getStruct(this, DyldInfoLoadCmd); + const uint8_t *Ptr = reinterpret_cast( + getPtr(this, DyldInfo.export_off)); + return ArrayRef(Ptr, DyldInfo.export_size); +} + + StringRef MachOObjectFile::getStringTableData() const { MachO::symtab_command S = getSymtabLoadCommand(); return getData().substr(S.stroff, S.strsize); diff --git a/test/tools/llvm-objdump/Inputs/exports-trie.macho-x86_64 b/test/tools/llvm-objdump/Inputs/exports-trie.macho-x86_64 new file mode 100755 index 00000000000..5d7506078a7 Binary files /dev/null and b/test/tools/llvm-objdump/Inputs/exports-trie.macho-x86_64 differ diff --git a/test/tools/llvm-objdump/macho-exports-trie.test b/test/tools/llvm-objdump/macho-exports-trie.test new file mode 100644 index 00000000000..f66fe3710cb --- /dev/null +++ b/test/tools/llvm-objdump/macho-exports-trie.test @@ -0,0 +1,11 @@ +# RUN: llvm-objdump -macho -exports-trie -arch x86_64 \ +# RUN: %p/Inputs/exports-trie.macho-x86_64 2>/dev/null | FileCheck %s + + +# CHECK:[re-export] _malloc (from libSystem) +# CHECK:[re-export] _myfree (_free from libSystem) +# CHECK:0x00000F70 _myWeakweak_def] +# CHECK:0x00001018 _myTLVper-thread] +# CHECK:0x12345678 _myAbsabsolute] +# CHECK:0x00000F60 _foo + diff --git a/tools/llvm-objdump/MachODump.cpp b/tools/llvm-objdump/MachODump.cpp index 3b97d6f7f95..bfff4456b01 100644 --- a/tools/llvm-objdump/MachODump.cpp +++ b/tools/llvm-objdump/MachODump.cpp @@ -1783,3 +1783,63 @@ void llvm::printMachOFileHeader(const object::ObjectFile *Obj) { getAndPrintMachHeader(file, ncmds, filetype, cputype, true); PrintLoadCommands(file, ncmds, filetype, cputype, true); } + +//===----------------------------------------------------------------------===// +// export trie dumping +//===----------------------------------------------------------------------===// + +void llvm::printMachOExportsTrie(const object::MachOObjectFile *Obj) { + for (const llvm::object::ExportEntry &entry : Obj->exports()) { + uint64_t Flags = entry.flags(); + bool ReExport = (Flags & MachO::EXPORT_SYMBOL_FLAGS_REEXPORT); + bool WeakDef = (Flags & MachO::EXPORT_SYMBOL_FLAGS_WEAK_DEFINITION); + bool ThreadLocal = ((Flags & MachO::EXPORT_SYMBOL_FLAGS_KIND_MASK) == + MachO::EXPORT_SYMBOL_FLAGS_KIND_THREAD_LOCAL); + bool Abs = ((Flags & MachO::EXPORT_SYMBOL_FLAGS_KIND_MASK) == + MachO::EXPORT_SYMBOL_FLAGS_KIND_ABSOLUTE); + bool Resolver = (Flags & MachO::EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER); + if (ReExport) + outs() << "[re-export] "; + else + outs() + << format("0x%08llX ", entry.address()); // FIXME:add in base address + outs() << entry.name(); + if (WeakDef || ThreadLocal || Resolver || Abs) { + bool needComma = false; + printf(" ["); + if (WeakDef) { + outs() << "weak_def"; + needComma = true; + } + if (ThreadLocal) { + if (needComma) + outs() << ", "; + outs() << "per-thread"; + needComma = true; + } + if (Abs) { + if (needComma) + outs() << ", "; + outs() << "absolute"; + needComma = true; + } + if (Resolver) { + if (needComma) + outs() << ", "; + outs() << format("resolver=0x%08llX", entry.other()); + needComma = true; + } + outs() << "]"; + } + if (ReExport) { + StringRef DylibName = "unknown"; + int ordinal = entry.other() - 1; + Obj->getLibraryShortNameByIndex(ordinal, DylibName); + if (entry.otherName().empty()) + outs() << " (from " << DylibName << ")"; + else + outs() << " (" << entry.otherName() << " from " << DylibName << ")"; + } + outs() << "\n"; + } +} diff --git a/tools/llvm-objdump/llvm-objdump.cpp b/tools/llvm-objdump/llvm-objdump.cpp index d23bfec54ca..e8efc560dfc 100644 --- a/tools/llvm-objdump/llvm-objdump.cpp +++ b/tools/llvm-objdump/llvm-objdump.cpp @@ -84,6 +84,9 @@ SectionContents("s", cl::desc("Display the content of each section")); static cl::opt SymbolTable("t", cl::desc("Display the symbol table")); +static cl::opt +ExportsTrie("exports-trie", cl::desc("Display mach-o exported symbols")); + static cl::opt MachOOpt("macho", cl::desc("Use MachO specific object file parser")); static cl::alias @@ -829,6 +832,17 @@ static void PrintUnwindInfo(const ObjectFile *o) { } } +static void printExportsTrie(const ObjectFile *o) { + outs() << "Exports trie:\n"; + if (const MachOObjectFile *MachO = dyn_cast(o)) + printMachOExportsTrie(MachO); + else { + errs() << "This operation is only currently supported " + "for Mach-O executable files.\n"; + return; + } +} + static void printPrivateFileHeader(const ObjectFile *o) { if (o->isELF()) { printELFFileHeader(o); @@ -858,6 +872,8 @@ static void DumpObject(const ObjectFile *o) { PrintUnwindInfo(o); if (PrivateHeaders) printPrivateFileHeader(o); + if (ExportsTrie) + printExportsTrie(o); } /// @brief Dump each object file in \a a; @@ -939,7 +955,8 @@ int main(int argc, char **argv) { && !SectionContents && !SymbolTable && !UnwindInfo - && !PrivateHeaders) { + && !PrivateHeaders + && !ExportsTrie) { cl::PrintHelpMessage(); return 2; } diff --git a/tools/llvm-objdump/llvm-objdump.h b/tools/llvm-objdump/llvm-objdump.h index 9277c22b657..c259042c39d 100644 --- a/tools/llvm-objdump/llvm-objdump.h +++ b/tools/llvm-objdump/llvm-objdump.h @@ -35,7 +35,7 @@ void DumpBytes(StringRef bytes); void DisassembleInputMachO(StringRef Filename); void printCOFFUnwindInfo(const object::COFFObjectFile* o); void printMachOUnwindInfo(const object::MachOObjectFile* o); - +void printMachOExportsTrie(const object::MachOObjectFile* o); void printELFFileHeader(const object::ObjectFile *o); void printCOFFFileHeader(const object::ObjectFile *o); void printMachOFileHeader(const object::ObjectFile *o);