}
typedef DenseMap<uint64_t, StringRef> SymbolAddressMap;
+typedef std::pair<uint64_t, const char *> BindInfoEntry;
+typedef std::vector<BindInfoEntry> BindTable;
+typedef BindTable::iterator bind_table_iterator;
// The block of info used by the Symbolizer call backs.
struct DisassembleInfo {
MachOObjectFile *O;
SectionRef S;
SymbolAddressMap *AddrMap;
+ std::vector<SectionRef> *Sections;
+ const char *class_name;
+ const char *selector_name;
+ char *method;
+ BindTable *BindTable;
};
// SymbolizerGetOpInfo() is the operand information call back function.
// TODO:
// Second search the external relocation entries of a fully linked image
// (if any) for an entry that matches this segment offset.
- //uint64_t seg_offset = (Pc + Offset);
+ // uint64_t seg_offset = (Pc + Offset);
return 0;
} else if (Arch == Triple::arm) {
return 0;
uint32_t index = Sec.reserved1 + (ReferenceValue - Sec.addr) / stride;
if (index < Dysymtab.nindirectsyms) {
uint32_t indirect_symbol =
- info->O->getIndirectSymbolTableEntry(Dysymtab, index);
+ info->O->getIndirectSymbolTableEntry(Dysymtab, index);
if (indirect_symbol < Symtab.nsyms) {
symbol_iterator Sym = info->O->getSymbolByIndex(indirect_symbol);
SymbolRef Symbol = *Sym;
uint32_t index = Sec.reserved1 + (ReferenceValue - Sec.addr) / stride;
if (index < Dysymtab.nindirectsyms) {
uint32_t indirect_symbol =
- info->O->getIndirectSymbolTableEntry(Dysymtab, index);
+ info->O->getIndirectSymbolTableEntry(Dysymtab, index);
if (indirect_symbol < Symtab.nsyms) {
symbol_iterator Sym = info->O->getSymbolByIndex(indirect_symbol);
SymbolRef Symbol = *Sym;
return nullptr;
}
+// method_reference() is called passing it the ReferenceName that might be
+// a reference it to an Objective-C method call. If so then it allocates and
+// assembles a method call string with the values last seen and saved in
+// the DisassembleInfo's class_name and selector_name fields. This is saved
+// into the method field of the info and any previous string is free'ed.
+// Then the class_name field in the info is set to nullptr. The method call
+// string is set into ReferenceName and ReferenceType is set to
+// LLVMDisassembler_ReferenceType_Out_Objc_Message. If this not a method call
+// then both ReferenceType and ReferenceName are left unchanged.
+static void method_reference(struct DisassembleInfo *info,
+ uint64_t *ReferenceType,
+ const char **ReferenceName) {
+ if (*ReferenceName != nullptr) {
+ if (strcmp(*ReferenceName, "_objc_msgSend") == 0) {
+ if (info->selector_name != NULL) {
+ if (info->method != nullptr)
+ free(info->method);
+ if (info->class_name != nullptr) {
+ info->method = (char *)malloc(5 + strlen(info->class_name) +
+ strlen(info->selector_name));
+ if (info->method != nullptr) {
+ strcpy(info->method, "+[");
+ strcat(info->method, info->class_name);
+ strcat(info->method, " ");
+ strcat(info->method, info->selector_name);
+ strcat(info->method, "]");
+ *ReferenceName = info->method;
+ *ReferenceType = LLVMDisassembler_ReferenceType_Out_Objc_Message;
+ }
+ } else {
+ info->method = (char *)malloc(9 + strlen(info->selector_name));
+ if (info->method != nullptr) {
+ strcpy(info->method, "-[%rdi ");
+ strcat(info->method, info->selector_name);
+ strcat(info->method, "]");
+ *ReferenceName = info->method;
+ *ReferenceType = LLVMDisassembler_ReferenceType_Out_Objc_Message;
+ }
+ }
+ info->class_name = nullptr;
+ }
+ } else if (strcmp(*ReferenceName, "_objc_msgSendSuper2") == 0) {
+ if (info->selector_name != NULL) {
+ if (info->method != nullptr)
+ free(info->method);
+ info->method = (char *)malloc(17 + strlen(info->selector_name));
+ if (info->method != nullptr) {
+ strcpy(info->method, "-[[%rdi super] ");
+ strcat(info->method, info->selector_name);
+ strcat(info->method, "]");
+ *ReferenceName = info->method;
+ *ReferenceType = LLVMDisassembler_ReferenceType_Out_Objc_Message;
+ }
+ info->class_name = nullptr;
+ }
+ }
+ }
+}
+
+// GuessPointerPointer() is passed the address of what might be a pointer to
+// a reference to an Objective-C class, selector, message ref or cfstring.
+// If so the value of the pointer is returned and one of the booleans are set
+// to true. If not zero is returned and all the booleans are set to false.
+static uint64_t GuessPointerPointer(uint64_t ReferenceValue,
+ struct DisassembleInfo *info,
+ bool &classref, bool &selref, bool &msgref,
+ bool &cfstring) {
+ classref = false;
+ selref = false;
+ msgref = false;
+ cfstring = false;
+ uint32_t LoadCommandCount = info->O->getHeader().ncmds;
+ MachOObjectFile::LoadCommandInfo Load = info->O->getFirstLoadCommandInfo();
+ for (unsigned I = 0;; ++I) {
+ if (Load.C.cmd == MachO::LC_SEGMENT_64) {
+ MachO::segment_command_64 Seg = info->O->getSegment64LoadCommand(Load);
+ for (unsigned J = 0; J < Seg.nsects; ++J) {
+ MachO::section_64 Sec = info->O->getSection64(Load, J);
+ if ((strncmp(Sec.sectname, "__objc_selrefs", 16) == 0 ||
+ strncmp(Sec.sectname, "__objc_classrefs", 16) == 0 ||
+ strncmp(Sec.sectname, "__objc_superrefs", 16) == 0 ||
+ strncmp(Sec.sectname, "__objc_msgrefs", 16) == 0 ||
+ strncmp(Sec.sectname, "__cfstring", 16) == 0) &&
+ ReferenceValue >= Sec.addr &&
+ ReferenceValue < Sec.addr + Sec.size) {
+ uint64_t sect_offset = ReferenceValue - Sec.addr;
+ uint64_t object_offset = Sec.offset + sect_offset;
+ StringRef MachOContents = info->O->getData();
+ uint64_t object_size = MachOContents.size();
+ const char *object_addr = (const char *)MachOContents.data();
+ if (object_offset < object_size) {
+ uint64_t pointer_value;
+ memcpy(&pointer_value, object_addr + object_offset,
+ sizeof(uint64_t));
+ if (info->O->isLittleEndian() != sys::IsLittleEndianHost)
+ sys::swapByteOrder(pointer_value);
+ if (strncmp(Sec.sectname, "__objc_selrefs", 16) == 0)
+ selref = true;
+ else if (strncmp(Sec.sectname, "__objc_classrefs", 16) == 0 ||
+ strncmp(Sec.sectname, "__objc_superrefs", 16) == 0)
+ classref = true;
+ else if (strncmp(Sec.sectname, "__objc_msgrefs", 16) == 0 &&
+ ReferenceValue + 8 < Sec.addr + Sec.size) {
+ msgref = true;
+ memcpy(&pointer_value, object_addr + object_offset + 8,
+ sizeof(uint64_t));
+ if (info->O->isLittleEndian() != sys::IsLittleEndianHost)
+ sys::swapByteOrder(pointer_value);
+ } else if (strncmp(Sec.sectname, "__cfstring", 16) == 0)
+ cfstring = true;
+ return pointer_value;
+ } else {
+ return 0;
+ }
+ }
+ }
+ }
+ // TODO: Look for LC_SEGMENT for 32-bit Mach-O files.
+ if (I == LoadCommandCount - 1)
+ break;
+ else
+ Load = info->O->getNextLoadCommandInfo(Load);
+ }
+ return 0;
+}
+
+// get_pointer_64 returns a pointer to the bytes in the object file at the
+// Address from a section in the Mach-O file. And indirectly returns the
+// offset into the section, number of bytes left in the section past the offset
+// and which section is was being referenced. If the Address is not in a
+// section nullptr is returned.
+const char *get_pointer_64(uint64_t Address, uint32_t &offset, uint32_t &left,
+ SectionRef &S, DisassembleInfo *info) {
+ offset = 0;
+ left = 0;
+ S = SectionRef();
+ for (unsigned SectIdx = 0; SectIdx != info->Sections->size(); SectIdx++) {
+ uint64_t SectAddress = ((*(info->Sections))[SectIdx]).getAddress();
+ uint64_t SectSize = ((*(info->Sections))[SectIdx]).getSize();
+ if (Address >= SectAddress && Address < SectAddress + SectSize) {
+ S = (*(info->Sections))[SectIdx];
+ offset = Address - SectAddress;
+ left = SectSize - offset;
+ StringRef SectContents;
+ ((*(info->Sections))[SectIdx]).getContents(SectContents);
+ return SectContents.data() + offset;
+ }
+ }
+ return nullptr;
+}
+
+// get_symbol_64() returns the name of a symbol (or nullptr) and the address of
+// the symbol indirectly through n_value. Based on the relocation information
+// for the specified section offset in the specified section reference.
+const char *get_symbol_64(uint32_t sect_offset, SectionRef S,
+ DisassembleInfo *info, uint64_t &n_value) {
+ n_value = 0;
+ if (info->verbose == false)
+ return nullptr;
+
+ // See if there is an external relocation entry at the sect_offset.
+ bool reloc_found = false;
+ DataRefImpl Rel;
+ MachO::any_relocation_info RE;
+ bool isExtern = false;
+ SymbolRef Symbol;
+ for (const RelocationRef &Reloc : S.relocations()) {
+ uint64_t RelocOffset;
+ Reloc.getOffset(RelocOffset);
+ if (RelocOffset == sect_offset) {
+ Rel = Reloc.getRawDataRefImpl();
+ RE = info->O->getRelocation(Rel);
+ if (info->O->isRelocationScattered(RE))
+ continue;
+ isExtern = info->O->getPlainRelocationExternal(RE);
+ if (isExtern) {
+ symbol_iterator RelocSym = Reloc.getSymbol();
+ Symbol = *RelocSym;
+ }
+ reloc_found = true;
+ break;
+ }
+ }
+ // If there is an external relocation entry for a symbol in this section
+ // at this section_offset then use that symbol's value for the n_value
+ // and return its name.
+ const char *SymbolName = nullptr;
+ if (reloc_found && isExtern) {
+ Symbol.getAddress(n_value);
+ StringRef name;
+ Symbol.getName(name);
+ if (!name.empty()) {
+ SymbolName = name.data();
+ return SymbolName;
+ }
+ }
+
+ // TODO: For fully linked images, look through the external relocation
+ // entries off the dynamic symtab command. For these the r_offset is from the
+ // start of the first writeable segment in the Mach-O file. So the offset
+ // to this section from that segment is passed to this routine by the caller,
+ // as the database_offset. Which is the difference of the section's starting
+ // address and the first writable segment.
+ //
+ // NOTE: need add passing the database_offset to this routine.
+
+ // TODO: We did not find an external relocation entry so look up the
+ // ReferenceValue as an address of a symbol and if found return that symbol's
+ // name.
+ //
+ // NOTE: need add passing the ReferenceValue to this routine. Then that code
+ // would simply be this:
+ //
+ // if (ReferenceValue != 0xffffffffffffffffLLU &&
+ // ReferenceValue != 0xfffffffffffffffeLLU) {
+ // StringRef name = info->AddrMap->lookup(ReferenceValue);
+ // if (!name.empty())
+ // SymbolName = name.data();
+ // }
+
+ return SymbolName;
+}
+
+// These are structs in the Objective-C meta data and read to produce the
+// comments for disassembly. While these are part of the ABI they are no
+// public defintions. So the are here not in include/llvm/Support/MachO.h .
+
+// The cfstring object in a 64-bit Mach-O file.
+struct cfstring64_t {
+ uint64_t isa; // class64_t * (64-bit pointer)
+ uint64_t flags; // flag bits
+ uint64_t characters; // char * (64-bit pointer)
+ uint64_t length; // number of non-NULL characters in above
+};
+
+// The class object in a 64-bit Mach-O file.
+struct class64_t {
+ uint64_t isa; // class64_t * (64-bit pointer)
+ uint64_t superclass; // class64_t * (64-bit pointer)
+ uint64_t cache; // Cache (64-bit pointer)
+ uint64_t vtable; // IMP * (64-bit pointer)
+ uint64_t data; // class_ro64_t * (64-bit pointer)
+};
+
+struct class_ro64_t {
+ uint32_t flags;
+ uint32_t instanceStart;
+ uint32_t instanceSize;
+ uint32_t reserved;
+ uint64_t ivarLayout; // const uint8_t * (64-bit pointer)
+ uint64_t name; // const char * (64-bit pointer)
+ uint64_t baseMethods; // const method_list_t * (64-bit pointer)
+ uint64_t baseProtocols; // const protocol_list_t * (64-bit pointer)
+ uint64_t ivars; // const ivar_list_t * (64-bit pointer)
+ uint64_t weakIvarLayout; // const uint8_t * (64-bit pointer)
+ uint64_t baseProperties; // const struct objc_property_list (64-bit pointer)
+};
+
+inline void swapStruct(struct cfstring64_t &cfs) {
+ sys::swapByteOrder(cfs.isa);
+ sys::swapByteOrder(cfs.flags);
+ sys::swapByteOrder(cfs.characters);
+ sys::swapByteOrder(cfs.length);
+}
+
+inline void swapStruct(struct class64_t &c) {
+ sys::swapByteOrder(c.isa);
+ sys::swapByteOrder(c.superclass);
+ sys::swapByteOrder(c.cache);
+ sys::swapByteOrder(c.vtable);
+ sys::swapByteOrder(c.data);
+}
+
+inline void swapStruct(struct class_ro64_t &cro) {
+ sys::swapByteOrder(cro.flags);
+ sys::swapByteOrder(cro.instanceStart);
+ sys::swapByteOrder(cro.instanceSize);
+ sys::swapByteOrder(cro.reserved);
+ sys::swapByteOrder(cro.ivarLayout);
+ sys::swapByteOrder(cro.name);
+ sys::swapByteOrder(cro.baseMethods);
+ sys::swapByteOrder(cro.baseProtocols);
+ sys::swapByteOrder(cro.ivars);
+ sys::swapByteOrder(cro.weakIvarLayout);
+ sys::swapByteOrder(cro.baseProperties);
+}
+
+static const char *get_dyld_bind_info_symbolname(uint64_t ReferenceValue,
+ struct DisassembleInfo *info);
+
+// get_objc2_64bit_class_name() is used for disassembly and is passed a pointer
+// to an Objective-C class and returns the class name. It is also passed the
+// address of the pointer, so when the pointer is zero as it can be in an .o
+// file, that is used to look for an external relocation entry with a symbol
+// name.
+const char *get_objc2_64bit_class_name(uint64_t pointer_value,
+ uint64_t ReferenceValue,
+ struct DisassembleInfo *info) {
+ const char *r;
+ uint32_t offset, left;
+ SectionRef S;
+
+ // The pointer_value can be 0 in an object file and have a relocation
+ // entry for the class symbol at the ReferenceValue (the address of the
+ // pointer).
+ if (pointer_value == 0) {
+ r = get_pointer_64(ReferenceValue, offset, left, S, info);
+ if (r == nullptr || left < sizeof(uint64_t))
+ return nullptr;
+ uint64_t n_value;
+ const char *symbol_name = get_symbol_64(offset, S, info, n_value);
+ if (symbol_name == nullptr)
+ return nullptr;
+ const char *class_name = rindex(symbol_name, '$');
+ if (class_name != nullptr && class_name[1] == '_' && class_name[2] != '\0')
+ return class_name + 2;
+ else
+ return nullptr;
+ }
+
+ // The case were the pointer_value is non-zero and points to a class defined
+ // in this Mach-O file.
+ r = get_pointer_64(pointer_value, offset, left, S, info);
+ if (r == nullptr || left < sizeof(struct class64_t))
+ return nullptr;
+ struct class64_t c;
+ memcpy(&c, r, sizeof(struct class64_t));
+ if (info->O->isLittleEndian() != sys::IsLittleEndianHost)
+ swapStruct(c);
+ if (c.data == 0)
+ return nullptr;
+ r = get_pointer_64(c.data, offset, left, S, info);
+ if (r == nullptr || left < sizeof(struct class_ro64_t))
+ return nullptr;
+ struct class_ro64_t cro;
+ memcpy(&cro, r, sizeof(struct class_ro64_t));
+ if (info->O->isLittleEndian() != sys::IsLittleEndianHost)
+ swapStruct(cro);
+ if (cro.name == 0)
+ return nullptr;
+ const char *name = get_pointer_64(cro.name, offset, left, S, info);
+ return name;
+}
+
+// get_objc2_64bit_cfstring_name is used for disassembly and is passed a
+// pointer to a cfstring and returns its name or nullptr.
+const char *get_objc2_64bit_cfstring_name(uint64_t ReferenceValue,
+ struct DisassembleInfo *info) {
+ const char *r, *name;
+ uint32_t offset, left;
+ SectionRef S;
+ struct cfstring64_t cfs;
+ uint64_t cfs_characters;
+
+ r = get_pointer_64(ReferenceValue, offset, left, S, info);
+ if (r == nullptr || left < sizeof(struct cfstring64_t))
+ return nullptr;
+ memcpy(&cfs, r, sizeof(struct cfstring64_t));
+ if (info->O->isLittleEndian() != sys::IsLittleEndianHost)
+ swapStruct(cfs);
+ if (cfs.characters == 0) {
+ uint64_t n_value;
+ const char *symbol_name = get_symbol_64(
+ offset + offsetof(struct cfstring64_t, characters), S, info, n_value);
+ if (symbol_name == nullptr)
+ return nullptr;
+ cfs_characters = n_value;
+ } else
+ cfs_characters = cfs.characters;
+ name = get_pointer_64(cfs_characters, offset, left, S, info);
+
+ return name;
+}
+
+// get_objc2_64bit_selref() is used for disassembly and is passed a the address
+// of a pointer to an Objective-C selector reference when the pointer value is
+// zero as in a .o file and is likely to have a external relocation entry with
+// who's symbol's n_value is the real pointer to the selector name. If that is
+// the case the real pointer to the selector name is returned else 0 is
+// returned
+uint64_t get_objc2_64bit_selref(uint64_t ReferenceValue,
+ struct DisassembleInfo *info) {
+ uint32_t offset, left;
+ SectionRef S;
+
+ const char *r = get_pointer_64(ReferenceValue, offset, left, S, info);
+ if (r == nullptr || left < sizeof(uint64_t))
+ return 0;
+ uint64_t n_value;
+ const char *symbol_name = get_symbol_64(offset, S, info, n_value);
+ if (symbol_name == nullptr)
+ return 0;
+ return n_value;
+}
+
// GuessLiteralPointer returns a string which for the item in the Mach-O file
// for the address passed in as ReferenceValue for printing as a comment with
// the instruction and also returns the corresponding type of that item
// cstring is returned and ReferenceType is set to
// LLVMDisassembler_ReferenceType_Out_LitPool_CstrAddr .
//
-// TODO: other literals such as Objective-C CFStrings refs, Selector refs,
-// Message refs, Class refs and a Symbol address in a literal pool are yet
-// to be done here.
+// If ReferenceValue is an address of an Objective-C CFString, Selector ref or
+// Class ref that name is returned and the ReferenceType is set accordingly.
+//
+// Lastly, literals which are Symbol address in a literal pool are looked for
+// and if found the symbol name is returned and ReferenceType is set to
+// LLVMDisassembler_ReferenceType_Out_LitPool_SymAddr .
+//
+// If there is no item in the Mach-O file for the address passed in as
+// ReferenceValue nullptr is returned and ReferenceType is unchanged.
const char *GuessLiteralPointer(uint64_t ReferenceValue, uint64_t ReferencePC,
uint64_t *ReferenceType,
struct DisassembleInfo *info) {
- // TODO: This rouine's code is only for an x86_64 Mach-O file for now.
+ // TODO: This rouine's code and the routines it calls are only work with
+ // x86_64 Mach-O files for now.
unsigned int Arch = info->O->getArch();
if (Arch != Triple::x86_64)
return nullptr;
}
}
- // TODO: the code to look for other literals such as Objective-C CFStrings
- // refs, Selector refs, Message refs, Class refs will be added here.
+ // Look for literals such as Objective-C CFStrings refs, Selector refs,
+ // Message refs and Class refs.
+ bool classref, selref, msgref, cfstring;
+ uint64_t pointer_value = GuessPointerPointer(ReferenceValue, info, classref,
+ selref, msgref, cfstring);
+ if (classref == true && pointer_value == 0) {
+ // Note the ReferenceValue is a pointer into the __objc_classrefs section.
+ // And the pointer_value in that section is typically zero as it will be
+ // set by dyld as part of the "bind information".
+ const char *name = get_dyld_bind_info_symbolname(ReferenceValue, info);
+ if (name != nullptr) {
+ *ReferenceType = LLVMDisassembler_ReferenceType_Out_Objc_Class_Ref;
+ const char *class_name = rindex(name, '$');
+ if (class_name != nullptr && class_name[1] == '_' &&
+ class_name[2] != '\0') {
+ info->class_name = class_name + 2;
+ return name;
+ }
+ }
+ }
+
+ if (classref == true) {
+ *ReferenceType = LLVMDisassembler_ReferenceType_Out_Objc_Class_Ref;
+ const char *name =
+ get_objc2_64bit_class_name(pointer_value, ReferenceValue, info);
+ if (name != nullptr)
+ info->class_name = name;
+ else
+ name = "bad class ref";
+ return name;
+ }
+
+ if (cfstring == true) {
+ *ReferenceType = LLVMDisassembler_ReferenceType_Out_Objc_CFString_Ref;
+ const char *name = get_objc2_64bit_cfstring_name(ReferenceValue, info);
+ return name;
+ }
+
+ if (selref == true && pointer_value == 0)
+ pointer_value = get_objc2_64bit_selref(ReferenceValue, info);
+
+ if (pointer_value != 0)
+ ReferenceValue = pointer_value;
const char *name = GuessCstringPointer(ReferenceValue, info);
if (name) {
- // TODO: note when the code is added above for Selector refs and Message
- // refs we will need check for that here and set the ReferenceType
- // accordingly.
- *ReferenceType = LLVMDisassembler_ReferenceType_Out_LitPool_CstrAddr;
+ if (pointer_value != 0 && selref == true) {
+ *ReferenceType = LLVMDisassembler_ReferenceType_Out_Objc_Selector_Ref;
+ info->selector_name = name;
+ } else if (pointer_value != 0 && msgref == true) {
+ info->class_name = nullptr;
+ *ReferenceType = LLVMDisassembler_ReferenceType_Out_Objc_Message_Ref;
+ info->selector_name = name;
+ } else
+ *ReferenceType = LLVMDisassembler_ReferenceType_Out_LitPool_CstrAddr;
return name;
}
- // TODO: look for an indirect symbol with this ReferenceValue which is in
- // a literal pool.
+ // Lastly look for an indirect symbol with this ReferenceValue which is in
+ // a literal pool. If found return that symbol name.
+ name = GuessIndirectSymbol(ReferenceValue, info);
+ if (name) {
+ *ReferenceType = LLVMDisassembler_ReferenceType_Out_LitPool_SymAddr;
+ return name;
+ }
return nullptr;
}
// Out type and the ReferenceName will also be set which is added as a comment
// to the disassembled instruction.
//
-// If the symbol name is a C++ mangled name then the demangled name is
+// TODO: If the symbol name is a C++ mangled name then the demangled name is
// returned through ReferenceName and ReferenceType is set to
// LLVMDisassembler_ReferenceType_DeMangled_Name .
//
// ReferenceType will be LLVMDisassembler_ReferenceType_In_PCrel_Load then the
// SymbolValue is checked to be an address of literal pointer, symbol pointer,
// or an Objective-C meta data reference. If so the output ReferenceType is
-// set to correspond to that as well as ReferenceName.
+// set to correspond to that as well as setting the ReferenceName.
const char *SymbolizerSymbolLookUp(void *DisInfo, uint64_t ReferenceValue,
uint64_t *ReferenceType,
uint64_t ReferencePC,
}
const char *SymbolName = nullptr;
- StringRef name = info->AddrMap->lookup(ReferenceValue);
- if (!name.empty())
- SymbolName = name.data();
+ if (ReferenceValue != 0xffffffffffffffffLLU &&
+ ReferenceValue != 0xfffffffffffffffeLLU) {
+ StringRef name = info->AddrMap->lookup(ReferenceValue);
+ if (!name.empty())
+ SymbolName = name.data();
+ }
if (*ReferenceType == LLVMDisassembler_ReferenceType_In_Branch) {
*ReferenceName = GuessIndirectSymbol(ReferenceValue, info);
+ if (*ReferenceName) {
+ method_reference(info, ReferenceType, ReferenceName);
+ if (*ReferenceType != LLVMDisassembler_ReferenceType_Out_Objc_Message)
+ *ReferenceType = LLVMDisassembler_ReferenceType_Out_SymbolStub;
+ } else
+ // TODO: if SymbolName is not nullptr see if it is a C++ name
+ // and demangle it.
+ *ReferenceType = LLVMDisassembler_ReferenceType_InOut_None;
+ } else if (*ReferenceType == LLVMDisassembler_ReferenceType_In_PCrel_Load) {
+ *ReferenceName =
+ GuessLiteralPointer(ReferenceValue, ReferencePC, ReferenceType, info);
if (*ReferenceName)
- *ReferenceType = LLVMDisassembler_ReferenceType_Out_SymbolStub;
+ method_reference(info, ReferenceType, ReferenceName);
else
*ReferenceType = LLVMDisassembler_ReferenceType_InOut_None;
}
- else if (*ReferenceType == LLVMDisassembler_ReferenceType_In_PCrel_Load) {
- *ReferenceName = GuessLiteralPointer(ReferenceValue, ReferencePC,
- ReferenceType, info);
- if (*ReferenceName == nullptr)
- *ReferenceType = LLVMDisassembler_ReferenceType_InOut_None;
- // TODO: other types of references to be added.
- } else {
+ // TODO: if SymbolName is not nullptr see if it is a C++ name
+ // and demangle it.
+ else {
*ReferenceName = nullptr;
*ReferenceType = LLVMDisassembler_ReferenceType_InOut_None;
}
uint64_t Size;
uint64_t BasePC;
public:
- DisasmMemoryObject(const uint8_t *bytes, uint64_t size, uint64_t basePC) :
- Bytes(bytes), Size(size), BasePC(basePC) {}
+ DisasmMemoryObject(const uint8_t *bytes, uint64_t size, uint64_t basePC)
+ : Bytes(bytes), Size(size), BasePC(basePC) {}
uint64_t getBase() const override { return BasePC; }
uint64_t getExtent() const override { return Size; }
SymbolizerInfo.O = MachOOF;
SymbolizerInfo.S = Sections[SectIdx];
SymbolizerInfo.AddrMap = &AddrMap;
+ SymbolizerInfo.Sections = &Sections;
+ SymbolizerInfo.class_name = nullptr;
+ SymbolizerInfo.selector_name = nullptr;
+ SymbolizerInfo.method = nullptr;
+ SymbolizerInfo.BindTable = nullptr;
// Disassemble symbol by symbol.
for (unsigned SymIdx = 0; SymIdx != Symbols.size(); SymIdx++) {
uint64_t Size;
symbolTableWorked = true;
+ DisasmMemoryObject SectionMemoryObject((const uint8_t *)Bytes.data() +
+ Start,
+ End - Start, SectAddress + Start);
DataRefImpl Symb = Symbols[SymIdx].getRawDataRefImpl();
bool isThumb =
if (FullLeadingAddr) {
if (MachOOF->is64Bit())
outs() << format("%016" PRIx64, PC);
- else
+ else
outs() << format("%08" PRIx64, PC);
} else {
outs() << format("%8" PRIx64 ":", PC);
bool gotInst;
if (isThumb)
- gotInst = ThumbDisAsm->getInstruction(Inst, Size, MemoryObject, PC,
- DebugOut, Annotations);
+ gotInst = ThumbDisAsm->getInstruction(Inst, Size, SectionMemoryObject,
+ PC, DebugOut, Annotations);
else
- gotInst = DisAsm->getInstruction(Inst, Size, MemoryObject, PC,
+ gotInst = DisAsm->getInstruction(Inst, Size, SectionMemoryObject, PC,
DebugOut, Annotations);
if (gotInst) {
if (!NoShowRawInsn) {
}
outs() << "\n";
} else {
- errs() << "llvm-objdump: warning: invalid instruction encoding\n";
- if (Size == 0)
- Size = 1; // skip illegible bytes
+ unsigned int Arch = MachOOF->getArch();
+ if (Arch == Triple::x86_64 || Arch == Triple::x86){
+ outs() << format("\t.byte 0x%02x #bad opcode\n",
+ *(Bytes.data() + Index) & 0xff);
+ Size = 1; // skip exactly one illegible byte and move on.
+ } else {
+ errs() << "llvm-objdump: warning: invalid instruction encoding\n";
+ if (Size == 0)
+ Size = 1; // skip illegible bytes
+ }
}
}
}
MCInst Inst;
uint64_t PC = SectAddress + Index;
- if (DisAsm->getInstruction(Inst, InstSize, MemoryObject, PC,
- DebugOut, nulls())) {
+ if (DisAsm->getInstruction(Inst, InstSize, MemoryObject, PC, DebugOut,
+ nulls())) {
if (FullLeadingAddr) {
if (MachOOF->is64Bit())
outs() << format("%016" PRIx64, PC);
- else
+ else
outs() << format("%08" PRIx64, PC);
} else {
outs() << format("%8" PRIx64 ":", PC);
IP->printInst(&Inst, outs(), "");
outs() << "\n";
} else {
- errs() << "llvm-objdump: warning: invalid instruction encoding\n";
- if (InstSize == 0)
- InstSize = 1; // skip illegible bytes
+ unsigned int Arch = MachOOF->getArch();
+ if (Arch == Triple::x86_64 || Arch == Triple::x86){
+ outs() << format("\t.byte 0x%02x #bad opcode\n",
+ *(Bytes.data() + Index) & 0xff);
+ InstSize = 1; // skip exactly one illegible byte and move on.
+ } else {
+ errs() << "llvm-objdump: warning: invalid instruction encoding\n";
+ if (InstSize == 0)
+ InstSize = 1; // skip illegible bytes
+ }
}
}
}
+ if (SymbolizerInfo.method != nullptr)
+ free(SymbolizerInfo.method);
+ if (SymbolizerInfo.BindTable != nullptr)
+ delete SymbolizerInfo.BindTable;
}
}
}
}
-
+// get_dyld_bind_info_symbolname() is used for disassembly and passed an
+// address, ReferenceValue, in the Mach-O file and looks in the dyld bind
+// information for that address. If the address is found its binding symbol
+// name is returned. If not nullptr is returned.
+static const char *get_dyld_bind_info_symbolname(uint64_t ReferenceValue,
+ struct DisassembleInfo *info) {
+ if (info->BindTable == nullptr) {
+ info->BindTable = new (BindTable);
+ SegInfo sectionTable(info->O);
+ for (const llvm::object::MachOBindEntry &Entry : info->O->bindTable()) {
+ uint32_t SegIndex = Entry.segmentIndex();
+ uint64_t OffsetInSeg = Entry.segmentOffset();
+ uint64_t Address = sectionTable.address(SegIndex, OffsetInSeg);
+ const char *SymbolName = nullptr;
+ StringRef name = Entry.symbolName();
+ if (!name.empty())
+ SymbolName = name.data();
+ info->BindTable->push_back(std::make_pair(Address, SymbolName));
+ }
+ }
+ for (bind_table_iterator BI = info->BindTable->begin(),
+ BE = info->BindTable->end();
+ BI != BE; ++BI) {
+ uint64_t Address = BI->first;
+ if (ReferenceValue == Address) {
+ const char *SymbolName = BI->second;
+ return SymbolName;
+ }
+ }
+ return nullptr;
+}