#include "llvm/Support/GraphWriter.h"
#include "llvm/Support/MachO.h"
#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/FormattedStream.h"
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/TargetSelect.h"
#include "llvm/Support/raw_ostream.h"
static cl::opt<std::string>
DSYMFile("dsym", cl::desc("Use .dSYM file for debug info"));
+static cl::opt<bool>
+ FullLeadingAddr("full-leading-addr",
+ cl::desc("Print full leading address"));
+
+static cl::opt<bool>
+ PrintImmHex("print-imm-hex",
+ cl::desc("Use hex format for immediate values"));
+
static std::string ThumbTripleName;
static const Target *GetTarget(const MachOObjectFile *MachOObj,
DisassembleInputMachO2(Filename, MachOOF.get());
}
+typedef DenseMap<uint64_t, StringRef> SymbolAddressMap;
+
// The block of info used by the Symbolizer call backs.
struct DisassembleInfo {
bool verbose;
MachOObjectFile *O;
SectionRef S;
+ SymbolAddressMap *AddrMap;
};
// SymbolizerGetOpInfo() is the operand information call back function.
// is the offset from the external symbol.
if (info->O->getAnyRelocationPCRel(RE))
op_info->Value -= Pc + Offset + Size;
- // SymbolRef Symbol = (*info->Relocs)[Idx].second;
StringRef SymName;
Symbol.getName(SymName);
const char *name = SymName.data();
}
}
+// GuessCstringPointer is passed the address of what might be a pointer to a
+// literal string in a cstring section. If that address is in a cstring section
+// it returns a pointer to that string. Else it returns nullptr.
+const char *GuessCstringPointer(uint64_t ReferenceValue,
+ struct DisassembleInfo *info) {
+ uint32_t LoadCommandCount = info->O->getHeader().ncmds;
+ MachOObjectFile::LoadCommandInfo Load = info->O->getFirstLoadCommandInfo();
+ for (unsigned I = 0;; ++I) {
+ if (Load.C.cmd == MachO::LC_SEGMENT_64) {
+ MachO::segment_command_64 Seg = info->O->getSegment64LoadCommand(Load);
+ for (unsigned J = 0; J < Seg.nsects; ++J) {
+ MachO::section_64 Sec = info->O->getSection64(Load, J);
+ uint32_t section_type = Sec.flags & MachO::SECTION_TYPE;
+ if (section_type == MachO::S_CSTRING_LITERALS &&
+ ReferenceValue >= Sec.addr &&
+ ReferenceValue < Sec.addr + Sec.size) {
+ uint64_t sect_offset = ReferenceValue - Sec.addr;
+ uint64_t object_offset = Sec.offset + sect_offset;
+ StringRef MachOContents = info->O->getData();
+ uint64_t object_size = MachOContents.size();
+ const char *object_addr = (const char *)MachOContents.data();
+ if (object_offset < object_size) {
+ const char *name = object_addr + object_offset;
+ return name;
+ } else {
+ return nullptr;
+ }
+ }
+ }
+ } else if (Load.C.cmd == MachO::LC_SEGMENT) {
+ MachO::segment_command Seg = info->O->getSegmentLoadCommand(Load);
+ for (unsigned J = 0; J < Seg.nsects; ++J) {
+ MachO::section Sec = info->O->getSection(Load, J);
+ uint32_t section_type = Sec.flags & MachO::SECTION_TYPE;
+ if (section_type == MachO::S_CSTRING_LITERALS &&
+ ReferenceValue >= Sec.addr &&
+ ReferenceValue < Sec.addr + Sec.size) {
+ uint64_t sect_offset = ReferenceValue - Sec.addr;
+ uint64_t object_offset = Sec.offset + sect_offset;
+ StringRef MachOContents = info->O->getData();
+ uint64_t object_size = MachOContents.size();
+ const char *object_addr = (const char *)MachOContents.data();
+ if (object_offset < object_size) {
+ const char *name = object_addr + object_offset;
+ return name;
+ } else {
+ return nullptr;
+ }
+ }
+ }
+ }
+ if (I == LoadCommandCount - 1)
+ break;
+ else
+ Load = info->O->getNextLoadCommandInfo(Load);
+ }
+ return nullptr;
+}
+
+// GuessLiteralPointer returns a string which for the item in the Mach-O file
+// for the address passed in as ReferenceValue for printing as a comment with
+// the instruction and also returns the corresponding type of that item
+// indirectly through ReferenceType.
+//
+// If ReferenceValue is an address of literal cstring then a pointer to the
+// cstring is returned and ReferenceType is set to
+// LLVMDisassembler_ReferenceType_Out_LitPool_CstrAddr .
+//
+// TODO: other literals such as Objective-C CFStrings refs, Selector refs,
+// Message refs, Class refs and a Symbol address in a literal pool are yet
+// to be done here.
+const char *GuessLiteralPointer(uint64_t ReferenceValue, uint64_t ReferencePC,
+ uint64_t *ReferenceType,
+ struct DisassembleInfo *info) {
+ // TODO: This rouine's code is only for an x86_64 Mach-O file for now.
+ unsigned int Arch = info->O->getArch();
+ if (Arch != Triple::x86_64)
+ return nullptr;
+
+ // First see if there is an external relocation entry at the ReferencePC.
+ uint64_t sect_addr;
+ info->S.getAddress(sect_addr);
+ uint64_t sect_offset = ReferencePC - sect_addr;
+ bool reloc_found = false;
+ DataRefImpl Rel;
+ MachO::any_relocation_info RE;
+ bool isExtern = false;
+ SymbolRef Symbol;
+ for (const RelocationRef &Reloc : info->S.relocations()) {
+ uint64_t RelocOffset;
+ Reloc.getOffset(RelocOffset);
+ if (RelocOffset == sect_offset) {
+ Rel = Reloc.getRawDataRefImpl();
+ RE = info->O->getRelocation(Rel);
+ if (info->O->isRelocationScattered(RE))
+ continue;
+ isExtern = info->O->getPlainRelocationExternal(RE);
+ if (isExtern) {
+ symbol_iterator RelocSym = Reloc.getSymbol();
+ Symbol = *RelocSym;
+ }
+ reloc_found = true;
+ break;
+ }
+ }
+ // If there is an external relocation entry for a symbol in a section
+ // then used that symbol's value for the value of the reference.
+ if (reloc_found && isExtern) {
+ if (info->O->getAnyRelocationPCRel(RE)) {
+ unsigned Type = info->O->getAnyRelocationType(RE);
+ if (Type == MachO::X86_64_RELOC_SIGNED) {
+ Symbol.getAddress(ReferenceValue);
+ }
+ }
+ }
+
+ // TODO: the code to look for other literals such as Objective-C CFStrings
+ // refs, Selector refs, Message refs, Class refs will be added here.
+
+ const char *name = GuessCstringPointer(ReferenceValue, info);
+ if (name) {
+ // TODO: note when the code is added above for Selector refs and Message
+ // refs we will need check for that here and set the ReferenceType
+ // accordingly.
+ *ReferenceType = LLVMDisassembler_ReferenceType_Out_LitPool_CstrAddr;
+ return name;
+ }
+
+ // TODO: look for an indirect symbol with this ReferenceValue which is in
+ // a literal pool.
+
+ return nullptr;
+}
+
// SymbolizerSymbolLookUp is the symbol lookup function passed when creating
-// the Symbolizer. It looks up the SymbolValue using the info passed via the
+// the Symbolizer. It looks up the ReferenceValue using the info passed via the
// pointer to the struct DisassembleInfo that was passed when MCSymbolizer
// is created and returns the symbol name that matches the ReferenceValue or
// nullptr if none. The ReferenceType is passed in for the IN type of
// stub is returned indirectly through ReferenceName and then ReferenceType is
// set to LLVMDisassembler_ReferenceType_Out_SymbolStub.
//
-// When this is called with an value loaded via a PC relative load then
+// When this is called with an value loaded via a PC relative load then
// ReferenceType will be LLVMDisassembler_ReferenceType_In_PCrel_Load then the
// SymbolValue is checked to be an address of literal pointer, symbol pointer,
// or an Objective-C meta data reference. If so the output ReferenceType is
uint64_t ReferencePC,
const char **ReferenceName) {
struct DisassembleInfo *info = (struct DisassembleInfo *)DisInfo;
- *ReferenceName = nullptr;
- *ReferenceType = LLVMDisassembler_ReferenceType_InOut_None;
- unsigned int Arch = info->O->getArch();
- if (Arch == Triple::x86) {
- return nullptr;
- } else if (Arch == Triple::x86_64) {
- return nullptr;
- } else if (Arch == Triple::arm) {
- return nullptr;
- } else if (Arch == Triple::aarch64) {
+ // If no verbose symbolic information is wanted then just return nullptr.
+ if (info->verbose == false) {
+ *ReferenceName = nullptr;
+ *ReferenceType = LLVMDisassembler_ReferenceType_InOut_None;
return nullptr;
+ }
+
+ const char *SymbolName = nullptr;
+ StringRef name = info->AddrMap->lookup(ReferenceValue);
+ if (!name.empty())
+ SymbolName = name.data();
+
+ if (*ReferenceType == LLVMDisassembler_ReferenceType_In_PCrel_Load) {
+ *ReferenceName = GuessLiteralPointer(ReferenceValue, ReferencePC,
+ ReferenceType, info);
+ if (*ReferenceName == nullptr)
+ *ReferenceType = LLVMDisassembler_ReferenceType_InOut_None;
+ // TODO: other types of references to be added.
} else {
- return nullptr;
+ *ReferenceName = nullptr;
+ *ReferenceType = LLVMDisassembler_ReferenceType_InOut_None;
}
+
+ return SymbolName;
+}
+
+//
+// This is the memory object used by DisAsm->getInstruction() which has its
+// BasePC. This then allows the 'address' parameter to getInstruction() to
+// be the actual PC of the instruction. Then when a branch dispacement is
+// added to the PC of an instruction, the 'ReferenceValue' passed to the
+// SymbolizerSymbolLookUp() routine is the correct target addresses. As in
+// the case of a fully linked Mach-O file where a section being disassembled
+// generally not linked at address zero.
+//
+class DisasmMemoryObject : public MemoryObject {
+ uint8_t *Bytes;
+ uint64_t Size;
+ uint64_t BasePC;
+public:
+ DisasmMemoryObject(uint8_t *bytes, uint64_t size, uint64_t basePC) :
+ Bytes(bytes), Size(size), BasePC(basePC) {}
+
+ uint64_t getBase() const override { return BasePC; }
+ uint64_t getExtent() const override { return Size; }
+
+ int readByte(uint64_t Addr, uint8_t *Byte) const override {
+ if (Addr - BasePC >= Size)
+ return -1;
+ *Byte = Bytes[Addr - BasePC];
+ return 0;
+ }
+};
+
+/// \brief Emits the comments that are stored in the CommentStream.
+/// Each comment in the CommentStream must end with a newline.
+static void emitComments(raw_svector_ostream &CommentStream,
+ SmallString<128> &CommentsToEmit,
+ formatted_raw_ostream &FormattedOS,
+ const MCAsmInfo &MAI) {
+ // Flush the stream before taking its content.
+ CommentStream.flush();
+ StringRef Comments = CommentsToEmit.str();
+ // Get the default information for printing a comment.
+ const char *CommentBegin = MAI.getCommentString();
+ unsigned CommentColumn = MAI.getCommentColumn();
+ bool IsFirst = true;
+ while (!Comments.empty()) {
+ if (!IsFirst)
+ FormattedOS << '\n';
+ // Emit a line of comments.
+ FormattedOS.PadToColumn(CommentColumn);
+ size_t Position = Comments.find('\n');
+ FormattedOS << CommentBegin << ' ' << Comments.substr(0, Position);
+ // Move after the newline character.
+ Comments = Comments.substr(Position + 1);
+ IsFirst = false;
+ }
+ FormattedOS.flush();
+
+ // Tell the comment stream that the vector changed underneath it.
+ CommentsToEmit.clear();
+ CommentStream.resync();
}
static void DisassembleInputMachO2(StringRef Filename,
int AsmPrinterVariant = AsmInfo->getAssemblerDialect();
std::unique_ptr<MCInstPrinter> IP(TheTarget->createMCInstPrinter(
AsmPrinterVariant, *AsmInfo, *InstrInfo, *MRI, *STI));
+ // Set the display preference for hex vs. decimal immediates.
+ IP->setPrintImmHex(PrintImmHex);
+ // Comment stream and backing vector.
+ SmallString<128> CommentsToEmit;
+ raw_svector_ostream CommentStream(CommentsToEmit);
+ IP->setCommentStream(CommentStream);
if (!InstrAnalysis || !AsmInfo || !STI || !DisAsm || !IP) {
errs() << "error: couldn't initialize disassembler for target "
ThumbTarget->createMCSubtargetInfo(ThumbTripleName, MCPU, FeaturesStr));
ThumbCtx.reset(new MCContext(ThumbAsmInfo.get(), ThumbMRI.get(), nullptr));
ThumbDisAsm.reset(ThumbTarget->createMCDisassembler(*ThumbSTI, *ThumbCtx));
-// TODO: add MCSymbolizer here for the ThumbTarget like above for TheTarget.
+ // TODO: add MCSymbolizer here for the ThumbTarget like above for TheTarget.
int ThumbAsmPrinterVariant = ThumbAsmInfo->getAssemblerDialect();
ThumbIP.reset(ThumbTarget->createMCInstPrinter(
ThumbAsmPrinterVariant, *ThumbAsmInfo, *ThumbInstrInfo, *ThumbMRI,
*ThumbSTI));
+ // Set the display preference for hex vs. decimal immediates.
+ ThumbIP->setPrintImmHex(PrintImmHex);
}
if (ThumbTarget && (!ThumbInstrAnalysis || !ThumbAsmInfo || !ThumbSTI ||
StringRef Bytes;
Sections[SectIdx].getContents(Bytes);
- StringRefMemoryObject memoryObject(Bytes);
+ uint64_t SectAddress = 0;
+ Sections[SectIdx].getAddress(SectAddress);
+ DisasmMemoryObject MemoryObject((uint8_t *)Bytes.data(), Bytes.size(),
+ SectAddress);
bool symbolTableWorked = false;
// Parse relocations.
}
array_pod_sort(Relocs.begin(), Relocs.end());
+ // Create a map of symbol addresses to symbol names for use by
+ // the SymbolizerSymbolLookUp() routine.
+ SymbolAddressMap AddrMap;
+ for (const SymbolRef &Symbol : MachOOF->symbols()) {
+ SymbolRef::Type ST;
+ Symbol.getType(ST);
+ if (ST == SymbolRef::ST_Function || ST == SymbolRef::ST_Data ||
+ ST == SymbolRef::ST_Other) {
+ uint64_t Address;
+ Symbol.getAddress(Address);
+ StringRef SymName;
+ Symbol.getName(SymName);
+ AddrMap[Address] = SymName;
+ }
+ }
// Set up the block of info used by the Symbolizer call backs.
SymbolizerInfo.verbose = true;
SymbolizerInfo.O = MachOOF;
SymbolizerInfo.S = Sections[SectIdx];
+ SymbolizerInfo.AddrMap = &AddrMap;
// Disassemble symbol by symbol.
for (unsigned SymIdx = 0; SymIdx != Symbols.size(); SymIdx++) {
for (uint64_t Index = Start; Index < End; Index += Size) {
MCInst Inst;
- uint64_t SectAddress = 0;
- Sections[SectIdx].getAddress(SectAddress);
- outs() << format("%8" PRIx64 ":\t", SectAddress + Index);
+ uint64_t PC = SectAddress + Index;
+ if (FullLeadingAddr) {
+ if (MachOOF->is64Bit())
+ outs() << format("%016" PRIx64, PC);
+ else
+ outs() << format("%08" PRIx64, PC);
+ } else {
+ outs() << format("%8" PRIx64 ":", PC);
+ }
+ if (!NoShowRawInsn)
+ outs() << "\t";
// Check the data in code table here to see if this is data not an
// instruction to be disassembled.
DiceTable Dice;
- Dice.push_back(std::make_pair(SectAddress + Index, DiceRef()));
+ Dice.push_back(std::make_pair(PC, DiceRef()));
dice_table_iterator DTI = std::search(Dices.begin(), Dices.end(),
Dice.begin(), Dice.end(),
compareDiceTableEntries);
continue;
}
+ SmallVector<char, 64> AnnotationsBytes;
+ raw_svector_ostream Annotations(AnnotationsBytes);
+
bool gotInst;
if (isThumb)
- gotInst = ThumbDisAsm->getInstruction(Inst, Size, memoryObject, Index,
- DebugOut, nulls());
+ gotInst = ThumbDisAsm->getInstruction(Inst, Size, MemoryObject, PC,
+ DebugOut, Annotations);
else
- gotInst = DisAsm->getInstruction(Inst, Size, memoryObject, Index,
- DebugOut, nulls());
+ gotInst = DisAsm->getInstruction(Inst, Size, MemoryObject, PC,
+ DebugOut, Annotations);
if (gotInst) {
- DumpBytes(StringRef(Bytes.data() + Index, Size));
+ if (!NoShowRawInsn) {
+ DumpBytes(StringRef(Bytes.data() + Index, Size));
+ }
+ formatted_raw_ostream FormattedOS(outs());
+ Annotations.flush();
+ StringRef AnnotationsStr = Annotations.str();
if (isThumb)
- ThumbIP->printInst(&Inst, outs(), "");
+ ThumbIP->printInst(&Inst, FormattedOS, AnnotationsStr);
else
- IP->printInst(&Inst, outs(), "");
+ IP->printInst(&Inst, FormattedOS, AnnotationsStr);
+ emitComments(CommentStream, CommentsToEmit, FormattedOS, *AsmInfo);
// Print debug info.
if (diContext) {
DILineInfo dli =
- diContext->getLineInfoForAddress(SectAddress + Index);
+ diContext->getLineInfoForAddress(PC);
// Print valid line info if it changed.
if (dli != lastLine && dli.Line != 0)
outs() << "\t## " << dli.FileName << ':' << dli.Line << ':'
for (uint64_t Index = 0; Index < SectSize; Index += InstSize) {
MCInst Inst;
- if (DisAsm->getInstruction(Inst, InstSize, memoryObject, Index,
+ uint64_t PC = SectAddress + Index;
+ if (DisAsm->getInstruction(Inst, InstSize, MemoryObject, PC,
DebugOut, nulls())) {
- outs() << format("%8" PRIx64 ":\t", SectAddress + Index);
- DumpBytes(StringRef(Bytes.data() + Index, InstSize));
+ if (FullLeadingAddr) {
+ if (MachOOF->is64Bit())
+ outs() << format("%016" PRIx64, PC);
+ else
+ outs() << format("%08" PRIx64, PC);
+ } else {
+ outs() << format("%8" PRIx64 ":", PC);
+ }
+ if (!NoShowRawInsn) {
+ outs() << "\t";
+ DumpBytes(StringRef(Bytes.data() + Index, InstSize));
+ }
IP->printInst(&Inst, outs(), "");
outs() << "\n";
} else {