1 //===-- MachODump.cpp - Object file dumping utility for llvm --------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file implements the MachO-specific dumper for llvm-objdump.
12 //===----------------------------------------------------------------------===//
14 #include "llvm-objdump.h"
15 #include "llvm/ADT/OwningPtr.h"
16 #include "llvm/ADT/STLExtras.h"
17 #include "llvm/ADT/StringExtras.h"
18 #include "llvm/ADT/Triple.h"
19 #include "llvm/DebugInfo/DIContext.h"
20 #include "llvm/MC/MCAsmInfo.h"
21 #include "llvm/MC/MCDisassembler.h"
22 #include "llvm/MC/MCInst.h"
23 #include "llvm/MC/MCInstPrinter.h"
24 #include "llvm/MC/MCInstrAnalysis.h"
25 #include "llvm/MC/MCInstrDesc.h"
26 #include "llvm/MC/MCInstrInfo.h"
27 #include "llvm/MC/MCRegisterInfo.h"
28 #include "llvm/MC/MCSubtargetInfo.h"
29 #include "llvm/Object/MachO.h"
30 #include "llvm/Support/Casting.h"
31 #include "llvm/Support/CommandLine.h"
32 #include "llvm/Support/Debug.h"
33 #include "llvm/Support/Format.h"
34 #include "llvm/Support/GraphWriter.h"
35 #include "llvm/Support/MachO.h"
36 #include "llvm/Support/MemoryBuffer.h"
37 #include "llvm/Support/TargetRegistry.h"
38 #include "llvm/Support/TargetSelect.h"
39 #include "llvm/Support/raw_ostream.h"
40 #include "llvm/Support/system_error.h"
44 using namespace object;
47 UseDbg("g", cl::desc("Print line information from debug info if available"));
49 static cl::opt<std::string>
50 DSYMFile("dsym", cl::desc("Use .dSYM file for debug info"));
52 static const Target *GetTarget(const MachOObjectFile *MachOObj) {
53 // Figure out the target triple.
54 if (TripleName.empty()) {
55 llvm::Triple TT("unknown-unknown-unknown");
56 TT.setArch(Triple::ArchType(MachOObj->getArch()));
57 TripleName = TT.str();
60 // Get the target specific parser.
62 const Target *TheTarget = TargetRegistry::lookupTarget(TripleName, Error);
66 errs() << "llvm-objdump: error: unable to get target for '" << TripleName
67 << "', see --version and --triple.\n";
72 bool operator()(const SymbolRef &A, const SymbolRef &B) {
73 SymbolRef::Type AType, BType;
77 uint64_t AAddr, BAddr;
78 if (AType != SymbolRef::ST_Function)
82 if (BType != SymbolRef::ST_Function)
91 getSectionsAndSymbols(const macho::Header Header,
92 MachOObjectFile *MachOObj,
93 std::vector<SectionRef> &Sections,
94 std::vector<SymbolRef> &Symbols,
95 SmallVectorImpl<uint64_t> &FoundFns) {
97 for (symbol_iterator SI = MachOObj->begin_symbols(),
98 SE = MachOObj->end_symbols(); SI != SE; SI.increment(ec))
99 Symbols.push_back(*SI);
101 for (section_iterator SI = MachOObj->begin_sections(),
102 SE = MachOObj->end_sections(); SI != SE; SI.increment(ec)) {
105 SR.getName(SectName);
106 Sections.push_back(*SI);
109 MachOObjectFile::LoadCommandInfo Command =
110 MachOObj->getFirstLoadCommandInfo();
111 for (unsigned i = 0; ; ++i) {
112 if (Command.C.Type == macho::LCT_FunctionStarts) {
113 // We found a function starts segment, parse the addresses for later
115 macho::LinkeditDataLoadCommand LLC =
116 MachOObj->getLinkeditDataLoadCommand(Command);
118 MachOObj->ReadULEB128s(LLC.DataOffset, FoundFns);
121 if (i == Header.NumLoadCommands - 1)
124 Command = MachOObj->getNextLoadCommandInfo(Command);
128 static void DisassembleInputMachO2(StringRef Filename,
129 MachOObjectFile *MachOOF);
131 void llvm::DisassembleInputMachO(StringRef Filename) {
132 OwningPtr<MemoryBuffer> Buff;
134 if (error_code ec = MemoryBuffer::getFileOrSTDIN(Filename, Buff)) {
135 errs() << "llvm-objdump: " << Filename << ": " << ec.message() << "\n";
139 OwningPtr<MachOObjectFile> MachOOF(static_cast<MachOObjectFile*>(
140 ObjectFile::createMachOObjectFile(Buff.take())));
142 DisassembleInputMachO2(Filename, MachOOF.get());
145 static void DisassembleInputMachO2(StringRef Filename,
146 MachOObjectFile *MachOOF) {
147 const Target *TheTarget = GetTarget(MachOOF);
149 // GetTarget prints out stuff.
152 OwningPtr<const MCInstrInfo> InstrInfo(TheTarget->createMCInstrInfo());
153 OwningPtr<MCInstrAnalysis>
154 InstrAnalysis(TheTarget->createMCInstrAnalysis(InstrInfo.get()));
156 // Set up disassembler.
157 OwningPtr<const MCRegisterInfo> MRI(TheTarget->createMCRegInfo(TripleName));
158 OwningPtr<const MCAsmInfo> AsmInfo(
159 TheTarget->createMCAsmInfo(*MRI, TripleName));
160 OwningPtr<const MCSubtargetInfo>
161 STI(TheTarget->createMCSubtargetInfo(TripleName, "", ""));
162 OwningPtr<const MCDisassembler> DisAsm(TheTarget->createMCDisassembler(*STI));
163 int AsmPrinterVariant = AsmInfo->getAssemblerDialect();
164 OwningPtr<MCInstPrinter>
165 IP(TheTarget->createMCInstPrinter(AsmPrinterVariant, *AsmInfo, *InstrInfo,
168 if (!InstrAnalysis || !AsmInfo || !STI || !DisAsm || !IP) {
169 errs() << "error: couldn't initialize disassembler for target "
170 << TripleName << '\n';
174 outs() << '\n' << Filename << ":\n\n";
176 macho::Header Header = MachOOF->getHeader();
178 // FIXME: FoundFns isn't used anymore. Using symbols/LC_FUNCTION_STARTS to
179 // determine function locations will eventually go in MCObjectDisassembler.
180 // FIXME: Using the -cfg command line option, this code used to be able to
181 // annotate relocations with the referenced symbol's name, and if this was
182 // inside a __[cf]string section, the data it points to. This is now replaced
183 // by the upcoming MCSymbolizer, which needs the appropriate setup done above.
184 std::vector<SectionRef> Sections;
185 std::vector<SymbolRef> Symbols;
186 SmallVector<uint64_t, 8> FoundFns;
188 getSectionsAndSymbols(Header, MachOOF, Sections, Symbols, FoundFns);
190 // Make a copy of the unsorted symbol list. FIXME: duplication
191 std::vector<SymbolRef> UnsortedSymbols(Symbols);
192 // Sort the symbols by address, just in case they didn't come in that way.
193 std::sort(Symbols.begin(), Symbols.end(), SymbolSorter());
196 raw_ostream &DebugOut = DebugFlag ? dbgs() : nulls();
198 raw_ostream &DebugOut = nulls();
201 OwningPtr<DIContext> diContext;
202 ObjectFile *DbgObj = MachOOF;
203 // Try to find debug info and set up the DIContext for it.
205 // A separate DSym file path was specified, parse it as a macho file,
206 // get the sections and supply it to the section name parsing machinery.
207 if (!DSYMFile.empty()) {
208 OwningPtr<MemoryBuffer> Buf;
209 if (error_code ec = MemoryBuffer::getFileOrSTDIN(DSYMFile.c_str(), Buf)) {
210 errs() << "llvm-objdump: " << Filename << ": " << ec.message() << '\n';
213 DbgObj = ObjectFile::createMachOObjectFile(Buf.take());
216 // Setup the DIContext
217 diContext.reset(DIContext::getDWARFContext(DbgObj));
220 for (unsigned SectIdx = 0; SectIdx != Sections.size(); SectIdx++) {
222 bool SectIsText = false;
223 Sections[SectIdx].isText(SectIsText);
224 if (SectIsText == false)
228 if (Sections[SectIdx].getName(SectName) ||
229 SectName != "__text")
230 continue; // Skip non-text sections
232 DataRefImpl DR = Sections[SectIdx].getRawDataRefImpl();
234 StringRef SegmentName = MachOOF->getSectionFinalSegmentName(DR);
235 if (SegmentName != "__TEXT")
239 Sections[SectIdx].getContents(Bytes);
240 StringRefMemoryObject memoryObject(Bytes);
241 bool symbolTableWorked = false;
243 // Parse relocations.
244 std::vector<std::pair<uint64_t, SymbolRef> > Relocs;
246 for (relocation_iterator RI = Sections[SectIdx].begin_relocations(),
247 RE = Sections[SectIdx].end_relocations(); RI != RE; RI.increment(ec)) {
248 uint64_t RelocOffset, SectionAddress;
249 RI->getOffset(RelocOffset);
250 Sections[SectIdx].getAddress(SectionAddress);
251 RelocOffset -= SectionAddress;
254 RI->getSymbol(RelocSym);
256 Relocs.push_back(std::make_pair(RelocOffset, RelocSym));
258 array_pod_sort(Relocs.begin(), Relocs.end());
260 // Disassemble symbol by symbol.
261 for (unsigned SymIdx = 0; SymIdx != Symbols.size(); SymIdx++) {
263 Symbols[SymIdx].getName(SymName);
266 Symbols[SymIdx].getType(ST);
267 if (ST != SymbolRef::ST_Function)
270 // Make sure the symbol is defined in this section.
271 bool containsSym = false;
272 Sections[SectIdx].containsSymbol(Symbols[SymIdx], containsSym);
276 // Start at the address of the symbol relative to the section's address.
277 uint64_t SectionAddress = 0;
279 Sections[SectIdx].getAddress(SectionAddress);
280 Symbols[SymIdx].getAddress(Start);
281 Start -= SectionAddress;
283 // Stop disassembling either at the beginning of the next symbol or at
284 // the end of the section.
285 bool containsNextSym = false;
286 uint64_t NextSym = 0;
287 uint64_t NextSymIdx = SymIdx+1;
288 while (Symbols.size() > NextSymIdx) {
289 SymbolRef::Type NextSymType;
290 Symbols[NextSymIdx].getType(NextSymType);
291 if (NextSymType == SymbolRef::ST_Function) {
292 Sections[SectIdx].containsSymbol(Symbols[NextSymIdx],
294 Symbols[NextSymIdx].getAddress(NextSym);
295 NextSym -= SectionAddress;
302 Sections[SectIdx].getSize(SectSize);
303 uint64_t End = containsNextSym ? NextSym : SectSize;
306 symbolTableWorked = true;
308 outs() << SymName << ":\n";
310 for (uint64_t Index = Start; Index < End; Index += Size) {
313 if (DisAsm->getInstruction(Inst, Size, memoryObject, Index,
314 DebugOut, nulls())) {
315 uint64_t SectAddress = 0;
316 Sections[SectIdx].getAddress(SectAddress);
317 outs() << format("%8" PRIx64 ":\t", SectAddress + Index);
319 DumpBytes(StringRef(Bytes.data() + Index, Size));
320 IP->printInst(&Inst, outs(), "");
325 diContext->getLineInfoForAddress(SectAddress + Index);
326 // Print valid line info if it changed.
327 if (dli != lastLine && dli.getLine() != 0)
328 outs() << "\t## " << dli.getFileName() << ':'
329 << dli.getLine() << ':' << dli.getColumn();
334 errs() << "llvm-objdump: warning: invalid instruction encoding\n";
336 Size = 1; // skip illegible bytes
340 if (!symbolTableWorked) {
341 // Reading the symbol table didn't work, disassemble the whole section.
342 uint64_t SectAddress;
343 Sections[SectIdx].getAddress(SectAddress);
345 Sections[SectIdx].getSize(SectSize);
347 for (uint64_t Index = 0; Index < SectSize; Index += InstSize) {
350 if (DisAsm->getInstruction(Inst, InstSize, memoryObject, Index,
351 DebugOut, nulls())) {
352 outs() << format("%8" PRIx64 ":\t", SectAddress + Index);
353 DumpBytes(StringRef(Bytes.data() + Index, InstSize));
354 IP->printInst(&Inst, outs(), "");
357 errs() << "llvm-objdump: warning: invalid instruction encoding\n";
359 InstSize = 1; // skip illegible bytes