a0fff215ea89ff38b82262b7528d0c014dff8911
[oota-llvm.git] / tools / llvm-objdump / MachODump.cpp
1 //===-- MachODump.cpp - Object file dumping utility for llvm --------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file implements the MachO-specific dumper for llvm-objdump.
11 //
12 //===----------------------------------------------------------------------===//
13
14 #include "llvm-objdump.h"
15 #include "MCFunction.h"
16 #include "llvm/Support/MachO.h"
17 #include "llvm/Object/MachOObject.h"
18 #include "llvm/ADT/OwningPtr.h"
19 #include "llvm/ADT/Triple.h"
20 #include "llvm/ADT/STLExtras.h"
21 #include "llvm/MC/MCAsmInfo.h"
22 #include "llvm/MC/MCDisassembler.h"
23 #include "llvm/MC/MCInst.h"
24 #include "llvm/MC/MCInstPrinter.h"
25 #include "llvm/MC/MCInstrAnalysis.h"
26 #include "llvm/MC/MCInstrDesc.h"
27 #include "llvm/MC/MCInstrInfo.h"
28 #include "llvm/MC/MCSubtargetInfo.h"
29 #include "llvm/Support/CommandLine.h"
30 #include "llvm/Support/Debug.h"
31 #include "llvm/Support/Format.h"
32 #include "llvm/Support/GraphWriter.h"
33 #include "llvm/Support/MemoryBuffer.h"
34 #include "llvm/Support/TargetRegistry.h"
35 #include "llvm/Support/TargetSelect.h"
36 #include "llvm/Support/raw_ostream.h"
37 #include "llvm/Support/system_error.h"
38 #include <algorithm>
39 #include <cstring>
40 using namespace llvm;
41 using namespace object;
42
43 static cl::opt<bool>
44   CFG("cfg", cl::desc("Create a CFG for every symbol in the object file and"
45                       "write it to a graphviz file (MachO-only)"));
46
47 static const Target *GetTarget(const MachOObject *MachOObj) {
48   // Figure out the target triple.
49   llvm::Triple TT("unknown-unknown-unknown");
50   switch (MachOObj->getHeader().CPUType) {
51   case llvm::MachO::CPUTypeI386:
52     TT.setArch(Triple::ArchType(Triple::x86));
53     break;
54   case llvm::MachO::CPUTypeX86_64:
55     TT.setArch(Triple::ArchType(Triple::x86_64));
56     break;
57   case llvm::MachO::CPUTypeARM:
58     TT.setArch(Triple::ArchType(Triple::arm));
59     break;
60   case llvm::MachO::CPUTypePowerPC:
61     TT.setArch(Triple::ArchType(Triple::ppc));
62     break;
63   case llvm::MachO::CPUTypePowerPC64:
64     TT.setArch(Triple::ArchType(Triple::ppc64));
65     break;
66   }
67
68   TripleName = TT.str();
69
70   // Get the target specific parser.
71   std::string Error;
72   const Target *TheTarget = TargetRegistry::lookupTarget(TripleName, Error);
73   if (TheTarget)
74     return TheTarget;
75
76   errs() << "llvm-objdump: error: unable to get target for '" << TripleName
77          << "', see --version and --triple.\n";
78   return 0;
79 }
80
81 struct Section {
82   char Name[16];
83   uint64_t Address;
84   uint64_t Size;
85   uint32_t Offset;
86   uint32_t NumRelocs;
87   uint64_t RelocTableOffset;
88 };
89
90 struct Symbol {
91   uint64_t Value;
92   uint32_t StringIndex;
93   uint8_t SectionIndex;
94   bool operator<(const Symbol &RHS) const { return Value < RHS.Value; }
95 };
96
97
98 template <typename T>
99 static Section copySection(const T &Sect) {
100   Section S;
101   memcpy(S.Name, Sect->Name, 16);
102   S.Address = Sect->Address;
103   S.Size = Sect->Size;
104   S.Offset = Sect->Offset;
105   S.NumRelocs = Sect->NumRelocationTableEntries;
106   S.RelocTableOffset = Sect->RelocationTableOffset;
107   return S;
108 }
109
110 template <typename T>
111 static Symbol copySymbol(const T &STE) {
112   Symbol S;
113   S.StringIndex = STE->StringIndex;
114   S.SectionIndex = STE->SectionIndex;
115   S.Value = STE->Value;
116   return S;
117 }
118
119 // Print addtitional information about an address, if available.
120 static void DumpAddress(uint64_t Address, ArrayRef<Section> Sections,
121                         MachOObject *MachOObj, raw_ostream &OS) {
122   for (unsigned i = 0; i != Sections.size(); ++i) {
123     uint64_t addr = Address-Sections[i].Address;
124     if (Sections[i].Address <= Address &&
125         Sections[i].Address + Sections[i].Size > Address) {
126       StringRef bytes = MachOObj->getData(Sections[i].Offset,
127                                           Sections[i].Size);
128       // Print constant strings.
129       if (!strcmp(Sections[i].Name, "__cstring"))
130         OS << '"' << bytes.substr(addr, bytes.find('\0', addr)) << '"';
131       // Print constant CFStrings.
132       if (!strcmp(Sections[i].Name, "__cfstring"))
133         OS << "@\"" << bytes.substr(addr, bytes.find('\0', addr)) << '"';
134     }
135   }
136 }
137
138 typedef std::map<uint64_t, MCFunction*> FunctionMapTy;
139 typedef SmallVector<MCFunction, 16> FunctionListTy;
140 static void createMCFunctionAndSaveCalls(StringRef Name,
141                                          const MCDisassembler *DisAsm,
142                                          MemoryObject &Object, uint64_t Start,
143                                          uint64_t End,
144                                          MCInstrAnalysis *InstrAnalysis,
145                                          uint64_t Address,
146                                          raw_ostream &DebugOut,
147                                          FunctionMapTy &FunctionMap,
148                                          FunctionListTy &Functions) {
149   SmallVector<uint64_t, 16> Calls;
150   MCFunction f =
151     MCFunction::createFunctionFromMC(Name, DisAsm, Object, Start, End,
152                                      InstrAnalysis, DebugOut, Calls);
153   Functions.push_back(f);
154   FunctionMap[Address] = &Functions.back();
155
156   // Add the gathered callees to the map.
157   for (unsigned i = 0, e = Calls.size(); i != e; ++i)
158     FunctionMap.insert(std::make_pair(Calls[i], (MCFunction*)0));
159 }
160
161 // Write a graphviz file for the CFG inside an MCFunction.
162 static void emitDOTFile(const char *FileName, const MCFunction &f,
163                         MCInstPrinter *IP) {
164   // Start a new dot file.
165   std::string Error;
166   raw_fd_ostream Out(FileName, Error);
167   if (!Error.empty()) {
168     errs() << "llvm-objdump: warning: " << Error << '\n';
169     return;
170   }
171
172   Out << "digraph " << f.getName() << " {\n";
173   Out << "graph [ rankdir = \"LR\" ];\n";
174   for (MCFunction::iterator i = f.begin(), e = f.end(); i != e; ++i) {
175     bool hasPreds = false;
176     // Only print blocks that have predecessors.
177     // FIXME: Slow.
178     for (MCFunction::iterator pi = f.begin(), pe = f.end(); pi != pe;
179         ++pi)
180       if (pi->second.contains(i->first)) {
181         hasPreds = true;
182         break;
183       }
184
185     if (!hasPreds && i != f.begin())
186       continue;
187
188     Out << '"' << i->first << "\" [ label=\"<a>";
189     // Print instructions.
190     for (unsigned ii = 0, ie = i->second.getInsts().size(); ii != ie;
191         ++ii) {
192       // Escape special chars and print the instruction in mnemonic form.
193       std::string Str;
194       raw_string_ostream OS(Str);
195       IP->printInst(&i->second.getInsts()[ii].Inst, OS, "");
196       Out << DOT::EscapeString(OS.str()) << '|';
197     }
198     Out << "<o>\" shape=\"record\" ];\n";
199
200     // Add edges.
201     for (MCBasicBlock::succ_iterator si = i->second.succ_begin(),
202         se = i->second.succ_end(); si != se; ++si)
203       Out << i->first << ":o -> " << *si <<":a\n";
204   }
205   Out << "}\n";
206 }
207
208 void llvm::DisassembleInputMachO(StringRef Filename) {
209   OwningPtr<MemoryBuffer> Buff;
210
211   if (error_code ec = MemoryBuffer::getFileOrSTDIN(Filename, Buff)) {
212     errs() << "llvm-objdump: " << Filename << ": " << ec.message() << "\n";
213     return;
214   }
215
216   OwningPtr<MachOObject> MachOObj(MachOObject::LoadFromBuffer(Buff.take()));
217
218   const Target *TheTarget = GetTarget(MachOObj.get());
219   if (!TheTarget) {
220     // GetTarget prints out stuff.
221     return;
222   }
223   const MCInstrInfo *InstrInfo = TheTarget->createMCInstrInfo();
224   OwningPtr<MCInstrAnalysis>
225     InstrAnalysis(TheTarget->createMCInstrAnalysis(InstrInfo));
226
227   // Set up disassembler.
228   OwningPtr<const MCAsmInfo> AsmInfo(TheTarget->createMCAsmInfo(TripleName));
229   OwningPtr<const MCSubtargetInfo>
230     STI(TheTarget->createMCSubtargetInfo(TripleName, "", ""));
231   OwningPtr<const MCDisassembler> DisAsm(TheTarget->createMCDisassembler(*STI));
232   int AsmPrinterVariant = AsmInfo->getAssemblerDialect();
233   OwningPtr<MCInstPrinter> IP(TheTarget->createMCInstPrinter(
234                               AsmPrinterVariant, *AsmInfo, *STI));
235
236   if (!InstrAnalysis || !AsmInfo || !STI || !DisAsm || !IP) {
237     errs() << "error: couldn't initialize disassmbler for target "
238            << TripleName << '\n';
239     return;
240   }
241
242   outs() << '\n' << Filename << ":\n\n";
243
244   const macho::Header &Header = MachOObj->getHeader();
245
246   const MachOObject::LoadCommandInfo *SymtabLCI = 0;
247   // First, find the symbol table segment.
248   for (unsigned i = 0; i != Header.NumLoadCommands; ++i) {
249     const MachOObject::LoadCommandInfo &LCI = MachOObj->getLoadCommandInfo(i);
250     if (LCI.Command.Type == macho::LCT_Symtab) {
251       SymtabLCI = &LCI;
252       break;
253     }
254   }
255
256   // Read and register the symbol table data.
257   InMemoryStruct<macho::SymtabLoadCommand> SymtabLC;
258   MachOObj->ReadSymtabLoadCommand(*SymtabLCI, SymtabLC);
259   MachOObj->RegisterStringTable(*SymtabLC);
260
261   std::vector<Section> Sections;
262   std::vector<Symbol> Symbols;
263   std::vector<Symbol> UnsortedSymbols; // FIXME: duplication
264   SmallVector<uint64_t, 8> FoundFns;
265
266   // Make a list of all symbols in the object file.
267   for (unsigned i = 0; i != Header.NumLoadCommands; ++i) {
268     const MachOObject::LoadCommandInfo &LCI = MachOObj->getLoadCommandInfo(i);
269     if (LCI.Command.Type == macho::LCT_Segment) {
270       InMemoryStruct<macho::SegmentLoadCommand> SegmentLC;
271       MachOObj->ReadSegmentLoadCommand(LCI, SegmentLC);
272
273       // Store the sections in this segment.
274       for (unsigned SectNum = 0; SectNum != SegmentLC->NumSections; ++SectNum) {
275         InMemoryStruct<macho::Section> Sect;
276         MachOObj->ReadSection(LCI, SectNum, Sect);
277         Sections.push_back(copySection(Sect));
278
279         // Store the symbols in this section.
280         for (unsigned i = 0; i != SymtabLC->NumSymbolTableEntries; ++i) {
281           InMemoryStruct<macho::SymbolTableEntry> STE;
282           MachOObj->ReadSymbolTableEntry(SymtabLC->SymbolTableOffset, i, STE);
283           Symbols.push_back(copySymbol(STE));
284           UnsortedSymbols.push_back(Symbols.back());
285         }
286       }
287     } else if (LCI.Command.Type == macho::LCT_Segment64) {
288       InMemoryStruct<macho::Segment64LoadCommand> Segment64LC;
289       MachOObj->ReadSegment64LoadCommand(LCI, Segment64LC);
290
291       // Store the sections in this segment.
292       for (unsigned SectNum = 0; SectNum != Segment64LC->NumSections;
293            ++SectNum) {
294         InMemoryStruct<macho::Section64> Sect64;
295         MachOObj->ReadSection64(LCI, SectNum, Sect64);
296         Sections.push_back(copySection(Sect64));
297
298         // Store the symbols in this section.
299         for (unsigned i = 0; i != SymtabLC->NumSymbolTableEntries; ++i) {
300           InMemoryStruct<macho::Symbol64TableEntry> STE;
301           MachOObj->ReadSymbol64TableEntry(SymtabLC->SymbolTableOffset, i, STE);
302           Symbols.push_back(copySymbol(STE));
303           UnsortedSymbols.push_back(Symbols.back());
304         }
305       }
306     } else if (LCI.Command.Type == macho::LCT_FunctionStarts) {
307       // We found a function starts segment, parse the addresses for later
308       // consumption.
309       InMemoryStruct<macho::LinkeditDataLoadCommand> LLC;
310       MachOObj->ReadLinkeditDataLoadCommand(LCI, LLC);
311
312       MachOObj->ReadULEB128s(LLC->DataOffset, FoundFns);
313     }
314   }
315
316
317   // Sort the symbols by address, just in case they didn't come in that way.
318   array_pod_sort(Symbols.begin(), Symbols.end());
319
320 #ifndef NDEBUG
321   raw_ostream &DebugOut = DebugFlag ? dbgs() : nulls();
322 #else
323   raw_ostream &DebugOut = nulls();
324 #endif
325
326   FunctionMapTy FunctionMap;
327   FunctionListTy Functions;
328
329   for (unsigned SectIdx = 0; SectIdx != Sections.size(); SectIdx++) {
330     if (strcmp(Sections[SectIdx].Name, "__text"))
331       continue; // Skip non-text sections
332
333     // Insert the functions from the function starts segment into our map.
334     uint64_t VMAddr = Sections[SectIdx].Address - Sections[SectIdx].Offset;
335     for (unsigned i = 0, e = FoundFns.size(); i != e; ++i)
336       FunctionMap.insert(std::make_pair(FoundFns[i]+VMAddr, (MCFunction*)0));
337
338     StringRef Bytes = MachOObj->getData(Sections[SectIdx].Offset,
339                                         Sections[SectIdx].Size);
340     StringRefMemoryObject memoryObject(Bytes);
341     bool symbolTableWorked = false;
342
343     // Parse relocations.
344     std::vector<std::pair<uint64_t, uint32_t> > Relocs;
345     for (unsigned j = 0; j != Sections[SectIdx].NumRelocs; ++j) {
346       InMemoryStruct<macho::RelocationEntry> RE;
347       MachOObj->ReadRelocationEntry(Sections[SectIdx].RelocTableOffset, j, RE);
348       Relocs.push_back(std::make_pair(RE->Word0, RE->Word1 & 0xffffff));
349     }
350     array_pod_sort(Relocs.begin(), Relocs.end());
351
352     // Disassemble symbol by symbol.
353     for (unsigned SymIdx = 0; SymIdx != Symbols.size(); SymIdx++) {
354       // Make sure the symbol is defined in this section.
355       if ((unsigned)Symbols[SymIdx].SectionIndex - 1 != SectIdx)
356         continue;
357
358       // Start at the address of the symbol relative to the section's address.
359       uint64_t Start = Symbols[SymIdx].Value - Sections[SectIdx].Address;
360       // Stop disassembling either at the beginning of the next symbol or at
361       // the end of the section.
362       uint64_t End = (SymIdx+1 == Symbols.size() ||
363           Symbols[SymIdx].SectionIndex != Symbols[SymIdx+1].SectionIndex) ?
364           Sections[SectIdx].Size :
365           Symbols[SymIdx+1].Value - Sections[SectIdx].Address;
366       uint64_t Size;
367
368       if (Start >= End)
369         continue;
370
371       symbolTableWorked = true;
372
373       if (!CFG) {
374         // Normal disassembly, print addresses, bytes and mnemonic form.
375         outs() << MachOObj->getStringAtIndex(Symbols[SymIdx].StringIndex)
376           << ":\n";
377         for (uint64_t Index = Start; Index < End; Index += Size) {
378           MCInst Inst;
379
380           if (DisAsm->getInstruction(Inst, Size, memoryObject, Index,
381                                      DebugOut, nulls())) {
382             outs() << format("%8llx:\t", Sections[SectIdx].Address + Index);
383             DumpBytes(StringRef(Bytes.data() + Index, Size));
384             IP->printInst(&Inst, outs(), "");
385             outs() << "\n";
386           } else {
387             errs() << "llvm-objdump: warning: invalid instruction encoding\n";
388             if (Size == 0)
389               Size = 1; // skip illegible bytes
390           }
391         }
392       } else {
393         // Create CFG and use it for disassembly.
394         createMCFunctionAndSaveCalls(
395             MachOObj->getStringAtIndex(Symbols[SymIdx].StringIndex),
396             DisAsm.get(), memoryObject, Start, End, InstrAnalysis.get(),
397             Start, DebugOut, FunctionMap, Functions);
398       }
399     }
400
401     if (CFG) {
402       if (!symbolTableWorked) {
403         // Reading the symbol table didn't work, create a big __TEXT symbol.
404         createMCFunctionAndSaveCalls("__TEXT", DisAsm.get(), memoryObject,
405                                      0, Sections[SectIdx].Size,
406                                      InstrAnalysis.get(),
407                                      Sections[SectIdx].Offset, DebugOut,
408                                      FunctionMap, Functions);
409       }
410       for (std::map<uint64_t, MCFunction*>::iterator mi = FunctionMap.begin(),
411            me = FunctionMap.end(); mi != me; ++mi)
412         if (mi->second == 0) {
413           // Create functions for the remaining callees we have gathered,
414           // but we didn't find a name for them.
415           SmallVector<uint64_t, 16> Calls;
416           MCFunction f =
417             MCFunction::createFunctionFromMC("unknown", DisAsm.get(),
418                                              memoryObject, mi->first,
419                                              Sections[SectIdx].Size,
420                                              InstrAnalysis.get(), DebugOut,
421                                              Calls);
422           Functions.push_back(f);
423           mi->second = &Functions.back();
424           for (unsigned i = 0, e = Calls.size(); i != e; ++i) {
425             std::pair<uint64_t, MCFunction*> p(Calls[i], (MCFunction*)0);
426             if (FunctionMap.insert(p).second)
427               mi = FunctionMap.begin();
428           }
429         }
430
431       DenseSet<uint64_t> PrintedBlocks;
432       for (unsigned ffi = 0, ffe = Functions.size(); ffi != ffe; ++ffi) {
433         MCFunction &f = Functions[ffi];
434         for (MCFunction::iterator fi = f.begin(), fe = f.end(); fi != fe; ++fi){
435           if (!PrintedBlocks.insert(fi->first).second)
436             continue; // We already printed this block.
437
438           // We assume a block has predecessors when it's the first block after
439           // a symbol.
440           bool hasPreds = FunctionMap.find(fi->first) != FunctionMap.end();
441
442           // See if this block has predecessors.
443           // FIXME: Slow.
444           for (MCFunction::iterator pi = f.begin(), pe = f.end(); pi != pe;
445               ++pi)
446             if (pi->second.contains(fi->first)) {
447               hasPreds = true;
448               break;
449             }
450
451           // No predecessors, this is a data block. Print as .byte directives.
452           if (!hasPreds) {
453             uint64_t End = llvm::next(fi) == fe ? Sections[SectIdx].Size :
454                                                   llvm::next(fi)->first;
455             outs() << "# " << End-fi->first << " bytes of data:\n";
456             for (unsigned pos = fi->first; pos != End; ++pos) {
457               outs() << format("%8x:\t", Sections[SectIdx].Address + pos);
458               DumpBytes(StringRef(Bytes.data() + pos, 1));
459               outs() << format("\t.byte 0x%02x\n", (uint8_t)Bytes[pos]);
460             }
461             continue;
462           }
463
464           if (fi->second.contains(fi->first)) // Print a header for simple loops
465             outs() << "# Loop begin:\n";
466
467           // Walk over the instructions and print them.
468           for (unsigned ii = 0, ie = fi->second.getInsts().size(); ii != ie;
469                ++ii) {
470             const MCDecodedInst &Inst = fi->second.getInsts()[ii];
471
472             // If there's a symbol at this address, print its name.
473             if (FunctionMap.find(Sections[SectIdx].Address + Inst.Address) !=
474                 FunctionMap.end())
475               outs() << FunctionMap[Sections[SectIdx].Address + Inst.Address]->
476                                                              getName() << ":\n";
477
478             outs() << format("%8llx:\t", Sections[SectIdx].Address +
479                                          Inst.Address);
480             DumpBytes(StringRef(Bytes.data() + Inst.Address, Inst.Size));
481
482             if (fi->second.contains(fi->first)) // Indent simple loops.
483               outs() << '\t';
484
485             IP->printInst(&Inst.Inst, outs(), "");
486
487             // Look for relocations inside this instructions, if there is one
488             // print its target and additional information if availbable.
489             for (unsigned j = 0; j != Relocs.size(); ++j)
490               if (Relocs[j].first >= Sections[SectIdx].Address + Inst.Address &&
491                   Relocs[j].first < Sections[SectIdx].Address + Inst.Address +
492                                     Inst.Size) {
493                 outs() << "\t# "
494                    << MachOObj->getStringAtIndex(
495                                   UnsortedSymbols[Relocs[j].second].StringIndex)
496                    << ' ';
497                 DumpAddress(UnsortedSymbols[Relocs[j].second].Value, Sections,
498                             MachOObj.get(), outs());
499               }
500
501             // If this instructions contains an address, see if we can evaluate
502             // it and print additional information.
503             uint64_t targ = InstrAnalysis->evaluateBranch(Inst.Inst,
504                                                           Inst.Address,
505                                                           Inst.Size);
506             if (targ != -1ULL)
507               DumpAddress(targ, Sections, MachOObj.get(), outs());
508
509             outs() << '\n';
510           }
511         }
512
513         emitDOTFile((f.getName().str() + ".dot").c_str(), f, IP.get());
514       }
515     }
516   }
517 }