1 //===-- llvm/MC/MCObjectDisassembler.h --------------------------*- C++ -*-===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file contains the declaration of the MCObjectDisassembler class, which
11 // can be used to construct an MCModule and an MC CFG from an ObjectFile.
13 //===----------------------------------------------------------------------===//
15 #ifndef LLVM_MC_MCOBJECTDISASSEMBLER_H
16 #define LLVM_MC_MCOBJECTDISASSEMBLER_H
18 #include "llvm/ADT/ArrayRef.h"
19 #include "llvm/ADT/StringRef.h"
20 #include "llvm/Support/DataTypes.h"
21 #include "llvm/Support/MemoryObject.h"
28 class MachOObjectFile;
34 class MCInstrAnalysis;
36 class MCObjectSymbolizer;
38 /// \brief Disassemble an ObjectFile to an MCModule and MCFunctions.
39 /// This class builds on MCDisassembler to disassemble whole sections, creating
40 /// MCAtom (MCTextAtom for disassembled sections and MCDataAtom for raw data).
41 /// It can also be used to create a control flow graph consisting of MCFunctions
42 /// and MCBasicBlocks.
43 class MCObjectDisassembler {
45 MCObjectDisassembler(const object::ObjectFile &Obj,
46 const MCDisassembler &Dis,
47 const MCInstrAnalysis &MIA);
48 virtual ~MCObjectDisassembler() {}
50 /// \brief Build an MCModule, creating atoms and optionally functions.
51 /// \param withCFG Also build a CFG by adding MCFunctions to the Module.
52 /// If withCFG is false, the MCModule built only contains atoms, representing
53 /// what was found in the object file. If withCFG is true, MCFunctions are
54 /// created, containing MCBasicBlocks. All text atoms are split to form basic
55 /// block atoms, which then each back an MCBasicBlock.
56 MCModule *buildModule(bool withCFG = false);
58 MCModule *buildEmptyModule();
60 typedef std::vector<uint64_t> AddressSetTy;
61 /// \name Create a new MCFunction.
62 MCFunction *createFunction(MCModule *Module, uint64_t BeginAddr,
63 AddressSetTy &CallTargets,
64 AddressSetTy &TailCallTargets);
66 /// \brief Set the region on which to fallback if disassembly was requested
67 /// somewhere not accessible in the object file.
68 /// This is used for dynamic disassembly (see RawMemoryObject).
69 void setFallbackRegion(std::unique_ptr<MemoryObject> &Region) {
70 FallbackRegion.reset(Region.release());
73 /// \brief Set the symbolizer to use to get information on external functions.
74 /// Note that this isn't used to do instruction-level symbolization (that is,
75 /// plugged into MCDisassembler), but to symbolize function call targets.
76 void setSymbolizer(MCObjectSymbolizer *ObjectSymbolizer) {
77 MOS = ObjectSymbolizer;
80 /// \brief Get the effective address of the entrypoint, or 0 if there is none.
81 virtual uint64_t getEntrypoint();
83 /// \name Get the addresses of static constructors/destructors in the object.
84 /// The caller is expected to know how to interpret the addresses;
85 /// for example, Mach-O init functions expect 5 arguments, not for ELF.
86 /// The addresses are original object file load addresses, not effective.
88 virtual ArrayRef<uint64_t> getStaticInitFunctions();
89 virtual ArrayRef<uint64_t> getStaticExitFunctions();
92 /// \name Translation between effective and objectfile load address.
94 /// \brief Compute the effective load address, from an objectfile virtual
95 /// address. This is implemented in a format-specific way, to take into
96 /// account things like PIE/ASLR when doing dynamic disassembly.
97 /// For example, on Mach-O this would be done by adding the VM addr slide,
98 /// on glibc ELF by keeping a map between segment load addresses, filled
99 /// using dl_iterate_phdr, etc..
100 /// In most static situations and in the default impl., this returns \p Addr.
101 virtual uint64_t getEffectiveLoadAddr(uint64_t Addr);
103 /// \brief Compute the original load address, as specified in the objectfile.
104 /// This is the inverse of getEffectiveLoadAddr.
105 virtual uint64_t getOriginalLoadAddr(uint64_t EffectiveAddr);
109 const object::ObjectFile &Obj;
110 const MCDisassembler &Dis;
111 const MCInstrAnalysis &MIA;
112 MCObjectSymbolizer *MOS;
114 /// \brief The fallback memory region, outside the object file.
115 std::unique_ptr<MemoryObject> FallbackRegion;
117 /// \brief Return a memory region suitable for reading starting at \p Addr.
118 /// In most cases, this returns a StringRefMemoryObject backed by the
119 /// containing section. When no section was found, this returns the
120 /// FallbackRegion, if it is suitable.
121 /// If it is not, or if there is no fallback region, this returns 0.
122 MemoryObject *getRegionFor(uint64_t Addr);
125 /// \brief Fill \p Module by creating an atom for each section.
126 /// This could be made much smarter, using information like symbols, but also
127 /// format-specific features, like mach-o function_start or data_in_code LCs.
128 void buildSectionAtoms(MCModule *Module);
130 /// \brief Enrich \p Module with a CFG consisting of MCFunctions.
131 /// \param Module An MCModule returned by buildModule, with no CFG.
132 /// NOTE: Each MCBasicBlock in a MCFunction is backed by a single MCTextAtom.
133 /// When the CFG is built, contiguous instructions that were previously in a
134 /// single MCTextAtom will be split in multiple basic block atoms.
135 void buildCFG(MCModule *Module);
137 MCBasicBlock *getBBAt(MCModule *Module, MCFunction *MCFN, uint64_t BeginAddr,
138 AddressSetTy &CallTargets,
139 AddressSetTy &TailCallTargets);
142 class MCMachOObjectDisassembler : public MCObjectDisassembler {
143 const object::MachOObjectFile &MOOF;
145 uint64_t VMAddrSlide;
146 uint64_t HeaderLoadAddress;
148 // __DATA;__mod_init_func support.
149 llvm::StringRef ModInitContents;
150 // __DATA;__mod_exit_func support.
151 llvm::StringRef ModExitContents;
154 /// \brief Construct a Mach-O specific object disassembler.
155 /// \param VMAddrSlide The virtual address slide applied by dyld.
156 /// \param HeaderLoadAddress The load address of the mach_header for this
158 MCMachOObjectDisassembler(const object::MachOObjectFile &MOOF,
159 const MCDisassembler &Dis,
160 const MCInstrAnalysis &MIA, uint64_t VMAddrSlide,
161 uint64_t HeaderLoadAddress);
164 uint64_t getEffectiveLoadAddr(uint64_t Addr) override;
165 uint64_t getOriginalLoadAddr(uint64_t EffectiveAddr) override;
166 uint64_t getEntrypoint() override;
168 ArrayRef<uint64_t> getStaticInitFunctions() override;
169 ArrayRef<uint64_t> getStaticExitFunctions() override;