1 //=== MachOWriter.h - Target-independent Mach-O writer support --*- C++ -*-===//
3 // The LLVM Compiler Infrastructure
5 // This file was developed by Nate Begeman and is distributed under the
6 // University of Illinois Open Source License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file defines the MachOWriter class.
12 //===----------------------------------------------------------------------===//
17 #include "llvm/Constants.h"
18 #include "llvm/DerivedTypes.h"
19 #include "llvm/CodeGen/MachineFunctionPass.h"
20 #include "llvm/CodeGen/MachineRelocation.h"
21 #include "llvm/Target/TargetData.h"
22 #include "llvm/Target/TargetMachine.h"
23 #include "llvm/Target/TargetMachOWriterInfo.h"
28 class MachineCodeEmitter;
29 class MachOCodeEmitter;
32 /// MachOSym - This struct contains information about each symbol that is
33 /// added to logical symbol table for the module. This is eventually
34 /// turned into a real symbol table in the file.
36 const GlobalValue *GV; // The global value this corresponds to.
37 std::string GVName; // The mangled name of the global value.
38 uint32_t n_strx; // index into the string table
39 uint8_t n_type; // type flag
40 uint8_t n_sect; // section number or NO_SECT
41 int16_t n_desc; // see <mach-o/stab.h>
42 uint64_t n_value; // value for this symbol (or stab offset)
44 // Constants for the n_sect field
45 // see <mach-o/nlist.h>
46 enum { NO_SECT = 0 }; // symbol is not in any section
48 // Constants for the n_type field
49 // see <mach-o/nlist.h>
50 enum { N_UNDF = 0x0, // undefined, n_sect == NO_SECT
51 N_ABS = 0x2, // absolute, n_sect == NO_SECT
52 N_SECT = 0xe, // defined in section number n_sect
53 N_PBUD = 0xc, // prebound undefined (defined in a dylib)
54 N_INDR = 0xa // indirect
56 // The following bits are OR'd into the types above. For example, a type
57 // of 0x0f would be an external N_SECT symbol (0x0e | 0x01).
58 enum { N_EXT = 0x01, // external symbol bit
59 N_PEXT = 0x10 // private external symbol bit
62 // Constants for the n_desc field
63 // see <mach-o/loader.h>
64 enum { REFERENCE_FLAG_UNDEFINED_NON_LAZY = 0,
65 REFERENCE_FLAG_UNDEFINED_LAZY = 1,
66 REFERENCE_FLAG_DEFINED = 2,
67 REFERENCE_FLAG_PRIVATE_DEFINED = 3,
68 REFERENCE_FLAG_PRIVATE_UNDEFINED_NON_LAZY = 4,
69 REFERENCE_FLAG_PRIVATE_UNDEFINED_LAZY = 5
71 enum { N_NO_DEAD_STRIP = 0x0020, // symbol is not to be dead stripped
72 N_WEAK_REF = 0x0040, // symbol is weak referenced
73 N_WEAK_DEF = 0x0080 // coalesced symbol is a weak definition
76 MachOSym(const GlobalValue *gv, std::string name, uint8_t sect,
80 /// MachOWriter - This class implements the common target-independent code for
81 /// writing Mach-O files. Targets should derive a class from this to
82 /// parameterize the output format.
84 class MachOWriter : public MachineFunctionPass {
85 friend class MachOCodeEmitter;
87 MachineCodeEmitter &getMachineCodeEmitter() const {
88 return *(MachineCodeEmitter*)MCE;
91 MachOWriter(std::ostream &O, TargetMachine &TM);
92 virtual ~MachOWriter();
94 virtual const char *getPassName() const {
95 return "Mach-O Writer";
98 typedef std::vector<unsigned char> DataBuffer;
100 /// Output stream to send the resultant object file to.
104 /// Target machine description.
108 /// Mang - The object used to perform name mangling for this module.
112 /// MCE - The MachineCodeEmitter object that we are exposing to emit machine
113 /// code for functions to the .o file.
114 MachOCodeEmitter *MCE;
116 /// is64Bit/isLittleEndian - This information is inferred from the target
117 /// machine directly, indicating what header values and flags to set.
118 bool is64Bit, isLittleEndian;
120 /// doInitialization - Emit the file header and all of the global variables
121 /// for the module to the Mach-O file.
122 bool doInitialization(Module &M);
124 bool runOnMachineFunction(MachineFunction &MF);
126 /// doFinalization - Now that the module has been completely processed, emit
127 /// the Mach-O file to 'O'.
128 bool doFinalization(Module &M);
130 /// MachOHeader - This struct contains the header information about a
131 /// specific architecture type/subtype pair that is emitted to the file.
133 uint32_t magic; // mach magic number identifier
134 uint32_t filetype; // type of file
135 uint32_t ncmds; // number of load commands
136 uint32_t sizeofcmds; // the size of all the load commands
137 uint32_t flags; // flags
138 uint32_t reserved; // 64-bit only
140 /// HeaderData - The actual data for the header which we are building
141 /// up for emission to the file.
142 DataBuffer HeaderData;
144 // Constants for the filetype field
145 // see <mach-o/loader.h> for additional info on the various types
146 enum { MH_OBJECT = 1, // relocatable object file
147 MH_EXECUTE = 2, // demand paged executable file
148 MH_FVMLIB = 3, // fixed VM shared library file
149 MH_CORE = 4, // core file
150 MH_PRELOAD = 5, // preloaded executable file
151 MH_DYLIB = 6, // dynamically bound shared library
152 MH_DYLINKER = 7, // dynamic link editor
153 MH_BUNDLE = 8, // dynamically bound bundle file
154 MH_DYLIB_STUB = 9, // shared library stub for static linking only
155 MH_DSYM = 10 // companion file wiht only debug sections
158 // Constants for the flags field
159 enum { MH_NOUNDEFS = 1 << 0,
160 // the object file has no undefined references
161 MH_INCRLINK = 1 << 1,
162 // the object file is the output of an incremental link against
163 // a base file and cannot be link edited again
164 MH_DYLDLINK = 1 << 2,
165 // the object file is input for the dynamic linker and cannot be
166 // statically link edited again.
167 MH_BINDATLOAD = 1 << 3,
168 // the object file's undefined references are bound by the
169 // dynamic linker when loaded.
170 MH_PREBOUND = 1 << 4,
171 // the file has its dynamic undefined references prebound
172 MH_SPLIT_SEGS = 1 << 5,
173 // the file has its read-only and read-write segments split
174 // see <mach/shared_memory_server.h>
175 MH_LAZY_INIT = 1 << 6,
176 // the shared library init routine is to be run lazily via
177 // catching memory faults to its writable segments (obsolete)
178 MH_TWOLEVEL = 1 << 7,
179 // the image is using two-level namespace bindings
180 MH_FORCE_FLAT = 1 << 8,
181 // the executable is forcing all images to use flat namespace
183 MH_NOMULTIDEFS = 1 << 8,
184 // this umbrella guarantees no multiple definitions of symbols
185 // in its sub-images so the two-level namespace hints can
187 MH_NOFIXPREBINDING = 1 << 10,
188 // do not have dyld notify the prebidning agent about this
190 MH_PREBINDABLE = 1 << 11,
191 // the binary is not prebound but can have its prebinding
192 // redone. only used when MH_PREBOUND is not set.
193 MH_ALLMODSBOUND = 1 << 12,
194 // indicates that this binary binds to all two-level namespace
195 // modules of its dependent libraries. Only used when
196 // MH_PREBINDABLE and MH_TWOLEVEL are both set.
197 MH_SUBSECTIONS_VIA_SYMBOLS = 1 << 13,
198 // safe to divide up the sections into sub-sections via symbols
199 // for dead code stripping.
200 MH_CANONICAL = 1 << 14,
201 // the binary has been canonicalized via the unprebind operation
202 MH_WEAK_DEFINES = 1 << 15,
203 // the final linked image contains external weak symbols
204 MH_BINDS_TO_WEAK = 1 << 16,
205 // the final linked image uses weak symbols
206 MH_ALLOW_STACK_EXECUTION = 1 << 17
207 // When this bit is set, all stacks in the task will be given
208 // stack execution privilege. Only used in MH_EXECUTE filetype
211 MachOHeader() : magic(0), filetype(0), ncmds(0), sizeofcmds(0), flags(0),
214 /// cmdSize - This routine returns the size of the MachOSection as written
215 /// to disk, depending on whether the destination is a 64 bit Mach-O file.
216 unsigned cmdSize(bool is64Bit) const {
218 return 8 * sizeof(uint32_t);
220 return 7 * sizeof(uint32_t);
223 /// setMagic - This routine sets the appropriate value for the 'magic'
224 /// field based on pointer size and endianness.
225 void setMagic(bool isLittleEndian, bool is64Bit) {
227 if (is64Bit) magic = 0xcffaedfe;
228 else magic = 0xcefaedfe;
230 if (is64Bit) magic = 0xfeedfacf;
231 else magic = 0xfeedface;
235 /// Header - An instance of MachOHeader that we will update while we build
236 /// the file, and then emit during finalization.
239 /// MachOSegment - This struct contains the necessary information to
240 /// emit the load commands for each section in the file.
241 struct MachOSegment {
242 uint32_t cmd; // LC_SEGMENT or LC_SEGMENT_64
243 uint32_t cmdsize; // Total size of this struct and section commands
244 std::string segname; // segment name
245 uint64_t vmaddr; // address of this segment
246 uint64_t vmsize; // size of this segment, may be larger than filesize
247 uint64_t fileoff; // offset in file
248 uint64_t filesize; // amount to read from file
249 uint32_t maxprot; // maximum VM protection
250 uint32_t initprot; // initial VM protection
251 uint32_t nsects; // number of sections in this segment
252 uint32_t flags; // flags
254 // The following constants are getting pulled in by one of the
255 // system headers, which creates a neat clash with the enum.
256 #if !defined(VM_PROT_NONE)
257 #define VM_PROT_NONE 0x00
259 #if !defined(VM_PROT_READ)
260 #define VM_PROT_READ 0x01
262 #if !defined(VM_PROT_WRITE)
263 #define VM_PROT_WRITE 0x02
265 #if !defined(VM_PROT_EXECUTE)
266 #define VM_PROT_EXECUTE 0x04
268 #if !defined(VM_PROT_ALL)
269 #define VM_PROT_ALL 0x07
272 // Constants for the vm protection fields
273 // see <mach-o/vm_prot.h>
274 enum { SEG_VM_PROT_NONE = VM_PROT_NONE,
275 SEG_VM_PROT_READ = VM_PROT_READ, // read permission
276 SEG_VM_PROT_WRITE = VM_PROT_WRITE, // write permission
277 SEG_VM_PROT_EXECUTE = VM_PROT_EXECUTE,
278 SEG_VM_PROT_ALL = VM_PROT_ALL
281 // Constants for the cmd field
282 // see <mach-o/loader.h>
283 enum { LC_SEGMENT = 0x01, // segment of this file to be mapped
284 LC_SEGMENT_64 = 0x19 // 64-bit segment of this file to be mapped
287 /// cmdSize - This routine returns the size of the MachOSection as written
288 /// to disk, depending on whether the destination is a 64 bit Mach-O file.
289 unsigned cmdSize(bool is64Bit) const {
291 return 6 * sizeof(uint32_t) + 4 * sizeof(uint64_t) + 16;
293 return 10 * sizeof(uint32_t) + 16; // addresses only 32 bits
296 MachOSegment(const std::string &seg, bool is64Bit)
297 : cmd(is64Bit ? LC_SEGMENT_64 : LC_SEGMENT), cmdsize(0), segname(seg),
298 vmaddr(0), vmsize(0), fileoff(0), filesize(0), maxprot(VM_PROT_ALL),
299 initprot(VM_PROT_ALL), nsects(0), flags(0) { }
302 /// MachOSection - This struct contains information about each section in a
303 /// particular segment that is emitted to the file. This is eventually
304 /// turned into the SectionCommand in the load command for a particlar
306 struct MachOSection {
307 std::string sectname; // name of this section,
308 std::string segname; // segment this section goes in
309 uint64_t addr; // memory address of this section
310 uint64_t size; // size in bytes of this section
311 uint32_t offset; // file offset of this section
312 uint32_t align; // section alignment (power of 2)
313 uint32_t reloff; // file offset of relocation entries
314 uint32_t nreloc; // number of relocation entries
315 uint32_t flags; // flags (section type and attributes)
316 uint32_t reserved1; // reserved (for offset or index)
317 uint32_t reserved2; // reserved (for count or sizeof)
318 uint32_t reserved3; // reserved (64 bit only)
320 /// A unique number for this section, which will be used to match symbols
321 /// to the correct section.
324 /// SectionData - The actual data for this section which we are building
325 /// up for emission to the file.
326 DataBuffer SectionData;
328 /// RelocBuffer - A buffer to hold the mach-o relocations before we write
329 /// them out at the appropriate location in the file.
330 DataBuffer RelocBuffer;
332 /// Relocations - The relocations that we have encountered so far in this
333 /// section that we will need to convert to MachORelocation entries when
334 /// the file is written.
335 std::vector<MachineRelocation> Relocations;
337 // Constants for the section types (low 8 bits of flags field)
338 // see <mach-o/loader.h>
339 enum { S_REGULAR = 0,
342 // zero fill on demand section
343 S_CSTRING_LITERALS = 2,
344 // section with only literal C strings
345 S_4BYTE_LITERALS = 3,
346 // section with only 4 byte literals
347 S_8BYTE_LITERALS = 4,
348 // section with only 8 byte literals
349 S_LITERAL_POINTERS = 5,
350 // section with only pointers to literals
351 S_NON_LAZY_SYMBOL_POINTERS = 6,
352 // section with only non-lazy symbol pointers
353 S_LAZY_SYMBOL_POINTERS = 7,
354 // section with only lazy symbol pointers
356 // section with only symbol stubs
357 // byte size of stub in the reserved2 field
358 S_MOD_INIT_FUNC_POINTERS = 9,
359 // section with only function pointers for initialization
360 S_MOD_TERM_FUNC_POINTERS = 10,
361 // section with only function pointers for termination
363 // section contains symbols that are coalesced
365 // zero fill on demand section (that can be larger than 4GB)
367 // section with only pairs of function pointers for interposing
368 S_16BYTE_LITERALS = 14
369 // section with only 16 byte literals
372 // Constants for the section flags (high 24 bits of flags field)
373 // see <mach-o/loader.h>
374 enum { S_ATTR_PURE_INSTRUCTIONS = 1 << 31,
375 // section contains only true machine instructions
376 S_ATTR_NO_TOC = 1 << 30,
377 // section contains coalesced symbols that are not to be in a
378 // ranlib table of contents
379 S_ATTR_STRIP_STATIC_SYMS = 1 << 29,
380 // ok to strip static symbols in this section in files with the
382 S_ATTR_NO_DEAD_STRIP = 1 << 28,
384 S_ATTR_LIVE_SUPPORT = 1 << 27,
385 // blocks are live if they reference live blocks
386 S_ATTR_SELF_MODIFYING_CODE = 1 << 26,
387 // used with i386 code stubs written on by dyld
388 S_ATTR_DEBUG = 1 << 25,
390 S_ATTR_SOME_INSTRUCTIONS = 1 << 10,
391 // section contains some machine instructions
392 S_ATTR_EXT_RELOC = 1 << 9,
393 // section has external relocation entries
394 S_ATTR_LOC_RELOC = 1 << 8
395 // section has local relocation entries
398 /// cmdSize - This routine returns the size of the MachOSection as written
399 /// to disk, depending on whether the destination is a 64 bit Mach-O file.
400 unsigned cmdSize(bool is64Bit) const {
402 return 7 * sizeof(uint32_t) + 2 * sizeof(uint64_t) + 32;
404 return 9 * sizeof(uint32_t) + 32; // addresses only 32 bits
407 MachOSection(const std::string &seg, const std::string §)
408 : sectname(sect), segname(seg), addr(0), size(0), offset(0), align(2),
409 reloff(0), nreloc(0), flags(0), reserved1(0), reserved2(0),
415 /// SectionList - This is the list of sections that we have emitted to the
416 /// file. Once the file has been completely built, the segment load command
417 /// SectionCommands are constructed from this info.
418 std::vector<MachOSection*> SectionList;
420 /// SectionLookup - This is a mapping from section name to SectionList entry
421 std::map<std::string, MachOSection*> SectionLookup;
423 /// GVSection - This is a mapping from a GlobalValue to a MachOSection,
424 /// to aid in emitting relocations.
425 std::map<GlobalValue*, MachOSection*> GVSection;
427 /// GVOffset - This is a mapping from a GlobalValue to an offset from the
428 /// start of the section in which the GV resides, to aid in emitting
430 std::map<GlobalValue*, intptr_t> GVOffset;
432 /// getSection - Return the section with the specified name, creating a new
433 /// section if one does not already exist.
434 MachOSection *getSection(const std::string &seg, const std::string §,
435 unsigned Flags = 0) {
436 MachOSection *MOS = SectionLookup[seg+sect];
439 MOS = new MachOSection(seg, sect);
440 SectionList.push_back(MOS);
441 MOS->Index = SectionList.size();
442 MOS->flags = MachOSection::S_REGULAR | Flags;
443 SectionLookup[seg+sect] = MOS;
446 MachOSection *getTextSection(bool isCode = true) {
448 return getSection("__TEXT", "__text",
449 MachOSection::S_ATTR_PURE_INSTRUCTIONS |
450 MachOSection::S_ATTR_SOME_INSTRUCTIONS);
452 return getSection("__TEXT", "__text");
454 MachOSection *getBSSSection() {
455 return getSection("__DATA", "__bss", MachOSection::S_ZEROFILL);
457 MachOSection *getDataSection() {
458 return getSection("__DATA", "__data");
460 MachOSection *getConstSection(Constant *C) {
461 const ConstantArray *CVA = dyn_cast<ConstantArray>(C);
462 if (CVA && CVA->isCString())
463 return getSection("__TEXT", "__cstring",
464 MachOSection::S_CSTRING_LITERALS);
466 const Type *Ty = C->getType();
467 if (Ty->isPrimitiveType() || Ty->isInteger()) {
468 unsigned Size = TM.getTargetData()->getTypeSize(Ty);
470 default: break; // Fall through to __TEXT,__const
472 return getSection("__TEXT", "__literal4",
473 MachOSection::S_4BYTE_LITERALS);
475 return getSection("__TEXT", "__literal8",
476 MachOSection::S_8BYTE_LITERALS);
478 return getSection("__TEXT", "__literal16",
479 MachOSection::S_16BYTE_LITERALS);
482 return getSection("__TEXT", "__const");
484 MachOSection *getJumpTableSection() {
485 if (TM.getRelocationModel() == Reloc::PIC_)
486 return getTextSection(false);
488 return getSection("__TEXT", "__const");
491 /// MachOSymTab - This struct contains information about the offsets and
492 /// size of symbol table information.
495 uint32_t cmd; // LC_SYMTAB
496 uint32_t cmdsize; // sizeof( MachOSymTab )
497 uint32_t symoff; // symbol table offset
498 uint32_t nsyms; // number of symbol table entries
499 uint32_t stroff; // string table offset
500 uint32_t strsize; // string table size in bytes
502 // Constants for the cmd field
503 // see <mach-o/loader.h>
504 enum { LC_SYMTAB = 0x02 // link-edit stab symbol table info
507 MachOSymTab() : cmd(LC_SYMTAB), cmdsize(6 * sizeof(uint32_t)), symoff(0),
508 nsyms(0), stroff(0), strsize(0) { }
511 /// MachOSymTab - This struct contains information about the offsets and
512 /// size of symbol table information.
514 struct MachODySymTab {
515 uint32_t cmd; // LC_DYSYMTAB
516 uint32_t cmdsize; // sizeof( MachODySymTab )
517 uint32_t ilocalsym; // index to local symbols
518 uint32_t nlocalsym; // number of local symbols
519 uint32_t iextdefsym; // index to externally defined symbols
520 uint32_t nextdefsym; // number of externally defined symbols
521 uint32_t iundefsym; // index to undefined symbols
522 uint32_t nundefsym; // number of undefined symbols
523 uint32_t tocoff; // file offset to table of contents
524 uint32_t ntoc; // number of entries in table of contents
525 uint32_t modtaboff; // file offset to module table
526 uint32_t nmodtab; // number of module table entries
527 uint32_t extrefsymoff; // offset to referenced symbol table
528 uint32_t nextrefsyms; // number of referenced symbol table entries
529 uint32_t indirectsymoff; // file offset to the indirect symbol table
530 uint32_t nindirectsyms; // number of indirect symbol table entries
531 uint32_t extreloff; // offset to external relocation entries
532 uint32_t nextrel; // number of external relocation entries
533 uint32_t locreloff; // offset to local relocation entries
534 uint32_t nlocrel; // number of local relocation entries
536 // Constants for the cmd field
537 // see <mach-o/loader.h>
538 enum { LC_DYSYMTAB = 0x0B // dynamic link-edit symbol table info
541 MachODySymTab() : cmd(LC_DYSYMTAB), cmdsize(20 * sizeof(uint32_t)),
542 ilocalsym(0), nlocalsym(0), iextdefsym(0), nextdefsym(0),
543 iundefsym(0), nundefsym(0), tocoff(0), ntoc(0), modtaboff(0),
544 nmodtab(0), extrefsymoff(0), nextrefsyms(0), indirectsymoff(0),
545 nindirectsyms(0), extreloff(0), nextrel(0), locreloff(0), nlocrel(0) { }
548 /// SymTab - The "stab" style symbol table information
550 /// DySymTab - symbol table info for the dynamic link editor
551 MachODySymTab DySymTab;
554 // FIXME: this does not appear to be sorting 'f' after 'F'
555 bool operator()(const MachOSym &LHS, const MachOSym &RHS) {
556 return LHS.GVName < RHS.GVName;
560 /// PartitionByLocal - Simple boolean predicate that returns true if Sym is
561 /// a local symbol rather than an external symbol.
562 static bool PartitionByLocal(const MachOSym &Sym);
564 /// PartitionByDefined - Simple boolean predicate that returns true if Sym
565 /// is defined in this module.
566 static bool PartitionByDefined(const MachOSym &Sym);
570 /// SymbolTable - This is the list of symbols we have emitted to the file.
571 /// This actually gets rearranged before emission to the file (to put the
572 /// local symbols first in the list).
573 std::vector<MachOSym> SymbolTable;
575 /// SymT - A buffer to hold the symbol table before we write it out at the
576 /// appropriate location in the file.
579 /// StrT - A buffer to hold the string table before we write it out at the
580 /// appropriate location in the file.
583 /// PendingSyms - This is a list of externally defined symbols that we have
584 /// been asked to emit, but have not seen a reference to. When a reference
585 /// is seen, the symbol will move from this list to the SymbolTable.
586 std::vector<GlobalValue*> PendingGlobals;
588 /// DynamicSymbolTable - This is just a vector of indices into
589 /// SymbolTable to aid in emitting the DYSYMTAB load command.
590 std::vector<unsigned> DynamicSymbolTable;
592 static void InitMem(const Constant *C, void *Addr, intptr_t Offset,
593 const TargetData *TD,
594 std::vector<MachineRelocation> &MRs);
597 void AddSymbolToSection(MachOSection *MOS, GlobalVariable *GV);
598 void EmitGlobal(GlobalVariable *GV);
599 void EmitHeaderAndLoadCommands();
601 void BufferSymbolAndStringTable();
602 void CalculateRelocations(MachOSection &MOS);
604 MachineRelocation GetJTRelocation(unsigned Offset,
605 MachineBasicBlock *MBB) const {
606 return TM.getMachOWriterInfo()->GetJTRelocation(Offset, MBB);
609 /// GetTargetRelocation - Returns the number of relocations.
610 unsigned GetTargetRelocation(MachineRelocation &MR,
614 OutputBuffer &RelocOut,
615 OutputBuffer &SecOut,
618 return TM.getMachOWriterInfo()->GetTargetRelocation(MR, FromIdx, ToAddr,