1 //===- lib/MC/MachObjectWriter.cpp - Mach-O File Writer -------------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 #include "llvm/ADT/StringMap.h"
11 #include "llvm/ADT/Twine.h"
12 #include "llvm/MC/MCAssembler.h"
13 #include "llvm/MC/MCAsmLayout.h"
14 #include "llvm/MC/MCExpr.h"
15 #include "llvm/MC/MCObjectWriter.h"
16 #include "llvm/MC/MCSectionMachO.h"
17 #include "llvm/MC/MCSymbol.h"
18 #include "llvm/MC/MCMachOSymbolFlags.h"
19 #include "llvm/MC/MCValue.h"
20 #include "llvm/Object/MachOFormat.h"
21 #include "llvm/Support/ErrorHandling.h"
22 #include "llvm/Target/TargetAsmBackend.h"
25 #include "../Target/X86/X86FixupKinds.h"
29 using namespace llvm::object;
31 // FIXME: this has been copied from (or to) X86AsmBackend.cpp
32 static unsigned getFixupKindLog2Size(unsigned Kind) {
34 default: llvm_unreachable("invalid fixup kind!");
35 case X86::reloc_pcrel_1byte:
36 case FK_Data_1: return 0;
37 case X86::reloc_pcrel_2byte:
38 case FK_Data_2: return 1;
39 case X86::reloc_pcrel_4byte:
40 case X86::reloc_riprel_4byte:
41 case X86::reloc_riprel_4byte_movq_load:
42 case X86::reloc_signed_4byte:
43 case FK_Data_4: return 2;
44 case FK_Data_8: return 3;
48 static bool isFixupKindPCRel(unsigned Kind) {
52 case X86::reloc_pcrel_1byte:
53 case X86::reloc_pcrel_2byte:
54 case X86::reloc_pcrel_4byte:
55 case X86::reloc_riprel_4byte:
56 case X86::reloc_riprel_4byte_movq_load:
61 static bool isFixupKindRIPRel(unsigned Kind) {
62 return Kind == X86::reloc_riprel_4byte ||
63 Kind == X86::reloc_riprel_4byte_movq_load;
66 static bool doesSymbolRequireExternRelocation(MCSymbolData *SD) {
67 // Undefined symbols are always extern.
68 if (SD->Symbol->isUndefined())
71 // References to weak definitions require external relocation entries; the
72 // definition may not always be the one in the same object file.
73 if (SD->getFlags() & SF_WeakDefinition)
76 // Otherwise, we can use an internal relocation.
80 static bool isScatteredFixupFullyResolved(const MCAssembler &Asm,
82 const MCSymbolData *BaseSymbol) {
83 // The effective fixup address is
84 // addr(atom(A)) + offset(A)
85 // - addr(atom(B)) - offset(B)
86 // - addr(BaseSymbol) + <fixup offset from base symbol>
87 // and the offsets are not relocatable, so the fixup is fully resolved when
88 // addr(atom(A)) - addr(atom(B)) - addr(BaseSymbol) == 0.
90 // Note that "false" is almost always conservatively correct (it means we emit
91 // a relocation which is unnecessary), except when it would force us to emit a
92 // relocation which the target cannot encode.
94 const MCSymbolData *A_Base = 0, *B_Base = 0;
95 if (const MCSymbolRefExpr *A = Target.getSymA()) {
96 // Modified symbol references cannot be resolved.
97 if (A->getKind() != MCSymbolRefExpr::VK_None)
100 A_Base = Asm.getAtom(&Asm.getSymbolData(A->getSymbol()));
105 if (const MCSymbolRefExpr *B = Target.getSymB()) {
106 // Modified symbol references cannot be resolved.
107 if (B->getKind() != MCSymbolRefExpr::VK_None)
110 B_Base = Asm.getAtom(&Asm.getSymbolData(B->getSymbol()));
115 // If there is no base, A and B have to be the same atom for this fixup to be
118 return A_Base == B_Base;
120 // Otherwise, B must be missing and A must be the base.
121 return !B_Base && BaseSymbol == A_Base;
124 static bool isScatteredFixupFullyResolvedSimple(const MCAssembler &Asm,
125 const MCValue Target,
126 const MCSection *BaseSection) {
127 // The effective fixup address is
128 // addr(atom(A)) + offset(A)
129 // - addr(atom(B)) - offset(B)
130 // - addr(<base symbol>) + <fixup offset from base symbol>
131 // and the offsets are not relocatable, so the fixup is fully resolved when
132 // addr(atom(A)) - addr(atom(B)) - addr(<base symbol>)) == 0.
134 // The simple (Darwin, except on x86_64) way of dealing with this was to
135 // assume that any reference to a temporary symbol *must* be a temporary
136 // symbol in the same atom, unless the sections differ. Therefore, any PCrel
137 // relocation to a temporary symbol (in the same section) is fully
138 // resolved. This also works in conjunction with absolutized .set, which
139 // requires the compiler to use .set to absolutize the differences between
140 // symbols which the compiler knows to be assembly time constants, so we don't
141 // need to worry about considering symbol differences fully resolved.
143 // Non-relative fixups are only resolved if constant.
145 return Target.isAbsolute();
147 // Otherwise, relative fixups are only resolved if not a difference and the
148 // target is a temporary in the same section.
149 if (Target.isAbsolute() || Target.getSymB())
152 const MCSymbol *A = &Target.getSymA()->getSymbol();
153 if (!A->isTemporary() || !A->isInSection() ||
154 &A->getSection() != BaseSection)
162 class MachObjectWriter : public MCObjectWriter {
163 /// MachSymbolData - Helper struct for containing some precomputed information
165 struct MachSymbolData {
166 MCSymbolData *SymbolData;
167 uint64_t StringIndex;
168 uint8_t SectionIndex;
170 // Support lexicographic sorting.
171 bool operator<(const MachSymbolData &RHS) const {
172 return SymbolData->getSymbol().getName() <
173 RHS.SymbolData->getSymbol().getName();
177 /// @name Relocation Data
180 struct MachRelocationEntry {
185 llvm::DenseMap<const MCSectionData*,
186 std::vector<MachRelocationEntry> > Relocations;
187 llvm::DenseMap<const MCSectionData*, unsigned> IndirectSymBase;
190 /// @name Symbol Table Data
193 SmallString<256> StringTable;
194 std::vector<MachSymbolData> LocalSymbolData;
195 std::vector<MachSymbolData> ExternalSymbolData;
196 std::vector<MachSymbolData> UndefinedSymbolData;
200 unsigned Is64Bit : 1;
206 MachObjectWriter(raw_ostream &_OS,
207 bool _Is64Bit, uint32_t _CPUType, uint32_t _CPUSubtype,
208 bool _IsLittleEndian)
209 : MCObjectWriter(_OS, _IsLittleEndian),
210 Is64Bit(_Is64Bit), CPUType(_CPUType), CPUSubtype(_CPUSubtype) {
213 void WriteHeader(unsigned NumLoadCommands, unsigned LoadCommandsSize,
214 bool SubsectionsViaSymbols) {
217 if (SubsectionsViaSymbols)
218 Flags |= macho::HF_SubsectionsViaSymbols;
220 // struct mach_header (28 bytes) or
221 // struct mach_header_64 (32 bytes)
223 uint64_t Start = OS.tell();
226 Write32(Is64Bit ? macho::HM_Object64 : macho::HM_Object32);
231 Write32(macho::HFT_Object);
232 Write32(NumLoadCommands); // Object files have a single load command, the
234 Write32(LoadCommandsSize);
237 Write32(0); // reserved
239 assert(OS.tell() - Start == Is64Bit ?
240 macho::Header64Size : macho::Header32Size);
243 /// WriteSegmentLoadCommand - Write a segment load command.
245 /// \arg NumSections - The number of sections in this segment.
246 /// \arg SectionDataSize - The total size of the sections.
247 void WriteSegmentLoadCommand(unsigned NumSections,
249 uint64_t SectionDataStartOffset,
250 uint64_t SectionDataSize) {
251 // struct segment_command (56 bytes) or
252 // struct segment_command_64 (72 bytes)
254 uint64_t Start = OS.tell();
257 unsigned SegmentLoadCommandSize = Is64Bit ? macho::SegmentLoadCommand64Size:
258 macho::SegmentLoadCommand32Size;
259 Write32(Is64Bit ? macho::LCT_Segment64 : macho::LCT_Segment);
260 Write32(SegmentLoadCommandSize +
261 NumSections * (Is64Bit ? macho::Section64Size :
262 macho::Section32Size));
266 Write64(0); // vmaddr
267 Write64(VMSize); // vmsize
268 Write64(SectionDataStartOffset); // file offset
269 Write64(SectionDataSize); // file size
271 Write32(0); // vmaddr
272 Write32(VMSize); // vmsize
273 Write32(SectionDataStartOffset); // file offset
274 Write32(SectionDataSize); // file size
276 Write32(0x7); // maxprot
277 Write32(0x7); // initprot
278 Write32(NumSections);
281 assert(OS.tell() - Start == SegmentLoadCommandSize);
284 void WriteSection(const MCAssembler &Asm, const MCAsmLayout &Layout,
285 const MCSectionData &SD, uint64_t FileOffset,
286 uint64_t RelocationsStart, unsigned NumRelocations) {
287 uint64_t SectionSize = Layout.getSectionSize(&SD);
289 // The offset is unused for virtual sections.
290 if (SD.getSection().isVirtualSection()) {
291 assert(Layout.getSectionFileSize(&SD) == 0 && "Invalid file size!");
295 // struct section (68 bytes) or
296 // struct section_64 (80 bytes)
298 uint64_t Start = OS.tell();
301 const MCSectionMachO &Section = cast<MCSectionMachO>(SD.getSection());
302 WriteBytes(Section.getSectionName(), 16);
303 WriteBytes(Section.getSegmentName(), 16);
305 Write64(Layout.getSectionAddress(&SD)); // address
306 Write64(SectionSize); // size
308 Write32(Layout.getSectionAddress(&SD)); // address
309 Write32(SectionSize); // size
313 unsigned Flags = Section.getTypeAndAttributes();
314 if (SD.hasInstructions())
315 Flags |= MCSectionMachO::S_ATTR_SOME_INSTRUCTIONS;
317 assert(isPowerOf2_32(SD.getAlignment()) && "Invalid alignment!");
318 Write32(Log2_32(SD.getAlignment()));
319 Write32(NumRelocations ? RelocationsStart : 0);
320 Write32(NumRelocations);
322 Write32(IndirectSymBase.lookup(&SD)); // reserved1
323 Write32(Section.getStubSize()); // reserved2
325 Write32(0); // reserved3
327 assert(OS.tell() - Start == Is64Bit ? macho::Section64Size :
328 macho::Section32Size);
331 void WriteSymtabLoadCommand(uint32_t SymbolOffset, uint32_t NumSymbols,
332 uint32_t StringTableOffset,
333 uint32_t StringTableSize) {
334 // struct symtab_command (24 bytes)
336 uint64_t Start = OS.tell();
339 Write32(macho::LCT_Symtab);
340 Write32(macho::SymtabLoadCommandSize);
341 Write32(SymbolOffset);
343 Write32(StringTableOffset);
344 Write32(StringTableSize);
346 assert(OS.tell() - Start == macho::SymtabLoadCommandSize);
349 void WriteDysymtabLoadCommand(uint32_t FirstLocalSymbol,
350 uint32_t NumLocalSymbols,
351 uint32_t FirstExternalSymbol,
352 uint32_t NumExternalSymbols,
353 uint32_t FirstUndefinedSymbol,
354 uint32_t NumUndefinedSymbols,
355 uint32_t IndirectSymbolOffset,
356 uint32_t NumIndirectSymbols) {
357 // struct dysymtab_command (80 bytes)
359 uint64_t Start = OS.tell();
362 Write32(macho::LCT_Dysymtab);
363 Write32(macho::DysymtabLoadCommandSize);
364 Write32(FirstLocalSymbol);
365 Write32(NumLocalSymbols);
366 Write32(FirstExternalSymbol);
367 Write32(NumExternalSymbols);
368 Write32(FirstUndefinedSymbol);
369 Write32(NumUndefinedSymbols);
370 Write32(0); // tocoff
372 Write32(0); // modtaboff
373 Write32(0); // nmodtab
374 Write32(0); // extrefsymoff
375 Write32(0); // nextrefsyms
376 Write32(IndirectSymbolOffset);
377 Write32(NumIndirectSymbols);
378 Write32(0); // extreloff
379 Write32(0); // nextrel
380 Write32(0); // locreloff
381 Write32(0); // nlocrel
383 assert(OS.tell() - Start == macho::DysymtabLoadCommandSize);
386 void WriteNlist(MachSymbolData &MSD, const MCAsmLayout &Layout) {
387 MCSymbolData &Data = *MSD.SymbolData;
388 const MCSymbol &Symbol = Data.getSymbol();
390 uint16_t Flags = Data.getFlags();
391 uint32_t Address = 0;
393 // Set the N_TYPE bits. See <mach-o/nlist.h>.
395 // FIXME: Are the prebound or indirect fields possible here?
396 if (Symbol.isUndefined())
397 Type = macho::STT_Undefined;
398 else if (Symbol.isAbsolute())
399 Type = macho::STT_Absolute;
401 Type = macho::STT_Section;
403 // FIXME: Set STAB bits.
405 if (Data.isPrivateExtern())
406 Type |= macho::STF_PrivateExtern;
409 if (Data.isExternal() || Symbol.isUndefined())
410 Type |= macho::STF_External;
412 // Compute the symbol address.
413 if (Symbol.isDefined()) {
414 if (Symbol.isAbsolute()) {
415 Address = cast<MCConstantExpr>(Symbol.getVariableValue())->getValue();
417 Address = Layout.getSymbolAddress(&Data);
419 } else if (Data.isCommon()) {
420 // Common symbols are encoded with the size in the address
421 // field, and their alignment in the flags.
422 Address = Data.getCommonSize();
424 // Common alignment is packed into the 'desc' bits.
425 if (unsigned Align = Data.getCommonAlignment()) {
426 unsigned Log2Size = Log2_32(Align);
427 assert((1U << Log2Size) == Align && "Invalid 'common' alignment!");
429 report_fatal_error("invalid 'common' alignment '" +
431 // FIXME: Keep this mask with the SymbolFlags enumeration.
432 Flags = (Flags & 0xF0FF) | (Log2Size << 8);
436 // struct nlist (12 bytes)
438 Write32(MSD.StringIndex);
440 Write8(MSD.SectionIndex);
442 // The Mach-O streamer uses the lowest 16-bits of the flags for the 'desc'
451 // FIXME: We really need to improve the relocation validation. Basically, we
452 // want to implement a separate computation which evaluates the relocation
453 // entry as the linker would, and verifies that the resultant fixup value is
454 // exactly what the encoder wanted. This will catch several classes of
457 // - Relocation entry bugs, the two algorithms are unlikely to have the same
460 // - Relaxation issues, where we forget to relax something.
462 // - Input errors, where something cannot be correctly encoded. 'as' allows
463 // these through in many cases.
465 void RecordX86_64Relocation(const MCAssembler &Asm, const MCAsmLayout &Layout,
466 const MCFragment *Fragment,
467 const MCFixup &Fixup, MCValue Target,
468 uint64_t &FixedValue) {
469 unsigned IsPCRel = isFixupKindPCRel(Fixup.getKind());
470 unsigned IsRIPRel = isFixupKindRIPRel(Fixup.getKind());
471 unsigned Log2Size = getFixupKindLog2Size(Fixup.getKind());
474 uint32_t FixupOffset =
475 Layout.getFragmentOffset(Fragment) + Fixup.getOffset();
476 uint32_t FixupAddress =
477 Layout.getFragmentAddress(Fragment) + Fixup.getOffset();
480 unsigned IsExtern = 0;
483 Value = Target.getConstant();
486 // Compensate for the relocation offset, Darwin x86_64 relocations only
487 // have the addend and appear to have attempted to define it to be the
488 // actual expression addend without the PCrel bias. However, instructions
489 // with data following the relocation are not accomodated for (see comment
490 // below regarding SIGNED{1,2,4}), so it isn't exactly that either.
491 Value += 1LL << Log2Size;
494 if (Target.isAbsolute()) { // constant
495 // SymbolNum of 0 indicates the absolute section.
496 Type = macho::RIT_X86_64_Unsigned;
499 // FIXME: I believe this is broken, I don't think the linker can
500 // understand it. I think it would require a local relocation, but I'm not
501 // sure if that would work either. The official way to get an absolute
502 // PCrel relocation is to use an absolute symbol (which we don't support
506 Type = macho::RIT_X86_64_Branch;
508 } else if (Target.getSymB()) { // A - B + constant
509 const MCSymbol *A = &Target.getSymA()->getSymbol();
510 MCSymbolData &A_SD = Asm.getSymbolData(*A);
511 const MCSymbolData *A_Base = Asm.getAtom(&A_SD);
513 const MCSymbol *B = &Target.getSymB()->getSymbol();
514 MCSymbolData &B_SD = Asm.getSymbolData(*B);
515 const MCSymbolData *B_Base = Asm.getAtom(&B_SD);
517 // Neither symbol can be modified.
518 if (Target.getSymA()->getKind() != MCSymbolRefExpr::VK_None ||
519 Target.getSymB()->getKind() != MCSymbolRefExpr::VK_None)
520 report_fatal_error("unsupported relocation of modified symbol");
522 // We don't support PCrel relocations of differences. Darwin 'as' doesn't
523 // implement most of these correctly.
525 report_fatal_error("unsupported pc-relative relocation of difference");
527 // The support for the situation where one or both of the symbols would
528 // require a local relocation is handled just like if the symbols were
529 // external. This is certainly used in the case of debug sections where
530 // the section has only temporary symbols and thus the symbols don't have
531 // base symbols. This is encoded using the section ordinal and
532 // non-extern relocation entries.
534 // Darwin 'as' doesn't emit correct relocations for this (it ends up with
535 // a single SIGNED relocation); reject it for now. Except the case where
536 // both symbols don't have a base, equal but both NULL.
537 if (A_Base == B_Base && A_Base)
538 report_fatal_error("unsupported relocation with identical base");
540 Value += Layout.getSymbolAddress(&A_SD) -
541 (A_Base == NULL ? 0 : Layout.getSymbolAddress(A_Base));
542 Value -= Layout.getSymbolAddress(&B_SD) -
543 (B_Base == NULL ? 0 : Layout.getSymbolAddress(B_Base));
546 Index = A_Base->getIndex();
550 Index = A_SD.getFragment()->getParent()->getOrdinal() + 1;
553 Type = macho::RIT_X86_64_Unsigned;
555 MachRelocationEntry MRE;
556 MRE.Word0 = FixupOffset;
557 MRE.Word1 = ((Index << 0) |
562 Relocations[Fragment->getParent()].push_back(MRE);
565 Index = B_Base->getIndex();
569 Index = B_SD.getFragment()->getParent()->getOrdinal() + 1;
572 Type = macho::RIT_X86_64_Subtractor;
574 const MCSymbol *Symbol = &Target.getSymA()->getSymbol();
575 MCSymbolData &SD = Asm.getSymbolData(*Symbol);
576 const MCSymbolData *Base = Asm.getAtom(&SD);
578 // Relocations inside debug sections always use local relocations when
579 // possible. This seems to be done because the debugger doesn't fully
580 // understand x86_64 relocation entries, and expects to find values that
581 // have already been fixed up.
582 if (Symbol->isInSection()) {
583 const MCSectionMachO &Section = static_cast<const MCSectionMachO&>(
584 Fragment->getParent()->getSection());
585 if (Section.hasAttribute(MCSectionMachO::S_ATTR_DEBUG))
589 // x86_64 almost always uses external relocations, except when there is no
590 // symbol to use as a base address (a local symbol with no preceeding
591 // non-local symbol).
593 Index = Base->getIndex();
596 // Add the local offset, if needed.
598 Value += Layout.getSymbolAddress(&SD) - Layout.getSymbolAddress(Base);
599 } else if (Symbol->isInSection()) {
600 // The index is the section ordinal (1-based).
601 Index = SD.getFragment()->getParent()->getOrdinal() + 1;
603 Value += Layout.getSymbolAddress(&SD);
606 Value -= FixupAddress + (1 << Log2Size);
608 report_fatal_error("unsupported relocation of undefined symbol '" +
609 Symbol->getName() + "'");
612 MCSymbolRefExpr::VariantKind Modifier = Target.getSymA()->getKind();
615 if (Modifier == MCSymbolRefExpr::VK_GOTPCREL) {
616 // x86_64 distinguishes movq foo@GOTPCREL so that the linker can
617 // rewrite the movq to an leaq at link time if the symbol ends up in
618 // the same linkage unit.
619 if (unsigned(Fixup.getKind()) == X86::reloc_riprel_4byte_movq_load)
620 Type = macho::RIT_X86_64_GOTLoad;
622 Type = macho::RIT_X86_64_GOT;
623 } else if (Modifier == MCSymbolRefExpr::VK_TLVP) {
624 Type = macho::RIT_X86_64_TLV;
625 } else if (Modifier != MCSymbolRefExpr::VK_None) {
626 report_fatal_error("unsupported symbol modifier in relocation");
628 Type = macho::RIT_X86_64_Signed;
630 // The Darwin x86_64 relocation format has a problem where it cannot
631 // encode an address (L<foo> + <constant>) which is outside the atom
632 // containing L<foo>. Generally, this shouldn't occur but it does
633 // happen when we have a RIPrel instruction with data following the
634 // relocation entry (e.g., movb $012, L0(%rip)). Even with the PCrel
635 // adjustment Darwin x86_64 uses, the offset is still negative and
636 // the linker has no way to recognize this.
638 // To work around this, Darwin uses several special relocation types
639 // to indicate the offsets. However, the specification or
640 // implementation of these seems to also be incomplete; they should
641 // adjust the addend as well based on the actual encoded instruction
642 // (the additional bias), but instead appear to just look at the
644 switch (-(Target.getConstant() + (1LL << Log2Size))) {
645 case 1: Type = macho::RIT_X86_64_Signed1; break;
646 case 2: Type = macho::RIT_X86_64_Signed2; break;
647 case 4: Type = macho::RIT_X86_64_Signed4; break;
651 if (Modifier != MCSymbolRefExpr::VK_None)
652 report_fatal_error("unsupported symbol modifier in branch "
655 Type = macho::RIT_X86_64_Branch;
658 if (Modifier == MCSymbolRefExpr::VK_GOT) {
659 Type = macho::RIT_X86_64_GOT;
660 } else if (Modifier == MCSymbolRefExpr::VK_GOTPCREL) {
661 // GOTPCREL is allowed as a modifier on non-PCrel instructions, in
662 // which case all we do is set the PCrel bit in the relocation entry;
663 // this is used with exception handling, for example. The source is
664 // required to include any necessary offset directly.
665 Type = macho::RIT_X86_64_GOT;
667 } else if (Modifier == MCSymbolRefExpr::VK_TLVP) {
668 report_fatal_error("TLVP symbol modifier should have been rip-rel");
669 } else if (Modifier != MCSymbolRefExpr::VK_None)
670 report_fatal_error("unsupported symbol modifier in relocation");
672 Type = macho::RIT_X86_64_Unsigned;
676 // x86_64 always writes custom values into the fixups.
679 // struct relocation_info (8 bytes)
680 MachRelocationEntry MRE;
681 MRE.Word0 = FixupOffset;
682 MRE.Word1 = ((Index << 0) |
687 Relocations[Fragment->getParent()].push_back(MRE);
690 void RecordScatteredRelocation(const MCAssembler &Asm,
691 const MCAsmLayout &Layout,
692 const MCFragment *Fragment,
693 const MCFixup &Fixup, MCValue Target,
694 uint64_t &FixedValue) {
695 uint32_t FixupOffset = Layout.getFragmentOffset(Fragment)+Fixup.getOffset();
696 unsigned IsPCRel = isFixupKindPCRel(Fixup.getKind());
697 unsigned Log2Size = getFixupKindLog2Size(Fixup.getKind());
698 unsigned Type = macho::RIT_Vanilla;
701 const MCSymbol *A = &Target.getSymA()->getSymbol();
702 MCSymbolData *A_SD = &Asm.getSymbolData(*A);
704 if (!A_SD->getFragment())
705 report_fatal_error("symbol '" + A->getName() +
706 "' can not be undefined in a subtraction expression");
708 uint32_t Value = Layout.getSymbolAddress(A_SD);
711 if (const MCSymbolRefExpr *B = Target.getSymB()) {
712 MCSymbolData *B_SD = &Asm.getSymbolData(B->getSymbol());
714 if (!B_SD->getFragment())
715 report_fatal_error("symbol '" + B->getSymbol().getName() +
716 "' can not be undefined in a subtraction expression");
718 // Select the appropriate difference relocation type.
720 // Note that there is no longer any semantic difference between these two
721 // relocation types from the linkers point of view, this is done solely
722 // for pedantic compatibility with 'as'.
723 Type = A_SD->isExternal() ? macho::RIT_Difference :
724 macho::RIT_LocalDifference;
725 Value2 = Layout.getSymbolAddress(B_SD);
728 // Relocations are written out in reverse order, so the PAIR comes first.
729 if (Type == macho::RIT_Difference || Type == macho::RIT_LocalDifference) {
730 MachRelocationEntry MRE;
731 MRE.Word0 = ((0 << 0) |
732 (macho::RIT_Pair << 24) |
735 macho::RF_Scattered);
737 Relocations[Fragment->getParent()].push_back(MRE);
740 MachRelocationEntry MRE;
741 MRE.Word0 = ((FixupOffset << 0) |
745 macho::RF_Scattered);
747 Relocations[Fragment->getParent()].push_back(MRE);
750 void RecordTLVPRelocation(const MCAssembler &Asm,
751 const MCAsmLayout &Layout,
752 const MCFragment *Fragment,
753 const MCFixup &Fixup, MCValue Target,
754 uint64_t &FixedValue) {
755 assert(Target.getSymA()->getKind() == MCSymbolRefExpr::VK_TLVP &&
757 "Should only be called with a 32-bit TLVP relocation!");
759 unsigned Log2Size = getFixupKindLog2Size(Fixup.getKind());
760 uint32_t Value = Layout.getFragmentOffset(Fragment)+Fixup.getOffset();
761 unsigned IsPCRel = 0;
763 // Get the symbol data.
764 MCSymbolData *SD_A = &Asm.getSymbolData(Target.getSymA()->getSymbol());
765 unsigned Index = SD_A->getIndex();
767 // We're only going to have a second symbol in pic mode and it'll be a
768 // subtraction from the picbase. For 32-bit pic the addend is the difference
769 // between the picbase and the next address. For 32-bit static the addend
771 if (Target.getSymB()) {
772 // If this is a subtraction then we're pcrel.
773 uint32_t FixupAddress =
774 Layout.getFragmentAddress(Fragment) + Fixup.getOffset();
775 MCSymbolData *SD_B = &Asm.getSymbolData(Target.getSymB()->getSymbol());
777 FixedValue = (FixupAddress - Layout.getSymbolAddress(SD_B) +
778 Target.getConstant());
779 FixedValue += 1ULL << Log2Size;
784 // struct relocation_info (8 bytes)
785 MachRelocationEntry MRE;
787 MRE.Word1 = ((Index << 0) |
790 (1 << 27) | // Extern
791 (macho::RIT_TLV << 28)); // Type
792 Relocations[Fragment->getParent()].push_back(MRE);
795 void RecordRelocation(const MCAssembler &Asm, const MCAsmLayout &Layout,
796 const MCFragment *Fragment, const MCFixup &Fixup,
797 MCValue Target, uint64_t &FixedValue) {
799 RecordX86_64Relocation(Asm, Layout, Fragment, Fixup, Target, FixedValue);
803 unsigned IsPCRel = isFixupKindPCRel(Fixup.getKind());
804 unsigned Log2Size = getFixupKindLog2Size(Fixup.getKind());
806 // If this is a 32-bit TLVP reloc it's handled a bit differently.
807 if (Target.getSymA() &&
808 Target.getSymA()->getKind() == MCSymbolRefExpr::VK_TLVP) {
809 RecordTLVPRelocation(Asm, Layout, Fragment, Fixup, Target, FixedValue);
813 // If this is a difference or a defined symbol plus an offset, then we need
814 // a scattered relocation entry.
815 // Differences always require scattered relocations.
816 if (Target.getSymB())
817 return RecordScatteredRelocation(Asm, Layout, Fragment, Fixup,
820 // Get the symbol data, if any.
821 MCSymbolData *SD = 0;
822 if (Target.getSymA())
823 SD = &Asm.getSymbolData(Target.getSymA()->getSymbol());
825 // If this is an internal relocation with an offset, it also needs a
826 // scattered relocation entry.
827 uint32_t Offset = Target.getConstant();
829 Offset += 1 << Log2Size;
830 if (Offset && SD && !doesSymbolRequireExternRelocation(SD))
831 return RecordScatteredRelocation(Asm, Layout, Fragment, Fixup,
835 uint32_t FixupOffset = Layout.getFragmentOffset(Fragment)+Fixup.getOffset();
837 unsigned IsExtern = 0;
840 if (Target.isAbsolute()) { // constant
841 // SymbolNum of 0 indicates the absolute section.
843 // FIXME: Currently, these are never generated (see code below). I cannot
844 // find a case where they are actually emitted.
845 Type = macho::RIT_Vanilla;
847 // Check whether we need an external or internal relocation.
848 if (doesSymbolRequireExternRelocation(SD)) {
850 Index = SD->getIndex();
851 // For external relocations, make sure to offset the fixup value to
852 // compensate for the addend of the symbol address, if it was
853 // undefined. This occurs with weak definitions, for example.
854 if (!SD->Symbol->isUndefined())
855 FixedValue -= Layout.getSymbolAddress(SD);
857 // The index is the section ordinal (1-based).
858 Index = SD->getFragment()->getParent()->getOrdinal() + 1;
861 Type = macho::RIT_Vanilla;
864 // struct relocation_info (8 bytes)
865 MachRelocationEntry MRE;
866 MRE.Word0 = FixupOffset;
867 MRE.Word1 = ((Index << 0) |
872 Relocations[Fragment->getParent()].push_back(MRE);
875 void BindIndirectSymbols(MCAssembler &Asm) {
876 // This is the point where 'as' creates actual symbols for indirect symbols
877 // (in the following two passes). It would be easier for us to do this
878 // sooner when we see the attribute, but that makes getting the order in the
879 // symbol table much more complicated than it is worth.
881 // FIXME: Revisit this when the dust settles.
883 // Bind non lazy symbol pointers first.
884 unsigned IndirectIndex = 0;
885 for (MCAssembler::indirect_symbol_iterator it = Asm.indirect_symbol_begin(),
886 ie = Asm.indirect_symbol_end(); it != ie; ++it, ++IndirectIndex) {
887 const MCSectionMachO &Section =
888 cast<MCSectionMachO>(it->SectionData->getSection());
890 if (Section.getType() != MCSectionMachO::S_NON_LAZY_SYMBOL_POINTERS)
893 // Initialize the section indirect symbol base, if necessary.
894 if (!IndirectSymBase.count(it->SectionData))
895 IndirectSymBase[it->SectionData] = IndirectIndex;
897 Asm.getOrCreateSymbolData(*it->Symbol);
900 // Then lazy symbol pointers and symbol stubs.
902 for (MCAssembler::indirect_symbol_iterator it = Asm.indirect_symbol_begin(),
903 ie = Asm.indirect_symbol_end(); it != ie; ++it, ++IndirectIndex) {
904 const MCSectionMachO &Section =
905 cast<MCSectionMachO>(it->SectionData->getSection());
907 if (Section.getType() != MCSectionMachO::S_LAZY_SYMBOL_POINTERS &&
908 Section.getType() != MCSectionMachO::S_SYMBOL_STUBS)
911 // Initialize the section indirect symbol base, if necessary.
912 if (!IndirectSymBase.count(it->SectionData))
913 IndirectSymBase[it->SectionData] = IndirectIndex;
915 // Set the symbol type to undefined lazy, but only on construction.
917 // FIXME: Do not hardcode.
919 MCSymbolData &Entry = Asm.getOrCreateSymbolData(*it->Symbol, &Created);
921 Entry.setFlags(Entry.getFlags() | 0x0001);
925 /// ComputeSymbolTable - Compute the symbol table data
927 /// \param StringTable [out] - The string table data.
928 /// \param StringIndexMap [out] - Map from symbol names to offsets in the
930 void ComputeSymbolTable(MCAssembler &Asm, SmallString<256> &StringTable,
931 std::vector<MachSymbolData> &LocalSymbolData,
932 std::vector<MachSymbolData> &ExternalSymbolData,
933 std::vector<MachSymbolData> &UndefinedSymbolData) {
934 // Build section lookup table.
935 DenseMap<const MCSection*, uint8_t> SectionIndexMap;
937 for (MCAssembler::iterator it = Asm.begin(),
938 ie = Asm.end(); it != ie; ++it, ++Index)
939 SectionIndexMap[&it->getSection()] = Index;
940 assert(Index <= 256 && "Too many sections!");
942 // Index 0 is always the empty string.
943 StringMap<uint64_t> StringIndexMap;
944 StringTable += '\x00';
946 // Build the symbol arrays and the string table, but only for non-local
949 // The particular order that we collect the symbols and create the string
950 // table, then sort the symbols is chosen to match 'as'. Even though it
951 // doesn't matter for correctness, this is important for letting us diff .o
953 for (MCAssembler::symbol_iterator it = Asm.symbol_begin(),
954 ie = Asm.symbol_end(); it != ie; ++it) {
955 const MCSymbol &Symbol = it->getSymbol();
957 // Ignore non-linker visible symbols.
958 if (!Asm.isSymbolLinkerVisible(it->getSymbol()))
961 if (!it->isExternal() && !Symbol.isUndefined())
964 uint64_t &Entry = StringIndexMap[Symbol.getName()];
966 Entry = StringTable.size();
967 StringTable += Symbol.getName();
968 StringTable += '\x00';
973 MSD.StringIndex = Entry;
975 if (Symbol.isUndefined()) {
976 MSD.SectionIndex = 0;
977 UndefinedSymbolData.push_back(MSD);
978 } else if (Symbol.isAbsolute()) {
979 MSD.SectionIndex = 0;
980 ExternalSymbolData.push_back(MSD);
982 MSD.SectionIndex = SectionIndexMap.lookup(&Symbol.getSection());
983 assert(MSD.SectionIndex && "Invalid section index!");
984 ExternalSymbolData.push_back(MSD);
988 // Now add the data for local symbols.
989 for (MCAssembler::symbol_iterator it = Asm.symbol_begin(),
990 ie = Asm.symbol_end(); it != ie; ++it) {
991 const MCSymbol &Symbol = it->getSymbol();
993 // Ignore non-linker visible symbols.
994 if (!Asm.isSymbolLinkerVisible(it->getSymbol()))
997 if (it->isExternal() || Symbol.isUndefined())
1000 uint64_t &Entry = StringIndexMap[Symbol.getName()];
1002 Entry = StringTable.size();
1003 StringTable += Symbol.getName();
1004 StringTable += '\x00';
1008 MSD.SymbolData = it;
1009 MSD.StringIndex = Entry;
1011 if (Symbol.isAbsolute()) {
1012 MSD.SectionIndex = 0;
1013 LocalSymbolData.push_back(MSD);
1015 MSD.SectionIndex = SectionIndexMap.lookup(&Symbol.getSection());
1016 assert(MSD.SectionIndex && "Invalid section index!");
1017 LocalSymbolData.push_back(MSD);
1021 // External and undefined symbols are required to be in lexicographic order.
1022 std::sort(ExternalSymbolData.begin(), ExternalSymbolData.end());
1023 std::sort(UndefinedSymbolData.begin(), UndefinedSymbolData.end());
1025 // Set the symbol indices.
1027 for (unsigned i = 0, e = LocalSymbolData.size(); i != e; ++i)
1028 LocalSymbolData[i].SymbolData->setIndex(Index++);
1029 for (unsigned i = 0, e = ExternalSymbolData.size(); i != e; ++i)
1030 ExternalSymbolData[i].SymbolData->setIndex(Index++);
1031 for (unsigned i = 0, e = UndefinedSymbolData.size(); i != e; ++i)
1032 UndefinedSymbolData[i].SymbolData->setIndex(Index++);
1034 // The string table is padded to a multiple of 4.
1035 while (StringTable.size() % 4)
1036 StringTable += '\x00';
1039 void ExecutePostLayoutBinding(MCAssembler &Asm) {
1040 // Create symbol data for any indirect symbols.
1041 BindIndirectSymbols(Asm);
1043 // Compute symbol table information and bind symbol indices.
1044 ComputeSymbolTable(Asm, StringTable, LocalSymbolData, ExternalSymbolData,
1045 UndefinedSymbolData);
1049 bool IsFixupFullyResolved(const MCAssembler &Asm,
1050 const MCValue Target,
1052 const MCFragment *DF) const {
1053 // If we aren't using scattered symbols, the fixup is fully resolved.
1054 if (!Asm.getBackend().hasScatteredSymbols())
1057 // Otherwise, determine whether this value is actually resolved; scattering
1058 // may cause atoms to move.
1060 // Check if we are using the "simple" resolution algorithm (e.g.,
1062 if (!Asm.getBackend().hasReliableSymbolDifference()) {
1063 const MCSection *BaseSection = 0;
1065 BaseSection = &DF->getParent()->getSection();
1067 return isScatteredFixupFullyResolvedSimple(Asm, Target, BaseSection);
1070 // Otherwise, compute the proper answer as reliably as possible.
1072 // If this is a PCrel relocation, find the base atom (identified by its
1073 // symbol) that the fixup value is relative to.
1074 const MCSymbolData *BaseSymbol = 0;
1076 BaseSymbol = DF->getAtom();
1081 return isScatteredFixupFullyResolved(Asm, Target, BaseSymbol);
1084 void WriteObject(MCAssembler &Asm, const MCAsmLayout &Layout) {
1085 unsigned NumSections = Asm.size();
1087 // The section data starts after the header, the segment load command (and
1088 // section headers) and the symbol table.
1089 unsigned NumLoadCommands = 1;
1090 uint64_t LoadCommandsSize = Is64Bit ?
1091 macho::SegmentLoadCommand64Size + NumSections * macho::Section64Size :
1092 macho::SegmentLoadCommand32Size + NumSections * macho::Section32Size;
1094 // Add the symbol table load command sizes, if used.
1095 unsigned NumSymbols = LocalSymbolData.size() + ExternalSymbolData.size() +
1096 UndefinedSymbolData.size();
1098 NumLoadCommands += 2;
1099 LoadCommandsSize += (macho::SymtabLoadCommandSize +
1100 macho::DysymtabLoadCommandSize);
1103 // Compute the total size of the section data, as well as its file size and
1105 uint64_t SectionDataStart = (Is64Bit ? macho::Header64Size :
1106 macho::Header32Size) + LoadCommandsSize;
1107 uint64_t SectionDataSize = 0;
1108 uint64_t SectionDataFileSize = 0;
1109 uint64_t VMSize = 0;
1110 for (MCAssembler::const_iterator it = Asm.begin(),
1111 ie = Asm.end(); it != ie; ++it) {
1112 const MCSectionData &SD = *it;
1113 uint64_t Address = Layout.getSectionAddress(&SD);
1114 uint64_t Size = Layout.getSectionSize(&SD);
1115 uint64_t FileSize = Layout.getSectionFileSize(&SD);
1117 VMSize = std::max(VMSize, Address + Size);
1119 if (SD.getSection().isVirtualSection())
1122 SectionDataSize = std::max(SectionDataSize, Address + Size);
1123 SectionDataFileSize = std::max(SectionDataFileSize, Address + FileSize);
1126 // The section data is padded to 4 bytes.
1128 // FIXME: Is this machine dependent?
1129 unsigned SectionDataPadding = OffsetToAlignment(SectionDataFileSize, 4);
1130 SectionDataFileSize += SectionDataPadding;
1132 // Write the prolog, starting with the header and load command...
1133 WriteHeader(NumLoadCommands, LoadCommandsSize,
1134 Asm.getSubsectionsViaSymbols());
1135 WriteSegmentLoadCommand(NumSections, VMSize,
1136 SectionDataStart, SectionDataSize);
1138 // ... and then the section headers.
1139 uint64_t RelocTableEnd = SectionDataStart + SectionDataFileSize;
1140 for (MCAssembler::const_iterator it = Asm.begin(),
1141 ie = Asm.end(); it != ie; ++it) {
1142 std::vector<MachRelocationEntry> &Relocs = Relocations[it];
1143 unsigned NumRelocs = Relocs.size();
1144 uint64_t SectionStart = SectionDataStart + Layout.getSectionAddress(it);
1145 WriteSection(Asm, Layout, *it, SectionStart, RelocTableEnd, NumRelocs);
1146 RelocTableEnd += NumRelocs * macho::RelocationInfoSize;
1149 // Write the symbol table load command, if used.
1151 unsigned FirstLocalSymbol = 0;
1152 unsigned NumLocalSymbols = LocalSymbolData.size();
1153 unsigned FirstExternalSymbol = FirstLocalSymbol + NumLocalSymbols;
1154 unsigned NumExternalSymbols = ExternalSymbolData.size();
1155 unsigned FirstUndefinedSymbol = FirstExternalSymbol + NumExternalSymbols;
1156 unsigned NumUndefinedSymbols = UndefinedSymbolData.size();
1157 unsigned NumIndirectSymbols = Asm.indirect_symbol_size();
1158 unsigned NumSymTabSymbols =
1159 NumLocalSymbols + NumExternalSymbols + NumUndefinedSymbols;
1160 uint64_t IndirectSymbolSize = NumIndirectSymbols * 4;
1161 uint64_t IndirectSymbolOffset = 0;
1163 // If used, the indirect symbols are written after the section data.
1164 if (NumIndirectSymbols)
1165 IndirectSymbolOffset = RelocTableEnd;
1167 // The symbol table is written after the indirect symbol data.
1168 uint64_t SymbolTableOffset = RelocTableEnd + IndirectSymbolSize;
1170 // The string table is written after symbol table.
1171 uint64_t StringTableOffset =
1172 SymbolTableOffset + NumSymTabSymbols * (Is64Bit ? macho::Nlist64Size :
1173 macho::Nlist32Size);
1174 WriteSymtabLoadCommand(SymbolTableOffset, NumSymTabSymbols,
1175 StringTableOffset, StringTable.size());
1177 WriteDysymtabLoadCommand(FirstLocalSymbol, NumLocalSymbols,
1178 FirstExternalSymbol, NumExternalSymbols,
1179 FirstUndefinedSymbol, NumUndefinedSymbols,
1180 IndirectSymbolOffset, NumIndirectSymbols);
1183 // Write the actual section data.
1184 for (MCAssembler::const_iterator it = Asm.begin(),
1185 ie = Asm.end(); it != ie; ++it)
1186 Asm.WriteSectionData(it, Layout, this);
1188 // Write the extra padding.
1189 WriteZeros(SectionDataPadding);
1191 // Write the relocation entries.
1192 for (MCAssembler::const_iterator it = Asm.begin(),
1193 ie = Asm.end(); it != ie; ++it) {
1194 // Write the section relocation entries, in reverse order to match 'as'
1195 // (approximately, the exact algorithm is more complicated than this).
1196 std::vector<MachRelocationEntry> &Relocs = Relocations[it];
1197 for (unsigned i = 0, e = Relocs.size(); i != e; ++i) {
1198 Write32(Relocs[e - i - 1].Word0);
1199 Write32(Relocs[e - i - 1].Word1);
1203 // Write the symbol table data, if used.
1205 // Write the indirect symbol entries.
1206 for (MCAssembler::const_indirect_symbol_iterator
1207 it = Asm.indirect_symbol_begin(),
1208 ie = Asm.indirect_symbol_end(); it != ie; ++it) {
1209 // Indirect symbols in the non lazy symbol pointer section have some
1210 // special handling.
1211 const MCSectionMachO &Section =
1212 static_cast<const MCSectionMachO&>(it->SectionData->getSection());
1213 if (Section.getType() == MCSectionMachO::S_NON_LAZY_SYMBOL_POINTERS) {
1214 // If this symbol is defined and internal, mark it as such.
1215 if (it->Symbol->isDefined() &&
1216 !Asm.getSymbolData(*it->Symbol).isExternal()) {
1217 uint32_t Flags = macho::ISF_Local;
1218 if (it->Symbol->isAbsolute())
1219 Flags |= macho::ISF_Absolute;
1225 Write32(Asm.getSymbolData(*it->Symbol).getIndex());
1228 // FIXME: Check that offsets match computed ones.
1230 // Write the symbol table entries.
1231 for (unsigned i = 0, e = LocalSymbolData.size(); i != e; ++i)
1232 WriteNlist(LocalSymbolData[i], Layout);
1233 for (unsigned i = 0, e = ExternalSymbolData.size(); i != e; ++i)
1234 WriteNlist(ExternalSymbolData[i], Layout);
1235 for (unsigned i = 0, e = UndefinedSymbolData.size(); i != e; ++i)
1236 WriteNlist(UndefinedSymbolData[i], Layout);
1238 // Write the string table.
1239 OS << StringTable.str();
1246 MCObjectWriter *llvm::createMachObjectWriter(raw_ostream &OS, bool is64Bit,
1248 uint32_t CPUSubtype,
1249 bool IsLittleEndian) {
1250 return new MachObjectWriter(OS, is64Bit, CPUType, CPUSubtype, IsLittleEndian);