X-Git-Url: http://demsky.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FMC%2FMachObjectWriter.cpp;h=5e9e86f18a030088b9c3df6eb6568202f2699112;hb=a8432640e8fd7b86fec7e9bfee98433d8070bfcf;hp=c8fe40e2f437ba120024ba8efc81f5968e677f91;hpb=4010dd72b81b760daaa0361084de6dca8ed86fa1;p=oota-llvm.git diff --git a/lib/MC/MachObjectWriter.cpp b/lib/MC/MachObjectWriter.cpp index c8fe40e2f43..5e9e86f18a0 100644 --- a/lib/MC/MachObjectWriter.cpp +++ b/lib/MC/MachObjectWriter.cpp @@ -8,51 +8,41 @@ //===----------------------------------------------------------------------===// #include "llvm/MC/MCMachObjectWriter.h" -#include "llvm/ADT/OwningPtr.h" #include "llvm/ADT/StringMap.h" #include "llvm/ADT/Twine.h" -#include "llvm/MC/MCAssembler.h" +#include "llvm/MC/MCAsmBackend.h" #include "llvm/MC/MCAsmLayout.h" +#include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCFixupKindInfo.h" +#include "llvm/MC/MCMachOSymbolFlags.h" #include "llvm/MC/MCObjectWriter.h" #include "llvm/MC/MCSectionMachO.h" #include "llvm/MC/MCSymbol.h" -#include "llvm/MC/MCMachOSymbolFlags.h" #include "llvm/MC/MCValue.h" -#include "llvm/Object/MachOFormat.h" +#include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Target/TargetAsmBackend.h" - -// FIXME: Gross. -#include "../Target/ARM/ARMFixupKinds.h" -#include "../Target/X86/X86FixupKinds.h" - +#include "llvm/Support/MachO.h" +#include "llvm/Support/raw_ostream.h" #include using namespace llvm; -using namespace llvm::object; - -// FIXME: this has been copied from (or to) X86AsmBackend.cpp -static unsigned getFixupKindLog2Size(unsigned Kind) { - switch (Kind) { - default: - llvm_unreachable("invalid fixup kind!"); - case FK_PCRel_1: - case FK_Data_1: return 0; - case FK_PCRel_2: - case FK_Data_2: return 1; - case FK_PCRel_4: - // FIXME: Remove these!!! - case X86::reloc_riprel_4byte: - case X86::reloc_riprel_4byte_movq_load: - case X86::reloc_signed_4byte: - case FK_Data_4: return 2; - case FK_Data_8: return 3; - } + +#define DEBUG_TYPE "mc" + +void MachObjectWriter::reset() { + Relocations.clear(); + IndirectSymBase.clear(); + StringTable.clear(); + LocalSymbolData.clear(); + ExternalSymbolData.clear(); + UndefinedSymbolData.clear(); + MCObjectWriter::reset(); } -static bool doesSymbolRequireExternRelocation(MCSymbolData *SD) { +bool MachObjectWriter:: +doesSymbolRequireExternRelocation(const MCSymbolData *SD) { // Undefined symbols are always extern. - if (SD->Symbol->isUndefined()) + if (SD->getSymbol().isUndefined()) return true; // References to weak definitions require external relocation entries; the @@ -64,1502 +54,976 @@ static bool doesSymbolRequireExternRelocation(MCSymbolData *SD) { return false; } -static bool isScatteredFixupFullyResolved(const MCAssembler &Asm, - const MCValue Target, - const MCSymbolData *BaseSymbol) { - // The effective fixup address is - // addr(atom(A)) + offset(A) - // - addr(atom(B)) - offset(B) - // - addr(BaseSymbol) + - // and the offsets are not relocatable, so the fixup is fully resolved when - // addr(atom(A)) - addr(atom(B)) - addr(BaseSymbol) == 0. - // - // Note that "false" is almost always conservatively correct (it means we emit - // a relocation which is unnecessary), except when it would force us to emit a - // relocation which the target cannot encode. - - const MCSymbolData *A_Base = 0, *B_Base = 0; - if (const MCSymbolRefExpr *A = Target.getSymA()) { - // Modified symbol references cannot be resolved. - if (A->getKind() != MCSymbolRefExpr::VK_None) - return false; - - A_Base = Asm.getAtom(&Asm.getSymbolData(A->getSymbol())); - if (!A_Base) - return false; - } - - if (const MCSymbolRefExpr *B = Target.getSymB()) { - // Modified symbol references cannot be resolved. - if (B->getKind() != MCSymbolRefExpr::VK_None) - return false; +bool MachObjectWriter:: +MachSymbolData::operator<(const MachSymbolData &RHS) const { + return SymbolData->getSymbol().getName() < + RHS.SymbolData->getSymbol().getName(); +} - B_Base = Asm.getAtom(&Asm.getSymbolData(B->getSymbol())); - if (!B_Base) - return false; - } +bool MachObjectWriter::isFixupKindPCRel(const MCAssembler &Asm, unsigned Kind) { + const MCFixupKindInfo &FKI = Asm.getBackend().getFixupKindInfo( + (MCFixupKind) Kind); - // If there is no base, A and B have to be the same atom for this fixup to be - // fully resolved. - if (!BaseSymbol) - return A_Base == B_Base; + return FKI.Flags & MCFixupKindInfo::FKF_IsPCRel; +} - // Otherwise, B must be missing and A must be the base. - return !B_Base && BaseSymbol == A_Base; +uint64_t MachObjectWriter::getFragmentAddress(const MCFragment *Fragment, + const MCAsmLayout &Layout) const { + return getSectionAddress(Fragment->getParent()) + + Layout.getFragmentOffset(Fragment); } -static bool isScatteredFixupFullyResolvedSimple(const MCAssembler &Asm, - const MCValue Target, - const MCSection *BaseSection) { - // The effective fixup address is - // addr(atom(A)) + offset(A) - // - addr(atom(B)) - offset(B) - // - addr() + - // and the offsets are not relocatable, so the fixup is fully resolved when - // addr(atom(A)) - addr(atom(B)) - addr()) == 0. - // - // The simple (Darwin, except on x86_64) way of dealing with this was to - // assume that any reference to a temporary symbol *must* be a temporary - // symbol in the same atom, unless the sections differ. Therefore, any PCrel - // relocation to a temporary symbol (in the same section) is fully - // resolved. This also works in conjunction with absolutized .set, which - // requires the compiler to use .set to absolutize the differences between - // symbols which the compiler knows to be assembly time constants, so we don't - // need to worry about considering symbol differences fully resolved. - - // Non-relative fixups are only resolved if constant. - if (!BaseSection) - return Target.isAbsolute(); - - // Otherwise, relative fixups are only resolved if not a difference and the - // target is a temporary in the same section. - if (Target.isAbsolute() || Target.getSymB()) - return false; +uint64_t MachObjectWriter::getSymbolAddress(const MCSymbolData* SD, + const MCAsmLayout &Layout) const { + const MCSymbol &S = SD->getSymbol(); - const MCSymbol *A = &Target.getSymA()->getSymbol(); - if (!A->isTemporary() || !A->isInSection() || - &A->getSection() != BaseSection) - return false; + // If this is a variable, then recursively evaluate now. + if (S.isVariable()) { + if (const MCConstantExpr *C = + dyn_cast(S.getVariableValue())) + return C->getValue(); - return true; -} -namespace { + MCValue Target; + if (!S.getVariableValue()->EvaluateAsRelocatable(Target, &Layout, nullptr)) + report_fatal_error("unable to evaluate offset for variable '" + + S.getName() + "'"); -class MachObjectWriter : public MCObjectWriter { - /// MachSymbolData - Helper struct for containing some precomputed information - /// on symbols. - struct MachSymbolData { - MCSymbolData *SymbolData; - uint64_t StringIndex; - uint8_t SectionIndex; + // Verify that any used symbols are defined. + if (Target.getSymA() && Target.getSymA()->getSymbol().isUndefined()) + report_fatal_error("unable to evaluate offset to undefined symbol '" + + Target.getSymA()->getSymbol().getName() + "'"); + if (Target.getSymB() && Target.getSymB()->getSymbol().isUndefined()) + report_fatal_error("unable to evaluate offset to undefined symbol '" + + Target.getSymB()->getSymbol().getName() + "'"); - // Support lexicographic sorting. - bool operator<(const MachSymbolData &RHS) const { - return SymbolData->getSymbol().getName() < - RHS.SymbolData->getSymbol().getName(); - } - }; + uint64_t Address = Target.getConstant(); + if (Target.getSymA()) + Address += getSymbolAddress(&Layout.getAssembler().getSymbolData( + Target.getSymA()->getSymbol()), Layout); + if (Target.getSymB()) + Address += getSymbolAddress(&Layout.getAssembler().getSymbolData( + Target.getSymB()->getSymbol()), Layout); + return Address; + } - /// The target specific Mach-O writer instance. - llvm::OwningPtr TargetObjectWriter; + return getSectionAddress(SD->getFragment()->getParent()) + + Layout.getSymbolOffset(SD); +} - /// @name Relocation Data - /// @{ +uint64_t MachObjectWriter::getPaddingSize(const MCSectionData *SD, + const MCAsmLayout &Layout) const { + uint64_t EndAddr = getSectionAddress(SD) + Layout.getSectionAddressSize(SD); + unsigned Next = SD->getLayoutOrder() + 1; + if (Next >= Layout.getSectionOrder().size()) + return 0; + + const MCSectionData &NextSD = *Layout.getSectionOrder()[Next]; + if (NextSD.getSection().isVirtualSection()) + return 0; + return OffsetToAlignment(EndAddr, NextSD.getAlignment()); +} - llvm::DenseMap > Relocations; - llvm::DenseMap IndirectSymBase; +void MachObjectWriter::WriteHeader(unsigned NumLoadCommands, + unsigned LoadCommandsSize, + bool SubsectionsViaSymbols) { + uint32_t Flags = 0; - /// @} - /// @name Symbol Table Data - /// @{ + if (SubsectionsViaSymbols) + Flags |= MachO::MH_SUBSECTIONS_VIA_SYMBOLS; - SmallString<256> StringTable; - std::vector LocalSymbolData; - std::vector ExternalSymbolData; - std::vector UndefinedSymbolData; + // struct mach_header (28 bytes) or + // struct mach_header_64 (32 bytes) - /// @} + uint64_t Start = OS.tell(); + (void) Start; -private: - /// @name Utility Methods - /// @{ + Write32(is64Bit() ? MachO::MH_MAGIC_64 : MachO::MH_MAGIC); - bool isFixupKindPCRel(const MCAssembler &Asm, unsigned Kind) { - const MCFixupKindInfo &FKI = Asm.getBackend().getFixupKindInfo( - (MCFixupKind) Kind); + Write32(TargetObjectWriter->getCPUType()); + Write32(TargetObjectWriter->getCPUSubtype()); - return FKI.Flags & MCFixupKindInfo::FKF_IsPCRel; - } + Write32(MachO::MH_OBJECT); + Write32(NumLoadCommands); + Write32(LoadCommandsSize); + Write32(Flags); + if (is64Bit()) + Write32(0); // reserved - /// @} + assert(OS.tell() - Start == + (is64Bit()?sizeof(MachO::mach_header_64): sizeof(MachO::mach_header))); +} - SectionAddrMap SectionAddress; - uint64_t getSectionAddress(const MCSectionData* SD) const { - return SectionAddress.lookup(SD); - } - uint64_t getSymbolAddress(const MCSymbolData* SD, - const MCAsmLayout &Layout) const { - return getSectionAddress(SD->getFragment()->getParent()) + - Layout.getSymbolOffset(SD); - } - uint64_t getFragmentAddress(const MCFragment *Fragment, - const MCAsmLayout &Layout) const { - return getSectionAddress(Fragment->getParent()) + - Layout.getFragmentOffset(Fragment); +/// WriteSegmentLoadCommand - Write a segment load command. +/// +/// \param NumSections The number of sections in this segment. +/// \param SectionDataSize The total size of the sections. +void MachObjectWriter::WriteSegmentLoadCommand(unsigned NumSections, + uint64_t VMSize, + uint64_t SectionDataStartOffset, + uint64_t SectionDataSize) { + // struct segment_command (56 bytes) or + // struct segment_command_64 (72 bytes) + + uint64_t Start = OS.tell(); + (void) Start; + + unsigned SegmentLoadCommandSize = + is64Bit() ? sizeof(MachO::segment_command_64): + sizeof(MachO::segment_command); + Write32(is64Bit() ? MachO::LC_SEGMENT_64 : MachO::LC_SEGMENT); + Write32(SegmentLoadCommandSize + + NumSections * (is64Bit() ? sizeof(MachO::section_64) : + sizeof(MachO::section))); + + WriteBytes("", 16); + if (is64Bit()) { + Write64(0); // vmaddr + Write64(VMSize); // vmsize + Write64(SectionDataStartOffset); // file offset + Write64(SectionDataSize); // file size + } else { + Write32(0); // vmaddr + Write32(VMSize); // vmsize + Write32(SectionDataStartOffset); // file offset + Write32(SectionDataSize); // file size } + // maxprot + Write32(MachO::VM_PROT_READ | MachO::VM_PROT_WRITE | MachO::VM_PROT_EXECUTE); + // initprot + Write32(MachO::VM_PROT_READ | MachO::VM_PROT_WRITE | MachO::VM_PROT_EXECUTE); + Write32(NumSections); + Write32(0); // flags + + assert(OS.tell() - Start == SegmentLoadCommandSize); +} - uint64_t getPaddingSize(const MCSectionData *SD, - const MCAsmLayout &Layout) const { - uint64_t EndAddr = getSectionAddress(SD) + Layout.getSectionAddressSize(SD); - unsigned Next = SD->getLayoutOrder() + 1; - if (Next >= Layout.getSectionOrder().size()) - return 0; - - const MCSectionData &NextSD = *Layout.getSectionOrder()[Next]; - if (NextSD.getSection().isVirtualSection()) - return 0; - return OffsetToAlignment(EndAddr, NextSD.getAlignment()); +void MachObjectWriter::WriteSection(const MCAssembler &Asm, + const MCAsmLayout &Layout, + const MCSectionData &SD, + uint64_t FileOffset, + uint64_t RelocationsStart, + unsigned NumRelocations) { + uint64_t SectionSize = Layout.getSectionAddressSize(&SD); + + // The offset is unused for virtual sections. + if (SD.getSection().isVirtualSection()) { + assert(Layout.getSectionFileSize(&SD) == 0 && "Invalid file size!"); + FileOffset = 0; } -public: - MachObjectWriter(MCMachObjectTargetWriter *MOTW, raw_ostream &_OS, - bool _IsLittleEndian) - : MCObjectWriter(_OS, _IsLittleEndian), TargetObjectWriter(MOTW) { + // struct section (68 bytes) or + // struct section_64 (80 bytes) + + uint64_t Start = OS.tell(); + (void) Start; + + const MCSectionMachO &Section = cast(SD.getSection()); + WriteBytes(Section.getSectionName(), 16); + WriteBytes(Section.getSegmentName(), 16); + if (is64Bit()) { + Write64(getSectionAddress(&SD)); // address + Write64(SectionSize); // size + } else { + Write32(getSectionAddress(&SD)); // address + Write32(SectionSize); // size } + Write32(FileOffset); + + unsigned Flags = Section.getTypeAndAttributes(); + if (SD.hasInstructions()) + Flags |= MachO::S_ATTR_SOME_INSTRUCTIONS; + + assert(isPowerOf2_32(SD.getAlignment()) && "Invalid alignment!"); + Write32(Log2_32(SD.getAlignment())); + Write32(NumRelocations ? RelocationsStart : 0); + Write32(NumRelocations); + Write32(Flags); + Write32(IndirectSymBase.lookup(&SD)); // reserved1 + Write32(Section.getStubSize()); // reserved2 + if (is64Bit()) + Write32(0); // reserved3 + + assert(OS.tell() - Start == (is64Bit() ? sizeof(MachO::section_64) : + sizeof(MachO::section))); +} - /// @name Target Writer Proxy Accessors - /// @{ - - bool is64Bit() const { return TargetObjectWriter->is64Bit(); } - bool isARM() const { - uint32_t CPUType = TargetObjectWriter->getCPUType() & ~mach::CTFM_ArchMask; - return CPUType == mach::CTM_ARM; - } +void MachObjectWriter::WriteSymtabLoadCommand(uint32_t SymbolOffset, + uint32_t NumSymbols, + uint32_t StringTableOffset, + uint32_t StringTableSize) { + // struct symtab_command (24 bytes) - /// @} + uint64_t Start = OS.tell(); + (void) Start; - void WriteHeader(unsigned NumLoadCommands, unsigned LoadCommandsSize, - bool SubsectionsViaSymbols) { - uint32_t Flags = 0; + Write32(MachO::LC_SYMTAB); + Write32(sizeof(MachO::symtab_command)); + Write32(SymbolOffset); + Write32(NumSymbols); + Write32(StringTableOffset); + Write32(StringTableSize); - if (SubsectionsViaSymbols) - Flags |= macho::HF_SubsectionsViaSymbols; + assert(OS.tell() - Start == sizeof(MachO::symtab_command)); +} - // struct mach_header (28 bytes) or - // struct mach_header_64 (32 bytes) +void MachObjectWriter::WriteDysymtabLoadCommand(uint32_t FirstLocalSymbol, + uint32_t NumLocalSymbols, + uint32_t FirstExternalSymbol, + uint32_t NumExternalSymbols, + uint32_t FirstUndefinedSymbol, + uint32_t NumUndefinedSymbols, + uint32_t IndirectSymbolOffset, + uint32_t NumIndirectSymbols) { + // struct dysymtab_command (80 bytes) + + uint64_t Start = OS.tell(); + (void) Start; + + Write32(MachO::LC_DYSYMTAB); + Write32(sizeof(MachO::dysymtab_command)); + Write32(FirstLocalSymbol); + Write32(NumLocalSymbols); + Write32(FirstExternalSymbol); + Write32(NumExternalSymbols); + Write32(FirstUndefinedSymbol); + Write32(NumUndefinedSymbols); + Write32(0); // tocoff + Write32(0); // ntoc + Write32(0); // modtaboff + Write32(0); // nmodtab + Write32(0); // extrefsymoff + Write32(0); // nextrefsyms + Write32(IndirectSymbolOffset); + Write32(NumIndirectSymbols); + Write32(0); // extreloff + Write32(0); // nextrel + Write32(0); // locreloff + Write32(0); // nlocrel + + assert(OS.tell() - Start == sizeof(MachO::dysymtab_command)); +} - uint64_t Start = OS.tell(); - (void) Start; +MachObjectWriter::MachSymbolData * +MachObjectWriter::findSymbolData(const MCSymbol &Sym) { + for (auto &Entry : LocalSymbolData) + if (&Entry.SymbolData->getSymbol() == &Sym) + return &Entry; - Write32(is64Bit() ? macho::HM_Object64 : macho::HM_Object32); + for (auto &Entry : ExternalSymbolData) + if (&Entry.SymbolData->getSymbol() == &Sym) + return &Entry; - Write32(TargetObjectWriter->getCPUType()); - Write32(TargetObjectWriter->getCPUSubtype()); + for (auto &Entry : UndefinedSymbolData) + if (&Entry.SymbolData->getSymbol() == &Sym) + return &Entry; - Write32(macho::HFT_Object); - Write32(NumLoadCommands); - Write32(LoadCommandsSize); - Write32(Flags); - if (is64Bit()) - Write32(0); // reserved + return nullptr; +} - assert(OS.tell() - Start == is64Bit() ? - macho::Header64Size : macho::Header32Size); +void MachObjectWriter::WriteNlist(MachSymbolData &MSD, + const MCAsmLayout &Layout) { + MCSymbolData &Data = *MSD.SymbolData; + const MCSymbol *Symbol = &Data.getSymbol(); + const MCSymbol *AliasedSymbol = &Symbol->AliasedSymbol(); + uint8_t SectionIndex = MSD.SectionIndex; + uint8_t Type = 0; + uint16_t Flags = Data.getFlags(); + uint64_t Address = 0; + bool IsAlias = Symbol != AliasedSymbol; + + MachSymbolData *AliaseeInfo; + if (IsAlias) { + AliaseeInfo = findSymbolData(*AliasedSymbol); + if (AliaseeInfo) + SectionIndex = AliaseeInfo->SectionIndex; + Symbol = AliasedSymbol; } - /// WriteSegmentLoadCommand - Write a segment load command. - /// - /// \arg NumSections - The number of sections in this segment. - /// \arg SectionDataSize - The total size of the sections. - void WriteSegmentLoadCommand(unsigned NumSections, - uint64_t VMSize, - uint64_t SectionDataStartOffset, - uint64_t SectionDataSize) { - // struct segment_command (56 bytes) or - // struct segment_command_64 (72 bytes) - - uint64_t Start = OS.tell(); - (void) Start; - - unsigned SegmentLoadCommandSize = - is64Bit() ? macho::SegmentLoadCommand64Size: - macho::SegmentLoadCommand32Size; - Write32(is64Bit() ? macho::LCT_Segment64 : macho::LCT_Segment); - Write32(SegmentLoadCommandSize + - NumSections * (is64Bit() ? macho::Section64Size : - macho::Section32Size)); - - WriteBytes("", 16); - if (is64Bit()) { - Write64(0); // vmaddr - Write64(VMSize); // vmsize - Write64(SectionDataStartOffset); // file offset - Write64(SectionDataSize); // file size - } else { - Write32(0); // vmaddr - Write32(VMSize); // vmsize - Write32(SectionDataStartOffset); // file offset - Write32(SectionDataSize); // file size + // Set the N_TYPE bits. See . + // + // FIXME: Are the prebound or indirect fields possible here? + if (IsAlias && Symbol->isUndefined()) + Type = MachO::N_INDR; + else if (Symbol->isUndefined()) + Type = MachO::N_UNDF; + else if (Symbol->isAbsolute()) + Type = MachO::N_ABS; + else + Type = MachO::N_SECT; + + // FIXME: Set STAB bits. + + if (Data.isPrivateExtern()) + Type |= MachO::N_PEXT; + + // Set external bit. + if (Data.isExternal() || (!IsAlias && Symbol->isUndefined())) + Type |= MachO::N_EXT; + + // Compute the symbol address. + if (IsAlias && Symbol->isUndefined()) + Address = AliaseeInfo->StringIndex; + else if (Symbol->isDefined()) + Address = getSymbolAddress(&Data, Layout); + else if (Data.isCommon()) { + // Common symbols are encoded with the size in the address + // field, and their alignment in the flags. + Address = Data.getCommonSize(); + + // Common alignment is packed into the 'desc' bits. + if (unsigned Align = Data.getCommonAlignment()) { + unsigned Log2Size = Log2_32(Align); + assert((1U << Log2Size) == Align && "Invalid 'common' alignment!"); + if (Log2Size > 15) + report_fatal_error("invalid 'common' alignment '" + + Twine(Align) + "' for '" + Symbol->getName() + "'", + false); + // FIXME: Keep this mask with the SymbolFlags enumeration. + Flags = (Flags & 0xF0FF) | (Log2Size << 8); } - Write32(0x7); // maxprot - Write32(0x7); // initprot - Write32(NumSections); - Write32(0); // flags - - assert(OS.tell() - Start == SegmentLoadCommandSize); } - void WriteSection(const MCAssembler &Asm, const MCAsmLayout &Layout, - const MCSectionData &SD, uint64_t FileOffset, - uint64_t RelocationsStart, unsigned NumRelocations) { - uint64_t SectionSize = Layout.getSectionAddressSize(&SD); - - // The offset is unused for virtual sections. - if (SD.getSection().isVirtualSection()) { - assert(Layout.getSectionFileSize(&SD) == 0 && "Invalid file size!"); - FileOffset = 0; - } + if (Layout.getAssembler().isThumbFunc(Symbol)) + Flags |= SF_ThumbFunc; - // struct section (68 bytes) or - // struct section_64 (80 bytes) + // struct nlist (12 bytes) - uint64_t Start = OS.tell(); - (void) Start; + Write32(MSD.StringIndex); + Write8(Type); + Write8(SectionIndex); - const MCSectionMachO &Section = cast(SD.getSection()); - WriteBytes(Section.getSectionName(), 16); - WriteBytes(Section.getSegmentName(), 16); - if (is64Bit()) { - Write64(getSectionAddress(&SD)); // address - Write64(SectionSize); // size - } else { - Write32(getSectionAddress(&SD)); // address - Write32(SectionSize); // size - } - Write32(FileOffset); - - unsigned Flags = Section.getTypeAndAttributes(); - if (SD.hasInstructions()) - Flags |= MCSectionMachO::S_ATTR_SOME_INSTRUCTIONS; - - assert(isPowerOf2_32(SD.getAlignment()) && "Invalid alignment!"); - Write32(Log2_32(SD.getAlignment())); - Write32(NumRelocations ? RelocationsStart : 0); - Write32(NumRelocations); - Write32(Flags); - Write32(IndirectSymBase.lookup(&SD)); // reserved1 - Write32(Section.getStubSize()); // reserved2 - if (is64Bit()) - Write32(0); // reserved3 - - assert(OS.tell() - Start == is64Bit() ? macho::Section64Size : - macho::Section32Size); - } + // The Mach-O streamer uses the lowest 16-bits of the flags for the 'desc' + // value. + Write16(Flags); + if (is64Bit()) + Write64(Address); + else + Write32(Address); +} - void WriteSymtabLoadCommand(uint32_t SymbolOffset, uint32_t NumSymbols, - uint32_t StringTableOffset, - uint32_t StringTableSize) { - // struct symtab_command (24 bytes) +void MachObjectWriter::WriteLinkeditLoadCommand(uint32_t Type, + uint32_t DataOffset, + uint32_t DataSize) { + uint64_t Start = OS.tell(); + (void) Start; - uint64_t Start = OS.tell(); - (void) Start; + Write32(Type); + Write32(sizeof(MachO::linkedit_data_command)); + Write32(DataOffset); + Write32(DataSize); - Write32(macho::LCT_Symtab); - Write32(macho::SymtabLoadCommandSize); - Write32(SymbolOffset); - Write32(NumSymbols); - Write32(StringTableOffset); - Write32(StringTableSize); + assert(OS.tell() - Start == sizeof(MachO::linkedit_data_command)); +} - assert(OS.tell() - Start == macho::SymtabLoadCommandSize); - } +static unsigned ComputeLinkerOptionsLoadCommandSize( + const std::vector &Options, bool is64Bit) +{ + unsigned Size = sizeof(MachO::linker_option_command); + for (unsigned i = 0, e = Options.size(); i != e; ++i) + Size += Options[i].size() + 1; + return RoundUpToAlignment(Size, is64Bit ? 8 : 4); +} - void WriteDysymtabLoadCommand(uint32_t FirstLocalSymbol, - uint32_t NumLocalSymbols, - uint32_t FirstExternalSymbol, - uint32_t NumExternalSymbols, - uint32_t FirstUndefinedSymbol, - uint32_t NumUndefinedSymbols, - uint32_t IndirectSymbolOffset, - uint32_t NumIndirectSymbols) { - // struct dysymtab_command (80 bytes) - - uint64_t Start = OS.tell(); - (void) Start; - - Write32(macho::LCT_Dysymtab); - Write32(macho::DysymtabLoadCommandSize); - Write32(FirstLocalSymbol); - Write32(NumLocalSymbols); - Write32(FirstExternalSymbol); - Write32(NumExternalSymbols); - Write32(FirstUndefinedSymbol); - Write32(NumUndefinedSymbols); - Write32(0); // tocoff - Write32(0); // ntoc - Write32(0); // modtaboff - Write32(0); // nmodtab - Write32(0); // extrefsymoff - Write32(0); // nextrefsyms - Write32(IndirectSymbolOffset); - Write32(NumIndirectSymbols); - Write32(0); // extreloff - Write32(0); // nextrel - Write32(0); // locreloff - Write32(0); // nlocrel - - assert(OS.tell() - Start == macho::DysymtabLoadCommandSize); +void MachObjectWriter::WriteLinkerOptionsLoadCommand( + const std::vector &Options) +{ + unsigned Size = ComputeLinkerOptionsLoadCommandSize(Options, is64Bit()); + uint64_t Start = OS.tell(); + (void) Start; + + Write32(MachO::LC_LINKER_OPTION); + Write32(Size); + Write32(Options.size()); + uint64_t BytesWritten = sizeof(MachO::linker_option_command); + for (unsigned i = 0, e = Options.size(); i != e; ++i) { + // Write each string, including the null byte. + const std::string &Option = Options[i]; + WriteBytes(Option.c_str(), Option.size() + 1); + BytesWritten += Option.size() + 1; } - void WriteNlist(MachSymbolData &MSD, const MCAsmLayout &Layout) { - MCSymbolData &Data = *MSD.SymbolData; - const MCSymbol &Symbol = Data.getSymbol(); - uint8_t Type = 0; - uint16_t Flags = Data.getFlags(); - uint32_t Address = 0; - - // Set the N_TYPE bits. See . - // - // FIXME: Are the prebound or indirect fields possible here? - if (Symbol.isUndefined()) - Type = macho::STT_Undefined; - else if (Symbol.isAbsolute()) - Type = macho::STT_Absolute; - else - Type = macho::STT_Section; - - // FIXME: Set STAB bits. - - if (Data.isPrivateExtern()) - Type |= macho::STF_PrivateExtern; - - // Set external bit. - if (Data.isExternal() || Symbol.isUndefined()) - Type |= macho::STF_External; - - // Compute the symbol address. - if (Symbol.isDefined()) { - if (Symbol.isAbsolute()) { - Address = cast(Symbol.getVariableValue())->getValue(); - } else { - Address = getSymbolAddress(&Data, Layout); - } - } else if (Data.isCommon()) { - // Common symbols are encoded with the size in the address - // field, and their alignment in the flags. - Address = Data.getCommonSize(); - - // Common alignment is packed into the 'desc' bits. - if (unsigned Align = Data.getCommonAlignment()) { - unsigned Log2Size = Log2_32(Align); - assert((1U << Log2Size) == Align && "Invalid 'common' alignment!"); - if (Log2Size > 15) - report_fatal_error("invalid 'common' alignment '" + - Twine(Align) + "'"); - // FIXME: Keep this mask with the SymbolFlags enumeration. - Flags = (Flags & 0xF0FF) | (Log2Size << 8); - } - } - - // struct nlist (12 bytes) + // Pad to a multiple of the pointer size. + WriteBytes("", OffsetToAlignment(BytesWritten, is64Bit() ? 8 : 4)); - Write32(MSD.StringIndex); - Write8(Type); - Write8(MSD.SectionIndex); + assert(OS.tell() - Start == Size); +} - // The Mach-O streamer uses the lowest 16-bits of the flags for the 'desc' - // value. - Write16(Flags); - if (is64Bit()) - Write64(Address); - else - Write32(Address); - } +void MachObjectWriter::RecordRelocation(MCAssembler &Asm, + const MCAsmLayout &Layout, + const MCFragment *Fragment, + const MCFixup &Fixup, MCValue Target, + bool &IsPCRel, uint64_t &FixedValue) { + TargetObjectWriter->RecordRelocation(this, Asm, Layout, Fragment, Fixup, + Target, FixedValue); +} - // FIXME: We really need to improve the relocation validation. Basically, we - // want to implement a separate computation which evaluates the relocation - // entry as the linker would, and verifies that the resultant fixup value is - // exactly what the encoder wanted. This will catch several classes of - // problems: - // - // - Relocation entry bugs, the two algorithms are unlikely to have the same - // exact bug. +void MachObjectWriter::BindIndirectSymbols(MCAssembler &Asm) { + // This is the point where 'as' creates actual symbols for indirect symbols + // (in the following two passes). It would be easier for us to do this sooner + // when we see the attribute, but that makes getting the order in the symbol + // table much more complicated than it is worth. // - // - Relaxation issues, where we forget to relax something. - // - // - Input errors, where something cannot be correctly encoded. 'as' allows - // these through in many cases. - - static bool isFixupKindRIPRel(unsigned Kind) { - return Kind == X86::reloc_riprel_4byte || - Kind == X86::reloc_riprel_4byte_movq_load; - } - void RecordX86_64Relocation(const MCAssembler &Asm, const MCAsmLayout &Layout, - const MCFragment *Fragment, - const MCFixup &Fixup, MCValue Target, - uint64_t &FixedValue) { - unsigned IsPCRel = isFixupKindPCRel(Asm, Fixup.getKind()); - unsigned IsRIPRel = isFixupKindRIPRel(Fixup.getKind()); - unsigned Log2Size = getFixupKindLog2Size(Fixup.getKind()); - - // See . - uint32_t FixupOffset = - Layout.getFragmentOffset(Fragment) + Fixup.getOffset(); - uint32_t FixupAddress = - getFragmentAddress(Fragment, Layout) + Fixup.getOffset(); - int64_t Value = 0; - unsigned Index = 0; - unsigned IsExtern = 0; - unsigned Type = 0; - - Value = Target.getConstant(); - - if (IsPCRel) { - // Compensate for the relocation offset, Darwin x86_64 relocations only - // have the addend and appear to have attempted to define it to be the - // actual expression addend without the PCrel bias. However, instructions - // with data following the relocation are not accomodated for (see comment - // below regarding SIGNED{1,2,4}), so it isn't exactly that either. - Value += 1LL << Log2Size; + // FIXME: Revisit this when the dust settles. + + // Report errors for use of .indirect_symbol not in a symbol pointer section + // or stub section. + for (MCAssembler::indirect_symbol_iterator it = Asm.indirect_symbol_begin(), + ie = Asm.indirect_symbol_end(); it != ie; ++it) { + const MCSectionMachO &Section = + cast(it->SectionData->getSection()); + + if (Section.getType() != MachO::S_NON_LAZY_SYMBOL_POINTERS && + Section.getType() != MachO::S_LAZY_SYMBOL_POINTERS && + Section.getType() != MachO::S_SYMBOL_STUBS) { + MCSymbol &Symbol = *it->Symbol; + report_fatal_error("indirect symbol '" + Symbol.getName() + + "' not in a symbol pointer or stub section"); } + } - if (Target.isAbsolute()) { // constant - // SymbolNum of 0 indicates the absolute section. - Type = macho::RIT_X86_64_Unsigned; - Index = 0; - - // FIXME: I believe this is broken, I don't think the linker can - // understand it. I think it would require a local relocation, but I'm not - // sure if that would work either. The official way to get an absolute - // PCrel relocation is to use an absolute symbol (which we don't support - // yet). - if (IsPCRel) { - IsExtern = 1; - Type = macho::RIT_X86_64_Branch; - } - } else if (Target.getSymB()) { // A - B + constant - const MCSymbol *A = &Target.getSymA()->getSymbol(); - MCSymbolData &A_SD = Asm.getSymbolData(*A); - const MCSymbolData *A_Base = Asm.getAtom(&A_SD); - - const MCSymbol *B = &Target.getSymB()->getSymbol(); - MCSymbolData &B_SD = Asm.getSymbolData(*B); - const MCSymbolData *B_Base = Asm.getAtom(&B_SD); - - // Neither symbol can be modified. - if (Target.getSymA()->getKind() != MCSymbolRefExpr::VK_None || - Target.getSymB()->getKind() != MCSymbolRefExpr::VK_None) - report_fatal_error("unsupported relocation of modified symbol"); - - // We don't support PCrel relocations of differences. Darwin 'as' doesn't - // implement most of these correctly. - if (IsPCRel) - report_fatal_error("unsupported pc-relative relocation of difference"); - - // The support for the situation where one or both of the symbols would - // require a local relocation is handled just like if the symbols were - // external. This is certainly used in the case of debug sections where - // the section has only temporary symbols and thus the symbols don't have - // base symbols. This is encoded using the section ordinal and - // non-extern relocation entries. - - // Darwin 'as' doesn't emit correct relocations for this (it ends up with - // a single SIGNED relocation); reject it for now. Except the case where - // both symbols don't have a base, equal but both NULL. - if (A_Base == B_Base && A_Base) - report_fatal_error("unsupported relocation with identical base"); - - Value += getSymbolAddress(&A_SD, Layout) - - (A_Base == NULL ? 0 : getSymbolAddress(A_Base, Layout)); - Value -= getSymbolAddress(&B_SD, Layout) - - (B_Base == NULL ? 0 : getSymbolAddress(B_Base, Layout)); - - if (A_Base) { - Index = A_Base->getIndex(); - IsExtern = 1; - } - else { - Index = A_SD.getFragment()->getParent()->getOrdinal() + 1; - IsExtern = 0; - } - Type = macho::RIT_X86_64_Unsigned; - - macho::RelocationEntry MRE; - MRE.Word0 = FixupOffset; - MRE.Word1 = ((Index << 0) | - (IsPCRel << 24) | - (Log2Size << 25) | - (IsExtern << 27) | - (Type << 28)); - Relocations[Fragment->getParent()].push_back(MRE); - - if (B_Base) { - Index = B_Base->getIndex(); - IsExtern = 1; - } - else { - Index = B_SD.getFragment()->getParent()->getOrdinal() + 1; - IsExtern = 0; - } - Type = macho::RIT_X86_64_Subtractor; - } else { - const MCSymbol *Symbol = &Target.getSymA()->getSymbol(); - MCSymbolData &SD = Asm.getSymbolData(*Symbol); - const MCSymbolData *Base = Asm.getAtom(&SD); - - // Relocations inside debug sections always use local relocations when - // possible. This seems to be done because the debugger doesn't fully - // understand x86_64 relocation entries, and expects to find values that - // have already been fixed up. - if (Symbol->isInSection()) { - const MCSectionMachO &Section = static_cast( - Fragment->getParent()->getSection()); - if (Section.hasAttribute(MCSectionMachO::S_ATTR_DEBUG)) - Base = 0; - } + // Bind non-lazy symbol pointers first. + unsigned IndirectIndex = 0; + for (MCAssembler::indirect_symbol_iterator it = Asm.indirect_symbol_begin(), + ie = Asm.indirect_symbol_end(); it != ie; ++it, ++IndirectIndex) { + const MCSectionMachO &Section = + cast(it->SectionData->getSection()); - // x86_64 almost always uses external relocations, except when there is no - // symbol to use as a base address (a local symbol with no preceeding - // non-local symbol). - if (Base) { - Index = Base->getIndex(); - IsExtern = 1; - - // Add the local offset, if needed. - if (Base != &SD) - Value += Layout.getSymbolOffset(&SD) - Layout.getSymbolOffset(Base); - } else if (Symbol->isInSection()) { - // The index is the section ordinal (1-based). - Index = SD.getFragment()->getParent()->getOrdinal() + 1; - IsExtern = 0; - Value += getSymbolAddress(&SD, Layout); - - if (IsPCRel) - Value -= FixupAddress + (1 << Log2Size); - } else if (Symbol->isVariable()) { - const MCExpr *Value = Symbol->getVariableValue(); - int64_t Res; - bool isAbs = Value->EvaluateAsAbsolute(Res, Layout, SectionAddress); - if (isAbs) { - FixedValue = Res; - return; - } else { - report_fatal_error("unsupported relocation of variable '" + - Symbol->getName() + "'"); - } - } else { - report_fatal_error("unsupported relocation of undefined symbol '" + - Symbol->getName() + "'"); - } + if (Section.getType() != MachO::S_NON_LAZY_SYMBOL_POINTERS) + continue; - MCSymbolRefExpr::VariantKind Modifier = Target.getSymA()->getKind(); - if (IsPCRel) { - if (IsRIPRel) { - if (Modifier == MCSymbolRefExpr::VK_GOTPCREL) { - // x86_64 distinguishes movq foo@GOTPCREL so that the linker can - // rewrite the movq to an leaq at link time if the symbol ends up in - // the same linkage unit. - if (unsigned(Fixup.getKind()) == X86::reloc_riprel_4byte_movq_load) - Type = macho::RIT_X86_64_GOTLoad; - else - Type = macho::RIT_X86_64_GOT; - } else if (Modifier == MCSymbolRefExpr::VK_TLVP) { - Type = macho::RIT_X86_64_TLV; - } else if (Modifier != MCSymbolRefExpr::VK_None) { - report_fatal_error("unsupported symbol modifier in relocation"); - } else { - Type = macho::RIT_X86_64_Signed; - - // The Darwin x86_64 relocation format has a problem where it cannot - // encode an address (L + ) which is outside the atom - // containing L. Generally, this shouldn't occur but it does - // happen when we have a RIPrel instruction with data following the - // relocation entry (e.g., movb $012, L0(%rip)). Even with the PCrel - // adjustment Darwin x86_64 uses, the offset is still negative and - // the linker has no way to recognize this. - // - // To work around this, Darwin uses several special relocation types - // to indicate the offsets. However, the specification or - // implementation of these seems to also be incomplete; they should - // adjust the addend as well based on the actual encoded instruction - // (the additional bias), but instead appear to just look at the - // final offset. - switch (-(Target.getConstant() + (1LL << Log2Size))) { - case 1: Type = macho::RIT_X86_64_Signed1; break; - case 2: Type = macho::RIT_X86_64_Signed2; break; - case 4: Type = macho::RIT_X86_64_Signed4; break; - } - } - } else { - if (Modifier != MCSymbolRefExpr::VK_None) - report_fatal_error("unsupported symbol modifier in branch " - "relocation"); - - Type = macho::RIT_X86_64_Branch; - } - } else { - if (Modifier == MCSymbolRefExpr::VK_GOT) { - Type = macho::RIT_X86_64_GOT; - } else if (Modifier == MCSymbolRefExpr::VK_GOTPCREL) { - // GOTPCREL is allowed as a modifier on non-PCrel instructions, in - // which case all we do is set the PCrel bit in the relocation entry; - // this is used with exception handling, for example. The source is - // required to include any necessary offset directly. - Type = macho::RIT_X86_64_GOT; - IsPCRel = 1; - } else if (Modifier == MCSymbolRefExpr::VK_TLVP) { - report_fatal_error("TLVP symbol modifier should have been rip-rel"); - } else if (Modifier != MCSymbolRefExpr::VK_None) - report_fatal_error("unsupported symbol modifier in relocation"); - else - Type = macho::RIT_X86_64_Unsigned; - } - } + // Initialize the section indirect symbol base, if necessary. + IndirectSymBase.insert(std::make_pair(it->SectionData, IndirectIndex)); - // x86_64 always writes custom values into the fixups. - FixedValue = Value; - - // struct relocation_info (8 bytes) - macho::RelocationEntry MRE; - MRE.Word0 = FixupOffset; - MRE.Word1 = ((Index << 0) | - (IsPCRel << 24) | - (Log2Size << 25) | - (IsExtern << 27) | - (Type << 28)); - Relocations[Fragment->getParent()].push_back(MRE); + Asm.getOrCreateSymbolData(*it->Symbol); } - void RecordScatteredRelocation(const MCAssembler &Asm, - const MCAsmLayout &Layout, - const MCFragment *Fragment, - const MCFixup &Fixup, MCValue Target, - uint64_t &FixedValue) { - uint32_t FixupOffset = Layout.getFragmentOffset(Fragment)+Fixup.getOffset(); - unsigned IsPCRel = isFixupKindPCRel(Asm, Fixup.getKind()); - unsigned Log2Size = getFixupKindLog2Size(Fixup.getKind()); - unsigned Type = macho::RIT_Vanilla; - - // See . - const MCSymbol *A = &Target.getSymA()->getSymbol(); - MCSymbolData *A_SD = &Asm.getSymbolData(*A); - - if (!A_SD->getFragment()) - report_fatal_error("symbol '" + A->getName() + - "' can not be undefined in a subtraction expression"); - - uint32_t Value = getSymbolAddress(A_SD, Layout); - uint64_t SecAddr = getSectionAddress(A_SD->getFragment()->getParent()); - FixedValue += SecAddr; - uint32_t Value2 = 0; - - if (const MCSymbolRefExpr *B = Target.getSymB()) { - MCSymbolData *B_SD = &Asm.getSymbolData(B->getSymbol()); - - if (!B_SD->getFragment()) - report_fatal_error("symbol '" + B->getSymbol().getName() + - "' can not be undefined in a subtraction expression"); - - // Select the appropriate difference relocation type. - // - // Note that there is no longer any semantic difference between these two - // relocation types from the linkers point of view, this is done solely - // for pedantic compatibility with 'as'. - Type = A_SD->isExternal() ? (unsigned)macho::RIT_Difference : - (unsigned)macho::RIT_Generic_LocalDifference; - Value2 = getSymbolAddress(B_SD, Layout); - FixedValue -= getSectionAddress(B_SD->getFragment()->getParent()); - } + // Then lazy symbol pointers and symbol stubs. + IndirectIndex = 0; + for (MCAssembler::indirect_symbol_iterator it = Asm.indirect_symbol_begin(), + ie = Asm.indirect_symbol_end(); it != ie; ++it, ++IndirectIndex) { + const MCSectionMachO &Section = + cast(it->SectionData->getSection()); - // Relocations are written out in reverse order, so the PAIR comes first. - if (Type == macho::RIT_Difference || - Type == macho::RIT_Generic_LocalDifference) { - macho::RelocationEntry MRE; - MRE.Word0 = ((0 << 0) | - (macho::RIT_Pair << 24) | - (Log2Size << 28) | - (IsPCRel << 30) | - macho::RF_Scattered); - MRE.Word1 = Value2; - Relocations[Fragment->getParent()].push_back(MRE); - } + if (Section.getType() != MachO::S_LAZY_SYMBOL_POINTERS && + Section.getType() != MachO::S_SYMBOL_STUBS) + continue; - macho::RelocationEntry MRE; - MRE.Word0 = ((FixupOffset << 0) | - (Type << 24) | - (Log2Size << 28) | - (IsPCRel << 30) | - macho::RF_Scattered); - MRE.Word1 = Value; - Relocations[Fragment->getParent()].push_back(MRE); - } - - void RecordARMScatteredRelocation(const MCAssembler &Asm, - const MCAsmLayout &Layout, - const MCFragment *Fragment, - const MCFixup &Fixup, MCValue Target, - uint64_t &FixedValue) { - uint32_t FixupOffset = Layout.getFragmentOffset(Fragment)+Fixup.getOffset(); - unsigned IsPCRel = isFixupKindPCRel(Asm, Fixup.getKind()); - unsigned Log2Size = getFixupKindLog2Size(Fixup.getKind()); - unsigned Type = macho::RIT_Vanilla; - - // See . - const MCSymbol *A = &Target.getSymA()->getSymbol(); - MCSymbolData *A_SD = &Asm.getSymbolData(*A); - - if (!A_SD->getFragment()) - report_fatal_error("symbol '" + A->getName() + - "' can not be undefined in a subtraction expression"); - - uint32_t Value = getSymbolAddress(A_SD, Layout); - uint64_t SecAddr = getSectionAddress(A_SD->getFragment()->getParent()); - FixedValue += SecAddr; - uint32_t Value2 = 0; - - if (const MCSymbolRefExpr *B = Target.getSymB()) { - MCSymbolData *B_SD = &Asm.getSymbolData(B->getSymbol()); - - if (!B_SD->getFragment()) - report_fatal_error("symbol '" + B->getSymbol().getName() + - "' can not be undefined in a subtraction expression"); - - // Select the appropriate difference relocation type. - Type = macho::RIT_Difference; - Value2 = getSymbolAddress(B_SD, Layout); - FixedValue -= getSectionAddress(B_SD->getFragment()->getParent()); - } + // Initialize the section indirect symbol base, if necessary. + IndirectSymBase.insert(std::make_pair(it->SectionData, IndirectIndex)); - // Relocations are written out in reverse order, so the PAIR comes first. - if (Type == macho::RIT_Difference || - Type == macho::RIT_Generic_LocalDifference) { - macho::RelocationEntry MRE; - MRE.Word0 = ((0 << 0) | - (macho::RIT_Pair << 24) | - (Log2Size << 28) | - (IsPCRel << 30) | - macho::RF_Scattered); - MRE.Word1 = Value2; - Relocations[Fragment->getParent()].push_back(MRE); - } + // Set the symbol type to undefined lazy, but only on construction. + // + // FIXME: Do not hardcode. + bool Created; + MCSymbolData &Entry = Asm.getOrCreateSymbolData(*it->Symbol, &Created); + if (Created) + Entry.setFlags(Entry.getFlags() | 0x0001); + } +} - macho::RelocationEntry MRE; - MRE.Word0 = ((FixupOffset << 0) | - (Type << 24) | - (Log2Size << 28) | - (IsPCRel << 30) | - macho::RF_Scattered); - MRE.Word1 = Value; - Relocations[Fragment->getParent()].push_back(MRE); +/// ComputeSymbolTable - Compute the symbol table data +void MachObjectWriter::ComputeSymbolTable( + MCAssembler &Asm, std::vector &LocalSymbolData, + std::vector &ExternalSymbolData, + std::vector &UndefinedSymbolData) { + // Build section lookup table. + DenseMap SectionIndexMap; + unsigned Index = 1; + for (MCAssembler::iterator it = Asm.begin(), + ie = Asm.end(); it != ie; ++it, ++Index) + SectionIndexMap[&it->getSection()] = Index; + assert(Index <= 256 && "Too many sections!"); + + // Build the string table. + for (MCSymbolData &SD : Asm.symbols()) { + const MCSymbol &Symbol = SD.getSymbol(); + if (!Asm.isSymbolLinkerVisible(Symbol)) + continue; + + StringTable.add(Symbol.getName()); } + StringTable.finalize(StringTableBuilder::MachO); - void RecordTLVPRelocation(const MCAssembler &Asm, - const MCAsmLayout &Layout, - const MCFragment *Fragment, - const MCFixup &Fixup, MCValue Target, - uint64_t &FixedValue) { - assert(Target.getSymA()->getKind() == MCSymbolRefExpr::VK_TLVP && - !is64Bit() && - "Should only be called with a 32-bit TLVP relocation!"); - - unsigned Log2Size = getFixupKindLog2Size(Fixup.getKind()); - uint32_t Value = Layout.getFragmentOffset(Fragment)+Fixup.getOffset(); - unsigned IsPCRel = 0; - - // Get the symbol data. - MCSymbolData *SD_A = &Asm.getSymbolData(Target.getSymA()->getSymbol()); - unsigned Index = SD_A->getIndex(); - - // We're only going to have a second symbol in pic mode and it'll be a - // subtraction from the picbase. For 32-bit pic the addend is the difference - // between the picbase and the next address. For 32-bit static the addend - // is zero. - if (Target.getSymB()) { - // If this is a subtraction then we're pcrel. - uint32_t FixupAddress = - getFragmentAddress(Fragment, Layout) + Fixup.getOffset(); - MCSymbolData *SD_B = &Asm.getSymbolData(Target.getSymB()->getSymbol()); - IsPCRel = 1; - FixedValue = (FixupAddress - getSymbolAddress(SD_B, Layout) + - Target.getConstant()); - FixedValue += 1ULL << Log2Size; + // Build the symbol arrays but only for non-local symbols. + // + // The particular order that we collect and then sort the symbols is chosen to + // match 'as'. Even though it doesn't matter for correctness, this is + // important for letting us diff .o files. + for (MCSymbolData &SD : Asm.symbols()) { + const MCSymbol &Symbol = SD.getSymbol(); + + // Ignore non-linker visible symbols. + if (!Asm.isSymbolLinkerVisible(Symbol)) + continue; + + if (!SD.isExternal() && !Symbol.isUndefined()) + continue; + + MachSymbolData MSD; + MSD.SymbolData = &SD; + MSD.StringIndex = StringTable.getOffset(Symbol.getName()); + + if (Symbol.isUndefined()) { + MSD.SectionIndex = 0; + UndefinedSymbolData.push_back(MSD); + } else if (Symbol.isAbsolute()) { + MSD.SectionIndex = 0; + ExternalSymbolData.push_back(MSD); } else { - FixedValue = 0; + MSD.SectionIndex = SectionIndexMap.lookup(&Symbol.getSection()); + assert(MSD.SectionIndex && "Invalid section index!"); + ExternalSymbolData.push_back(MSD); } - - // struct relocation_info (8 bytes) - macho::RelocationEntry MRE; - MRE.Word0 = Value; - MRE.Word1 = ((Index << 0) | - (IsPCRel << 24) | - (Log2Size << 25) | - (1 << 27) | // Extern - (macho::RIT_Generic_TLV << 28)); // Type - Relocations[Fragment->getParent()].push_back(MRE); } - static bool getARMFixupKindMachOInfo(unsigned Kind, bool &Is24BitBranch, - unsigned &Log2Size) { - Is24BitBranch = false; - Log2Size = ~0U; - - switch (Kind) { - default: - return false; + // Now add the data for local symbols. + for (MCSymbolData &SD : Asm.symbols()) { + const MCSymbol &Symbol = SD.getSymbol(); - case FK_Data_1: - Log2Size = llvm::Log2_32(1); - return true; - case FK_Data_2: - Log2Size = llvm::Log2_32(2); - return true; - case FK_Data_4: - Log2Size = llvm::Log2_32(4); - return true; - case FK_Data_8: - Log2Size = llvm::Log2_32(8); - return true; - - // Handle 24-bit branch kinds. - case ARM::fixup_arm_ldst_pcrel_12: - case ARM::fixup_arm_pcrel_10: - case ARM::fixup_arm_adr_pcrel_12: - case ARM::fixup_arm_branch: - Is24BitBranch = true; - // Report as 'long', even though that is not quite accurate. - Log2Size = llvm::Log2_32(4); - return true; - - // Handle Thumb branches. - case ARM::fixup_arm_thumb_br: - Log2Size = llvm::Log2_32(2); - return true; - - case ARM::fixup_arm_thumb_bl: - Log2Size = llvm::Log2_32(4); - return true; - - case ARM::fixup_arm_thumb_blx: - // Report as 'long', even though that is not quite accurate. - Log2Size = llvm::Log2_32(4); - return true; - } - } - void RecordARMRelocation(const MCAssembler &Asm, const MCAsmLayout &Layout, - const MCFragment *Fragment, const MCFixup &Fixup, - MCValue Target, uint64_t &FixedValue) { - unsigned IsPCRel = isFixupKindPCRel(Asm, Fixup.getKind()); - unsigned Log2Size; - bool Is24BitBranch; - if (!getARMFixupKindMachOInfo(Fixup.getKind(), Is24BitBranch, Log2Size)) { - report_fatal_error("unknown ARM fixup kind!"); - return; - } - - // If this is a difference or a defined symbol plus an offset, then we need - // a scattered relocation entry. Differences always require scattered - // relocations. - if (Target.getSymB()) - return RecordARMScatteredRelocation(Asm, Layout, Fragment, Fixup, - Target, FixedValue); + // Ignore non-linker visible symbols. + if (!Asm.isSymbolLinkerVisible(Symbol)) + continue; - // Get the symbol data, if any. - MCSymbolData *SD = 0; - if (Target.getSymA()) - SD = &Asm.getSymbolData(Target.getSymA()->getSymbol()); + if (SD.isExternal() || Symbol.isUndefined()) + continue; - // FIXME: For other platforms, we need to use scattered relocations for - // internal relocations with offsets. If this is an internal relocation - // with an offset, it also needs a scattered relocation entry. - // - // Is this right for ARM? - uint32_t Offset = Target.getConstant(); - if (IsPCRel && !Is24BitBranch) - Offset += 1 << Log2Size; - if (Offset && SD && !doesSymbolRequireExternRelocation(SD)) - return RecordARMScatteredRelocation(Asm, Layout, Fragment, Fixup, - Target, FixedValue); - - // See . - uint32_t FixupOffset = Layout.getFragmentOffset(Fragment)+Fixup.getOffset(); - unsigned Index = 0; - unsigned IsExtern = 0; - unsigned Type = 0; - - if (Target.isAbsolute()) { // constant - // FIXME! - report_fatal_error("FIXME: relocations to absolute targets " - "not yet implemented"); - } else if (SD->getSymbol().isVariable()) { - int64_t Res; - if (SD->getSymbol().getVariableValue()->EvaluateAsAbsolute( - Res, Layout, SectionAddress)) { - FixedValue = Res; - return; - } + MachSymbolData MSD; + MSD.SymbolData = &SD; + MSD.StringIndex = StringTable.getOffset(Symbol.getName()); - report_fatal_error("unsupported relocation of variable '" + - SD->getSymbol().getName() + "'"); + if (Symbol.isAbsolute()) { + MSD.SectionIndex = 0; + LocalSymbolData.push_back(MSD); } else { - // Check whether we need an external or internal relocation. - if (doesSymbolRequireExternRelocation(SD)) { - IsExtern = 1; - Index = SD->getIndex(); - // For external relocations, make sure to offset the fixup value to - // compensate for the addend of the symbol address, if it was - // undefined. This occurs with weak definitions, for example. - if (!SD->Symbol->isUndefined()) - FixedValue -= Layout.getSymbolOffset(SD); - } else { - // The index is the section ordinal (1-based). - Index = SD->getFragment()->getParent()->getOrdinal() + 1; - FixedValue += getSectionAddress(SD->getFragment()->getParent()); - } - if (IsPCRel) - FixedValue -= getSectionAddress(Fragment->getParent()); - - // Determine the appropriate type based on the fixup kind. - Type = Is24BitBranch ? (unsigned)macho::RIT_ARM_Branch24Bit : - (unsigned)macho::RIT_Vanilla; + MSD.SectionIndex = SectionIndexMap.lookup(&Symbol.getSection()); + assert(MSD.SectionIndex && "Invalid section index!"); + LocalSymbolData.push_back(MSD); } - - // struct relocation_info (8 bytes) - macho::RelocationEntry MRE; - MRE.Word0 = FixupOffset; - MRE.Word1 = ((Index << 0) | - (IsPCRel << 24) | - (Log2Size << 25) | - (IsExtern << 27) | - (Type << 28)); - Relocations[Fragment->getParent()].push_back(MRE); } - void RecordRelocation(const MCAssembler &Asm, const MCAsmLayout &Layout, - const MCFragment *Fragment, const MCFixup &Fixup, - MCValue Target, uint64_t &FixedValue) { - // FIXME: These needs to be factored into the target Mach-O writer. - if (isARM()) { - RecordARMRelocation(Asm, Layout, Fragment, Fixup, Target, FixedValue); - return; - } - if (is64Bit()) { - RecordX86_64Relocation(Asm, Layout, Fragment, Fixup, Target, FixedValue); - return; - } - - unsigned IsPCRel = isFixupKindPCRel(Asm, Fixup.getKind()); - unsigned Log2Size = getFixupKindLog2Size(Fixup.getKind()); + // External and undefined symbols are required to be in lexicographic order. + std::sort(ExternalSymbolData.begin(), ExternalSymbolData.end()); + std::sort(UndefinedSymbolData.begin(), UndefinedSymbolData.end()); + + // Set the symbol indices. + Index = 0; + for (unsigned i = 0, e = LocalSymbolData.size(); i != e; ++i) + LocalSymbolData[i].SymbolData->setIndex(Index++); + for (unsigned i = 0, e = ExternalSymbolData.size(); i != e; ++i) + ExternalSymbolData[i].SymbolData->setIndex(Index++); + for (unsigned i = 0, e = UndefinedSymbolData.size(); i != e; ++i) + UndefinedSymbolData[i].SymbolData->setIndex(Index++); + + for (const MCSectionData &SD : Asm) { + std::vector &Relocs = Relocations[&SD]; + for (RelAndSymbol &Rel : Relocs) { + if (!Rel.Sym) + continue; - // If this is a 32-bit TLVP reloc it's handled a bit differently. - if (Target.getSymA() && - Target.getSymA()->getKind() == MCSymbolRefExpr::VK_TLVP) { - RecordTLVPRelocation(Asm, Layout, Fragment, Fixup, Target, FixedValue); - return; + // Set the Index and the IsExtern bit. + unsigned Index = Rel.Sym->getIndex(); + assert(isInt<24>(Index)); + if (IsLittleEndian) + Rel.MRE.r_word1 = (Rel.MRE.r_word1 & (-1 << 24)) | Index | (1 << 27); + else + Rel.MRE.r_word1 = (Rel.MRE.r_word1 & 0xff) | Index << 8 | (1 << 4); } + } +} - // If this is a difference or a defined symbol plus an offset, then we need - // a scattered relocation entry. - // Differences always require scattered relocations. - if (Target.getSymB()) - return RecordScatteredRelocation(Asm, Layout, Fragment, Fixup, - Target, FixedValue); - - // Get the symbol data, if any. - MCSymbolData *SD = 0; - if (Target.getSymA()) - SD = &Asm.getSymbolData(Target.getSymA()->getSymbol()); - - // If this is an internal relocation with an offset, it also needs a - // scattered relocation entry. - uint32_t Offset = Target.getConstant(); - if (IsPCRel) - Offset += 1 << Log2Size; - if (Offset && SD && !doesSymbolRequireExternRelocation(SD)) - return RecordScatteredRelocation(Asm, Layout, Fragment, Fixup, - Target, FixedValue); - - // See . - uint32_t FixupOffset = Layout.getFragmentOffset(Fragment)+Fixup.getOffset(); - unsigned Index = 0; - unsigned IsExtern = 0; - unsigned Type = 0; - - if (Target.isAbsolute()) { // constant - // SymbolNum of 0 indicates the absolute section. - // - // FIXME: Currently, these are never generated (see code below). I cannot - // find a case where they are actually emitted. - Type = macho::RIT_Vanilla; - } else if (SD->getSymbol().isVariable()) { - int64_t Res; - if (SD->getSymbol().getVariableValue()->EvaluateAsAbsolute( - Res, Layout, SectionAddress)) { - FixedValue = Res; - return; - } - - report_fatal_error("unsupported relocation of variable '" + - SD->getSymbol().getName() + "'"); - } else { - // Check whether we need an external or internal relocation. - if (doesSymbolRequireExternRelocation(SD)) { - IsExtern = 1; - Index = SD->getIndex(); - // For external relocations, make sure to offset the fixup value to - // compensate for the addend of the symbol address, if it was - // undefined. This occurs with weak definitions, for example. - if (!SD->Symbol->isUndefined()) - FixedValue -= Layout.getSymbolOffset(SD); - } else { - // The index is the section ordinal (1-based). - Index = SD->getFragment()->getParent()->getOrdinal() + 1; - FixedValue += getSectionAddress(SD->getFragment()->getParent()); - } - if (IsPCRel) - FixedValue -= getSectionAddress(Fragment->getParent()); +void MachObjectWriter::computeSectionAddresses(const MCAssembler &Asm, + const MCAsmLayout &Layout) { + uint64_t StartAddress = 0; + const SmallVectorImpl &Order = Layout.getSectionOrder(); + for (int i = 0, n = Order.size(); i != n ; ++i) { + const MCSectionData *SD = Order[i]; + StartAddress = RoundUpToAlignment(StartAddress, SD->getAlignment()); + SectionAddress[SD] = StartAddress; + StartAddress += Layout.getSectionAddressSize(SD); + + // Explicitly pad the section to match the alignment requirements of the + // following one. This is for 'gas' compatibility, it shouldn't + /// strictly be necessary. + StartAddress += getPaddingSize(SD, Layout); + } +} - Type = macho::RIT_Vanilla; +void MachObjectWriter::markAbsoluteVariableSymbols(MCAssembler &Asm, + const MCAsmLayout &Layout) { + for (MCSymbolData &SD : Asm.symbols()) { + if (!SD.getSymbol().isVariable()) + continue; + + // Is the variable is a symbol difference (SA - SB + C) expression, + // and neither symbol is external, mark the variable as absolute. + const MCExpr *Expr = SD.getSymbol().getVariableValue(); + MCValue Value; + if (Expr->EvaluateAsRelocatable(Value, &Layout, nullptr)) { + if (Value.getSymA() && Value.getSymB()) + const_cast(&SD.getSymbol())->setAbsolute(); } - - // struct relocation_info (8 bytes) - macho::RelocationEntry MRE; - MRE.Word0 = FixupOffset; - MRE.Word1 = ((Index << 0) | - (IsPCRel << 24) | - (Log2Size << 25) | - (IsExtern << 27) | - (Type << 28)); - Relocations[Fragment->getParent()].push_back(MRE); } +} - void BindIndirectSymbols(MCAssembler &Asm) { - // This is the point where 'as' creates actual symbols for indirect symbols - // (in the following two passes). It would be easier for us to do this - // sooner when we see the attribute, but that makes getting the order in the - // symbol table much more complicated than it is worth. - // - // FIXME: Revisit this when the dust settles. +void MachObjectWriter::ExecutePostLayoutBinding(MCAssembler &Asm, + const MCAsmLayout &Layout) { + computeSectionAddresses(Asm, Layout); - // Bind non lazy symbol pointers first. - unsigned IndirectIndex = 0; - for (MCAssembler::indirect_symbol_iterator it = Asm.indirect_symbol_begin(), - ie = Asm.indirect_symbol_end(); it != ie; ++it, ++IndirectIndex) { - const MCSectionMachO &Section = - cast(it->SectionData->getSection()); + // Create symbol data for any indirect symbols. + BindIndirectSymbols(Asm); - if (Section.getType() != MCSectionMachO::S_NON_LAZY_SYMBOL_POINTERS) - continue; + // Mark symbol difference expressions in variables (from .set or = directives) + // as absolute. + markAbsoluteVariableSymbols(Asm, Layout); +} - // Initialize the section indirect symbol base, if necessary. - if (!IndirectSymBase.count(it->SectionData)) - IndirectSymBase[it->SectionData] = IndirectIndex; +bool MachObjectWriter:: +IsSymbolRefDifferenceFullyResolvedImpl(const MCAssembler &Asm, + const MCSymbolData &DataA, + const MCFragment &FB, + bool InSet, + bool IsPCRel) const { + if (InSet) + return true; - Asm.getOrCreateSymbolData(*it->Symbol); + // The effective address is + // addr(atom(A)) + offset(A) + // - addr(atom(B)) - offset(B) + // and the offsets are not relocatable, so the fixup is fully resolved when + // addr(atom(A)) - addr(atom(B)) == 0. + const MCSymbolData *A_Base = nullptr, *B_Base = nullptr; + + const MCSymbol &SA = DataA.getSymbol().AliasedSymbol(); + const MCSection &SecA = SA.getSection(); + const MCSection &SecB = FB.getParent()->getSection(); + + if (IsPCRel) { + // The simple (Darwin, except on x86_64) way of dealing with this was to + // assume that any reference to a temporary symbol *must* be a temporary + // symbol in the same atom, unless the sections differ. Therefore, any PCrel + // relocation to a temporary symbol (in the same section) is fully + // resolved. This also works in conjunction with absolutized .set, which + // requires the compiler to use .set to absolutize the differences between + // symbols which the compiler knows to be assembly time constants, so we + // don't need to worry about considering symbol differences fully resolved. + // + // If the file isn't using sub-sections-via-symbols, we can make the + // same assumptions about any symbol that we normally make about + // assembler locals. + + bool hasReliableSymbolDifference = isX86_64(); + if (!hasReliableSymbolDifference) { + if (!SA.isInSection() || &SecA != &SecB || + (!SA.isTemporary() && + FB.getAtom() != Asm.getSymbolData(SA).getFragment()->getAtom() && + Asm.getSubsectionsViaSymbols())) + return false; + return true; } - - // Then lazy symbol pointers and symbol stubs. - IndirectIndex = 0; - for (MCAssembler::indirect_symbol_iterator it = Asm.indirect_symbol_begin(), - ie = Asm.indirect_symbol_end(); it != ie; ++it, ++IndirectIndex) { - const MCSectionMachO &Section = - cast(it->SectionData->getSection()); - - if (Section.getType() != MCSectionMachO::S_LAZY_SYMBOL_POINTERS && - Section.getType() != MCSectionMachO::S_SYMBOL_STUBS) - continue; - - // Initialize the section indirect symbol base, if necessary. - if (!IndirectSymBase.count(it->SectionData)) - IndirectSymBase[it->SectionData] = IndirectIndex; - - // Set the symbol type to undefined lazy, but only on construction. - // - // FIXME: Do not hardcode. - bool Created; - MCSymbolData &Entry = Asm.getOrCreateSymbolData(*it->Symbol, &Created); - if (Created) - Entry.setFlags(Entry.getFlags() | 0x0001); + // For Darwin x86_64, there is one special case when the reference IsPCRel. + // If the fragment with the reference does not have a base symbol but meets + // the simple way of dealing with this, in that it is a temporary symbol in + // the same atom then it is assumed to be fully resolved. This is needed so + // a relocation entry is not created and so the static linker does not + // mess up the reference later. + else if(!FB.getAtom() && + SA.isTemporary() && SA.isInSection() && &SecA == &SecB){ + return true; } + } else { + if (!TargetObjectWriter->useAggressiveSymbolFolding()) + return false; } - /// ComputeSymbolTable - Compute the symbol table data - /// - /// \param StringTable [out] - The string table data. - /// \param StringIndexMap [out] - Map from symbol names to offsets in the - /// string table. - void ComputeSymbolTable(MCAssembler &Asm, SmallString<256> &StringTable, - std::vector &LocalSymbolData, - std::vector &ExternalSymbolData, - std::vector &UndefinedSymbolData) { - // Build section lookup table. - DenseMap SectionIndexMap; - unsigned Index = 1; - for (MCAssembler::iterator it = Asm.begin(), - ie = Asm.end(); it != ie; ++it, ++Index) - SectionIndexMap[&it->getSection()] = Index; - assert(Index <= 256 && "Too many sections!"); - - // Index 0 is always the empty string. - StringMap StringIndexMap; - StringTable += '\x00'; - - // Build the symbol arrays and the string table, but only for non-local - // symbols. - // - // The particular order that we collect the symbols and create the string - // table, then sort the symbols is chosen to match 'as'. Even though it - // doesn't matter for correctness, this is important for letting us diff .o - // files. - for (MCAssembler::symbol_iterator it = Asm.symbol_begin(), - ie = Asm.symbol_end(); it != ie; ++it) { - const MCSymbol &Symbol = it->getSymbol(); - - // Ignore non-linker visible symbols. - if (!Asm.isSymbolLinkerVisible(it->getSymbol())) - continue; - - if (!it->isExternal() && !Symbol.isUndefined()) - continue; - - uint64_t &Entry = StringIndexMap[Symbol.getName()]; - if (!Entry) { - Entry = StringTable.size(); - StringTable += Symbol.getName(); - StringTable += '\x00'; - } - - MachSymbolData MSD; - MSD.SymbolData = it; - MSD.StringIndex = Entry; - - if (Symbol.isUndefined()) { - MSD.SectionIndex = 0; - UndefinedSymbolData.push_back(MSD); - } else if (Symbol.isAbsolute()) { - MSD.SectionIndex = 0; - ExternalSymbolData.push_back(MSD); - } else { - MSD.SectionIndex = SectionIndexMap.lookup(&Symbol.getSection()); - assert(MSD.SectionIndex && "Invalid section index!"); - ExternalSymbolData.push_back(MSD); - } - } - - // Now add the data for local symbols. - for (MCAssembler::symbol_iterator it = Asm.symbol_begin(), - ie = Asm.symbol_end(); it != ie; ++it) { - const MCSymbol &Symbol = it->getSymbol(); - - // Ignore non-linker visible symbols. - if (!Asm.isSymbolLinkerVisible(it->getSymbol())) - continue; + // If they are not in the same section, we can't compute the diff. + if (&SecA != &SecB) + return false; - if (it->isExternal() || Symbol.isUndefined()) - continue; + const MCFragment *FA = Asm.getSymbolData(SA).getFragment(); - uint64_t &Entry = StringIndexMap[Symbol.getName()]; - if (!Entry) { - Entry = StringTable.size(); - StringTable += Symbol.getName(); - StringTable += '\x00'; - } + // Bail if the symbol has no fragment. + if (!FA) + return false; - MachSymbolData MSD; - MSD.SymbolData = it; - MSD.StringIndex = Entry; - - if (Symbol.isAbsolute()) { - MSD.SectionIndex = 0; - LocalSymbolData.push_back(MSD); - } else { - MSD.SectionIndex = SectionIndexMap.lookup(&Symbol.getSection()); - assert(MSD.SectionIndex && "Invalid section index!"); - LocalSymbolData.push_back(MSD); - } - } + A_Base = FA->getAtom(); + B_Base = FB.getAtom(); - // External and undefined symbols are required to be in lexicographic order. - std::sort(ExternalSymbolData.begin(), ExternalSymbolData.end()); - std::sort(UndefinedSymbolData.begin(), UndefinedSymbolData.end()); + // If the atoms are the same, they are guaranteed to have the same address. + if (A_Base == B_Base) + return true; - // Set the symbol indices. - Index = 0; - for (unsigned i = 0, e = LocalSymbolData.size(); i != e; ++i) - LocalSymbolData[i].SymbolData->setIndex(Index++); - for (unsigned i = 0, e = ExternalSymbolData.size(); i != e; ++i) - ExternalSymbolData[i].SymbolData->setIndex(Index++); - for (unsigned i = 0, e = UndefinedSymbolData.size(); i != e; ++i) - UndefinedSymbolData[i].SymbolData->setIndex(Index++); + // Otherwise, we can't prove this is fully resolved. + return false; +} - // The string table is padded to a multiple of 4. - while (StringTable.size() % 4) - StringTable += '\x00'; +void MachObjectWriter::WriteObject(MCAssembler &Asm, + const MCAsmLayout &Layout) { + // Compute symbol table information and bind symbol indices. + ComputeSymbolTable(Asm, LocalSymbolData, ExternalSymbolData, + UndefinedSymbolData); + + unsigned NumSections = Asm.size(); + const MCAssembler::VersionMinInfoType &VersionInfo = + Layout.getAssembler().getVersionMinInfo(); + + // The section data starts after the header, the segment load command (and + // section headers) and the symbol table. + unsigned NumLoadCommands = 1; + uint64_t LoadCommandsSize = is64Bit() ? + sizeof(MachO::segment_command_64) + NumSections * sizeof(MachO::section_64): + sizeof(MachO::segment_command) + NumSections * sizeof(MachO::section); + + // Add the deployment target version info load command size, if used. + if (VersionInfo.Major != 0) { + ++NumLoadCommands; + LoadCommandsSize += sizeof(MachO::version_min_command); } - void computeSectionAddresses(const MCAssembler &Asm, - const MCAsmLayout &Layout) { - uint64_t StartAddress = 0; - const SmallVectorImpl &Order = Layout.getSectionOrder(); - for (int i = 0, n = Order.size(); i != n ; ++i) { - const MCSectionData *SD = Order[i]; - StartAddress = RoundUpToAlignment(StartAddress, SD->getAlignment()); - SectionAddress[SD] = StartAddress; - StartAddress += Layout.getSectionAddressSize(SD); - // Explicitly pad the section to match the alignment requirements of the - // following one. This is for 'gas' compatibility, it shouldn't - /// strictly be necessary. - StartAddress += getPaddingSize(SD, Layout); - } + // Add the data-in-code load command size, if used. + unsigned NumDataRegions = Asm.getDataRegions().size(); + if (NumDataRegions) { + ++NumLoadCommands; + LoadCommandsSize += sizeof(MachO::linkedit_data_command); } - void ExecutePostLayoutBinding(MCAssembler &Asm, const MCAsmLayout &Layout) { - computeSectionAddresses(Asm, Layout); - - // Create symbol data for any indirect symbols. - BindIndirectSymbols(Asm); - - // Compute symbol table information and bind symbol indices. - ComputeSymbolTable(Asm, StringTable, LocalSymbolData, ExternalSymbolData, - UndefinedSymbolData); + // Add the loh load command size, if used. + uint64_t LOHRawSize = Asm.getLOHContainer().getEmitSize(*this, Layout); + uint64_t LOHSize = RoundUpToAlignment(LOHRawSize, is64Bit() ? 8 : 4); + if (LOHSize) { + ++NumLoadCommands; + LoadCommandsSize += sizeof(MachO::linkedit_data_command); } - bool IsSymbolRefDifferenceFullyResolved(const MCAssembler &Asm, - const MCSymbolRefExpr *A, - const MCSymbolRefExpr *B, - bool InSet) const { - if (InSet) - return true; - - if (!TargetObjectWriter->useAggressiveSymbolFolding()) - return false; - - // The effective address is - // addr(atom(A)) + offset(A) - // - addr(atom(B)) - offset(B) - // and the offsets are not relocatable, so the fixup is fully resolved when - // addr(atom(A)) - addr(atom(B)) == 0. - const MCSymbolData *A_Base = 0, *B_Base = 0; - - // Modified symbol references cannot be resolved. - if (A->getKind() != MCSymbolRefExpr::VK_None || - B->getKind() != MCSymbolRefExpr::VK_None) - return false; - - A_Base = Asm.getAtom(&Asm.getSymbolData(A->getSymbol())); - if (!A_Base) - return false; - - B_Base = Asm.getAtom(&Asm.getSymbolData(B->getSymbol())); - if (!B_Base) - return false; - - // If the atoms are the same, they are guaranteed to have the same address. - if (A_Base == B_Base) - return true; - - // Otherwise, we can't prove this is fully resolved. - return false; + // Add the symbol table load command sizes, if used. + unsigned NumSymbols = LocalSymbolData.size() + ExternalSymbolData.size() + + UndefinedSymbolData.size(); + if (NumSymbols) { + NumLoadCommands += 2; + LoadCommandsSize += (sizeof(MachO::symtab_command) + + sizeof(MachO::dysymtab_command)); } - bool IsFixupFullyResolved(const MCAssembler &Asm, - const MCValue Target, - bool IsPCRel, - const MCFragment *DF) const { - // Otherwise, determine whether this value is actually resolved; scattering - // may cause atoms to move. - - // Check if we are using the "simple" resolution algorithm (e.g., - // i386). - if (!Asm.getBackend().hasReliableSymbolDifference()) { - const MCSection *BaseSection = 0; - if (IsPCRel) - BaseSection = &DF->getParent()->getSection(); - - return isScatteredFixupFullyResolvedSimple(Asm, Target, BaseSection); - } + // Add the linker option load commands sizes. + const std::vector > &LinkerOptions = + Asm.getLinkerOptions(); + for (unsigned i = 0, e = LinkerOptions.size(); i != e; ++i) { + ++NumLoadCommands; + LoadCommandsSize += ComputeLinkerOptionsLoadCommandSize(LinkerOptions[i], + is64Bit()); + } + + // Compute the total size of the section data, as well as its file size and vm + // size. + uint64_t SectionDataStart = (is64Bit() ? sizeof(MachO::mach_header_64) : + sizeof(MachO::mach_header)) + LoadCommandsSize; + uint64_t SectionDataSize = 0; + uint64_t SectionDataFileSize = 0; + uint64_t VMSize = 0; + for (MCAssembler::const_iterator it = Asm.begin(), + ie = Asm.end(); it != ie; ++it) { + const MCSectionData &SD = *it; + uint64_t Address = getSectionAddress(&SD); + uint64_t Size = Layout.getSectionAddressSize(&SD); + uint64_t FileSize = Layout.getSectionFileSize(&SD); + FileSize += getPaddingSize(&SD, Layout); + + VMSize = std::max(VMSize, Address + Size); + + if (SD.getSection().isVirtualSection()) + continue; + + SectionDataSize = std::max(SectionDataSize, Address + Size); + SectionDataFileSize = std::max(SectionDataFileSize, Address + FileSize); + } - // Otherwise, compute the proper answer as reliably as possible. + // The section data is padded to 4 bytes. + // + // FIXME: Is this machine dependent? + unsigned SectionDataPadding = OffsetToAlignment(SectionDataFileSize, 4); + SectionDataFileSize += SectionDataPadding; + + // Write the prolog, starting with the header and load command... + WriteHeader(NumLoadCommands, LoadCommandsSize, + Asm.getSubsectionsViaSymbols()); + WriteSegmentLoadCommand(NumSections, VMSize, + SectionDataStart, SectionDataSize); + + // ... and then the section headers. + uint64_t RelocTableEnd = SectionDataStart + SectionDataFileSize; + for (MCAssembler::const_iterator it = Asm.begin(), + ie = Asm.end(); it != ie; ++it) { + std::vector &Relocs = Relocations[it]; + unsigned NumRelocs = Relocs.size(); + uint64_t SectionStart = SectionDataStart + getSectionAddress(it); + WriteSection(Asm, Layout, *it, SectionStart, RelocTableEnd, NumRelocs); + RelocTableEnd += NumRelocs * sizeof(MachO::any_relocation_info); + } - // If this is a PCrel relocation, find the base atom (identified by its - // symbol) that the fixup value is relative to. - const MCSymbolData *BaseSymbol = 0; - if (IsPCRel) { - BaseSymbol = DF->getAtom(); - if (!BaseSymbol) - return false; - } + // Write out the deployment target information, if it's available. + if (VersionInfo.Major != 0) { + assert(VersionInfo.Update < 256 && "unencodable update target version"); + assert(VersionInfo.Minor < 256 && "unencodable minor target version"); + assert(VersionInfo.Major < 65536 && "unencodable major target version"); + uint32_t EncodedVersion = VersionInfo.Update | (VersionInfo.Minor << 8) | + (VersionInfo.Major << 16); + Write32(VersionInfo.Kind == MCVM_OSXVersionMin ? MachO::LC_VERSION_MIN_MACOSX : + MachO::LC_VERSION_MIN_IPHONEOS); + Write32(sizeof(MachO::version_min_command)); + Write32(EncodedVersion); + Write32(0); // reserved. + } - return isScatteredFixupFullyResolved(Asm, Target, BaseSymbol); + // Write the data-in-code load command, if used. + uint64_t DataInCodeTableEnd = RelocTableEnd + NumDataRegions * 8; + if (NumDataRegions) { + uint64_t DataRegionsOffset = RelocTableEnd; + uint64_t DataRegionsSize = NumDataRegions * 8; + WriteLinkeditLoadCommand(MachO::LC_DATA_IN_CODE, DataRegionsOffset, + DataRegionsSize); } - void WriteObject(MCAssembler &Asm, const MCAsmLayout &Layout) { - unsigned NumSections = Asm.size(); - - // The section data starts after the header, the segment load command (and - // section headers) and the symbol table. - unsigned NumLoadCommands = 1; - uint64_t LoadCommandsSize = is64Bit() ? - macho::SegmentLoadCommand64Size + NumSections * macho::Section64Size : - macho::SegmentLoadCommand32Size + NumSections * macho::Section32Size; - - // Add the symbol table load command sizes, if used. - unsigned NumSymbols = LocalSymbolData.size() + ExternalSymbolData.size() + - UndefinedSymbolData.size(); - if (NumSymbols) { - NumLoadCommands += 2; - LoadCommandsSize += (macho::SymtabLoadCommandSize + - macho::DysymtabLoadCommandSize); - } + // Write the loh load command, if used. + uint64_t LOHTableEnd = DataInCodeTableEnd + LOHSize; + if (LOHSize) + WriteLinkeditLoadCommand(MachO::LC_LINKER_OPTIMIZATION_HINT, + DataInCodeTableEnd, LOHSize); + + // Write the symbol table load command, if used. + if (NumSymbols) { + unsigned FirstLocalSymbol = 0; + unsigned NumLocalSymbols = LocalSymbolData.size(); + unsigned FirstExternalSymbol = FirstLocalSymbol + NumLocalSymbols; + unsigned NumExternalSymbols = ExternalSymbolData.size(); + unsigned FirstUndefinedSymbol = FirstExternalSymbol + NumExternalSymbols; + unsigned NumUndefinedSymbols = UndefinedSymbolData.size(); + unsigned NumIndirectSymbols = Asm.indirect_symbol_size(); + unsigned NumSymTabSymbols = + NumLocalSymbols + NumExternalSymbols + NumUndefinedSymbols; + uint64_t IndirectSymbolSize = NumIndirectSymbols * 4; + uint64_t IndirectSymbolOffset = 0; + + // If used, the indirect symbols are written after the section data. + if (NumIndirectSymbols) + IndirectSymbolOffset = LOHTableEnd; + + // The symbol table is written after the indirect symbol data. + uint64_t SymbolTableOffset = LOHTableEnd + IndirectSymbolSize; + + // The string table is written after symbol table. + uint64_t StringTableOffset = + SymbolTableOffset + NumSymTabSymbols * (is64Bit() ? + sizeof(MachO::nlist_64) : + sizeof(MachO::nlist)); + WriteSymtabLoadCommand(SymbolTableOffset, NumSymTabSymbols, + StringTableOffset, StringTable.data().size()); + + WriteDysymtabLoadCommand(FirstLocalSymbol, NumLocalSymbols, + FirstExternalSymbol, NumExternalSymbols, + FirstUndefinedSymbol, NumUndefinedSymbols, + IndirectSymbolOffset, NumIndirectSymbols); + } - // Compute the total size of the section data, as well as its file size and - // vm size. - uint64_t SectionDataStart = (is64Bit() ? macho::Header64Size : - macho::Header32Size) + LoadCommandsSize; - uint64_t SectionDataSize = 0; - uint64_t SectionDataFileSize = 0; - uint64_t VMSize = 0; - for (MCAssembler::const_iterator it = Asm.begin(), - ie = Asm.end(); it != ie; ++it) { - const MCSectionData &SD = *it; - uint64_t Address = getSectionAddress(&SD); - uint64_t Size = Layout.getSectionAddressSize(&SD); - uint64_t FileSize = Layout.getSectionFileSize(&SD); - FileSize += getPaddingSize(&SD, Layout); - - VMSize = std::max(VMSize, Address + Size); - - if (SD.getSection().isVirtualSection()) - continue; + // Write the linker options load commands. + for (unsigned i = 0, e = LinkerOptions.size(); i != e; ++i) { + WriteLinkerOptionsLoadCommand(LinkerOptions[i]); + } - SectionDataSize = std::max(SectionDataSize, Address + Size); - SectionDataFileSize = std::max(SectionDataFileSize, Address + FileSize); - } + // Write the actual section data. + for (MCAssembler::const_iterator it = Asm.begin(), + ie = Asm.end(); it != ie; ++it) { + Asm.writeSectionData(it, Layout); - // The section data is padded to 4 bytes. - // - // FIXME: Is this machine dependent? - unsigned SectionDataPadding = OffsetToAlignment(SectionDataFileSize, 4); - SectionDataFileSize += SectionDataPadding; - - // Write the prolog, starting with the header and load command... - WriteHeader(NumLoadCommands, LoadCommandsSize, - Asm.getSubsectionsViaSymbols()); - WriteSegmentLoadCommand(NumSections, VMSize, - SectionDataStart, SectionDataSize); - - // ... and then the section headers. - uint64_t RelocTableEnd = SectionDataStart + SectionDataFileSize; - for (MCAssembler::const_iterator it = Asm.begin(), - ie = Asm.end(); it != ie; ++it) { - std::vector &Relocs = Relocations[it]; - unsigned NumRelocs = Relocs.size(); - uint64_t SectionStart = SectionDataStart + getSectionAddress(it); - WriteSection(Asm, Layout, *it, SectionStart, RelocTableEnd, NumRelocs); - RelocTableEnd += NumRelocs * macho::RelocationInfoSize; - } + uint64_t Pad = getPaddingSize(it, Layout); + for (unsigned int i = 0; i < Pad; ++i) + Write8(0); + } - // Write the symbol table load command, if used. - if (NumSymbols) { - unsigned FirstLocalSymbol = 0; - unsigned NumLocalSymbols = LocalSymbolData.size(); - unsigned FirstExternalSymbol = FirstLocalSymbol + NumLocalSymbols; - unsigned NumExternalSymbols = ExternalSymbolData.size(); - unsigned FirstUndefinedSymbol = FirstExternalSymbol + NumExternalSymbols; - unsigned NumUndefinedSymbols = UndefinedSymbolData.size(); - unsigned NumIndirectSymbols = Asm.indirect_symbol_size(); - unsigned NumSymTabSymbols = - NumLocalSymbols + NumExternalSymbols + NumUndefinedSymbols; - uint64_t IndirectSymbolSize = NumIndirectSymbols * 4; - uint64_t IndirectSymbolOffset = 0; - - // If used, the indirect symbols are written after the section data. - if (NumIndirectSymbols) - IndirectSymbolOffset = RelocTableEnd; - - // The symbol table is written after the indirect symbol data. - uint64_t SymbolTableOffset = RelocTableEnd + IndirectSymbolSize; - - // The string table is written after symbol table. - uint64_t StringTableOffset = - SymbolTableOffset + NumSymTabSymbols * (is64Bit() ? macho::Nlist64Size : - macho::Nlist32Size); - WriteSymtabLoadCommand(SymbolTableOffset, NumSymTabSymbols, - StringTableOffset, StringTable.size()); - - WriteDysymtabLoadCommand(FirstLocalSymbol, NumLocalSymbols, - FirstExternalSymbol, NumExternalSymbols, - FirstUndefinedSymbol, NumUndefinedSymbols, - IndirectSymbolOffset, NumIndirectSymbols); + // Write the extra padding. + WriteZeros(SectionDataPadding); + + // Write the relocation entries. + for (MCAssembler::const_iterator it = Asm.begin(), + ie = Asm.end(); it != ie; ++it) { + // Write the section relocation entries, in reverse order to match 'as' + // (approximately, the exact algorithm is more complicated than this). + std::vector &Relocs = Relocations[it]; + for (unsigned i = 0, e = Relocs.size(); i != e; ++i) { + Write32(Relocs[e - i - 1].MRE.r_word0); + Write32(Relocs[e - i - 1].MRE.r_word1); } + } - // Write the actual section data. - for (MCAssembler::const_iterator it = Asm.begin(), - ie = Asm.end(); it != ie; ++it) { - Asm.WriteSectionData(it, Layout); - - uint64_t Pad = getPaddingSize(it, Layout); - for (unsigned int i = 0; i < Pad; ++i) - Write8(0); - } + // Write out the data-in-code region payload, if there is one. + for (MCAssembler::const_data_region_iterator + it = Asm.data_region_begin(), ie = Asm.data_region_end(); + it != ie; ++it) { + const DataRegionData *Data = &(*it); + uint64_t Start = + getSymbolAddress(&Layout.getAssembler().getSymbolData(*Data->Start), + Layout); + uint64_t End = + getSymbolAddress(&Layout.getAssembler().getSymbolData(*Data->End), + Layout); + DEBUG(dbgs() << "data in code region-- kind: " << Data->Kind + << " start: " << Start << "(" << Data->Start->getName() << ")" + << " end: " << End << "(" << Data->End->getName() << ")" + << " size: " << End - Start + << "\n"); + Write32(Start); + Write16(End - Start); + Write16(Data->Kind); + } - // Write the extra padding. - WriteZeros(SectionDataPadding); - - // Write the relocation entries. - for (MCAssembler::const_iterator it = Asm.begin(), - ie = Asm.end(); it != ie; ++it) { - // Write the section relocation entries, in reverse order to match 'as' - // (approximately, the exact algorithm is more complicated than this). - std::vector &Relocs = Relocations[it]; - for (unsigned i = 0, e = Relocs.size(); i != e; ++i) { - Write32(Relocs[e - i - 1].Word0); - Write32(Relocs[e - i - 1].Word1); - } - } + // Write out the loh commands, if there is one. + if (LOHSize) { +#ifndef NDEBUG + unsigned Start = OS.tell(); +#endif + Asm.getLOHContainer().Emit(*this, Layout); + // Pad to a multiple of the pointer size. + WriteBytes("", OffsetToAlignment(LOHRawSize, is64Bit() ? 8 : 4)); + assert(OS.tell() - Start == LOHSize); + } - // Write the symbol table data, if used. - if (NumSymbols) { - // Write the indirect symbol entries. - for (MCAssembler::const_indirect_symbol_iterator - it = Asm.indirect_symbol_begin(), - ie = Asm.indirect_symbol_end(); it != ie; ++it) { - // Indirect symbols in the non lazy symbol pointer section have some - // special handling. - const MCSectionMachO &Section = - static_cast(it->SectionData->getSection()); - if (Section.getType() == MCSectionMachO::S_NON_LAZY_SYMBOL_POINTERS) { - // If this symbol is defined and internal, mark it as such. - if (it->Symbol->isDefined() && - !Asm.getSymbolData(*it->Symbol).isExternal()) { - uint32_t Flags = macho::ISF_Local; - if (it->Symbol->isAbsolute()) - Flags |= macho::ISF_Absolute; - Write32(Flags); - continue; - } + // Write the symbol table data, if used. + if (NumSymbols) { + // Write the indirect symbol entries. + for (MCAssembler::const_indirect_symbol_iterator + it = Asm.indirect_symbol_begin(), + ie = Asm.indirect_symbol_end(); it != ie; ++it) { + // Indirect symbols in the non-lazy symbol pointer section have some + // special handling. + const MCSectionMachO &Section = + static_cast(it->SectionData->getSection()); + if (Section.getType() == MachO::S_NON_LAZY_SYMBOL_POINTERS) { + // If this symbol is defined and internal, mark it as such. + if (it->Symbol->isDefined() && + !Asm.getSymbolData(*it->Symbol).isExternal()) { + uint32_t Flags = MachO::INDIRECT_SYMBOL_LOCAL; + if (it->Symbol->isAbsolute()) + Flags |= MachO::INDIRECT_SYMBOL_ABS; + Write32(Flags); + continue; } - - Write32(Asm.getSymbolData(*it->Symbol).getIndex()); } - // FIXME: Check that offsets match computed ones. + Write32(Asm.getSymbolData(*it->Symbol).getIndex()); + } - // Write the symbol table entries. - for (unsigned i = 0, e = LocalSymbolData.size(); i != e; ++i) - WriteNlist(LocalSymbolData[i], Layout); - for (unsigned i = 0, e = ExternalSymbolData.size(); i != e; ++i) - WriteNlist(ExternalSymbolData[i], Layout); - for (unsigned i = 0, e = UndefinedSymbolData.size(); i != e; ++i) - WriteNlist(UndefinedSymbolData[i], Layout); + // FIXME: Check that offsets match computed ones. - // Write the string table. - OS << StringTable.str(); - } - } -}; + // Write the symbol table entries. + for (unsigned i = 0, e = LocalSymbolData.size(); i != e; ++i) + WriteNlist(LocalSymbolData[i], Layout); + for (unsigned i = 0, e = ExternalSymbolData.size(); i != e; ++i) + WriteNlist(ExternalSymbolData[i], Layout); + for (unsigned i = 0, e = UndefinedSymbolData.size(); i != e; ++i) + WriteNlist(UndefinedSymbolData[i], Layout); + // Write the string table. + OS << StringTable.data(); + } } MCObjectWriter *llvm::createMachObjectWriter(MCMachObjectTargetWriter *MOTW,