1 //===-- LTOModule.cpp - LLVM Link Time Optimizer --------------------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file implements the Link Time Optimization library. This library is
11 // intended to be used by linker to optimize code at link time.
13 //===----------------------------------------------------------------------===//
15 #include "llvm/LTO/LTOModule.h"
16 #include "llvm/ADT/Triple.h"
17 #include "llvm/Bitcode/ReaderWriter.h"
18 #include "llvm/CodeGen/Analysis.h"
19 #include "llvm/IR/Constants.h"
20 #include "llvm/IR/DiagnosticPrinter.h"
21 #include "llvm/IR/LLVMContext.h"
22 #include "llvm/IR/Metadata.h"
23 #include "llvm/IR/Module.h"
24 #include "llvm/MC/MCExpr.h"
25 #include "llvm/MC/MCInst.h"
26 #include "llvm/MC/MCInstrInfo.h"
27 #include "llvm/MC/MCParser/MCAsmParser.h"
28 #include "llvm/MC/MCSection.h"
29 #include "llvm/MC/MCSubtargetInfo.h"
30 #include "llvm/MC/MCSymbol.h"
31 #include "llvm/MC/MCTargetAsmParser.h"
32 #include "llvm/MC/SubtargetFeature.h"
33 #include "llvm/Object/IRObjectFile.h"
34 #include "llvm/Object/ObjectFile.h"
35 #include "llvm/Support/CommandLine.h"
36 #include "llvm/Support/FileSystem.h"
37 #include "llvm/Support/Host.h"
38 #include "llvm/Support/MemoryBuffer.h"
39 #include "llvm/Support/Path.h"
40 #include "llvm/Support/SourceMgr.h"
41 #include "llvm/Support/TargetRegistry.h"
42 #include "llvm/Support/TargetSelect.h"
43 #include "llvm/Target/TargetLowering.h"
44 #include "llvm/Target/TargetLoweringObjectFile.h"
45 #include "llvm/Target/TargetRegisterInfo.h"
46 #include "llvm/Target/TargetSubtargetInfo.h"
47 #include "llvm/Transforms/Utils/GlobalStatus.h"
48 #include <system_error>
50 using namespace llvm::object;
52 LTOModule::LTOModule(std::unique_ptr<object::IRObjectFile> Obj,
53 llvm::TargetMachine *TM)
54 : IRFile(std::move(Obj)), _target(TM) {}
56 LTOModule::LTOModule(std::unique_ptr<object::IRObjectFile> Obj,
57 llvm::TargetMachine *TM,
58 std::unique_ptr<LLVMContext> Context)
59 : OwnedContext(std::move(Context)), IRFile(std::move(Obj)), _target(TM) {}
61 LTOModule::~LTOModule() {}
63 /// isBitcodeFile - Returns 'true' if the file (or memory contents) is LLVM
65 bool LTOModule::isBitcodeFile(const void *Mem, size_t Length) {
66 ErrorOr<MemoryBufferRef> BCData = IRObjectFile::findBitcodeInMemBuffer(
67 MemoryBufferRef(StringRef((const char *)Mem, Length), "<mem>"));
71 bool LTOModule::isBitcodeFile(const char *Path) {
72 ErrorOr<std::unique_ptr<MemoryBuffer>> BufferOrErr =
73 MemoryBuffer::getFile(Path);
77 ErrorOr<MemoryBufferRef> BCData = IRObjectFile::findBitcodeInMemBuffer(
78 BufferOrErr.get()->getMemBufferRef());
82 bool LTOModule::isBitcodeForTarget(MemoryBuffer *Buffer,
83 StringRef TriplePrefix) {
84 ErrorOr<MemoryBufferRef> BCOrErr =
85 IRObjectFile::findBitcodeInMemBuffer(Buffer->getMemBufferRef());
89 std::string Triple = getBitcodeTargetTriple(*BCOrErr, Context);
90 return StringRef(Triple).startswith(TriplePrefix);
93 LTOModule *LTOModule::createFromFile(const char *path, TargetOptions options,
94 std::string &errMsg) {
95 ErrorOr<std::unique_ptr<MemoryBuffer>> BufferOrErr =
96 MemoryBuffer::getFile(path);
97 if (std::error_code EC = BufferOrErr.getError()) {
98 errMsg = EC.message();
101 std::unique_ptr<MemoryBuffer> Buffer = std::move(BufferOrErr.get());
102 return makeLTOModule(Buffer->getMemBufferRef(), options, errMsg,
103 &getGlobalContext());
106 LTOModule *LTOModule::createFromOpenFile(int fd, const char *path, size_t size,
107 TargetOptions options,
108 std::string &errMsg) {
109 return createFromOpenFileSlice(fd, path, size, 0, options, errMsg);
112 LTOModule *LTOModule::createFromOpenFileSlice(int fd, const char *path,
113 size_t map_size, off_t offset,
114 TargetOptions options,
115 std::string &errMsg) {
116 ErrorOr<std::unique_ptr<MemoryBuffer>> BufferOrErr =
117 MemoryBuffer::getOpenFileSlice(fd, path, map_size, offset);
118 if (std::error_code EC = BufferOrErr.getError()) {
119 errMsg = EC.message();
122 std::unique_ptr<MemoryBuffer> Buffer = std::move(BufferOrErr.get());
123 return makeLTOModule(Buffer->getMemBufferRef(), options, errMsg,
124 &getGlobalContext());
127 LTOModule *LTOModule::createFromBuffer(const void *mem, size_t length,
128 TargetOptions options,
129 std::string &errMsg, StringRef path) {
130 return createInContext(mem, length, options, errMsg, path,
131 &getGlobalContext());
134 LTOModule *LTOModule::createInLocalContext(const void *mem, size_t length,
135 TargetOptions options,
138 return createInContext(mem, length, options, errMsg, path, nullptr);
141 LTOModule *LTOModule::createInContext(const void *mem, size_t length,
142 TargetOptions options,
143 std::string &errMsg, StringRef path,
144 LLVMContext *Context) {
145 StringRef Data((const char *)mem, length);
146 MemoryBufferRef Buffer(Data, path);
147 return makeLTOModule(Buffer, options, errMsg, Context);
150 static Module *parseBitcodeFileImpl(MemoryBufferRef Buffer,
151 LLVMContext &Context, bool ShouldBeLazy,
152 std::string &ErrMsg) {
155 ErrorOr<MemoryBufferRef> MBOrErr =
156 IRObjectFile::findBitcodeInMemBuffer(Buffer);
157 if (std::error_code EC = MBOrErr.getError()) {
158 ErrMsg = EC.message();
162 std::function<void(const DiagnosticInfo &)> DiagnosticHandler =
163 [&ErrMsg](const DiagnosticInfo &DI) {
164 raw_string_ostream Stream(ErrMsg);
165 DiagnosticPrinterRawOStream DP(Stream);
170 // Parse the full file.
171 ErrorOr<Module *> M =
172 parseBitcodeFile(*MBOrErr, Context, DiagnosticHandler);
179 std::unique_ptr<MemoryBuffer> LightweightBuf =
180 MemoryBuffer::getMemBuffer(*MBOrErr, false);
181 ErrorOr<Module *> M = getLazyBitcodeModule(std::move(LightweightBuf), Context,
188 LTOModule *LTOModule::makeLTOModule(MemoryBufferRef Buffer,
189 TargetOptions options, std::string &errMsg,
190 LLVMContext *Context) {
191 std::unique_ptr<LLVMContext> OwnedContext;
193 OwnedContext = llvm::make_unique<LLVMContext>();
194 Context = OwnedContext.get();
197 // If we own a context, we know this is being used only for symbol
198 // extraction, not linking. Be lazy in that case.
199 std::unique_ptr<Module> M(parseBitcodeFileImpl(
201 /* ShouldBeLazy */ static_cast<bool>(OwnedContext), errMsg));
205 std::string TripleStr = M->getTargetTriple();
206 if (TripleStr.empty())
207 TripleStr = sys::getDefaultTargetTriple();
208 llvm::Triple Triple(TripleStr);
210 // find machine architecture for this module
211 const Target *march = TargetRegistry::lookupTarget(TripleStr, errMsg);
215 // construct LTOModule, hand over ownership of module and target
216 SubtargetFeatures Features;
217 Features.getDefaultSubtargetFeatures(Triple);
218 std::string FeatureStr = Features.getString();
219 // Set a default CPU for Darwin triples.
221 if (Triple.isOSDarwin()) {
222 if (Triple.getArch() == llvm::Triple::x86_64)
224 else if (Triple.getArch() == llvm::Triple::x86)
226 else if (Triple.getArch() == llvm::Triple::aarch64)
230 TargetMachine *target = march->createTargetMachine(TripleStr, CPU, FeatureStr,
232 M->setDataLayout(target->getSubtargetImpl()->getDataLayout());
234 std::unique_ptr<object::IRObjectFile> IRObj(
235 new object::IRObjectFile(Buffer, std::move(M)));
239 Ret = new LTOModule(std::move(IRObj), target, std::move(OwnedContext));
241 Ret = new LTOModule(std::move(IRObj), target);
243 if (Ret->parseSymbols(errMsg)) {
248 Ret->parseMetadata();
253 /// Create a MemoryBuffer from a memory range with an optional name.
254 std::unique_ptr<MemoryBuffer>
255 LTOModule::makeBuffer(const void *mem, size_t length, StringRef name) {
256 const char *startPtr = (const char*)mem;
257 return MemoryBuffer::getMemBuffer(StringRef(startPtr, length), name, false);
260 /// objcClassNameFromExpression - Get string that the data pointer points to.
262 LTOModule::objcClassNameFromExpression(const Constant *c, std::string &name) {
263 if (const ConstantExpr *ce = dyn_cast<ConstantExpr>(c)) {
264 Constant *op = ce->getOperand(0);
265 if (GlobalVariable *gvn = dyn_cast<GlobalVariable>(op)) {
266 Constant *cn = gvn->getInitializer();
267 if (ConstantDataArray *ca = dyn_cast<ConstantDataArray>(cn)) {
268 if (ca->isCString()) {
269 name = ".objc_class_name_" + ca->getAsCString().str();
278 /// addObjCClass - Parse i386/ppc ObjC class data structure.
279 void LTOModule::addObjCClass(const GlobalVariable *clgv) {
280 const ConstantStruct *c = dyn_cast<ConstantStruct>(clgv->getInitializer());
283 // second slot in __OBJC,__class is pointer to superclass name
284 std::string superclassName;
285 if (objcClassNameFromExpression(c->getOperand(1), superclassName)) {
287 _undefines.insert(std::make_pair(superclassName, NameAndAttributes()));
288 if (IterBool.second) {
289 NameAndAttributes &info = IterBool.first->second;
290 info.name = IterBool.first->first().data();
291 info.attributes = LTO_SYMBOL_DEFINITION_UNDEFINED;
292 info.isFunction = false;
297 // third slot in __OBJC,__class is pointer to class name
298 std::string className;
299 if (objcClassNameFromExpression(c->getOperand(2), className)) {
300 auto Iter = _defines.insert(className).first;
302 NameAndAttributes info;
303 info.name = Iter->first().data();
304 info.attributes = LTO_SYMBOL_PERMISSIONS_DATA |
305 LTO_SYMBOL_DEFINITION_REGULAR | LTO_SYMBOL_SCOPE_DEFAULT;
306 info.isFunction = false;
308 _symbols.push_back(info);
312 /// addObjCCategory - Parse i386/ppc ObjC category data structure.
313 void LTOModule::addObjCCategory(const GlobalVariable *clgv) {
314 const ConstantStruct *c = dyn_cast<ConstantStruct>(clgv->getInitializer());
317 // second slot in __OBJC,__category is pointer to target class name
318 std::string targetclassName;
319 if (!objcClassNameFromExpression(c->getOperand(1), targetclassName))
323 _undefines.insert(std::make_pair(targetclassName, NameAndAttributes()));
325 if (!IterBool.second)
328 NameAndAttributes &info = IterBool.first->second;
329 info.name = IterBool.first->first().data();
330 info.attributes = LTO_SYMBOL_DEFINITION_UNDEFINED;
331 info.isFunction = false;
335 /// addObjCClassRef - Parse i386/ppc ObjC class list data structure.
336 void LTOModule::addObjCClassRef(const GlobalVariable *clgv) {
337 std::string targetclassName;
338 if (!objcClassNameFromExpression(clgv->getInitializer(), targetclassName))
342 _undefines.insert(std::make_pair(targetclassName, NameAndAttributes()));
344 if (!IterBool.second)
347 NameAndAttributes &info = IterBool.first->second;
348 info.name = IterBool.first->first().data();
349 info.attributes = LTO_SYMBOL_DEFINITION_UNDEFINED;
350 info.isFunction = false;
354 void LTOModule::addDefinedDataSymbol(const object::BasicSymbolRef &Sym) {
355 SmallString<64> Buffer;
357 raw_svector_ostream OS(Buffer);
361 const GlobalValue *V = IRFile->getSymbolGV(Sym.getRawDataRefImpl());
362 addDefinedDataSymbol(Buffer.c_str(), V);
365 void LTOModule::addDefinedDataSymbol(const char *Name, const GlobalValue *v) {
366 // Add to list of defined symbols.
367 addDefinedSymbol(Name, v, false);
369 if (!v->hasSection() /* || !isTargetDarwin */)
372 // Special case i386/ppc ObjC data structures in magic sections:
373 // The issue is that the old ObjC object format did some strange
374 // contortions to avoid real linker symbols. For instance, the
375 // ObjC class data structure is allocated statically in the executable
376 // that defines that class. That data structures contains a pointer to
377 // its superclass. But instead of just initializing that part of the
378 // struct to the address of its superclass, and letting the static and
379 // dynamic linkers do the rest, the runtime works by having that field
380 // instead point to a C-string that is the name of the superclass.
381 // At runtime the objc initialization updates that pointer and sets
382 // it to point to the actual super class. As far as the linker
383 // knows it is just a pointer to a string. But then someone wanted the
384 // linker to issue errors at build time if the superclass was not found.
385 // So they figured out a way in mach-o object format to use an absolute
386 // symbols (.objc_class_name_Foo = 0) and a floating reference
387 // (.reference .objc_class_name_Bar) to cause the linker into erroring when
388 // a class was missing.
389 // The following synthesizes the implicit .objc_* symbols for the linker
390 // from the ObjC data structures generated by the front end.
392 // special case if this data blob is an ObjC class definition
393 std::string Section = v->getSection();
394 if (Section.compare(0, 15, "__OBJC,__class,") == 0) {
395 if (const GlobalVariable *gv = dyn_cast<GlobalVariable>(v)) {
400 // special case if this data blob is an ObjC category definition
401 else if (Section.compare(0, 18, "__OBJC,__category,") == 0) {
402 if (const GlobalVariable *gv = dyn_cast<GlobalVariable>(v)) {
407 // special case if this data blob is the list of referenced classes
408 else if (Section.compare(0, 18, "__OBJC,__cls_refs,") == 0) {
409 if (const GlobalVariable *gv = dyn_cast<GlobalVariable>(v)) {
415 void LTOModule::addDefinedFunctionSymbol(const object::BasicSymbolRef &Sym) {
416 SmallString<64> Buffer;
418 raw_svector_ostream OS(Buffer);
423 cast<Function>(IRFile->getSymbolGV(Sym.getRawDataRefImpl()));
424 addDefinedFunctionSymbol(Buffer.c_str(), F);
427 void LTOModule::addDefinedFunctionSymbol(const char *Name, const Function *F) {
428 // add to list of defined symbols
429 addDefinedSymbol(Name, F, true);
432 void LTOModule::addDefinedSymbol(const char *Name, const GlobalValue *def,
434 // set alignment part log2() can have rounding errors
435 uint32_t align = def->getAlignment();
436 uint32_t attr = align ? countTrailingZeros(align) : 0;
438 // set permissions part
440 attr |= LTO_SYMBOL_PERMISSIONS_CODE;
442 const GlobalVariable *gv = dyn_cast<GlobalVariable>(def);
443 if (gv && gv->isConstant())
444 attr |= LTO_SYMBOL_PERMISSIONS_RODATA;
446 attr |= LTO_SYMBOL_PERMISSIONS_DATA;
449 // set definition part
450 if (def->hasWeakLinkage() || def->hasLinkOnceLinkage())
451 attr |= LTO_SYMBOL_DEFINITION_WEAK;
452 else if (def->hasCommonLinkage())
453 attr |= LTO_SYMBOL_DEFINITION_TENTATIVE;
455 attr |= LTO_SYMBOL_DEFINITION_REGULAR;
458 if (def->hasLocalLinkage())
459 // Ignore visibility if linkage is local.
460 attr |= LTO_SYMBOL_SCOPE_INTERNAL;
461 else if (def->hasHiddenVisibility())
462 attr |= LTO_SYMBOL_SCOPE_HIDDEN;
463 else if (def->hasProtectedVisibility())
464 attr |= LTO_SYMBOL_SCOPE_PROTECTED;
465 else if (canBeOmittedFromSymbolTable(def))
466 attr |= LTO_SYMBOL_SCOPE_DEFAULT_CAN_BE_HIDDEN;
468 attr |= LTO_SYMBOL_SCOPE_DEFAULT;
470 auto Iter = _defines.insert(Name).first;
472 // fill information structure
473 NameAndAttributes info;
474 StringRef NameRef = Iter->first();
475 info.name = NameRef.data();
476 assert(info.name[NameRef.size()] == '\0');
477 info.attributes = attr;
478 info.isFunction = isFunction;
481 // add to table of symbols
482 _symbols.push_back(info);
485 /// addAsmGlobalSymbol - Add a global symbol from module-level ASM to the
487 void LTOModule::addAsmGlobalSymbol(const char *name,
488 lto_symbol_attributes scope) {
489 auto IterBool = _defines.insert(name);
491 // only add new define if not already defined
492 if (!IterBool.second)
495 NameAndAttributes &info = _undefines[IterBool.first->first().data()];
497 if (info.symbol == nullptr) {
498 // FIXME: This is trying to take care of module ASM like this:
500 // module asm ".zerofill __FOO, __foo, _bar_baz_qux, 0"
502 // but is gross and its mother dresses it funny. Have the ASM parser give us
503 // more details for this type of situation so that we're not guessing so
506 // fill information structure
507 info.name = IterBool.first->first().data();
509 LTO_SYMBOL_PERMISSIONS_DATA | LTO_SYMBOL_DEFINITION_REGULAR | scope;
510 info.isFunction = false;
511 info.symbol = nullptr;
513 // add to table of symbols
514 _symbols.push_back(info);
519 addDefinedFunctionSymbol(info.name, cast<Function>(info.symbol));
521 addDefinedDataSymbol(info.name, info.symbol);
523 _symbols.back().attributes &= ~LTO_SYMBOL_SCOPE_MASK;
524 _symbols.back().attributes |= scope;
527 /// addAsmGlobalSymbolUndef - Add a global symbol from module-level ASM to the
529 void LTOModule::addAsmGlobalSymbolUndef(const char *name) {
530 auto IterBool = _undefines.insert(std::make_pair(name, NameAndAttributes()));
532 _asm_undefines.push_back(IterBool.first->first().data());
534 // we already have the symbol
535 if (!IterBool.second)
538 uint32_t attr = LTO_SYMBOL_DEFINITION_UNDEFINED;
539 attr |= LTO_SYMBOL_SCOPE_DEFAULT;
540 NameAndAttributes &info = IterBool.first->second;
541 info.name = IterBool.first->first().data();
542 info.attributes = attr;
543 info.isFunction = false;
544 info.symbol = nullptr;
547 /// Add a symbol which isn't defined just yet to a list to be resolved later.
548 void LTOModule::addPotentialUndefinedSymbol(const object::BasicSymbolRef &Sym,
550 SmallString<64> name;
552 raw_svector_ostream OS(name);
556 auto IterBool = _undefines.insert(std::make_pair(name, NameAndAttributes()));
558 // we already have the symbol
559 if (!IterBool.second)
562 NameAndAttributes &info = IterBool.first->second;
564 info.name = IterBool.first->first().data();
566 const GlobalValue *decl = IRFile->getSymbolGV(Sym.getRawDataRefImpl());
568 if (decl->hasExternalWeakLinkage())
569 info.attributes = LTO_SYMBOL_DEFINITION_WEAKUNDEF;
571 info.attributes = LTO_SYMBOL_DEFINITION_UNDEFINED;
573 info.isFunction = isFunc;
577 /// parseSymbols - Parse the symbols from the module and model-level ASM and add
578 /// them to either the defined or undefined lists.
579 bool LTOModule::parseSymbols(std::string &errMsg) {
580 for (auto &Sym : IRFile->symbols()) {
581 const GlobalValue *GV = IRFile->getSymbolGV(Sym.getRawDataRefImpl());
582 uint32_t Flags = Sym.getFlags();
583 if (Flags & object::BasicSymbolRef::SF_FormatSpecific)
586 bool IsUndefined = Flags & object::BasicSymbolRef::SF_Undefined;
589 SmallString<64> Buffer;
591 raw_svector_ostream OS(Buffer);
594 const char *Name = Buffer.c_str();
597 addAsmGlobalSymbolUndef(Name);
598 else if (Flags & object::BasicSymbolRef::SF_Global)
599 addAsmGlobalSymbol(Name, LTO_SYMBOL_SCOPE_DEFAULT);
601 addAsmGlobalSymbol(Name, LTO_SYMBOL_SCOPE_INTERNAL);
605 auto *F = dyn_cast<Function>(GV);
607 addPotentialUndefinedSymbol(Sym, F != nullptr);
612 addDefinedFunctionSymbol(Sym);
616 if (isa<GlobalVariable>(GV)) {
617 addDefinedDataSymbol(Sym);
621 assert(isa<GlobalAlias>(GV));
622 addDefinedDataSymbol(Sym);
625 // make symbols for all undefines
626 for (StringMap<NameAndAttributes>::iterator u =_undefines.begin(),
627 e = _undefines.end(); u != e; ++u) {
628 // If this symbol also has a definition, then don't make an undefine because
629 // it is a tentative definition.
630 if (_defines.count(u->getKey())) continue;
631 NameAndAttributes info = u->getValue();
632 _symbols.push_back(info);
638 /// parseMetadata - Parse metadata from the module
639 void LTOModule::parseMetadata() {
641 if (Metadata *Val = getModule().getModuleFlag("Linker Options")) {
642 MDNode *LinkerOptions = cast<MDNode>(Val);
643 for (unsigned i = 0, e = LinkerOptions->getNumOperands(); i != e; ++i) {
644 MDNode *MDOptions = cast<MDNode>(LinkerOptions->getOperand(i));
645 for (unsigned ii = 0, ie = MDOptions->getNumOperands(); ii != ie; ++ii) {
646 MDString *MDOption = cast<MDString>(MDOptions->getOperand(ii));
647 // FIXME: Make StringSet::insert match Self-Associative Container
648 // requirements, returning <iter,bool> rather than bool, and use that
651 _linkeropt_strings.insert(MDOption->getString()).first->first();
652 StringRef DepLibName = _target->getSubtargetImpl()
653 ->getTargetLowering()
654 ->getObjFileLowering()
655 .getDepLibFromLinkerOpt(Op);
656 if (!DepLibName.empty())
657 _deplibs.push_back(DepLibName.data());
658 else if (!Op.empty())
659 _linkeropts.push_back(Op.data());
664 // Add other interesting metadata here.