1 //===-- LTOModule.cpp - LLVM Link Time Optimizer --------------------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file implements the Link Time Optimization library. This library is
11 // intended to be used by linker to optimize code at link time.
13 //===----------------------------------------------------------------------===//
15 #include "llvm/LTO/LTOModule.h"
16 #include "llvm/ADT/Triple.h"
17 #include "llvm/Bitcode/ReaderWriter.h"
18 #include "llvm/CodeGen/Analysis.h"
19 #include "llvm/IR/Constants.h"
20 #include "llvm/IR/LLVMContext.h"
21 #include "llvm/IR/Metadata.h"
22 #include "llvm/IR/Module.h"
23 #include "llvm/MC/MCExpr.h"
24 #include "llvm/MC/MCInst.h"
25 #include "llvm/MC/MCInstrInfo.h"
26 #include "llvm/MC/MCParser/MCAsmParser.h"
27 #include "llvm/MC/MCSection.h"
28 #include "llvm/MC/MCSubtargetInfo.h"
29 #include "llvm/MC/MCSymbol.h"
30 #include "llvm/MC/MCTargetAsmParser.h"
31 #include "llvm/MC/SubtargetFeature.h"
32 #include "llvm/Object/IRObjectFile.h"
33 #include "llvm/Object/ObjectFile.h"
34 #include "llvm/Support/CommandLine.h"
35 #include "llvm/Support/FileSystem.h"
36 #include "llvm/Support/Host.h"
37 #include "llvm/Support/MemoryBuffer.h"
38 #include "llvm/Support/Path.h"
39 #include "llvm/Support/SourceMgr.h"
40 #include "llvm/Support/TargetRegistry.h"
41 #include "llvm/Support/TargetSelect.h"
42 #include "llvm/Target/TargetLowering.h"
43 #include "llvm/Target/TargetLoweringObjectFile.h"
44 #include "llvm/Target/TargetRegisterInfo.h"
45 #include "llvm/Target/TargetSubtargetInfo.h"
46 #include "llvm/Transforms/Utils/GlobalStatus.h"
47 #include <system_error>
49 using namespace llvm::object;
51 LTOModule::LTOModule(std::unique_ptr<object::IRObjectFile> Obj,
52 llvm::TargetMachine *TM)
53 : IRFile(std::move(Obj)), _target(TM) {}
55 LTOModule::LTOModule(std::unique_ptr<object::IRObjectFile> Obj,
56 llvm::TargetMachine *TM,
57 std::unique_ptr<LLVMContext> Context)
58 : OwnedContext(std::move(Context)), IRFile(std::move(Obj)), _target(TM) {}
60 LTOModule::~LTOModule() {}
62 /// isBitcodeFile - Returns 'true' if the file (or memory contents) is LLVM
64 bool LTOModule::isBitcodeFile(const void *Mem, size_t Length) {
65 ErrorOr<MemoryBufferRef> BCData = IRObjectFile::findBitcodeInMemBuffer(
66 MemoryBufferRef(StringRef((const char *)Mem, Length), "<mem>"));
70 bool LTOModule::isBitcodeFile(const char *Path) {
71 ErrorOr<std::unique_ptr<MemoryBuffer>> BufferOrErr =
72 MemoryBuffer::getFile(Path);
76 ErrorOr<MemoryBufferRef> BCData = IRObjectFile::findBitcodeInMemBuffer(
77 BufferOrErr.get()->getMemBufferRef());
81 bool LTOModule::isBitcodeForTarget(MemoryBuffer *Buffer,
82 StringRef TriplePrefix) {
83 ErrorOr<MemoryBufferRef> BCOrErr =
84 IRObjectFile::findBitcodeInMemBuffer(Buffer->getMemBufferRef());
88 std::string Triple = getBitcodeTargetTriple(*BCOrErr, Context);
89 return StringRef(Triple).startswith(TriplePrefix);
92 LTOModule *LTOModule::createFromFile(const char *path, TargetOptions options,
93 std::string &errMsg) {
94 ErrorOr<std::unique_ptr<MemoryBuffer>> BufferOrErr =
95 MemoryBuffer::getFile(path);
96 if (std::error_code EC = BufferOrErr.getError()) {
97 errMsg = EC.message();
100 std::unique_ptr<MemoryBuffer> Buffer = std::move(BufferOrErr.get());
101 return makeLTOModule(Buffer->getMemBufferRef(), options, errMsg,
102 &getGlobalContext());
105 LTOModule *LTOModule::createFromOpenFile(int fd, const char *path, size_t size,
106 TargetOptions options,
107 std::string &errMsg) {
108 return createFromOpenFileSlice(fd, path, size, 0, options, errMsg);
111 LTOModule *LTOModule::createFromOpenFileSlice(int fd, const char *path,
112 size_t map_size, off_t offset,
113 TargetOptions options,
114 std::string &errMsg) {
115 ErrorOr<std::unique_ptr<MemoryBuffer>> BufferOrErr =
116 MemoryBuffer::getOpenFileSlice(fd, path, map_size, offset);
117 if (std::error_code EC = BufferOrErr.getError()) {
118 errMsg = EC.message();
121 std::unique_ptr<MemoryBuffer> Buffer = std::move(BufferOrErr.get());
122 return makeLTOModule(Buffer->getMemBufferRef(), options, errMsg,
123 &getGlobalContext());
126 LTOModule *LTOModule::createFromBuffer(const void *mem, size_t length,
127 TargetOptions options,
128 std::string &errMsg, StringRef path) {
129 return createInContext(mem, length, options, errMsg, path,
130 &getGlobalContext());
133 LTOModule *LTOModule::createInLocalContext(const void *mem, size_t length,
134 TargetOptions options,
137 return createInContext(mem, length, options, errMsg, path, nullptr);
140 LTOModule *LTOModule::createInContext(const void *mem, size_t length,
141 TargetOptions options,
142 std::string &errMsg, StringRef path,
143 LLVMContext *Context) {
144 StringRef Data((const char *)mem, length);
145 MemoryBufferRef Buffer(Data, path);
146 return makeLTOModule(Buffer, options, errMsg, Context);
149 LTOModule *LTOModule::makeLTOModule(MemoryBufferRef Buffer,
150 TargetOptions options, std::string &errMsg,
151 LLVMContext *Context) {
152 std::unique_ptr<LLVMContext> OwnedContext;
154 OwnedContext = llvm::make_unique<LLVMContext>();
155 Context = OwnedContext.get();
158 ErrorOr<MemoryBufferRef> MBOrErr =
159 IRObjectFile::findBitcodeInMemBuffer(Buffer);
160 if (std::error_code EC = MBOrErr.getError()) {
161 errMsg = EC.message();
164 ErrorOr<Module *> MOrErr = parseBitcodeFile(*MBOrErr, *Context);
165 if (std::error_code EC = MOrErr.getError()) {
166 errMsg = EC.message();
169 std::unique_ptr<Module> M(MOrErr.get());
171 std::string TripleStr = M->getTargetTriple();
172 if (TripleStr.empty())
173 TripleStr = sys::getDefaultTargetTriple();
174 llvm::Triple Triple(TripleStr);
176 // find machine architecture for this module
177 const Target *march = TargetRegistry::lookupTarget(TripleStr, errMsg);
181 // construct LTOModule, hand over ownership of module and target
182 SubtargetFeatures Features;
183 Features.getDefaultSubtargetFeatures(Triple);
184 std::string FeatureStr = Features.getString();
185 // Set a default CPU for Darwin triples.
187 if (Triple.isOSDarwin()) {
188 if (Triple.getArch() == llvm::Triple::x86_64)
190 else if (Triple.getArch() == llvm::Triple::x86)
192 else if (Triple.getArch() == llvm::Triple::aarch64)
196 TargetMachine *target = march->createTargetMachine(TripleStr, CPU, FeatureStr,
198 M->setDataLayout(target->getSubtargetImpl()->getDataLayout());
200 std::unique_ptr<object::IRObjectFile> IRObj(
201 new object::IRObjectFile(Buffer, std::move(M)));
205 Ret = new LTOModule(std::move(IRObj), target, std::move(OwnedContext));
207 Ret = new LTOModule(std::move(IRObj), target);
209 if (Ret->parseSymbols(errMsg)) {
214 Ret->parseMetadata();
219 /// Create a MemoryBuffer from a memory range with an optional name.
220 std::unique_ptr<MemoryBuffer>
221 LTOModule::makeBuffer(const void *mem, size_t length, StringRef name) {
222 const char *startPtr = (const char*)mem;
223 return MemoryBuffer::getMemBuffer(StringRef(startPtr, length), name, false);
226 /// objcClassNameFromExpression - Get string that the data pointer points to.
228 LTOModule::objcClassNameFromExpression(const Constant *c, std::string &name) {
229 if (const ConstantExpr *ce = dyn_cast<ConstantExpr>(c)) {
230 Constant *op = ce->getOperand(0);
231 if (GlobalVariable *gvn = dyn_cast<GlobalVariable>(op)) {
232 Constant *cn = gvn->getInitializer();
233 if (ConstantDataArray *ca = dyn_cast<ConstantDataArray>(cn)) {
234 if (ca->isCString()) {
235 name = ".objc_class_name_" + ca->getAsCString().str();
244 /// addObjCClass - Parse i386/ppc ObjC class data structure.
245 void LTOModule::addObjCClass(const GlobalVariable *clgv) {
246 const ConstantStruct *c = dyn_cast<ConstantStruct>(clgv->getInitializer());
249 // second slot in __OBJC,__class is pointer to superclass name
250 std::string superclassName;
251 if (objcClassNameFromExpression(c->getOperand(1), superclassName)) {
252 NameAndAttributes info;
253 StringMap<NameAndAttributes>::value_type &entry =
254 _undefines.GetOrCreateValue(superclassName);
255 if (!entry.getValue().name) {
256 const char *symbolName = entry.getKey().data();
257 info.name = symbolName;
258 info.attributes = LTO_SYMBOL_DEFINITION_UNDEFINED;
259 info.isFunction = false;
261 entry.setValue(info);
265 // third slot in __OBJC,__class is pointer to class name
266 std::string className;
267 if (objcClassNameFromExpression(c->getOperand(2), className)) {
268 StringSet::value_type &entry = _defines.GetOrCreateValue(className);
271 NameAndAttributes info;
272 info.name = entry.getKey().data();
273 info.attributes = LTO_SYMBOL_PERMISSIONS_DATA |
274 LTO_SYMBOL_DEFINITION_REGULAR | LTO_SYMBOL_SCOPE_DEFAULT;
275 info.isFunction = false;
277 _symbols.push_back(info);
281 /// addObjCCategory - Parse i386/ppc ObjC category data structure.
282 void LTOModule::addObjCCategory(const GlobalVariable *clgv) {
283 const ConstantStruct *c = dyn_cast<ConstantStruct>(clgv->getInitializer());
286 // second slot in __OBJC,__category is pointer to target class name
287 std::string targetclassName;
288 if (!objcClassNameFromExpression(c->getOperand(1), targetclassName))
291 NameAndAttributes info;
292 StringMap<NameAndAttributes>::value_type &entry =
293 _undefines.GetOrCreateValue(targetclassName);
295 if (entry.getValue().name)
298 const char *symbolName = entry.getKey().data();
299 info.name = symbolName;
300 info.attributes = LTO_SYMBOL_DEFINITION_UNDEFINED;
301 info.isFunction = false;
303 entry.setValue(info);
306 /// addObjCClassRef - Parse i386/ppc ObjC class list data structure.
307 void LTOModule::addObjCClassRef(const GlobalVariable *clgv) {
308 std::string targetclassName;
309 if (!objcClassNameFromExpression(clgv->getInitializer(), targetclassName))
312 NameAndAttributes info;
313 StringMap<NameAndAttributes>::value_type &entry =
314 _undefines.GetOrCreateValue(targetclassName);
315 if (entry.getValue().name)
318 const char *symbolName = entry.getKey().data();
319 info.name = symbolName;
320 info.attributes = LTO_SYMBOL_DEFINITION_UNDEFINED;
321 info.isFunction = false;
323 entry.setValue(info);
326 void LTOModule::addDefinedDataSymbol(const object::BasicSymbolRef &Sym) {
327 SmallString<64> Buffer;
329 raw_svector_ostream OS(Buffer);
333 const GlobalValue *V = IRFile->getSymbolGV(Sym.getRawDataRefImpl());
334 addDefinedDataSymbol(Buffer.c_str(), V);
337 void LTOModule::addDefinedDataSymbol(const char *Name, const GlobalValue *v) {
338 // Add to list of defined symbols.
339 addDefinedSymbol(Name, v, false);
341 if (!v->hasSection() /* || !isTargetDarwin */)
344 // Special case i386/ppc ObjC data structures in magic sections:
345 // The issue is that the old ObjC object format did some strange
346 // contortions to avoid real linker symbols. For instance, the
347 // ObjC class data structure is allocated statically in the executable
348 // that defines that class. That data structures contains a pointer to
349 // its superclass. But instead of just initializing that part of the
350 // struct to the address of its superclass, and letting the static and
351 // dynamic linkers do the rest, the runtime works by having that field
352 // instead point to a C-string that is the name of the superclass.
353 // At runtime the objc initialization updates that pointer and sets
354 // it to point to the actual super class. As far as the linker
355 // knows it is just a pointer to a string. But then someone wanted the
356 // linker to issue errors at build time if the superclass was not found.
357 // So they figured out a way in mach-o object format to use an absolute
358 // symbols (.objc_class_name_Foo = 0) and a floating reference
359 // (.reference .objc_class_name_Bar) to cause the linker into erroring when
360 // a class was missing.
361 // The following synthesizes the implicit .objc_* symbols for the linker
362 // from the ObjC data structures generated by the front end.
364 // special case if this data blob is an ObjC class definition
365 std::string Section = v->getSection();
366 if (Section.compare(0, 15, "__OBJC,__class,") == 0) {
367 if (const GlobalVariable *gv = dyn_cast<GlobalVariable>(v)) {
372 // special case if this data blob is an ObjC category definition
373 else if (Section.compare(0, 18, "__OBJC,__category,") == 0) {
374 if (const GlobalVariable *gv = dyn_cast<GlobalVariable>(v)) {
379 // special case if this data blob is the list of referenced classes
380 else if (Section.compare(0, 18, "__OBJC,__cls_refs,") == 0) {
381 if (const GlobalVariable *gv = dyn_cast<GlobalVariable>(v)) {
387 void LTOModule::addDefinedFunctionSymbol(const object::BasicSymbolRef &Sym) {
388 SmallString<64> Buffer;
390 raw_svector_ostream OS(Buffer);
395 cast<Function>(IRFile->getSymbolGV(Sym.getRawDataRefImpl()));
396 addDefinedFunctionSymbol(Buffer.c_str(), F);
399 void LTOModule::addDefinedFunctionSymbol(const char *Name, const Function *F) {
400 // add to list of defined symbols
401 addDefinedSymbol(Name, F, true);
404 void LTOModule::addDefinedSymbol(const char *Name, const GlobalValue *def,
406 // set alignment part log2() can have rounding errors
407 uint32_t align = def->getAlignment();
408 uint32_t attr = align ? countTrailingZeros(align) : 0;
410 // set permissions part
412 attr |= LTO_SYMBOL_PERMISSIONS_CODE;
414 const GlobalVariable *gv = dyn_cast<GlobalVariable>(def);
415 if (gv && gv->isConstant())
416 attr |= LTO_SYMBOL_PERMISSIONS_RODATA;
418 attr |= LTO_SYMBOL_PERMISSIONS_DATA;
421 // set definition part
422 if (def->hasWeakLinkage() || def->hasLinkOnceLinkage())
423 attr |= LTO_SYMBOL_DEFINITION_WEAK;
424 else if (def->hasCommonLinkage())
425 attr |= LTO_SYMBOL_DEFINITION_TENTATIVE;
427 attr |= LTO_SYMBOL_DEFINITION_REGULAR;
430 if (def->hasLocalLinkage())
431 // Ignore visibility if linkage is local.
432 attr |= LTO_SYMBOL_SCOPE_INTERNAL;
433 else if (def->hasHiddenVisibility())
434 attr |= LTO_SYMBOL_SCOPE_HIDDEN;
435 else if (def->hasProtectedVisibility())
436 attr |= LTO_SYMBOL_SCOPE_PROTECTED;
437 else if (canBeOmittedFromSymbolTable(def))
438 attr |= LTO_SYMBOL_SCOPE_DEFAULT_CAN_BE_HIDDEN;
440 attr |= LTO_SYMBOL_SCOPE_DEFAULT;
442 StringSet::value_type &entry = _defines.GetOrCreateValue(Name);
445 // fill information structure
446 NameAndAttributes info;
447 StringRef NameRef = entry.getKey();
448 info.name = NameRef.data();
449 assert(info.name[NameRef.size()] == '\0');
450 info.attributes = attr;
451 info.isFunction = isFunction;
454 // add to table of symbols
455 _symbols.push_back(info);
458 /// addAsmGlobalSymbol - Add a global symbol from module-level ASM to the
460 void LTOModule::addAsmGlobalSymbol(const char *name,
461 lto_symbol_attributes scope) {
462 StringSet::value_type &entry = _defines.GetOrCreateValue(name);
464 // only add new define if not already defined
465 if (entry.getValue())
470 NameAndAttributes &info = _undefines[entry.getKey().data()];
472 if (info.symbol == nullptr) {
473 // FIXME: This is trying to take care of module ASM like this:
475 // module asm ".zerofill __FOO, __foo, _bar_baz_qux, 0"
477 // but is gross and its mother dresses it funny. Have the ASM parser give us
478 // more details for this type of situation so that we're not guessing so
481 // fill information structure
482 info.name = entry.getKey().data();
484 LTO_SYMBOL_PERMISSIONS_DATA | LTO_SYMBOL_DEFINITION_REGULAR | scope;
485 info.isFunction = false;
486 info.symbol = nullptr;
488 // add to table of symbols
489 _symbols.push_back(info);
494 addDefinedFunctionSymbol(info.name, cast<Function>(info.symbol));
496 addDefinedDataSymbol(info.name, info.symbol);
498 _symbols.back().attributes &= ~LTO_SYMBOL_SCOPE_MASK;
499 _symbols.back().attributes |= scope;
502 /// addAsmGlobalSymbolUndef - Add a global symbol from module-level ASM to the
504 void LTOModule::addAsmGlobalSymbolUndef(const char *name) {
505 StringMap<NameAndAttributes>::value_type &entry =
506 _undefines.GetOrCreateValue(name);
508 _asm_undefines.push_back(entry.getKey().data());
510 // we already have the symbol
511 if (entry.getValue().name)
514 uint32_t attr = LTO_SYMBOL_DEFINITION_UNDEFINED;
515 attr |= LTO_SYMBOL_SCOPE_DEFAULT;
516 NameAndAttributes info;
517 info.name = entry.getKey().data();
518 info.attributes = attr;
519 info.isFunction = false;
520 info.symbol = nullptr;
522 entry.setValue(info);
525 /// Add a symbol which isn't defined just yet to a list to be resolved later.
526 void LTOModule::addPotentialUndefinedSymbol(const object::BasicSymbolRef &Sym,
528 SmallString<64> name;
530 raw_svector_ostream OS(name);
534 StringMap<NameAndAttributes>::value_type &entry =
535 _undefines.GetOrCreateValue(name);
537 // we already have the symbol
538 if (entry.getValue().name)
541 NameAndAttributes info;
543 info.name = entry.getKey().data();
545 const GlobalValue *decl = IRFile->getSymbolGV(Sym.getRawDataRefImpl());
547 if (decl->hasExternalWeakLinkage())
548 info.attributes = LTO_SYMBOL_DEFINITION_WEAKUNDEF;
550 info.attributes = LTO_SYMBOL_DEFINITION_UNDEFINED;
552 info.isFunction = isFunc;
555 entry.setValue(info);
558 /// parseSymbols - Parse the symbols from the module and model-level ASM and add
559 /// them to either the defined or undefined lists.
560 bool LTOModule::parseSymbols(std::string &errMsg) {
561 for (auto &Sym : IRFile->symbols()) {
562 const GlobalValue *GV = IRFile->getSymbolGV(Sym.getRawDataRefImpl());
563 uint32_t Flags = Sym.getFlags();
564 if (Flags & object::BasicSymbolRef::SF_FormatSpecific)
567 bool IsUndefined = Flags & object::BasicSymbolRef::SF_Undefined;
570 SmallString<64> Buffer;
572 raw_svector_ostream OS(Buffer);
575 const char *Name = Buffer.c_str();
578 addAsmGlobalSymbolUndef(Name);
579 else if (Flags & object::BasicSymbolRef::SF_Global)
580 addAsmGlobalSymbol(Name, LTO_SYMBOL_SCOPE_DEFAULT);
582 addAsmGlobalSymbol(Name, LTO_SYMBOL_SCOPE_INTERNAL);
586 auto *F = dyn_cast<Function>(GV);
588 addPotentialUndefinedSymbol(Sym, F != nullptr);
593 addDefinedFunctionSymbol(Sym);
597 if (isa<GlobalVariable>(GV)) {
598 addDefinedDataSymbol(Sym);
602 assert(isa<GlobalAlias>(GV));
603 addDefinedDataSymbol(Sym);
606 // make symbols for all undefines
607 for (StringMap<NameAndAttributes>::iterator u =_undefines.begin(),
608 e = _undefines.end(); u != e; ++u) {
609 // If this symbol also has a definition, then don't make an undefine because
610 // it is a tentative definition.
611 if (_defines.count(u->getKey())) continue;
612 NameAndAttributes info = u->getValue();
613 _symbols.push_back(info);
619 /// parseMetadata - Parse metadata from the module
620 void LTOModule::parseMetadata() {
622 if (Value *Val = getModule().getModuleFlag("Linker Options")) {
623 MDNode *LinkerOptions = cast<MDNode>(Val);
624 for (unsigned i = 0, e = LinkerOptions->getNumOperands(); i != e; ++i) {
625 MDNode *MDOptions = cast<MDNode>(LinkerOptions->getOperand(i));
626 for (unsigned ii = 0, ie = MDOptions->getNumOperands(); ii != ie; ++ii) {
627 MDString *MDOption = cast<MDString>(MDOptions->getOperand(ii));
628 StringRef Op = _linkeropt_strings.
629 GetOrCreateValue(MDOption->getString()).getKey();
630 StringRef DepLibName = _target->getSubtargetImpl()
631 ->getTargetLowering()
632 ->getObjFileLowering()
633 .getDepLibFromLinkerOpt(Op);
634 if (!DepLibName.empty())
635 _deplibs.push_back(DepLibName.data());
636 else if (!Op.empty())
637 _linkeropts.push_back(Op.data());
642 // Add other interesting metadata here.