1 //===-- LTOModule.cpp - LLVM Link Time Optimizer --------------------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file implements the Link Time Optimization library. This library is
11 // intended to be used by linker to optimize code at link time.
13 //===----------------------------------------------------------------------===//
15 #include "llvm/LTO/LTOModule.h"
16 #include "llvm/ADT/Triple.h"
17 #include "llvm/Bitcode/ReaderWriter.h"
18 #include "llvm/IR/Constants.h"
19 #include "llvm/IR/LLVMContext.h"
20 #include "llvm/IR/Metadata.h"
21 #include "llvm/IR/Module.h"
22 #include "llvm/MC/MCExpr.h"
23 #include "llvm/MC/MCInst.h"
24 #include "llvm/MC/MCInstrInfo.h"
25 #include "llvm/MC/MCParser/MCAsmParser.h"
26 #include "llvm/MC/MCSection.h"
27 #include "llvm/MC/MCSubtargetInfo.h"
28 #include "llvm/MC/MCSymbol.h"
29 #include "llvm/MC/MCTargetAsmParser.h"
30 #include "llvm/MC/SubtargetFeature.h"
31 #include "llvm/Support/CommandLine.h"
32 #include "llvm/Support/FileSystem.h"
33 #include "llvm/Support/Host.h"
34 #include "llvm/Support/MemoryBuffer.h"
35 #include "llvm/Support/Path.h"
36 #include "llvm/Support/SourceMgr.h"
37 #include "llvm/Support/TargetRegistry.h"
38 #include "llvm/Support/TargetSelect.h"
39 #include "llvm/Target/TargetLowering.h"
40 #include "llvm/Target/TargetLoweringObjectFile.h"
41 #include "llvm/Target/TargetRegisterInfo.h"
42 #include "llvm/Transforms/Utils/GlobalStatus.h"
43 #include <system_error>
46 LTOModule::LTOModule(std::unique_ptr<object::IRObjectFile> Obj,
47 llvm::TargetMachine *TM)
48 : IRFile(std::move(Obj)), _target(TM) {}
50 /// isBitcodeFile - Returns 'true' if the file (or memory contents) is LLVM
52 bool LTOModule::isBitcodeFile(const void *mem, size_t length) {
53 return sys::fs::identify_magic(StringRef((const char *)mem, length)) ==
54 sys::fs::file_magic::bitcode;
57 bool LTOModule::isBitcodeFile(const char *path) {
58 sys::fs::file_magic type;
59 if (sys::fs::identify_magic(path, type))
61 return type == sys::fs::file_magic::bitcode;
64 bool LTOModule::isBitcodeForTarget(MemoryBuffer *buffer,
65 StringRef triplePrefix) {
66 std::string Triple = getBitcodeTargetTriple(buffer, getGlobalContext());
67 return StringRef(Triple).startswith(triplePrefix);
70 LTOModule *LTOModule::createFromFile(const char *path, TargetOptions options,
71 std::string &errMsg) {
72 ErrorOr<std::unique_ptr<MemoryBuffer>> BufferOrErr =
73 MemoryBuffer::getFile(path);
74 if (std::error_code EC = BufferOrErr.getError()) {
75 errMsg = EC.message();
78 return makeLTOModule(std::move(BufferOrErr.get()), options, errMsg);
81 LTOModule *LTOModule::createFromOpenFile(int fd, const char *path, size_t size,
82 TargetOptions options,
83 std::string &errMsg) {
84 return createFromOpenFileSlice(fd, path, size, 0, options, errMsg);
87 LTOModule *LTOModule::createFromOpenFileSlice(int fd, const char *path,
88 size_t map_size, off_t offset,
89 TargetOptions options,
90 std::string &errMsg) {
91 ErrorOr<std::unique_ptr<MemoryBuffer>> BufferOrErr =
92 MemoryBuffer::getOpenFileSlice(fd, path, map_size, offset);
93 if (std::error_code EC = BufferOrErr.getError()) {
94 errMsg = EC.message();
97 return makeLTOModule(std::move(BufferOrErr.get()), options, errMsg);
100 LTOModule *LTOModule::createFromBuffer(const void *mem, size_t length,
101 TargetOptions options,
102 std::string &errMsg, StringRef path) {
103 std::unique_ptr<MemoryBuffer> buffer(makeBuffer(mem, length, path));
106 return makeLTOModule(std::move(buffer), options, errMsg);
109 LTOModule *LTOModule::makeLTOModule(std::unique_ptr<MemoryBuffer> Buffer,
110 TargetOptions options,
111 std::string &errMsg) {
112 ErrorOr<Module *> MOrErr =
113 getLazyBitcodeModule(Buffer.get(), getGlobalContext());
114 if (std::error_code EC = MOrErr.getError()) {
115 errMsg = EC.message();
118 std::unique_ptr<Module> M(MOrErr.get());
120 std::string TripleStr = M->getTargetTriple();
121 if (TripleStr.empty())
122 TripleStr = sys::getDefaultTargetTriple();
123 llvm::Triple Triple(TripleStr);
125 // find machine architecture for this module
126 const Target *march = TargetRegistry::lookupTarget(TripleStr, errMsg);
130 // construct LTOModule, hand over ownership of module and target
131 SubtargetFeatures Features;
132 Features.getDefaultSubtargetFeatures(Triple);
133 std::string FeatureStr = Features.getString();
134 // Set a default CPU for Darwin triples.
136 if (Triple.isOSDarwin()) {
137 if (Triple.getArch() == llvm::Triple::x86_64)
139 else if (Triple.getArch() == llvm::Triple::x86)
141 else if (Triple.getArch() == llvm::Triple::aarch64)
145 TargetMachine *target = march->createTargetMachine(TripleStr, CPU, FeatureStr,
147 M->materializeAllPermanently(true);
148 M->setDataLayout(target->getDataLayout());
150 std::unique_ptr<object::IRObjectFile> IRObj(
151 new object::IRObjectFile(std::move(Buffer), std::move(M)));
153 LTOModule *Ret = new LTOModule(std::move(IRObj), target);
155 if (Ret->parseSymbols(errMsg)) {
160 Ret->parseMetadata();
165 /// Create a MemoryBuffer from a memory range with an optional name.
166 MemoryBuffer *LTOModule::makeBuffer(const void *mem, size_t length,
168 const char *startPtr = (const char*)mem;
169 return MemoryBuffer::getMemBuffer(StringRef(startPtr, length), name, false);
172 /// objcClassNameFromExpression - Get string that the data pointer points to.
174 LTOModule::objcClassNameFromExpression(const Constant *c, std::string &name) {
175 if (const ConstantExpr *ce = dyn_cast<ConstantExpr>(c)) {
176 Constant *op = ce->getOperand(0);
177 if (GlobalVariable *gvn = dyn_cast<GlobalVariable>(op)) {
178 Constant *cn = gvn->getInitializer();
179 if (ConstantDataArray *ca = dyn_cast<ConstantDataArray>(cn)) {
180 if (ca->isCString()) {
181 name = ".objc_class_name_" + ca->getAsCString().str();
190 /// addObjCClass - Parse i386/ppc ObjC class data structure.
191 void LTOModule::addObjCClass(const GlobalVariable *clgv) {
192 const ConstantStruct *c = dyn_cast<ConstantStruct>(clgv->getInitializer());
195 // second slot in __OBJC,__class is pointer to superclass name
196 std::string superclassName;
197 if (objcClassNameFromExpression(c->getOperand(1), superclassName)) {
198 NameAndAttributes info;
199 StringMap<NameAndAttributes>::value_type &entry =
200 _undefines.GetOrCreateValue(superclassName);
201 if (!entry.getValue().name) {
202 const char *symbolName = entry.getKey().data();
203 info.name = symbolName;
204 info.attributes = LTO_SYMBOL_DEFINITION_UNDEFINED;
205 info.isFunction = false;
207 entry.setValue(info);
211 // third slot in __OBJC,__class is pointer to class name
212 std::string className;
213 if (objcClassNameFromExpression(c->getOperand(2), className)) {
214 StringSet::value_type &entry = _defines.GetOrCreateValue(className);
217 NameAndAttributes info;
218 info.name = entry.getKey().data();
219 info.attributes = LTO_SYMBOL_PERMISSIONS_DATA |
220 LTO_SYMBOL_DEFINITION_REGULAR | LTO_SYMBOL_SCOPE_DEFAULT;
221 info.isFunction = false;
223 _symbols.push_back(info);
227 /// addObjCCategory - Parse i386/ppc ObjC category data structure.
228 void LTOModule::addObjCCategory(const GlobalVariable *clgv) {
229 const ConstantStruct *c = dyn_cast<ConstantStruct>(clgv->getInitializer());
232 // second slot in __OBJC,__category is pointer to target class name
233 std::string targetclassName;
234 if (!objcClassNameFromExpression(c->getOperand(1), targetclassName))
237 NameAndAttributes info;
238 StringMap<NameAndAttributes>::value_type &entry =
239 _undefines.GetOrCreateValue(targetclassName);
241 if (entry.getValue().name)
244 const char *symbolName = entry.getKey().data();
245 info.name = symbolName;
246 info.attributes = LTO_SYMBOL_DEFINITION_UNDEFINED;
247 info.isFunction = false;
249 entry.setValue(info);
252 /// addObjCClassRef - Parse i386/ppc ObjC class list data structure.
253 void LTOModule::addObjCClassRef(const GlobalVariable *clgv) {
254 std::string targetclassName;
255 if (!objcClassNameFromExpression(clgv->getInitializer(), targetclassName))
258 NameAndAttributes info;
259 StringMap<NameAndAttributes>::value_type &entry =
260 _undefines.GetOrCreateValue(targetclassName);
261 if (entry.getValue().name)
264 const char *symbolName = entry.getKey().data();
265 info.name = symbolName;
266 info.attributes = LTO_SYMBOL_DEFINITION_UNDEFINED;
267 info.isFunction = false;
269 entry.setValue(info);
272 void LTOModule::addDefinedDataSymbol(const object::BasicSymbolRef &Sym) {
273 SmallString<64> Buffer;
275 raw_svector_ostream OS(Buffer);
279 const GlobalValue *V = IRFile->getSymbolGV(Sym.getRawDataRefImpl());
280 addDefinedDataSymbol(Buffer.c_str(), V);
283 void LTOModule::addDefinedDataSymbol(const char *Name, const GlobalValue *v) {
284 // Add to list of defined symbols.
285 addDefinedSymbol(Name, v, false);
287 if (!v->hasSection() /* || !isTargetDarwin */)
290 // Special case i386/ppc ObjC data structures in magic sections:
291 // The issue is that the old ObjC object format did some strange
292 // contortions to avoid real linker symbols. For instance, the
293 // ObjC class data structure is allocated statically in the executable
294 // that defines that class. That data structures contains a pointer to
295 // its superclass. But instead of just initializing that part of the
296 // struct to the address of its superclass, and letting the static and
297 // dynamic linkers do the rest, the runtime works by having that field
298 // instead point to a C-string that is the name of the superclass.
299 // At runtime the objc initialization updates that pointer and sets
300 // it to point to the actual super class. As far as the linker
301 // knows it is just a pointer to a string. But then someone wanted the
302 // linker to issue errors at build time if the superclass was not found.
303 // So they figured out a way in mach-o object format to use an absolute
304 // symbols (.objc_class_name_Foo = 0) and a floating reference
305 // (.reference .objc_class_name_Bar) to cause the linker into erroring when
306 // a class was missing.
307 // The following synthesizes the implicit .objc_* symbols for the linker
308 // from the ObjC data structures generated by the front end.
310 // special case if this data blob is an ObjC class definition
311 std::string Section = v->getSection();
312 if (Section.compare(0, 15, "__OBJC,__class,") == 0) {
313 if (const GlobalVariable *gv = dyn_cast<GlobalVariable>(v)) {
318 // special case if this data blob is an ObjC category definition
319 else if (Section.compare(0, 18, "__OBJC,__category,") == 0) {
320 if (const GlobalVariable *gv = dyn_cast<GlobalVariable>(v)) {
325 // special case if this data blob is the list of referenced classes
326 else if (Section.compare(0, 18, "__OBJC,__cls_refs,") == 0) {
327 if (const GlobalVariable *gv = dyn_cast<GlobalVariable>(v)) {
333 void LTOModule::addDefinedFunctionSymbol(const object::BasicSymbolRef &Sym) {
334 SmallString<64> Buffer;
336 raw_svector_ostream OS(Buffer);
341 cast<Function>(IRFile->getSymbolGV(Sym.getRawDataRefImpl()));
342 addDefinedFunctionSymbol(Buffer.c_str(), F);
345 void LTOModule::addDefinedFunctionSymbol(const char *Name, const Function *F) {
346 // add to list of defined symbols
347 addDefinedSymbol(Name, F, true);
350 static bool canBeHidden(const GlobalValue *GV) {
351 // FIXME: this is duplicated with another static function in AsmPrinter.cpp
352 if (!GV->hasLinkOnceODRLinkage())
355 if (GV->hasUnnamedAddr())
358 // If it is a non constant variable, it needs to be uniqued across shared
360 if (const GlobalVariable *Var = dyn_cast<GlobalVariable>(GV)) {
361 if (!Var->isConstant())
366 if (GlobalStatus::analyzeGlobal(GV, GS))
369 return !GS.IsCompared;
372 void LTOModule::addDefinedSymbol(const char *Name, const GlobalValue *def,
374 // set alignment part log2() can have rounding errors
375 uint32_t align = def->getAlignment();
376 uint32_t attr = align ? countTrailingZeros(align) : 0;
378 // set permissions part
380 attr |= LTO_SYMBOL_PERMISSIONS_CODE;
382 const GlobalVariable *gv = dyn_cast<GlobalVariable>(def);
383 if (gv && gv->isConstant())
384 attr |= LTO_SYMBOL_PERMISSIONS_RODATA;
386 attr |= LTO_SYMBOL_PERMISSIONS_DATA;
389 // set definition part
390 if (def->hasWeakLinkage() || def->hasLinkOnceLinkage())
391 attr |= LTO_SYMBOL_DEFINITION_WEAK;
392 else if (def->hasCommonLinkage())
393 attr |= LTO_SYMBOL_DEFINITION_TENTATIVE;
395 attr |= LTO_SYMBOL_DEFINITION_REGULAR;
398 if (def->hasLocalLinkage())
399 // Ignore visibility if linkage is local.
400 attr |= LTO_SYMBOL_SCOPE_INTERNAL;
401 else if (def->hasHiddenVisibility())
402 attr |= LTO_SYMBOL_SCOPE_HIDDEN;
403 else if (def->hasProtectedVisibility())
404 attr |= LTO_SYMBOL_SCOPE_PROTECTED;
405 else if (canBeHidden(def))
406 attr |= LTO_SYMBOL_SCOPE_DEFAULT_CAN_BE_HIDDEN;
408 attr |= LTO_SYMBOL_SCOPE_DEFAULT;
410 StringSet::value_type &entry = _defines.GetOrCreateValue(Name);
413 // fill information structure
414 NameAndAttributes info;
415 StringRef NameRef = entry.getKey();
416 info.name = NameRef.data();
417 assert(info.name[NameRef.size()] == '\0');
418 info.attributes = attr;
419 info.isFunction = isFunction;
422 // add to table of symbols
423 _symbols.push_back(info);
426 /// addAsmGlobalSymbol - Add a global symbol from module-level ASM to the
428 void LTOModule::addAsmGlobalSymbol(const char *name,
429 lto_symbol_attributes scope) {
430 StringSet::value_type &entry = _defines.GetOrCreateValue(name);
432 // only add new define if not already defined
433 if (entry.getValue())
438 NameAndAttributes &info = _undefines[entry.getKey().data()];
440 if (info.symbol == nullptr) {
441 // FIXME: This is trying to take care of module ASM like this:
443 // module asm ".zerofill __FOO, __foo, _bar_baz_qux, 0"
445 // but is gross and its mother dresses it funny. Have the ASM parser give us
446 // more details for this type of situation so that we're not guessing so
449 // fill information structure
450 info.name = entry.getKey().data();
452 LTO_SYMBOL_PERMISSIONS_DATA | LTO_SYMBOL_DEFINITION_REGULAR | scope;
453 info.isFunction = false;
454 info.symbol = nullptr;
456 // add to table of symbols
457 _symbols.push_back(info);
462 addDefinedFunctionSymbol(info.name, cast<Function>(info.symbol));
464 addDefinedDataSymbol(info.name, info.symbol);
466 _symbols.back().attributes &= ~LTO_SYMBOL_SCOPE_MASK;
467 _symbols.back().attributes |= scope;
470 /// addAsmGlobalSymbolUndef - Add a global symbol from module-level ASM to the
472 void LTOModule::addAsmGlobalSymbolUndef(const char *name) {
473 StringMap<NameAndAttributes>::value_type &entry =
474 _undefines.GetOrCreateValue(name);
476 _asm_undefines.push_back(entry.getKey().data());
478 // we already have the symbol
479 if (entry.getValue().name)
482 uint32_t attr = LTO_SYMBOL_DEFINITION_UNDEFINED;
483 attr |= LTO_SYMBOL_SCOPE_DEFAULT;
484 NameAndAttributes info;
485 info.name = entry.getKey().data();
486 info.attributes = attr;
487 info.isFunction = false;
488 info.symbol = nullptr;
490 entry.setValue(info);
493 /// Add a symbol which isn't defined just yet to a list to be resolved later.
494 void LTOModule::addPotentialUndefinedSymbol(const object::BasicSymbolRef &Sym,
496 SmallString<64> name;
498 raw_svector_ostream OS(name);
502 StringMap<NameAndAttributes>::value_type &entry =
503 _undefines.GetOrCreateValue(name);
505 // we already have the symbol
506 if (entry.getValue().name)
509 NameAndAttributes info;
511 info.name = entry.getKey().data();
513 const GlobalValue *decl = IRFile->getSymbolGV(Sym.getRawDataRefImpl());
515 if (decl->hasExternalWeakLinkage())
516 info.attributes = LTO_SYMBOL_DEFINITION_WEAKUNDEF;
518 info.attributes = LTO_SYMBOL_DEFINITION_UNDEFINED;
520 info.isFunction = isFunc;
523 entry.setValue(info);
526 /// parseSymbols - Parse the symbols from the module and model-level ASM and add
527 /// them to either the defined or undefined lists.
528 bool LTOModule::parseSymbols(std::string &errMsg) {
529 for (auto &Sym : IRFile->symbols()) {
530 const GlobalValue *GV = IRFile->getSymbolGV(Sym.getRawDataRefImpl());
531 uint32_t Flags = Sym.getFlags();
532 if (Flags & object::BasicSymbolRef::SF_FormatSpecific)
535 bool IsUndefined = Flags & object::BasicSymbolRef::SF_Undefined;
538 SmallString<64> Buffer;
540 raw_svector_ostream OS(Buffer);
543 const char *Name = Buffer.c_str();
546 addAsmGlobalSymbolUndef(Name);
547 else if (Flags & object::BasicSymbolRef::SF_Global)
548 addAsmGlobalSymbol(Name, LTO_SYMBOL_SCOPE_DEFAULT);
550 addAsmGlobalSymbol(Name, LTO_SYMBOL_SCOPE_INTERNAL);
554 auto *F = dyn_cast<Function>(GV);
556 addPotentialUndefinedSymbol(Sym, F != nullptr);
561 addDefinedFunctionSymbol(Sym);
565 if (isa<GlobalVariable>(GV)) {
566 addDefinedDataSymbol(Sym);
570 assert(isa<GlobalAlias>(GV));
571 addDefinedDataSymbol(Sym);
574 // make symbols for all undefines
575 for (StringMap<NameAndAttributes>::iterator u =_undefines.begin(),
576 e = _undefines.end(); u != e; ++u) {
577 // If this symbol also has a definition, then don't make an undefine because
578 // it is a tentative definition.
579 if (_defines.count(u->getKey())) continue;
580 NameAndAttributes info = u->getValue();
581 _symbols.push_back(info);
587 /// parseMetadata - Parse metadata from the module
588 void LTOModule::parseMetadata() {
590 if (Value *Val = getModule().getModuleFlag("Linker Options")) {
591 MDNode *LinkerOptions = cast<MDNode>(Val);
592 for (unsigned i = 0, e = LinkerOptions->getNumOperands(); i != e; ++i) {
593 MDNode *MDOptions = cast<MDNode>(LinkerOptions->getOperand(i));
594 for (unsigned ii = 0, ie = MDOptions->getNumOperands(); ii != ie; ++ii) {
595 MDString *MDOption = cast<MDString>(MDOptions->getOperand(ii));
596 StringRef Op = _linkeropt_strings.
597 GetOrCreateValue(MDOption->getString()).getKey();
598 StringRef DepLibName = _target->getTargetLowering()->
599 getObjFileLowering().getDepLibFromLinkerOpt(Op);
600 if (!DepLibName.empty())
601 _deplibs.push_back(DepLibName.data());
602 else if (!Op.empty())
603 _linkeropts.push_back(Op.data());
608 // Add other interesting metadata here.