From: Teresa Johnson Date: Sun, 4 Oct 2015 14:33:43 +0000 (+0000) Subject: Support for function summary index bitcode sections and files. X-Git-Url: http://demsky.eecs.uci.edu/git/?a=commitdiff_plain;h=b97baa5fe54954247e143e702201364284b54345;p=oota-llvm.git Support for function summary index bitcode sections and files. Summary: The bitcode format is described in this document: https://drive.google.com/file/d/0B036uwnWM6RWdnBLakxmeDdOeXc/view For more info on ThinLTO see: https://sites.google.com/site/llvmthinlto The first customer is ThinLTO, however the data structures are designed and named more generally based on prior feedback. There are a few comments regarding how certain interfaces are used by ThinLTO, and the options added here to gold currently have ThinLTO-specific names as the behavior they provoke is currently ThinLTO-specific. This patch includes support for generating per-module function indexes, the combined index file via the gold plugin, and several tests (more are included with the associated clang patch D11908). Reviewers: dexonsmith, davidxl, joker.eph Subscribers: llvm-commits Differential Revision: http://reviews.llvm.org/D13107 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@249270 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/include/llvm/Bitcode/BitcodeWriterPass.h b/include/llvm/Bitcode/BitcodeWriterPass.h index ae915c688ba..a1272cf156e 100644 --- a/include/llvm/Bitcode/BitcodeWriterPass.h +++ b/include/llvm/Bitcode/BitcodeWriterPass.h @@ -29,8 +29,12 @@ class PreservedAnalyses; /// /// If \c ShouldPreserveUseListOrder, encode use-list order so it can be /// reproduced when deserialized. +/// +/// If \c EmitFunctionSummary, emit the function summary index (currently +/// for use in ThinLTO optimization). ModulePass *createBitcodeWriterPass(raw_ostream &Str, - bool ShouldPreserveUseListOrder = false); + bool ShouldPreserveUseListOrder = false, + bool EmitFunctionSummary = false); /// \brief Pass for writing a module of IR out to a bitcode file. /// @@ -39,15 +43,21 @@ ModulePass *createBitcodeWriterPass(raw_ostream &Str, class BitcodeWriterPass { raw_ostream &OS; bool ShouldPreserveUseListOrder; + bool EmitFunctionSummary; public: /// \brief Construct a bitcode writer pass around a particular output stream. /// /// If \c ShouldPreserveUseListOrder, encode use-list order so it can be /// reproduced when deserialized. + /// + /// If \c EmitFunctionSummary, emit the function summary index (currently + /// for use in ThinLTO optimization). explicit BitcodeWriterPass(raw_ostream &OS, - bool ShouldPreserveUseListOrder = false) - : OS(OS), ShouldPreserveUseListOrder(ShouldPreserveUseListOrder) {} + bool ShouldPreserveUseListOrder = false, + bool EmitFunctionSummary = false) + : OS(OS), ShouldPreserveUseListOrder(ShouldPreserveUseListOrder), + EmitFunctionSummary(EmitFunctionSummary) {} /// \brief Run the bitcode writer pass, and output the module to the selected /// output stream. diff --git a/include/llvm/Bitcode/BitstreamWriter.h b/include/llvm/Bitcode/BitstreamWriter.h index f4c83669dc1..bac6c574708 100644 --- a/include/llvm/Bitcode/BitstreamWriter.h +++ b/include/llvm/Bitcode/BitstreamWriter.h @@ -95,6 +95,9 @@ public: /// \brief Retrieve the current position in the stream, in bits. uint64_t GetCurrentBitNo() const { return GetBufferOffset() * 8 + CurBit; } + /// \brief Retrieve the number of bits currently used to encode an abbrev ID. + unsigned GetAbbrevIDWidth() const { return CurCodeSize; } + //===--------------------------------------------------------------------===// // Basic Primitives for emitting bits to the stream. //===--------------------------------------------------------------------===// diff --git a/include/llvm/Bitcode/LLVMBitCodes.h b/include/llvm/Bitcode/LLVMBitCodes.h index 9da5be6e969..d5f3c7f6096 100644 --- a/include/llvm/Bitcode/LLVMBitCodes.h +++ b/include/llvm/Bitcode/LLVMBitCodes.h @@ -150,6 +150,20 @@ namespace bitc { VST_CODE_ENTRY = 1, // VST_ENTRY: [valueid, namechar x N] VST_CODE_BBENTRY = 2, // VST_BBENTRY: [bbid, namechar x N] VST_CODE_FNENTRY = 3, // VST_FNENTRY: [valueid, offset, namechar x N] + // VST_COMBINED_FNENTRY: [offset, namechar x N] + VST_CODE_COMBINED_FNENTRY = 4 + }; + + // The module path symbol table only has one code (MST_CODE_ENTRY). + enum ModulePathSymtabCodes { + MST_CODE_ENTRY = 1, // MST_ENTRY: [modid, namechar x N] + }; + + // The function summary section uses different codes in the per-module + // and combined index cases. + enum FunctionSummarySymtabCodes { + FS_CODE_PERMODULE_ENTRY = 1, // FS_ENTRY: [valueid, islocal, instcount] + FS_CODE_COMBINED_ENTRY = 2, // FS_ENTRY: [modid, instcount] }; enum MetadataCodes { diff --git a/include/llvm/Bitcode/ReaderWriter.h b/include/llvm/Bitcode/ReaderWriter.h index 6f92e2e1b3c..18715844c52 100644 --- a/include/llvm/Bitcode/ReaderWriter.h +++ b/include/llvm/Bitcode/ReaderWriter.h @@ -15,6 +15,7 @@ #define LLVM_BITCODE_READERWRITER_H #include "llvm/IR/DiagnosticInfo.h" +#include "llvm/IR/FunctionInfo.h" #include "llvm/Support/Endian.h" #include "llvm/Support/ErrorOr.h" #include "llvm/Support/MemoryBuffer.h" @@ -58,6 +59,30 @@ namespace llvm { parseBitcodeFile(MemoryBufferRef Buffer, LLVMContext &Context, DiagnosticHandlerFunction DiagnosticHandler = nullptr); + /// Check if the given bitcode buffer contains a function summary block. + bool hasFunctionSummary(MemoryBufferRef Buffer, LLVMContext &Context, + DiagnosticHandlerFunction DiagnosticHandler); + + /// Parse the specified bitcode buffer, returning the function info index. + /// If IsLazy is true, parse the entire function summary into + /// the index. Otherwise skip the function summary section, and only create + /// an index object with a map from function name to function summary offset. + /// The index is used to perform lazy function summary reading later. + ErrorOr> getFunctionInfoIndex( + MemoryBufferRef Buffer, LLVMContext &Context, + DiagnosticHandlerFunction DiagnosticHandler, bool IsLazy = false); + + /// This method supports lazy reading of function summary data from the + /// combined index during function importing. When reading the combined index + /// file, getFunctionInfoIndex is first invoked with IsLazy=true. + /// Then this method is called for each function considered for importing, + /// to parse the summary information for the given function name into + /// the index. + std::error_code readFunctionSummary( + MemoryBufferRef Buffer, LLVMContext &Context, + DiagnosticHandlerFunction DiagnosticHandler, StringRef FunctionName, + std::unique_ptr Index); + /// \brief Write the specified module to the specified raw output stream. /// /// For streams where it matters, the given stream should be in "binary" @@ -66,8 +91,18 @@ namespace llvm { /// If \c ShouldPreserveUseListOrder, encode the use-list order for each \a /// Value in \c M. These will be reconstructed exactly when \a M is /// deserialized. + /// + /// If \c EmitFunctionSummary, emit the function summary index (currently + /// for use in ThinLTO optimization). void WriteBitcodeToFile(const Module *M, raw_ostream &Out, - bool ShouldPreserveUseListOrder = false); + bool ShouldPreserveUseListOrder = false, + bool EmitFunctionSummary = false); + + /// Write the specified function summary index to the given raw output stream, + /// where it will be written in a new bitcode block. This is used when + /// writing the combined index file for ThinLTO. + void WriteFunctionSummaryToFile(const FunctionInfoIndex *Index, + raw_ostream &Out); /// isBitcodeWrapper - Return true if the given bytes are the magic bytes /// for an LLVM IR bitcode wrapper. diff --git a/include/llvm/IR/FunctionInfo.h b/include/llvm/IR/FunctionInfo.h new file mode 100644 index 00000000000..a8dd2d9249e --- /dev/null +++ b/include/llvm/IR/FunctionInfo.h @@ -0,0 +1,230 @@ +//===-- llvm/FunctionInfo.h - Function Info Index ---------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +/// @file +/// FunctionInfo.h This file contains the declarations the classes that hold +/// the function info index and summary. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_IR_FUNCTIONINFO_H +#define LLVM_IR_FUNCTIONINFO_H + +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/IR/Module.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/raw_ostream.h" + +namespace llvm { + +/// \brief Function summary information to aid decisions and implementation of +/// importing. +/// +/// This is a separate class from FunctionInfo to enable lazy reading of this +/// function summary information from the combined index file during imporing. +class FunctionSummary { + private: + /// \brief Path of module containing function IR, used to locate module when + /// importing this function. + /// + /// This is only used during parsing of the combined function index, or when + /// parsing the per-module index for creation of the combined function index, + /// not during writing of the per-module index which doesn't contain a + /// module path string table. + StringRef ModulePath; + + /// \brief Used to flag functions that have local linkage types and need to + /// have module identifier appended before placing into the combined + /// index, to disambiguate from other functions with the same name. + /// + /// This is only used in the per-module function index, as it is consumed + /// while creating the combined index. + bool IsLocalFunction; + + // The rest of the information is used to help decide whether importing + // is likely to be profitable. + // Other information will be added as the importing is tuned, such + // as hotness (when profile available), and other function characteristics. + + /// Number of instructions (ignoring debug instructions, e.g.) computed + /// during the initial compile step when the function index is first built. + unsigned InstCount; + + public: + /// Construct a summary object from summary data expected for all + /// summary records. + FunctionSummary(unsigned NumInsts) : InstCount(NumInsts) {} + + /// Set the path to the module containing this function, for use in + /// the combined index. + void setModulePath(StringRef ModPath) { ModulePath = ModPath; } + + /// Get the path to the module containing this function. + StringRef modulePath() const { return ModulePath; } + + /// Record whether this is a local function in the per-module index. + void setLocalFunction(bool IsLocal) { IsLocalFunction = IsLocal; } + + /// Check whether this was a local function, for use in creating + /// the combined index. + bool isLocalFunction() const { return IsLocalFunction; } + + /// Get the instruction count recorded for this function. + unsigned instCount() const { return InstCount; } +}; + +/// \brief Class to hold pointer to function summary and information required +/// for parsing it. +/// +/// For the per-module index, this holds the bitcode offset +/// of the corresponding function block. For the combined index, +/// after parsing of the \a ValueSymbolTable, this initially +/// holds the offset of the corresponding function summary bitcode +/// record. After parsing the associated summary information from the summary +/// block the \a FunctionSummary is populated and stored here. +class FunctionInfo { + private: + /// Function summary information used to help make ThinLTO importing + /// decisions. + std::unique_ptr Summary; + + /// \brief The bitcode offset corresponding to either the associated + /// function's function body record, or its function summary record, + /// depending on whether this is a per-module or combined index. + /// + /// This bitcode offset is written to or read from the associated + /// \a ValueSymbolTable entry for the function. + /// For the per-module index this holds the bitcode offset of the + /// function's body record within bitcode module block in its module, + /// which is used during lazy function parsing or ThinLTO importing. + /// For the combined index this holds the offset of the corresponding + /// function summary record, to enable associating the combined index + /// VST records with the summary records. + uint64_t BitcodeIndex; + + public: + /// Constructor used during parsing of VST entries. + FunctionInfo(uint64_t FuncOffset) + : Summary(nullptr), BitcodeIndex(FuncOffset) {} + + /// Constructor used for per-module index bitcode writing. + FunctionInfo(uint64_t FuncOffset, + std::unique_ptr FuncSummary) + : Summary(std::move(FuncSummary)), BitcodeIndex(FuncOffset) {} + + /// Record the function summary information parsed out of the function + /// summary block during parsing or combined index creation. + void setFunctionSummary(std::unique_ptr FuncSummary) { + Summary = std::move(FuncSummary); + } + + /// Get the function summary recorded for this function. + FunctionSummary *functionSummary() const { return Summary.get(); } + + /// Get the bitcode index recorded for this function, depending on + /// the index type. + uint64_t bitcodeIndex() const { return BitcodeIndex; } + + /// Record the bitcode index for this function, depending on + /// the index type. + void setBitcodeIndex(uint64_t FuncOffset) { BitcodeIndex = FuncOffset; } +}; + +/// List of function info structures for a particular function name held +/// in the FunctionMap. Requires a vector in the case of multiple +/// COMDAT functions of the same name. +typedef std::vector> FunctionInfoList; + +/// Map from function name to corresponding function info structures. +typedef StringMap FunctionInfoMapTy; + +/// Type used for iterating through the function info map. +typedef FunctionInfoMapTy::const_iterator const_funcinfo_iterator; +typedef FunctionInfoMapTy::iterator funcinfo_iterator; + +/// String table to hold/own module path strings, which additionally holds the +/// module ID assigned to each module during the plugin step. The StringMap +/// makes a copy of and owns inserted strings. +typedef StringMap ModulePathStringTableTy; + +/// Class to hold module path string table and function map, +/// and encapsulate methods for operating on them. +class FunctionInfoIndex { + private: + /// Map from function name to list of function information instances + /// for functions of that name (may be duplicates in the COMDAT case, e.g.). + FunctionInfoMapTy FunctionMap; + + /// Holds strings for combined index, mapping to the corresponding module ID. + ModulePathStringTableTy ModulePathStringTable; + + public: + FunctionInfoIndex() = default; + ~FunctionInfoIndex() = default; + + // Disable the copy constructor and assignment operators, so + // no unexpected copying/moving occurs. + FunctionInfoIndex(const FunctionInfoIndex &) = delete; + void operator=(const FunctionInfoIndex &) = delete; + + funcinfo_iterator begin() { return FunctionMap.begin(); } + const_funcinfo_iterator begin() const { return FunctionMap.begin(); } + funcinfo_iterator end() { return FunctionMap.end(); } + const_funcinfo_iterator end() const { return FunctionMap.end(); } + + /// Get the list of function info objects for a given function. + const FunctionInfoList &getFunctionInfoList(StringRef FuncName) { + return FunctionMap[FuncName]; + } + + /// Add a function info for a function of the given name. + void addFunctionInfo(StringRef FuncName, std::unique_ptr Info) { + FunctionMap[FuncName].push_back(std::move(Info)); + } + + /// Iterator to allow writer to walk through table during emission. + iterator_range::const_iterator> modPathStringEntries() + const { + return llvm::make_range(ModulePathStringTable.begin(), + ModulePathStringTable.end()); + } + + /// Get the module ID recorded for the given module path. + uint64_t getModuleId(const StringRef ModPath) const { + return ModulePathStringTable.lookup(ModPath); + } + + /// Add the given per-module index into this function index/summary, + /// assigning it the given module ID. Each module merged in should have + /// a unique ID, necessary for consistent renaming of promoted + /// static (local) variables. + void mergeFrom(std::unique_ptr Other, + uint64_t NextModuleId); + + /// Convenience method for creating a promoted global name + /// for the given value name of a local, and its original module's ID. + static std::string getGlobalNameForLocal(StringRef Name, uint64_t ModId) { + SmallString<256> NewName(Name); + NewName += ".llvm."; + raw_svector_ostream(NewName) << ModId; + return NewName.str(); + } + + /// Add a new module path, mapped to the given module Id, and return StringRef + /// owned by string table map. + StringRef addModulePath(StringRef ModPath, uint64_t ModId) { + return ModulePathStringTable.insert(std::make_pair(ModPath, ModId)) + .first->first(); + } +}; + +} // End llvm namespace + +#endif diff --git a/include/llvm/Object/Binary.h b/include/llvm/Object/Binary.h index 1ec005a970e..36911d8f327 100644 --- a/include/llvm/Object/Binary.h +++ b/include/llvm/Object/Binary.h @@ -43,6 +43,7 @@ protected: ID_MachOUniversalBinary, ID_COFFImportFile, ID_IR, // LLVM IR + ID_FunctionIndex, // Function summary index // Object and children. ID_StartObjects, @@ -122,6 +123,10 @@ public: return TypeID == ID_IR; } + bool isFunctionIndex() const { + return TypeID == ID_FunctionIndex; + } + bool isLittleEndian() const { return !(TypeID == ID_ELF32B || TypeID == ID_ELF64B || TypeID == ID_MachO32B || TypeID == ID_MachO64B); diff --git a/include/llvm/Object/FunctionIndexObjectFile.h b/include/llvm/Object/FunctionIndexObjectFile.h new file mode 100644 index 00000000000..b33fa6434fd --- /dev/null +++ b/include/llvm/Object/FunctionIndexObjectFile.h @@ -0,0 +1,99 @@ +//===- FunctionIndexObjectFile.h - Function index file implementation -----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file declares the FunctionIndexObjectFile template class. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_OBJECT_FUNCTIONINDEXOBJECTFILE_H +#define LLVM_OBJECT_FUNCTIONINDEXOBJECTFILE_H + +#include "llvm/Object/SymbolicFile.h" + +namespace llvm { +class FunctionInfoIndex; + +namespace object { +class ObjectFile; + +/// This class is used to read just the function summary index related +/// sections out of the given object (which may contain a single module's +/// bitcode or be a combined index bitcode file). It builds a FunctionInfoIndex +/// object. +class FunctionIndexObjectFile : public SymbolicFile { + std::unique_ptr Index; + + public: + FunctionIndexObjectFile(MemoryBufferRef Object, + std::unique_ptr I); + ~FunctionIndexObjectFile() override; + + // TODO: Walk through FunctionMap entries for function symbols. + // However, currently these interfaces are not used by any consumers. + void moveSymbolNext(DataRefImpl &Symb) const override { + llvm_unreachable("not implemented"); + } + std::error_code printSymbolName(raw_ostream &OS, + DataRefImpl Symb) const override { + llvm_unreachable("not implemented"); + return std::error_code(); + } + uint32_t getSymbolFlags(DataRefImpl Symb) const override { + llvm_unreachable("not implemented"); + return 0; + } + basic_symbol_iterator symbol_begin_impl() const override { + llvm_unreachable("not implemented"); + return basic_symbol_iterator(BasicSymbolRef()); + } + basic_symbol_iterator symbol_end_impl() const override { + llvm_unreachable("not implemented"); + return basic_symbol_iterator(BasicSymbolRef()); + } + + const FunctionInfoIndex &getIndex() const { + return const_cast(this)->getIndex(); + } + FunctionInfoIndex &getIndex() { return *Index; } + std::unique_ptr takeIndex(); + + static inline bool classof(const Binary *v) { return v->isFunctionIndex(); } + + /// \brief Finds and returns bitcode embedded in the given object file, or an + /// error code if not found. + static ErrorOr findBitcodeInObject(const ObjectFile &Obj); + + /// \brief Finds and returns bitcode in the given memory buffer (which may + /// be either a bitcode file or a native object file with embedded bitcode), + /// or an error code if not found. + static ErrorOr findBitcodeInMemBuffer( + MemoryBufferRef Object); + + /// \brief Looks for function summary in the given memory buffer, + /// returns true if found, else false. + static bool hasFunctionSummaryInMemBuffer(MemoryBufferRef Object, + LLVMContext &Context); + + /// \brief Parse function index in the given memory buffer. + /// Return new FunctionIndexObjectFile instance containing parsed function + /// summary/index. + static ErrorOr> create( + MemoryBufferRef Object, LLVMContext &Context, bool IsLazy = false); + + /// \brief Parse the function summary information for function with the + /// given name out of the given buffer. Parsed information is + /// stored on the index object saved in this object. + std::error_code findFunctionSummaryInMemBuffer(MemoryBufferRef Object, + LLVMContext &Context, + StringRef FunctionName); +}; +} +} + +#endif diff --git a/lib/Bitcode/Reader/BitcodeReader.cpp b/lib/Bitcode/Reader/BitcodeReader.cpp index 165ab6e20c5..c93b4af11ca 100644 --- a/lib/Bitcode/Reader/BitcodeReader.cpp +++ b/lib/Bitcode/Reader/BitcodeReader.cpp @@ -27,6 +27,7 @@ #include "llvm/IR/Module.h" #include "llvm/IR/OperandTraits.h" #include "llvm/IR/Operator.h" +#include "llvm/IR/FunctionInfo.h" #include "llvm/IR/ValueHandle.h" #include "llvm/Support/DataStream.h" #include "llvm/Support/ManagedStatic.h" @@ -395,6 +396,96 @@ private: Function *F, DenseMap::iterator DeferredFunctionInfoIterator); }; + +/// Class to manage reading and parsing function summary index bitcode +/// files/sections. +class FunctionIndexBitcodeReader { + LLVMContext &Context; + DiagnosticHandlerFunction DiagnosticHandler; + + /// Eventually points to the function index built during parsing. + FunctionInfoIndex *TheIndex = nullptr; + + std::unique_ptr Buffer; + std::unique_ptr StreamFile; + BitstreamCursor Stream; + + /// \brief Used to indicate whether we are doing lazy parsing of summary data. + /// + /// If false, the summary section is fully parsed into the index during + /// the initial parse. Otherwise, if true, the caller is expected to + /// invoke \a readFunctionSummary for each summary needed, and the summary + /// section is thus parsed lazily. + bool IsLazy = false; + + /// Used to indicate whether caller only wants to check for the presence + /// of the function summary bitcode section. All blocks are skipped, + /// but the SeenFuncSummary boolean is set. + bool CheckFuncSummaryPresenceOnly = false; + + /// Indicates whether we have encountered a function summary section + /// yet during parsing, used when checking if file contains function + /// summary section. + bool SeenFuncSummary = false; + + /// \brief Map populated during function summary section parsing, and + /// consumed during ValueSymbolTable parsing. + /// + /// Used to correlate summary records with VST entries. For the per-module + /// index this maps the ValueID to the parsed function summary, and + /// for the combined index this maps the summary record's bitcode + /// offset to the function summary (since in the combined index the + /// VST records do not hold value IDs but rather hold the function + /// summary record offset). + DenseMap> SummaryMap; + + /// Map populated during module path string table parsing, from the + /// module ID to a string reference owned by the index's module + /// path string table, used to correlate with combined index function + /// summary records. + DenseMap ModuleIdMap; + + public: + std::error_code error(BitcodeError E, const Twine &Message); + std::error_code error(BitcodeError E); + std::error_code error(const Twine &Message); + + FunctionIndexBitcodeReader(MemoryBuffer *Buffer, LLVMContext &Context, + DiagnosticHandlerFunction DiagnosticHandler, + bool IsLazy = false, + bool CheckFuncSummaryPresenceOnly = false); + FunctionIndexBitcodeReader(LLVMContext &Context, + DiagnosticHandlerFunction DiagnosticHandler, + bool IsLazy = false, + bool CheckFuncSummaryPresenceOnly = false); + ~FunctionIndexBitcodeReader() { freeState(); } + + void freeState(); + + void releaseBuffer(); + + /// Check if the parser has encountered a function summary section. + bool foundFuncSummary() { return SeenFuncSummary; } + + /// \brief Main interface to parsing a bitcode buffer. + /// \returns true if an error occurred. + std::error_code parseSummaryIndexInto(std::unique_ptr Streamer, + FunctionInfoIndex *I); + + /// \brief Interface for parsing a function summary lazily. + std::error_code parseFunctionSummary(std::unique_ptr Streamer, + FunctionInfoIndex *I, + size_t FunctionSummaryOffset); + + private: + std::error_code parseModule(); + std::error_code parseValueSymbolTable(); + std::error_code parseEntireSummary(); + std::error_code parseModuleStringTable(); + std::error_code initStream(std::unique_ptr Streamer); + std::error_code initStreamFromBuffer(); + std::error_code initLazyStream(std::unique_ptr Streamer); +}; } // namespace BitcodeDiagnosticInfo::BitcodeDiagnosticInfo(std::error_code EC, @@ -3377,6 +3468,19 @@ std::error_code BitcodeReader::parseModule(bool Resume, } } +/// Helper to read the header common to all bitcode files. +static bool hasValidBitcodeHeader(BitstreamCursor &Stream) { + // Sniff for the signature. + if (Stream.Read(8) != 'B' || + Stream.Read(8) != 'C' || + Stream.Read(4) != 0x0 || + Stream.Read(4) != 0xC || + Stream.Read(4) != 0xE || + Stream.Read(4) != 0xD) + return false; + return true; +} + std::error_code BitcodeReader::parseBitcodeInto(std::unique_ptr Streamer, Module *M, bool ShouldLazyLoadMetadata) { @@ -3386,13 +3490,7 @@ BitcodeReader::parseBitcodeInto(std::unique_ptr Streamer, return EC; // Sniff for the signature. - if (Stream.Read(8) != 'B' || - Stream.Read(8) != 'C' || - Stream.Read(4) != 0x0 || - Stream.Read(4) != 0xC || - Stream.Read(4) != 0xE || - Stream.Read(4) != 0xD) - return error("Invalid bitcode signature"); + if (!hasValidBitcodeHeader(Stream)) return error("Invalid bitcode signature"); // We expect a number of well-defined blocks, though we don't necessarily // need to understand them all. @@ -3459,13 +3557,7 @@ ErrorOr BitcodeReader::parseTriple() { return EC; // Sniff for the signature. - if (Stream.Read(8) != 'B' || - Stream.Read(8) != 'C' || - Stream.Read(4) != 0x0 || - Stream.Read(4) != 0xC || - Stream.Read(4) != 0xE || - Stream.Read(4) != 0xD) - return error("Invalid bitcode signature"); + if (!hasValidBitcodeHeader(Stream)) return error("Invalid bitcode signature"); // We expect a number of well-defined blocks, though we don't necessarily // need to understand them all. @@ -5060,6 +5152,405 @@ BitcodeReader::initLazyStream(std::unique_ptr Streamer) { return std::error_code(); } +std::error_code FunctionIndexBitcodeReader::error(BitcodeError E, + const Twine &Message) { + return ::error(DiagnosticHandler, make_error_code(E), Message); +} + +std::error_code FunctionIndexBitcodeReader::error(const Twine &Message) { + return ::error(DiagnosticHandler, + make_error_code(BitcodeError::CorruptedBitcode), Message); +} + +std::error_code FunctionIndexBitcodeReader::error(BitcodeError E) { + return ::error(DiagnosticHandler, make_error_code(E)); +} + +FunctionIndexBitcodeReader::FunctionIndexBitcodeReader( + MemoryBuffer *Buffer, LLVMContext &Context, + DiagnosticHandlerFunction DiagnosticHandler, bool IsLazy, + bool CheckFuncSummaryPresenceOnly) + : Context(Context), + DiagnosticHandler(getDiagHandler(DiagnosticHandler, Context)), + Buffer(Buffer), + IsLazy(IsLazy), + CheckFuncSummaryPresenceOnly(CheckFuncSummaryPresenceOnly) {} + +FunctionIndexBitcodeReader::FunctionIndexBitcodeReader( + LLVMContext &Context, DiagnosticHandlerFunction DiagnosticHandler, + bool IsLazy, bool CheckFuncSummaryPresenceOnly) + : Context(Context), + DiagnosticHandler(getDiagHandler(DiagnosticHandler, Context)), + Buffer(nullptr), + IsLazy(IsLazy), + CheckFuncSummaryPresenceOnly(CheckFuncSummaryPresenceOnly) {} + +void FunctionIndexBitcodeReader::freeState() { Buffer = nullptr; } + +void FunctionIndexBitcodeReader::releaseBuffer() { Buffer.release(); } + +// Specialized value symbol table parser used when reading function index +// blocks where we don't actually create global values. +// At the end of this routine the function index is populated with a map +// from function name to FunctionInfo. The function info contains +// the function block's bitcode offset as well as the offset into the +// function summary section. +std::error_code FunctionIndexBitcodeReader::parseValueSymbolTable() { + if (Stream.EnterSubBlock(bitc::VALUE_SYMTAB_BLOCK_ID)) + return error("Invalid record"); + + SmallVector Record; + + // Read all the records for this value table. + SmallString<128> ValueName; + while (1) { + BitstreamEntry Entry = Stream.advanceSkippingSubblocks(); + + switch (Entry.Kind) { + case BitstreamEntry::SubBlock: // Handled for us already. + case BitstreamEntry::Error: + return error("Malformed block"); + case BitstreamEntry::EndBlock: + return std::error_code(); + case BitstreamEntry::Record: + // The interesting case. + break; + } + + // Read a record. + Record.clear(); + switch (Stream.readRecord(Entry.ID, Record)) { + default: // Default behavior: ignore (e.g. VST_CODE_BBENTRY records). + break; + case bitc::VST_CODE_FNENTRY: { + // VST_FNENTRY: [valueid, offset, namechar x N] + if (convertToString(Record, 2, ValueName)) + return error("Invalid record"); + unsigned ValueID = Record[0]; + uint64_t FuncOffset = Record[1]; + std::unique_ptr FuncInfo = + llvm::make_unique(FuncOffset); + if (foundFuncSummary() && !IsLazy) { + DenseMap>::iterator SMI = + SummaryMap.find(ValueID); + assert(SMI != SummaryMap.end() && "Summary info not found"); + FuncInfo->setFunctionSummary(std::move(SMI->second)); + } + TheIndex->addFunctionInfo(ValueName, std::move(FuncInfo)); + + ValueName.clear(); + break; + } + case bitc::VST_CODE_COMBINED_FNENTRY: { + // VST_FNENTRY: [offset, namechar x N] + if (convertToString(Record, 1, ValueName)) + return error("Invalid record"); + uint64_t FuncSummaryOffset = Record[0]; + std::unique_ptr FuncInfo = + llvm::make_unique(FuncSummaryOffset); + if (foundFuncSummary() && !IsLazy) { + DenseMap>::iterator SMI = + SummaryMap.find(FuncSummaryOffset); + assert(SMI != SummaryMap.end() && "Summary info not found"); + FuncInfo->setFunctionSummary(std::move(SMI->second)); + } + TheIndex->addFunctionInfo(ValueName, std::move(FuncInfo)); + + ValueName.clear(); + break; + } + } + } +} + +// Parse just the blocks needed for function index building out of the module. +// At the end of this routine the function Index is populated with a map +// from function name to FunctionInfo. The function info contains +// either the parsed function summary information (when parsing summaries +// eagerly), or just to the function summary record's offset +// if parsing lazily (IsLazy). +std::error_code FunctionIndexBitcodeReader::parseModule() { + if (Stream.EnterSubBlock(bitc::MODULE_BLOCK_ID)) + return error("Invalid record"); + + // Read the function index for this module. + while (1) { + BitstreamEntry Entry = Stream.advance(); + + switch (Entry.Kind) { + case BitstreamEntry::Error: + return error("Malformed block"); + case BitstreamEntry::EndBlock: + return std::error_code(); + + case BitstreamEntry::SubBlock: + if (CheckFuncSummaryPresenceOnly) { + if (Entry.ID == bitc::FUNCTION_SUMMARY_BLOCK_ID) + SeenFuncSummary = true; + if (Stream.SkipBlock()) return error("Invalid record"); + // No need to parse the rest since we found the summary. + return std::error_code(); + } + switch (Entry.ID) { + default: // Skip unknown content. + if (Stream.SkipBlock()) return error("Invalid record"); + break; + case bitc::BLOCKINFO_BLOCK_ID: + // Need to parse these to get abbrev ids (e.g. for VST) + if (Stream.ReadBlockInfoBlock()) return error("Malformed block"); + break; + case bitc::VALUE_SYMTAB_BLOCK_ID: + if (std::error_code EC = parseValueSymbolTable()) return EC; + break; + case bitc::FUNCTION_SUMMARY_BLOCK_ID: + SeenFuncSummary = true; + if (IsLazy) { + // Lazy parsing of summary info, skip it. + if (Stream.SkipBlock()) return error("Invalid record"); + } else if (std::error_code EC = parseEntireSummary()) + return EC; + break; + case bitc::MODULE_STRTAB_BLOCK_ID: + if (std::error_code EC = parseModuleStringTable()) return EC; + break; + } + continue; + + case BitstreamEntry::Record: + Stream.skipRecord(Entry.ID); + continue; + } + } +} + +// Eagerly parse the entire function summary block (i.e. for all functions +// in the index). This populates the FunctionSummary objects in +// the index. +std::error_code FunctionIndexBitcodeReader::parseEntireSummary() { + if (Stream.EnterSubBlock(bitc::FUNCTION_SUMMARY_BLOCK_ID)) + return error("Invalid record"); + + SmallVector Record; + + while (1) { + BitstreamEntry Entry = Stream.advanceSkippingSubblocks(); + + switch (Entry.Kind) { + case BitstreamEntry::SubBlock: // Handled for us already. + case BitstreamEntry::Error: + return error("Malformed block"); + case BitstreamEntry::EndBlock: + return std::error_code(); + case BitstreamEntry::Record: + // The interesting case. + break; + } + + // Read a record. The record format depends on whether this + // is a per-module index or a combined index file. In the per-module + // case the records contain the associated value's ID for correlation + // with VST entries. In the combined index the correlation is done + // via the bitcode offset of the summary records (which were saved + // in the combined index VST entries). The records also contain + // information used for ThinLTO renaming and importing. + Record.clear(); + uint64_t CurRecordBit = Stream.GetCurrentBitNo(); + switch (Stream.readRecord(Entry.ID, Record)) { + default: // Default behavior: ignore. + break; + // FS_PERMODULE_ENTRY: [valueid, islocal, instcount] + case bitc::FS_CODE_PERMODULE_ENTRY: { + unsigned ValueID = Record[0]; + bool IsLocal = Record[1]; + unsigned InstCount = Record[2]; + std::unique_ptr FS = + llvm::make_unique(InstCount); + FS->setLocalFunction(IsLocal); + // The module path string ref set in the summary must be owned by the + // index's module string table. Since we don't have a module path + // string table section in the per-module index, we create a single + // module path string table entry with an empty (0) ID to take + // ownership. + FS->setModulePath( + TheIndex->addModulePath(Buffer->getBufferIdentifier(), 0)); + SummaryMap[ValueID] = std::move(FS); + } + // FS_COMBINED_ENTRY: [modid, instcount] + case bitc::FS_CODE_COMBINED_ENTRY: { + uint64_t ModuleId = Record[0]; + unsigned InstCount = Record[1]; + std::unique_ptr FS = + llvm::make_unique(InstCount); + FS->setModulePath(ModuleIdMap[ModuleId]); + SummaryMap[CurRecordBit] = std::move(FS); + } + } + } + llvm_unreachable("Exit infinite loop"); +} + +// Parse the module string table block into the Index. +// This populates the ModulePathStringTable map in the index. +std::error_code FunctionIndexBitcodeReader::parseModuleStringTable() { + if (Stream.EnterSubBlock(bitc::MODULE_STRTAB_BLOCK_ID)) + return error("Invalid record"); + + SmallVector Record; + + SmallString<128> ModulePath; + while (1) { + BitstreamEntry Entry = Stream.advanceSkippingSubblocks(); + + switch (Entry.Kind) { + case BitstreamEntry::SubBlock: // Handled for us already. + case BitstreamEntry::Error: + return error("Malformed block"); + case BitstreamEntry::EndBlock: + return std::error_code(); + case BitstreamEntry::Record: + // The interesting case. + break; + } + + Record.clear(); + switch (Stream.readRecord(Entry.ID, Record)) { + default: // Default behavior: ignore. + break; + case bitc::MST_CODE_ENTRY: { + // MST_ENTRY: [modid, namechar x N] + if (convertToString(Record, 1, ModulePath)) + return error("Invalid record"); + uint64_t ModuleId = Record[0]; + StringRef ModulePathInMap = + TheIndex->addModulePath(ModulePath, ModuleId); + ModuleIdMap[ModuleId] = ModulePathInMap; + ModulePath.clear(); + break; + } + } + } + llvm_unreachable("Exit infinite loop"); +} + +// Parse the function info index from the bitcode streamer into the given index. +std::error_code FunctionIndexBitcodeReader::parseSummaryIndexInto( + std::unique_ptr Streamer, FunctionInfoIndex *I) { + TheIndex = I; + + if (std::error_code EC = initStream(std::move(Streamer))) return EC; + + // Sniff for the signature. + if (!hasValidBitcodeHeader(Stream)) return error("Invalid bitcode signature"); + + // We expect a number of well-defined blocks, though we don't necessarily + // need to understand them all. + while (1) { + if (Stream.AtEndOfStream()) { + // We didn't really read a proper Module block. + return error("Malformed block"); + } + + BitstreamEntry Entry = + Stream.advance(BitstreamCursor::AF_DontAutoprocessAbbrevs); + + if (Entry.Kind != BitstreamEntry::SubBlock) return error("Malformed block"); + + // If we see a MODULE_BLOCK, parse it to find the blocks needed for + // building the function summary index. + if (Entry.ID == bitc::MODULE_BLOCK_ID) return parseModule(); + + if (Stream.SkipBlock()) return error("Invalid record"); + } +} + +// Parse the function information at the given offset in the buffer into +// the index. Used to support lazy parsing of function summaries from the +// combined index during importing. +// TODO: This function is not yet complete as it won't have a consumer +// until ThinLTO function importing is added. +std::error_code FunctionIndexBitcodeReader::parseFunctionSummary( + std::unique_ptr Streamer, FunctionInfoIndex *I, + size_t FunctionSummaryOffset) { + TheIndex = I; + + if (std::error_code EC = initStream(std::move(Streamer))) return EC; + + // Sniff for the signature. + if (!hasValidBitcodeHeader(Stream)) return error("Invalid bitcode signature"); + + Stream.JumpToBit(FunctionSummaryOffset); + + BitstreamEntry Entry = Stream.advanceSkippingSubblocks(); + + switch (Entry.Kind) { + default: + return error("Malformed block"); + case BitstreamEntry::Record: + // The expected case. + break; + } + + // TODO: Read a record. This interface will be completed when ThinLTO + // importing is added so that it can be tested. + SmallVector Record; + switch (Stream.readRecord(Entry.ID, Record)) { + default: + return error("Invalid record"); + } + + return std::error_code(); +} + +std::error_code FunctionIndexBitcodeReader::initStream( + std::unique_ptr Streamer) { + if (Streamer) return initLazyStream(std::move(Streamer)); + return initStreamFromBuffer(); +} + +std::error_code FunctionIndexBitcodeReader::initStreamFromBuffer() { + const unsigned char *BufPtr = (const unsigned char *)Buffer->getBufferStart(); + const unsigned char *BufEnd = BufPtr + Buffer->getBufferSize(); + + if (Buffer->getBufferSize() & 3) return error("Invalid bitcode signature"); + + // If we have a wrapper header, parse it and ignore the non-bc file contents. + // The magic number is 0x0B17C0DE stored in little endian. + if (isBitcodeWrapper(BufPtr, BufEnd)) + if (SkipBitcodeWrapperHeader(BufPtr, BufEnd, true)) + return error("Invalid bitcode wrapper header"); + + StreamFile.reset(new BitstreamReader(BufPtr, BufEnd)); + Stream.init(&*StreamFile); + + return std::error_code(); +} + +std::error_code FunctionIndexBitcodeReader::initLazyStream( + std::unique_ptr Streamer) { + // Check and strip off the bitcode wrapper; BitstreamReader expects never to + // see it. + auto OwnedBytes = + llvm::make_unique(std::move(Streamer)); + StreamingMemoryObject &Bytes = *OwnedBytes; + StreamFile = llvm::make_unique(std::move(OwnedBytes)); + Stream.init(&*StreamFile); + + unsigned char buf[16]; + if (Bytes.readBytes(buf, 16, 0) != 16) + return error("Invalid bitcode signature"); + + if (!isBitcode(buf, buf + 16)) return error("Invalid bitcode signature"); + + if (isBitcodeWrapper(buf, buf + 4)) { + const unsigned char *bitcodeStart = buf; + const unsigned char *bitcodeEnd = buf + 16; + SkipBitcodeWrapperHeader(bitcodeStart, bitcodeEnd, false); + Bytes.dropLeadingBytes(bitcodeStart - buf); + Bytes.setKnownObjectSize(bitcodeEnd - bitcodeStart); + } + return std::error_code(); +} + namespace { class BitcodeErrorCategoryType : public std::error_category { const char *name() const LLVM_NOEXCEPT override { @@ -5181,3 +5672,81 @@ llvm::getBitcodeTargetTriple(MemoryBufferRef Buffer, LLVMContext &Context, return ""; return Triple.get(); } + +// Parse the specified bitcode buffer, returning the function info index. +// If IsLazy is false, parse the entire function summary into +// the index. Otherwise skip the function summary section, and only create +// an index object with a map from function name to function summary offset. +// The index is used to perform lazy function summary reading later. +ErrorOr> llvm::getFunctionInfoIndex( + MemoryBufferRef Buffer, LLVMContext &Context, + DiagnosticHandlerFunction DiagnosticHandler, bool IsLazy) { + std::unique_ptr Buf = MemoryBuffer::getMemBuffer(Buffer, false); + FunctionIndexBitcodeReader R(Buf.get(), Context, DiagnosticHandler, IsLazy); + + std::unique_ptr Index = + llvm::make_unique(); + + auto cleanupOnError = [&](std::error_code EC) { + R.releaseBuffer(); // Never take ownership on error. + return EC; + }; + + if (std::error_code EC = R.parseSummaryIndexInto(nullptr, Index.get())) + return cleanupOnError(EC); + + Buf.release(); // The FunctionIndexBitcodeReader owns it now. + return std::move(Index); +} + +// Check if the given bitcode buffer contains a function summary block. +bool llvm::hasFunctionSummary(MemoryBufferRef Buffer, LLVMContext &Context, + DiagnosticHandlerFunction DiagnosticHandler) { + std::unique_ptr Buf = MemoryBuffer::getMemBuffer(Buffer, false); + FunctionIndexBitcodeReader R(Buf.get(), Context, DiagnosticHandler, false, + true); + + auto cleanupOnError = [&](std::error_code EC) { + R.releaseBuffer(); // Never take ownership on error. + return false; + }; + + if (std::error_code EC = R.parseSummaryIndexInto(nullptr, nullptr)) + return cleanupOnError(EC); + + Buf.release(); // The FunctionIndexBitcodeReader owns it now. + return R.foundFuncSummary(); +} + +// This method supports lazy reading of function summary data from the combined +// index during ThinLTO function importing. When reading the combined index +// file, getFunctionInfoIndex is first invoked with IsLazy=true. +// Then this method is called for each function considered for importing, +// to parse the summary information for the given function name into +// the index. +std::error_code llvm::readFunctionSummary( + MemoryBufferRef Buffer, LLVMContext &Context, + DiagnosticHandlerFunction DiagnosticHandler, StringRef FunctionName, + std::unique_ptr Index) { + std::unique_ptr Buf = MemoryBuffer::getMemBuffer(Buffer, false); + FunctionIndexBitcodeReader R(Buf.get(), Context, DiagnosticHandler); + + auto cleanupOnError = [&](std::error_code EC) { + R.releaseBuffer(); // Never take ownership on error. + return EC; + }; + + // Lookup the given function name in the FunctionMap, which may + // contain a list of function infos in the case of a COMDAT. Walk through + // and parse each function summary info at the function summary offset + // recorded when parsing the value symbol table. + for (const auto &FI : Index->getFunctionInfoList(FunctionName)) { + size_t FunctionSummaryOffset = FI->bitcodeIndex(); + if (std::error_code EC = + R.parseFunctionSummary(nullptr, Index.get(), FunctionSummaryOffset)) + return cleanupOnError(EC); + } + + Buf.release(); // The FunctionIndexBitcodeReader owns it now. + return std::error_code(); +} diff --git a/lib/Bitcode/Writer/BitcodeWriter.cpp b/lib/Bitcode/Writer/BitcodeWriter.cpp index f6455728975..b8a000d3b7b 100644 --- a/lib/Bitcode/Writer/BitcodeWriter.cpp +++ b/lib/Bitcode/Writer/BitcodeWriter.cpp @@ -13,6 +13,7 @@ #include "llvm/Bitcode/ReaderWriter.h" #include "ValueEnumerator.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/Triple.h" #include "llvm/Bitcode/BitstreamWriter.h" #include "llvm/Bitcode/LLVMBitCodes.h" @@ -23,6 +24,7 @@ #include "llvm/IR/InlineAsm.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/LLVMContext.h" +#include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Module.h" #include "llvm/IR/Operator.h" #include "llvm/IR/UseListOrder.h" @@ -2187,7 +2189,8 @@ static void WriteValueSymbolTable( const ValueSymbolTable &VST, const ValueEnumerator &VE, BitstreamWriter &Stream, uint64_t VSTOffsetPlaceholder = 0, uint64_t BitcodeStartBit = 0, - DenseMap *FunctionIndex = nullptr) { + DenseMap> *FunctionIndex = + nullptr) { if (VST.empty()) { // WriteValueSymbolTableForwardDecl should have returned early as // well. Ensure this handling remains in sync by asserting that @@ -2282,7 +2285,8 @@ static void WriteValueSymbolTable( // Save the word offset of the function (from the start of the // actual bitcode written to the stream). assert(FunctionIndex->count(F) == 1); - uint64_t BitcodeIndex = (*FunctionIndex)[F] - BitcodeStartBit; + uint64_t BitcodeIndex = + (*FunctionIndex)[F]->bitcodeIndex() - BitcodeStartBit; assert((BitcodeIndex & 31) == 0 && "function block not 32-bit aligned"); NameVals.push_back(BitcodeIndex / 32); @@ -2300,9 +2304,7 @@ static void WriteValueSymbolTable( AbbrevToUse = VST_ENTRY_7_ABBREV; } - for (const char *P = Name.getKeyData(), - *E = Name.getKeyData()+Name.getKeyLength(); P != E; ++P) - NameVals.push_back((unsigned char)*P); + for (const auto P : Name.getKey()) NameVals.push_back((unsigned char)P); // Emit the finished record. Stream.EmitRecord(Code, NameVals, AbbrevToUse); @@ -2311,6 +2313,68 @@ static void WriteValueSymbolTable( Stream.ExitBlock(); } +/// Emit function names and summary offsets for the combined index +/// used by ThinLTO. +static void WriteCombinedValueSymbolTable(const FunctionInfoIndex *Index, + BitstreamWriter &Stream) { + Stream.EnterSubblock(bitc::VALUE_SYMTAB_BLOCK_ID, 4); + + // 8-bit fixed-width VST_COMBINED_FNENTRY function strings. + BitCodeAbbrev *Abbv = new BitCodeAbbrev(); + Abbv->Add(BitCodeAbbrevOp(bitc::VST_CODE_COMBINED_FNENTRY)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // value id + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // funcoffset + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 8)); + unsigned FnEntry8BitAbbrev = Stream.EmitAbbrev(Abbv); + + // 7-bit fixed width VST_COMBINED_FNENTRY function strings. + Abbv = new BitCodeAbbrev(); + Abbv->Add(BitCodeAbbrevOp(bitc::VST_CODE_COMBINED_FNENTRY)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // value id + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // funcoffset + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 7)); + unsigned FnEntry7BitAbbrev = Stream.EmitAbbrev(Abbv); + + // 6-bit char6 VST_COMBINED_FNENTRY function strings. + Abbv = new BitCodeAbbrev(); + Abbv->Add(BitCodeAbbrevOp(bitc::VST_CODE_COMBINED_FNENTRY)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // value id + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // funcoffset + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Char6)); + unsigned FnEntry6BitAbbrev = Stream.EmitAbbrev(Abbv); + + // FIXME: We know if the type names can use 7-bit ascii. + SmallVector NameVals; + + for (const auto &FII : *Index) { + for (const auto &FI : FII.getValue()) { + NameVals.push_back(FI->bitcodeIndex()); + + StringRef FuncName = FII.first(); + + // Figure out the encoding to use for the name. + StringEncoding Bits = getStringEncoding(FuncName.data(), FuncName.size()); + + // VST_COMBINED_FNENTRY: [funcsumoffset, namechar x N] + unsigned AbbrevToUse = FnEntry8BitAbbrev; + if (Bits == SE_Char6) + AbbrevToUse = FnEntry6BitAbbrev; + else if (Bits == SE_Fixed7) + AbbrevToUse = FnEntry7BitAbbrev; + + for (const auto P : FuncName) NameVals.push_back((unsigned char)P); + + // Emit the finished record. + Stream.EmitRecord(bitc::VST_CODE_COMBINED_FNENTRY, NameVals, AbbrevToUse); + NameVals.clear(); + } + } + Stream.ExitBlock(); +} + static void WriteUseList(ValueEnumerator &VE, UseListOrder &&Order, BitstreamWriter &Stream) { assert(Order.Shuffle.size() >= 2 && "Shuffle too small"); @@ -2345,14 +2409,33 @@ static void WriteUseListBlock(const Function *F, ValueEnumerator &VE, Stream.ExitBlock(); } -/// WriteFunction - Emit a function body to the module stream. -static void WriteFunction(const Function &F, ValueEnumerator &VE, - BitstreamWriter &Stream, - DenseMap &FunctionIndex) { +/// \brief Save information for the given function into the function index. +/// +/// At a minimum this saves the bitcode index of the function record that +/// was just written. However, if we are emitting function summary information, +/// for example for ThinLTO, then a \a FunctionSummary object is created +/// to hold the provided summary information. +static void SaveFunctionInfo( + const Function &F, + DenseMap> &FunctionIndex, + unsigned NumInsts, uint64_t BitcodeIndex, bool EmitFunctionSummary) { + std::unique_ptr FuncSummary; + if (EmitFunctionSummary) { + FuncSummary = llvm::make_unique(NumInsts); + FuncSummary->setLocalFunction(F.hasLocalLinkage()); + } + FunctionIndex[&F] = + llvm::make_unique(BitcodeIndex, std::move(FuncSummary)); +} + +/// Emit a function body to the module stream. +static void WriteFunction( + const Function &F, ValueEnumerator &VE, BitstreamWriter &Stream, + DenseMap> &FunctionIndex, + bool EmitFunctionSummary) { // Save the bitcode index of the start of this function block for recording // in the VST. uint64_t BitcodeIndex = Stream.GetCurrentBitNo(); - FunctionIndex[&F] = BitcodeIndex; Stream.EnterSubblock(bitc::FUNCTION_BLOCK_ID, 4); VE.incorporateFunction(F); @@ -2379,6 +2462,7 @@ static void WriteFunction(const Function &F, ValueEnumerator &VE, bool NeedsMetadataAttachment = F.hasMetadata(); DILocation *LastDL = nullptr; + unsigned NumInsts = 0; // Finally, emit all the instructions, in order. for (Function::const_iterator BB = F.begin(), E = F.end(); BB != E; ++BB) @@ -2386,6 +2470,8 @@ static void WriteFunction(const Function &F, ValueEnumerator &VE, I != E; ++I) { WriteInstruction(*I, InstID, VE, Stream, Vals); + if (!isa(I)) ++NumInsts; + if (!I->getType()->isVoidTy()) ++InstID; @@ -2422,6 +2508,9 @@ static void WriteFunction(const Function &F, ValueEnumerator &VE, WriteUseListBlock(&F, VE, Stream); VE.purgeFunction(); Stream.ExitBlock(); + + SaveFunctionInfo(F, FunctionIndex, NumInsts, BitcodeIndex, + EmitFunctionSummary); } // Emit blockinfo, which defines the standard abbreviations etc. @@ -2599,10 +2688,155 @@ static void WriteBlockInfo(const ValueEnumerator &VE, BitstreamWriter &Stream) { Stream.ExitBlock(); } +/// Write the module path strings, currently only used when generating +/// a combined index file. +static void WriteModStrings(const FunctionInfoIndex *I, + BitstreamWriter &Stream) { + Stream.EnterSubblock(bitc::MODULE_STRTAB_BLOCK_ID, 3); + + // TODO: See which abbrev sizes we actually need to emit + + // 8-bit fixed-width MST_ENTRY strings. + BitCodeAbbrev *Abbv = new BitCodeAbbrev(); + Abbv->Add(BitCodeAbbrevOp(bitc::MST_CODE_ENTRY)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 8)); + unsigned Abbrev8Bit = Stream.EmitAbbrev(Abbv); + + // 7-bit fixed width MST_ENTRY strings. + Abbv = new BitCodeAbbrev(); + Abbv->Add(BitCodeAbbrevOp(bitc::MST_CODE_ENTRY)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 7)); + unsigned Abbrev7Bit = Stream.EmitAbbrev(Abbv); + + // 6-bit char6 MST_ENTRY strings. + Abbv = new BitCodeAbbrev(); + Abbv->Add(BitCodeAbbrevOp(bitc::MST_CODE_ENTRY)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Char6)); + unsigned Abbrev6Bit = Stream.EmitAbbrev(Abbv); + + SmallVector NameVals; + for (const StringMapEntry &MPSE : I->modPathStringEntries()) { + StringEncoding Bits = + getStringEncoding(MPSE.getKey().data(), MPSE.getKey().size()); + unsigned AbbrevToUse = Abbrev8Bit; + if (Bits == SE_Char6) + AbbrevToUse = Abbrev6Bit; + else if (Bits == SE_Fixed7) + AbbrevToUse = Abbrev7Bit; + + NameVals.push_back(MPSE.getValue()); + + for (const auto P : MPSE.getKey()) NameVals.push_back((unsigned char)P); + + // Emit the finished record. + Stream.EmitRecord(bitc::MST_CODE_ENTRY, NameVals, AbbrevToUse); + NameVals.clear(); + } + Stream.ExitBlock(); +} + +// Helper to emit a single function summary record. +static void WritePerModuleFunctionSummaryRecord( + SmallVector &NameVals, FunctionSummary *FS, unsigned ValueID, + unsigned FSAbbrev, BitstreamWriter &Stream) { + assert(FS); + NameVals.push_back(ValueID); + NameVals.push_back(FS->isLocalFunction()); + NameVals.push_back(FS->instCount()); + + // Emit the finished record. + Stream.EmitRecord(bitc::FS_CODE_PERMODULE_ENTRY, NameVals, FSAbbrev); + NameVals.clear(); +} + +/// Emit the per-module function summary section alongside the rest of +/// the module's bitcode. +static void WritePerModuleFunctionSummary( + DenseMap> &FunctionIndex, + const Module *M, const ValueEnumerator &VE, BitstreamWriter &Stream) { + Stream.EnterSubblock(bitc::FUNCTION_SUMMARY_BLOCK_ID, 3); + + // Abbrev for FS_CODE_PERMODULE_ENTRY. + BitCodeAbbrev *Abbv = new BitCodeAbbrev(); + Abbv->Add(BitCodeAbbrevOp(bitc::FS_CODE_PERMODULE_ENTRY)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // valueid + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // islocal + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // instcount + unsigned FSAbbrev = Stream.EmitAbbrev(Abbv); + + SmallVector NameVals; + for (auto &I : FunctionIndex) { + // Skip anonymous functions. We will emit a function summary for + // any aliases below. + if (!I.first->hasName()) continue; + + WritePerModuleFunctionSummaryRecord( + NameVals, I.second->functionSummary(), + VE.getValueID(M->getValueSymbolTable().lookup(I.first->getName())), + FSAbbrev, Stream); + } + + for (const GlobalAlias &A : M->aliases()) { + if (!A.getBaseObject()) continue; + const Function *F = dyn_cast(A.getBaseObject()); + if (!F || F->isDeclaration()) continue; + + assert(FunctionIndex.count(F) == 1); + WritePerModuleFunctionSummaryRecord( + NameVals, FunctionIndex[F]->functionSummary(), + VE.getValueID(M->getValueSymbolTable().lookup(A.getName())), FSAbbrev, + Stream); + } + + Stream.ExitBlock(); +} + +/// Emit the combined function summary section into the combined index +/// file. +static void WriteCombinedFunctionSummary(const FunctionInfoIndex *I, + BitstreamWriter &Stream) { + Stream.EnterSubblock(bitc::FUNCTION_SUMMARY_BLOCK_ID, 3); + + // Abbrev for FS_CODE_COMBINED_ENTRY. + BitCodeAbbrev *Abbv = new BitCodeAbbrev(); + Abbv->Add(BitCodeAbbrevOp(bitc::FS_CODE_COMBINED_ENTRY)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // modid + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // instcount + unsigned FSAbbrev = Stream.EmitAbbrev(Abbv); + + SmallVector NameVals; + for (const auto &FII : *I) { + for (auto &FI : FII.getValue()) { + FunctionSummary *FS = FI->functionSummary(); + assert(FS); + + NameVals.push_back(I->getModuleId(FS->modulePath())); + NameVals.push_back(FS->instCount()); + + // Record the starting offset of this summary entry for use + // in the VST entry. Add the current code size since the + // reader will invoke readRecord after the abbrev id read. + FI->setBitcodeIndex(Stream.GetCurrentBitNo() + Stream.GetAbbrevIDWidth()); + + // Emit the finished record. + Stream.EmitRecord(bitc::FS_CODE_COMBINED_ENTRY, NameVals, FSAbbrev); + NameVals.clear(); + } + } + + Stream.ExitBlock(); +} + /// WriteModule - Emit the specified module to the bitstream. static void WriteModule(const Module *M, BitstreamWriter &Stream, bool ShouldPreserveUseListOrder, - uint64_t BitcodeStartBit) { + uint64_t BitcodeStartBit, bool EmitFunctionSummary) { Stream.EnterSubblock(bitc::MODULE_BLOCK_ID, 3); SmallVector Vals; @@ -2647,10 +2881,15 @@ static void WriteModule(const Module *M, BitstreamWriter &Stream, WriteOperandBundleTags(M, Stream); // Emit function bodies. - DenseMap FunctionIndex; + DenseMap> FunctionIndex; for (Module::const_iterator F = M->begin(), E = M->end(); F != E; ++F) if (!F->isDeclaration()) - WriteFunction(*F, VE, Stream, FunctionIndex); + WriteFunction(*F, VE, Stream, FunctionIndex, EmitFunctionSummary); + + // Need to write after the above call to WriteFunction which populates + // the summary information in the index. + if (EmitFunctionSummary) + WritePerModuleFunctionSummary(FunctionIndex, M, VE, Stream); WriteValueSymbolTable(M->getValueSymbolTable(), VE, Stream, VSTOffsetPlaceholder, BitcodeStartBit, &FunctionIndex); @@ -2728,10 +2967,22 @@ static void EmitDarwinBCHeaderAndTrailer(SmallVectorImpl &Buffer, Buffer.push_back(0); } +/// Helper to write the header common to all bitcode files. +static void WriteBitcodeHeader(BitstreamWriter &Stream) { + // Emit the file header. + Stream.Emit((unsigned)'B', 8); + Stream.Emit((unsigned)'C', 8); + Stream.Emit(0x0, 4); + Stream.Emit(0xC, 4); + Stream.Emit(0xE, 4); + Stream.Emit(0xD, 4); +} + /// WriteBitcodeToFile - Write the specified module to the specified output /// stream. void llvm::WriteBitcodeToFile(const Module *M, raw_ostream &Out, - bool ShouldPreserveUseListOrder) { + bool ShouldPreserveUseListOrder, + bool EmitFunctionSummary) { SmallVector Buffer; Buffer.reserve(256*1024); @@ -2751,15 +3002,11 @@ void llvm::WriteBitcodeToFile(const Module *M, raw_ostream &Out, uint64_t BitcodeStartBit = Stream.GetCurrentBitNo(); // Emit the file header. - Stream.Emit((unsigned)'B', 8); - Stream.Emit((unsigned)'C', 8); - Stream.Emit(0x0, 4); - Stream.Emit(0xC, 4); - Stream.Emit(0xE, 4); - Stream.Emit(0xD, 4); + WriteBitcodeHeader(Stream); // Emit the module. - WriteModule(M, Stream, ShouldPreserveUseListOrder, BitcodeStartBit); + WriteModule(M, Stream, ShouldPreserveUseListOrder, BitcodeStartBit, + EmitFunctionSummary); } if (TT.isOSDarwin()) @@ -2768,3 +3015,38 @@ void llvm::WriteBitcodeToFile(const Module *M, raw_ostream &Out, // Write the generated bitstream to "Out". Out.write((char*)&Buffer.front(), Buffer.size()); } + +// Write the specified function summary index to the given raw output stream, +// where it will be written in a new bitcode block. This is used when +// writing the combined index file for ThinLTO. +void llvm::WriteFunctionSummaryToFile(const FunctionInfoIndex *Index, + raw_ostream &Out) { + SmallVector Buffer; + Buffer.reserve(256 * 1024); + + BitstreamWriter Stream(Buffer); + + // Emit the bitcode header. + WriteBitcodeHeader(Stream); + + Stream.EnterSubblock(bitc::MODULE_BLOCK_ID, 3); + + SmallVector Vals; + unsigned CurVersion = 1; + Vals.push_back(CurVersion); + Stream.EmitRecord(bitc::MODULE_CODE_VERSION, Vals); + + // Write the module paths in the combined index. + WriteModStrings(Index, Stream); + + // Write the function summary combined index records. + WriteCombinedFunctionSummary(Index, Stream); + + // Need a special VST writer for the combined index (we don't have a + // real VST and real values when this is invoked). + WriteCombinedValueSymbolTable(Index, Stream); + + Stream.ExitBlock(); + + Out.write((char *)&Buffer.front(), Buffer.size()); +} diff --git a/lib/Bitcode/Writer/BitcodeWriterPass.cpp b/lib/Bitcode/Writer/BitcodeWriterPass.cpp index 3165743576e..24de99a34d3 100644 --- a/lib/Bitcode/Writer/BitcodeWriterPass.cpp +++ b/lib/Bitcode/Writer/BitcodeWriterPass.cpp @@ -19,7 +19,7 @@ using namespace llvm; PreservedAnalyses BitcodeWriterPass::run(Module &M) { - WriteBitcodeToFile(&M, OS, ShouldPreserveUseListOrder); + WriteBitcodeToFile(&M, OS, ShouldPreserveUseListOrder, EmitFunctionSummary); return PreservedAnalyses::all(); } @@ -27,17 +27,21 @@ namespace { class WriteBitcodePass : public ModulePass { raw_ostream &OS; // raw_ostream to print on bool ShouldPreserveUseListOrder; + bool EmitFunctionSummary; public: static char ID; // Pass identification, replacement for typeid - explicit WriteBitcodePass(raw_ostream &o, bool ShouldPreserveUseListOrder) + explicit WriteBitcodePass(raw_ostream &o, bool ShouldPreserveUseListOrder, + bool EmitFunctionSummary) : ModulePass(ID), OS(o), - ShouldPreserveUseListOrder(ShouldPreserveUseListOrder) {} + ShouldPreserveUseListOrder(ShouldPreserveUseListOrder), + EmitFunctionSummary(EmitFunctionSummary) {} const char *getPassName() const override { return "Bitcode Writer"; } bool runOnModule(Module &M) override { - WriteBitcodeToFile(&M, OS, ShouldPreserveUseListOrder); + WriteBitcodeToFile(&M, OS, ShouldPreserveUseListOrder, + EmitFunctionSummary); return false; } }; @@ -46,6 +50,8 @@ namespace { char WriteBitcodePass::ID = 0; ModulePass *llvm::createBitcodeWriterPass(raw_ostream &Str, - bool ShouldPreserveUseListOrder) { - return new WriteBitcodePass(Str, ShouldPreserveUseListOrder); + bool ShouldPreserveUseListOrder, + bool EmitFunctionSummary) { + return new WriteBitcodePass(Str, ShouldPreserveUseListOrder, + EmitFunctionSummary); } diff --git a/lib/IR/CMakeLists.txt b/lib/IR/CMakeLists.txt index 9bc44570734..472178f5122 100644 --- a/lib/IR/CMakeLists.txt +++ b/lib/IR/CMakeLists.txt @@ -39,6 +39,7 @@ add_llvm_library(LLVMCore PassManager.cpp PassRegistry.cpp Statepoint.cpp + FunctionInfo.cpp Type.cpp TypeFinder.cpp Use.cpp diff --git a/lib/IR/FunctionInfo.cpp b/lib/IR/FunctionInfo.cpp new file mode 100644 index 00000000000..015f1235f6d --- /dev/null +++ b/lib/IR/FunctionInfo.cpp @@ -0,0 +1,63 @@ +//===-- FunctionInfo.cpp - Function Info Index ----------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the function info index and summary classes for the +// IR library. +// +//===----------------------------------------------------------------------===// + +#include "llvm/IR/FunctionInfo.h" +#include "llvm/ADT/StringMap.h" +using namespace llvm; + +// Create the combined function index/summary from multiple +// per-module instances. +void FunctionInfoIndex::mergeFrom(std::unique_ptr Other, + uint64_t NextModuleId) { + + StringRef ModPath; + for (auto &OtherFuncInfoLists : *Other) { + StringRef FuncName = OtherFuncInfoLists.getKey(); + FunctionInfoList &List = OtherFuncInfoLists.second; + + // Assert that the func info list only has one entry, since we shouldn't + // have duplicate names within a single per-module index. + assert(List.size() == 1); + std::unique_ptr Info = std::move(List.front()); + + // Add the module path string ref for this module if we haven't already + // saved a reference to it. + if (ModPath.empty()) + ModPath = + addModulePath(Info->functionSummary()->modulePath(), NextModuleId); + else + assert(ModPath == Info->functionSummary()->modulePath() && + "Each module in the combined map should have a unique ID"); + + // Note the module path string ref was copied above and is still owned by + // the original per-module index. Reset it to the new module path + // string reference owned by the combined index. + Info->functionSummary()->setModulePath(ModPath); + + // If it is a local function, rename it. + if (Info->functionSummary()->isLocalFunction()) { + // Any local functions are virtually renamed when being added to the + // combined index map, to disambiguate from other functions with + // the same name. The symbol table created for the combined index + // file should contain the renamed symbols. + FuncName = + FunctionInfoIndex::getGlobalNameForLocal(FuncName, NextModuleId); + } + + // Add new function info to existing list. There may be duplicates when + // combining FunctionMap entries, due to COMDAT functions. Any local + // functions were virtually renamed above. + addFunctionInfo(FuncName, std::move(Info)); + } +} diff --git a/lib/Object/CMakeLists.txt b/lib/Object/CMakeLists.txt index 8f10143fccc..2ac2ee51dc2 100644 --- a/lib/Object/CMakeLists.txt +++ b/lib/Object/CMakeLists.txt @@ -16,6 +16,7 @@ add_llvm_library(LLVMObject RecordStreamer.cpp SymbolicFile.cpp SymbolSize.cpp + FunctionIndexObjectFile.cpp ADDITIONAL_HEADER_DIRS ${LLVM_MAIN_INCLUDE_DIR}/llvm/Object diff --git a/lib/Object/FunctionIndexObjectFile.cpp b/lib/Object/FunctionIndexObjectFile.cpp new file mode 100644 index 00000000000..c5f88fc2a2b --- /dev/null +++ b/lib/Object/FunctionIndexObjectFile.cpp @@ -0,0 +1,114 @@ +//===- FunctionIndexObjectFile.cpp - Function index file implementation ----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Part of the FunctionIndexObjectFile class implementation. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Object/FunctionIndexObjectFile.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/Bitcode/ReaderWriter.h" +#include "llvm/IR/FunctionInfo.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/Object/ObjectFile.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; +using namespace object; + +FunctionIndexObjectFile::FunctionIndexObjectFile( + MemoryBufferRef Object, std::unique_ptr I) + : SymbolicFile(Binary::ID_FunctionIndex, Object), Index(std::move(I)) {} + +FunctionIndexObjectFile::~FunctionIndexObjectFile() {} + +std::unique_ptr FunctionIndexObjectFile::takeIndex() { + return std::move(Index); +} + +ErrorOr FunctionIndexObjectFile::findBitcodeInObject( + const ObjectFile &Obj) { + for (const SectionRef &Sec : Obj.sections()) { + StringRef SecName; + if (std::error_code EC = Sec.getName(SecName)) return EC; + if (SecName == ".llvmbc") { + StringRef SecContents; + if (std::error_code EC = Sec.getContents(SecContents)) return EC; + return MemoryBufferRef(SecContents, Obj.getFileName()); + } + } + + return object_error::bitcode_section_not_found; +} + +ErrorOr FunctionIndexObjectFile::findBitcodeInMemBuffer( + MemoryBufferRef Object) { + sys::fs::file_magic Type = sys::fs::identify_magic(Object.getBuffer()); + switch (Type) { + case sys::fs::file_magic::bitcode: + return Object; + case sys::fs::file_magic::elf_relocatable: + case sys::fs::file_magic::macho_object: + case sys::fs::file_magic::coff_object: { + ErrorOr> ObjFile = + ObjectFile::createObjectFile(Object, Type); + if (!ObjFile) return ObjFile.getError(); + return findBitcodeInObject(*ObjFile->get()); + } + default: + return object_error::invalid_file_type; + } +} + +// Looks for function index in the given memory buffer. +// returns true if found, else false. +bool FunctionIndexObjectFile::hasFunctionSummaryInMemBuffer( + MemoryBufferRef Object, LLVMContext &Context) { + ErrorOr BCOrErr = findBitcodeInMemBuffer(Object); + if (!BCOrErr) return false; + + return hasFunctionSummary(BCOrErr.get(), Context, nullptr); +} + +// Parse function index in the given memory buffer. +// Return new FunctionIndexObjectFile instance containing parsed +// function summary/index. +ErrorOr> +FunctionIndexObjectFile::create(MemoryBufferRef Object, LLVMContext &Context, + bool IsLazy) { + std::unique_ptr Index; + + ErrorOr BCOrErr = findBitcodeInMemBuffer(Object); + if (!BCOrErr) return BCOrErr.getError(); + + ErrorOr> IOrErr = + getFunctionInfoIndex(BCOrErr.get(), Context, nullptr, IsLazy); + + if (std::error_code EC = IOrErr.getError()) return EC; + + Index = std::move(IOrErr.get()); + + return llvm::make_unique(Object, std::move(Index)); +} + +// Parse the function summary information for function with the +// given name out of the given buffer. Parsed information is +// stored on the index object saved in this object. +std::error_code FunctionIndexObjectFile::findFunctionSummaryInMemBuffer( + MemoryBufferRef Object, LLVMContext &Context, StringRef FunctionName) { + sys::fs::file_magic Type = sys::fs::identify_magic(Object.getBuffer()); + switch (Type) { + case sys::fs::file_magic::bitcode: { + return readFunctionSummary(Object, Context, nullptr, FunctionName, + std::move(Index)); + } + default: + return object_error::invalid_file_type; + } +} diff --git a/test/Bitcode/thinlto-function-summary.ll b/test/Bitcode/thinlto-function-summary.ll new file mode 100644 index 00000000000..9c2f2acd6c7 --- /dev/null +++ b/test/Bitcode/thinlto-function-summary.ll @@ -0,0 +1,45 @@ +; RUN: llvm-as -function-summary < %s | llvm-bcanalyzer -dump | FileCheck %s -check-prefix=BC +; Check for function summary block/records. + +; BC: [#uses=0] +@h = external global void ()* ; [#uses=0] + +define internal void @0() nounwind { +entry: + store void()* @0, void()** @h + br label %return + +return: ; preds = %entry + ret void +} diff --git a/test/tools/gold/X86/Inputs/thinlto.ll b/test/tools/gold/X86/Inputs/thinlto.ll new file mode 100644 index 00000000000..4e0840f3691 --- /dev/null +++ b/test/tools/gold/X86/Inputs/thinlto.ll @@ -0,0 +1,4 @@ +define void @g() { +entry: + ret void +} diff --git a/test/tools/gold/X86/thinlto.ll b/test/tools/gold/X86/thinlto.ll new file mode 100644 index 00000000000..296b9460005 --- /dev/null +++ b/test/tools/gold/X86/thinlto.ll @@ -0,0 +1,22 @@ +; RUN: llvm-as -function-summary %s -o %t.o +; RUN: llvm-as -function-summary %p/Inputs/thinlto.ll -o %t2.o + +; RUN: %gold -plugin %llvmshlibdir/LLVMgold.so \ +; RUN: --plugin-opt=thinlto \ +; RUN: -shared %t.o %t2.o -o %t3 +; RUN: llvm-bcanalyzer -dump %t3.thinlto.bc | FileCheck %s --check-prefix=COMBINED +; RUN: not test -e %t3 + +; COMBINED: '3') message(LDPL_FATAL, "Optimization level must be between 0 and 3"); @@ -376,6 +383,10 @@ static ld_plugin_status claim_file_hook(const ld_plugin_input_file *file, cf.handle = file->handle; + // If we are doing ThinLTO compilation, don't need to process the symbols. + // Later we simply build a combined index file after all files are claimed. + if (options::thinlto) return LDPS_OK; + for (auto &Sym : Obj->symbols()) { uint32_t Symflags = Sym.getFlags(); if (shouldSkip(Symflags)) @@ -591,6 +602,30 @@ static void freeSymName(ld_plugin_symbol &Sym) { Sym.comdat_key = nullptr; } +static std::unique_ptr getFunctionIndexForFile( + LLVMContext &Context, claimed_file &F, ld_plugin_input_file &Info) { + + if (get_symbols(F.handle, F.syms.size(), &F.syms[0]) != LDPS_OK) + message(LDPL_FATAL, "Failed to get symbol information"); + + const void *View; + if (get_view(F.handle, &View) != LDPS_OK) + message(LDPL_FATAL, "Failed to get a view of file"); + + MemoryBufferRef BufferRef(StringRef((const char *)View, Info.filesize), + Info.name); + ErrorOr> ObjOrErr = + object::FunctionIndexObjectFile::create(BufferRef, Context); + + if (std::error_code EC = ObjOrErr.getError()) + message(LDPL_FATAL, "Could not read function index bitcode from file : %s", + EC.message().c_str()); + + object::FunctionIndexObjectFile &Obj = **ObjOrErr; + + return Obj.takeIndex(); +} + static std::unique_ptr getModuleForFile(LLVMContext &Context, claimed_file &F, ld_plugin_input_file &Info, raw_fd_ostream *ApiFile, @@ -857,6 +892,35 @@ static ld_plugin_status allSymbolsReadHook(raw_fd_ostream *ApiFile) { LLVMContext Context; Context.setDiagnosticHandler(diagnosticHandler, nullptr, true); + // If we are doing ThinLTO compilation, simply build the combined + // function index/summary and emit it. We don't need to parse the modules + // and link them in this case. + if (options::thinlto) { + std::unique_ptr CombinedIndex(new FunctionInfoIndex()); + uint64_t NextModuleId = 0; + for (claimed_file &F : Modules) { + ld_plugin_input_file File; + if (get_input_file(F.handle, &File) != LDPS_OK) + message(LDPL_FATAL, "Failed to get file information"); + + std::unique_ptr Index = + getFunctionIndexForFile(Context, F, File); + CombinedIndex->mergeFrom(std::move(Index), ++NextModuleId); + } + + std::error_code EC; + raw_fd_ostream OS(output_name + ".thinlto.bc", EC, + sys::fs::OpenFlags::F_None); + if (EC) + message(LDPL_FATAL, "Unable to open %s.thinlto.bc for writing: %s", + output_name.data(), EC.message().c_str()); + WriteFunctionSummaryToFile(CombinedIndex.get(), OS); + OS.close(); + + cleanup_hook(); + exit(0); + } + std::unique_ptr Combined(new Module("ld-temp.o", Context)); Linker L(Combined.get()); diff --git a/tools/llvm-as/llvm-as.cpp b/tools/llvm-as/llvm-as.cpp index 4455d24fb60..d4e4d8d7107 100644 --- a/tools/llvm-as/llvm-as.cpp +++ b/tools/llvm-as/llvm-as.cpp @@ -44,6 +44,10 @@ Force("f", cl::desc("Enable binary output on terminals")); static cl::opt DisableOutput("disable-output", cl::desc("Disable output"), cl::init(false)); +static cl::opt +EmitFunctionSummary("function-summary", cl::desc("Emit function summary index"), + cl::init(false)); + static cl::opt DumpAsm("d", cl::desc("Print assembly as parsed"), cl::Hidden); @@ -77,7 +81,8 @@ static void WriteOutputFile(const Module *M) { } if (Force || !CheckBitcodeOutputToConsole(Out->os(), true)) - WriteBitcodeToFile(M, Out->os(), PreserveBitcodeUseListOrder); + WriteBitcodeToFile(M, Out->os(), PreserveBitcodeUseListOrder, + EmitFunctionSummary); // Declare success. Out->keep(); diff --git a/tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp b/tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp index 25ec7e342e9..7243a6474e3 100644 --- a/tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp +++ b/tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp @@ -114,6 +114,9 @@ static const char *GetBlockName(unsigned BlockID, case bitc::METADATA_BLOCK_ID: return "METADATA_BLOCK"; case bitc::METADATA_ATTACHMENT_ID: return "METADATA_ATTACHMENT_BLOCK"; case bitc::USELIST_BLOCK_ID: return "USELIST_BLOCK_ID"; + case bitc::FUNCTION_SUMMARY_BLOCK_ID: + return "FUNCTION_SUMMARY_BLOCK"; + case bitc::MODULE_STRTAB_BLOCK_ID: return "MODULE_STRTAB_BLOCK"; } } @@ -268,6 +271,18 @@ static const char *GetCodeName(unsigned CodeID, unsigned BlockID, STRINGIFY_CODE(VST_CODE, ENTRY) STRINGIFY_CODE(VST_CODE, BBENTRY) STRINGIFY_CODE(VST_CODE, FNENTRY) + STRINGIFY_CODE(VST_CODE, COMBINED_FNENTRY) + } + case bitc::MODULE_STRTAB_BLOCK_ID: + switch (CodeID) { + default: return nullptr; + STRINGIFY_CODE(MST_CODE, ENTRY) + } + case bitc::FUNCTION_SUMMARY_BLOCK_ID: + switch (CodeID) { + default: return nullptr; + STRINGIFY_CODE(FS_CODE, PERMODULE_ENTRY) + STRINGIFY_CODE(FS_CODE, COMBINED_ENTRY) } case bitc::METADATA_ATTACHMENT_ID: switch(CodeID) {