From bd24fe8c7e8038fe53accad67c480875f87df2a8 Mon Sep 17 00:00:00 2001 From: "Duncan P. N. Exon Smith" Date: Mon, 28 Jul 2014 21:19:41 +0000 Subject: [PATCH] Bitcode: Serialize (and recover) use-list order Predict and serialize use-list order in bitcode. This makes the option `-preserve-bc-use-list-order` work *most* of the time, but this is still experimental. - Builds a full value-table up front in the writer, sets up a list of use-list orders to write out, and discards the table. This is a simpler first step than determining the order from the various overlapping IDs of values on-the-fly. - The shuffles stored in the use-list order list have an unnecessarily large memory footprint. - `blockaddress` expressions cause functions to be materialized out-of-order. For now I've ignored this problem, so use-list orders will be wrong for constants used by functions that have block addresses taken. There are a couple of ways to fix this, but I don't have a concrete plan yet. - When materializing functions lazily, the use-lists for constants will not be correct. This use case is out of scope: what should the use-list order be, if it's incomplete? This is part of PR5680. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@214125 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/Bitcode/LLVMBitCodes.h | 3 +- include/llvm/IR/UseListOrder.h | 13 ++ lib/Bitcode/Reader/BitcodeReader.cpp | 43 ++++- lib/Bitcode/Reader/BitcodeReader.h | 1 - lib/Bitcode/Writer/BitcodeWriter.cpp | 131 +++++---------- lib/Bitcode/Writer/ValueEnumerator.cpp | 193 ++++++++++++++++++++++ lib/Bitcode/Writer/ValueEnumerator.h | 4 + test/Bitcode/use-list-order.ll | 1 - tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp | 3 +- 9 files changed, 288 insertions(+), 104 deletions(-) diff --git a/include/llvm/Bitcode/LLVMBitCodes.h b/include/llvm/Bitcode/LLVMBitCodes.h index ee2efa2257b..c42ecfe66de 100644 --- a/include/llvm/Bitcode/LLVMBitCodes.h +++ b/include/llvm/Bitcode/LLVMBitCodes.h @@ -330,7 +330,8 @@ namespace bitc { }; enum UseListCodes { - USELIST_CODE_ENTRY = 1 // USELIST_CODE_ENTRY: TBD. + USELIST_CODE_DEFAULT = 1, // DEFAULT: [index..., value-id] + USELIST_CODE_BB = 2 // BB: [index..., bb-id] }; enum AttributeKindCodes { diff --git a/include/llvm/IR/UseListOrder.h b/include/llvm/IR/UseListOrder.h index ebc08cbd94e..4326182c366 100644 --- a/include/llvm/IR/UseListOrder.h +++ b/include/llvm/IR/UseListOrder.h @@ -16,10 +16,23 @@ #define LLVM_IR_USELISTORDER_H #include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/SmallVector.h" +#include namespace llvm { class Module; +class Function; +class Value; + +/// \brief Structure to hold a use-list order. +struct UseListOrder { + const Function *F; + const Value *V; + SmallVector Shuffle; +}; + +typedef std::vector UseListOrderStack; /// \brief Whether to preserve use-list ordering. bool shouldPreserveBitcodeUseListOrder(); diff --git a/lib/Bitcode/Reader/BitcodeReader.cpp b/lib/Bitcode/Reader/BitcodeReader.cpp index 47a39539e20..806a8a9fd07 100644 --- a/lib/Bitcode/Reader/BitcodeReader.cpp +++ b/lib/Bitcode/Reader/BitcodeReader.cpp @@ -1620,9 +1620,8 @@ std::error_code BitcodeReader::ParseUseLists() { if (Stream.EnterSubBlock(bitc::USELIST_BLOCK_ID)) return Error(InvalidRecord); - SmallVector Record; - // Read all the records. + SmallVector Record; while (1) { BitstreamEntry Entry = Stream.advanceSkippingSubblocks(); @@ -1639,14 +1638,42 @@ std::error_code BitcodeReader::ParseUseLists() { // Read a use list record. Record.clear(); + bool IsBB = false; switch (Stream.readRecord(Entry.ID, Record)) { default: // Default behavior: unknown type. break; - case bitc::USELIST_CODE_ENTRY: { // USELIST_CODE_ENTRY: TBD. + case bitc::USELIST_CODE_BB: + IsBB = true; + // fallthrough + case bitc::USELIST_CODE_DEFAULT: { unsigned RecordLength = Record.size(); - if (RecordLength < 1) - return Error(InvalidRecord); - UseListRecords.push_back(Record); + if (RecordLength < 3) + // Records should have at least an ID and two indexes. + return Error(InvalidRecord); + unsigned ID = Record.back(); + Record.pop_back(); + + Value *V; + if (IsBB) { + assert(ID < FunctionBBs.size() && "Basic block not found"); + V = FunctionBBs[ID]; + } else + V = ValueList[ID]; + unsigned NumUses = 0; + SmallDenseMap Order; + for (const Use &U : V->uses()) { + if (NumUses > Record.size()) + break; + Order[&U] = Record[NumUses++]; + } + if (Order.size() != Record.size() || NumUses > Record.size()) + // Mismatches can happen if the functions are being materialized lazily + // (out-of-order), or a value has been upgraded. + break; + + V->sortUseList([&](const Use &L, const Use &R) { + return Order.lookup(&L) < Order.lookup(&R); + }); break; } } @@ -2298,6 +2325,10 @@ std::error_code BitcodeReader::ParseFunctionBody(Function *F) { if (std::error_code EC = ParseMetadata()) return EC; break; + case bitc::USELIST_BLOCK_ID: + if (std::error_code EC = ParseUseLists()) + return EC; + break; } continue; diff --git a/lib/Bitcode/Reader/BitcodeReader.h b/lib/Bitcode/Reader/BitcodeReader.h index 1d4869a2d5a..d27a3eff867 100644 --- a/lib/Bitcode/Reader/BitcodeReader.h +++ b/lib/Bitcode/Reader/BitcodeReader.h @@ -138,7 +138,6 @@ class BitcodeReader : public GVMaterializer { BitcodeReaderMDValueList MDValueList; std::vector ComdatList; SmallVector InstructionList; - SmallVector, 64> UseListRecords; std::vector > GlobalInits; std::vector > AliasInits; diff --git a/lib/Bitcode/Writer/BitcodeWriter.cpp b/lib/Bitcode/Writer/BitcodeWriter.cpp index cb3f42b071c..feb9a4299a4 100644 --- a/lib/Bitcode/Writer/BitcodeWriter.cpp +++ b/lib/Bitcode/Writer/BitcodeWriter.cpp @@ -1602,6 +1602,39 @@ static void WriteValueSymbolTable(const ValueSymbolTable &VST, Stream.ExitBlock(); } +static void WriteUseList(ValueEnumerator &VE, UseListOrder &&Order, + BitstreamWriter &Stream) { + assert(Order.Shuffle.size() >= 2 && "Shuffle too small"); + unsigned Code; + if (isa(Order.V)) + Code = bitc::USELIST_CODE_BB; + else + Code = bitc::USELIST_CODE_DEFAULT; + + SmallVector Record; + for (unsigned I : Order.Shuffle) + Record.push_back(I); + Record.push_back(VE.getValueID(Order.V)); + Stream.EmitRecord(Code, Record); +} + +static void WriteUseListBlock(const Function *F, ValueEnumerator &VE, + BitstreamWriter &Stream) { + auto hasMore = [&]() { + return !VE.UseListOrders.empty() && VE.UseListOrders.back().F == F; + }; + if (!hasMore()) + // Nothing to do. + return; + + Stream.EnterSubblock(bitc::USELIST_BLOCK_ID, 3); + while (hasMore()) { + WriteUseList(VE, std::move(VE.UseListOrders.back()), Stream); + VE.UseListOrders.pop_back(); + } + Stream.ExitBlock(); +} + /// WriteFunction - Emit a function body to the module stream. static void WriteFunction(const Function &F, ValueEnumerator &VE, BitstreamWriter &Stream) { @@ -1670,6 +1703,8 @@ static void WriteFunction(const Function &F, ValueEnumerator &VE, if (NeedsMetadataAttachment) WriteMetadataAttachment(F, VE, Stream); + if (shouldPreserveBitcodeUseListOrder()) + WriteUseListBlock(&F, VE, Stream); VE.purgeFunction(); Stream.ExitBlock(); } @@ -1835,98 +1870,6 @@ static void WriteBlockInfo(const ValueEnumerator &VE, BitstreamWriter &Stream) { Stream.ExitBlock(); } -// Sort the Users based on the order in which the reader parses the bitcode -// file. -static bool bitcodereader_order(const User *lhs, const User *rhs) { - // TODO: Implement. - return true; -} - -static void WriteUseList(const Value *V, const ValueEnumerator &VE, - BitstreamWriter &Stream) { - - // One or zero uses can't get out of order. - if (V->use_empty() || V->hasNUses(1)) - return; - - // Make a copy of the in-memory use-list for sorting. - SmallVector UserList(V->user_begin(), V->user_end()); - - // Sort the copy based on the order read by the BitcodeReader. - std::sort(UserList.begin(), UserList.end(), bitcodereader_order); - - // TODO: Generate a diff between the BitcodeWriter in-memory use-list and the - // sorted list (i.e., the expected BitcodeReader in-memory use-list). - - // TODO: Emit the USELIST_CODE_ENTRYs. -} - -static void WriteFunctionUseList(const Function *F, ValueEnumerator &VE, - BitstreamWriter &Stream) { - VE.incorporateFunction(*F); - - for (Function::const_arg_iterator AI = F->arg_begin(), AE = F->arg_end(); - AI != AE; ++AI) - WriteUseList(AI, VE, Stream); - for (Function::const_iterator BB = F->begin(), FE = F->end(); BB != FE; - ++BB) { - WriteUseList(BB, VE, Stream); - for (BasicBlock::const_iterator II = BB->begin(), IE = BB->end(); II != IE; - ++II) { - WriteUseList(II, VE, Stream); - for (User::const_op_iterator OI = II->op_begin(), E = II->op_end(); - OI != E; ++OI) { - if ((isa(*OI) && !isa(*OI)) || - isa(*OI)) - WriteUseList(*OI, VE, Stream); - } - } - } - VE.purgeFunction(); -} - -// Emit use-lists. -static void WriteModuleUseLists(const Module *M, ValueEnumerator &VE, - BitstreamWriter &Stream) { - Stream.EnterSubblock(bitc::USELIST_BLOCK_ID, 3); - - // XXX: this modifies the module, but in a way that should never change the - // behavior of any pass or codegen in LLVM. The problem is that GVs may - // contain entries in the use_list that do not exist in the Module and are - // not stored in the .bc file. - for (Module::const_global_iterator I = M->global_begin(), E = M->global_end(); - I != E; ++I) - I->removeDeadConstantUsers(); - - // Write the global variables. - for (Module::const_global_iterator GI = M->global_begin(), - GE = M->global_end(); GI != GE; ++GI) { - WriteUseList(GI, VE, Stream); - - // Write the global variable initializers. - if (GI->hasInitializer()) - WriteUseList(GI->getInitializer(), VE, Stream); - } - - // Write the functions. - for (Module::const_iterator FI = M->begin(), FE = M->end(); FI != FE; ++FI) { - WriteUseList(FI, VE, Stream); - if (!FI->isDeclaration()) - WriteFunctionUseList(FI, VE, Stream); - if (FI->hasPrefixData()) - WriteUseList(FI->getPrefixData(), VE, Stream); - } - - // Write the aliases. - for (Module::const_alias_iterator AI = M->alias_begin(), AE = M->alias_end(); - AI != AE; ++AI) { - WriteUseList(AI, VE, Stream); - WriteUseList(AI->getAliasee(), VE, Stream); - } - - Stream.ExitBlock(); -} - /// WriteModule - Emit the specified module to the bitstream. static void WriteModule(const Module *M, BitstreamWriter &Stream) { Stream.EnterSubblock(bitc::MODULE_BLOCK_ID, 3); @@ -1969,9 +1912,9 @@ static void WriteModule(const Module *M, BitstreamWriter &Stream) { // Emit names for globals/functions etc. WriteValueSymbolTable(M->getValueSymbolTable(), VE, Stream); - // Emit use-lists. + // Emit module-level use-lists. if (shouldPreserveBitcodeUseListOrder()) - WriteModuleUseLists(M, VE, Stream); + WriteUseListBlock(nullptr, VE, Stream); // Emit function bodies. for (Module::const_iterator F = M->begin(), E = M->end(); F != E; ++F) diff --git a/lib/Bitcode/Writer/ValueEnumerator.cpp b/lib/Bitcode/Writer/ValueEnumerator.cpp index 1430fad88ac..0421332e530 100644 --- a/lib/Bitcode/Writer/ValueEnumerator.cpp +++ b/lib/Bitcode/Writer/ValueEnumerator.cpp @@ -25,14 +25,207 @@ #include using namespace llvm; +namespace { +typedef DenseMap> OrderMap; +} + +static void orderValue(const Value *V, OrderMap &OM) { + if (OM.lookup(V).first) + return; + + if (const Constant *C = dyn_cast(V)) + if (C->getNumOperands() && !isa(C)) + for (const Value *Op : C->operands()) + if (!isa(Op)) + orderValue(Op, OM); + + // Note: we cannot cache this lookup above, since inserting into the map + // changes the map's size, and thus affects the ID. + OM[V].first = OM.size() + 1; +} + +static OrderMap orderModule(const Module *M) { + // This needs to match the order used by ValueEnumerator::ValueEnumerator() + // and ValueEnumerator::incorporateFunction(). + OrderMap OM; + + for (const GlobalVariable &G : M->globals()) + orderValue(&G, OM); + for (const Function &F : *M) + orderValue(&F, OM); + for (const GlobalAlias &A : M->aliases()) + orderValue(&A, OM); + for (const GlobalVariable &G : M->globals()) + if (G.hasInitializer()) + orderValue(G.getInitializer(), OM); + for (const GlobalAlias &A : M->aliases()) + orderValue(A.getAliasee(), OM); + for (const Function &F : *M) + if (F.hasPrefixData()) + orderValue(F.getPrefixData(), OM); + + for (const Function &F : *M) { + if (F.isDeclaration()) + continue; + // Here we need to match the union of ValueEnumerator::incorporateFunction() + // and WriteFunction(). Basic blocks are implicitly declared before + // anything else (by declaring their size). + for (const BasicBlock &BB : F) + orderValue(&BB, OM); + for (const Argument &A : F.args()) + orderValue(&A, OM); + for (const BasicBlock &BB : F) + for (const Instruction &I : BB) + for (const Value *Op : I.operands()) + if ((isa(*Op) && !isa(*Op)) || + isa(*Op)) + orderValue(Op, OM); + for (const BasicBlock &BB : F) + for (const Instruction &I : BB) + orderValue(&I, OM); + } + return OM; +} + +static void predictValueUseListOrderImpl(const Value *V, const Function *F, + unsigned ID, const OrderMap &OM, + UseListOrderStack &Stack) { + // Predict use-list order for this one. + typedef std::pair Entry; + SmallVector List; + for (const Use &U : V->uses()) + // Check if this user will be serialized. + if (OM.lookup(U.getUser()).first) + List.push_back(std::make_pair(&U, List.size())); + + if (List.size() < 2) + // We may have lost some users. + return; + + std::sort(List.begin(), List.end(), + [&OM, ID](const Entry &L, const Entry &R) { + const Use *LU = L.first; + const Use *RU = R.first; + auto LID = OM.lookup(LU->getUser()).first; + auto RID = OM.lookup(RU->getUser()).first; + // If ID is 4, then expect: 7 6 5 1 2 3. + if (LID < RID) { + if (RID < ID) + return true; + return false; + } + if (RID < LID) { + if (LID < ID) + return false; + return true; + } + // LID and RID are equal, so we have different operands of the same user. + // Assume operands are added in order for all instructions. + if (LU->getOperandNo() < RU->getOperandNo()) + return LID < ID; + return ID < LID; + }); + + if (std::is_sorted( + List.begin(), List.end(), + [](const Entry &L, const Entry &R) { return L.second < R.second; })) + // Order is already correct. + return; + + // Store the shuffle. + UseListOrder O; + O.V = V; + O.F = F; + for (auto &I : List) + O.Shuffle.push_back(I.second); + Stack.push_back(O); +} + +static void predictValueUseListOrder(const Value *V, const Function *F, + OrderMap &OM, UseListOrderStack &Stack) { + auto &IDPair = OM[V]; + assert(IDPair.first && "Unmapped value"); + if (IDPair.second) + // Already predicted. + return; + + // Do the actual prediction. + IDPair.second = true; + if (!V->use_empty() && std::next(V->use_begin()) != V->use_end()) + predictValueUseListOrderImpl(V, F, IDPair.first, OM, Stack); + + // Recursive descent into constants. + if (const Constant *C = dyn_cast(V)) + if (C->getNumOperands() && !isa(C)) + for (const Value *Op : C->operands()) + if (isa(Op) && !isa(Op)) + predictValueUseListOrder(Op, F, OM, Stack); +} + +static UseListOrderStack predictUseListOrder(const Module *M) { + OrderMap OM = orderModule(M); + + // Use-list orders need to be serialized after all the users have been added + // to a value, or else the shuffles will be incomplete. Store them per + // function in a stack. + // + // Aside from function order, the order of values doesn't matter much here. + UseListOrderStack Stack; + + // We want to visit the functions backward now so we can list function-local + // constants in the last Function they're used in. Module-level constants + // have already been visited above. + for (auto I = M->rbegin(), E = M->rend(); I != E; ++I) { + const Function &F = *I; + if (F.isDeclaration()) + continue; + for (const BasicBlock &BB : F) + predictValueUseListOrder(&BB, &F, OM, Stack); + for (const Argument &A : F.args()) + predictValueUseListOrder(&A, &F, OM, Stack); + for (const BasicBlock &BB : F) + for (const Instruction &I : BB) + for (const Value *Op : I.operands()) + if ((isa(*Op) && !isa(*Op)) || + isa(*Op)) + predictValueUseListOrder(Op, &F, OM, Stack); + for (const BasicBlock &BB : F) + for (const Instruction &I : BB) + predictValueUseListOrder(&I, &F, OM, Stack); + } + + // Visit globals last, since the module-level use-list block will be seen + // before the function bodies are processed. + for (const GlobalVariable &G : M->globals()) + predictValueUseListOrder(&G, nullptr, OM, Stack); + for (const Function &F : *M) + predictValueUseListOrder(&F, nullptr, OM, Stack); + for (const GlobalAlias &A : M->aliases()) + predictValueUseListOrder(&A, nullptr, OM, Stack); + for (const GlobalVariable &G : M->globals()) + if (G.hasInitializer()) + predictValueUseListOrder(G.getInitializer(), nullptr, OM, Stack); + for (const GlobalAlias &A : M->aliases()) + predictValueUseListOrder(A.getAliasee(), nullptr, OM, Stack); + for (const Function &F : *M) + if (F.hasPrefixData()) + predictValueUseListOrder(F.getPrefixData(), nullptr, OM, Stack); + + return Stack; +} + static bool isIntOrIntVectorValue(const std::pair &V) { return V.first->getType()->isIntOrIntVectorTy(); } /// ValueEnumerator - Enumerate module-level information. ValueEnumerator::ValueEnumerator(const Module *M) { + if (shouldPreserveBitcodeUseListOrder()) + UseListOrders = predictUseListOrder(M); + // Enumerate the global variables. for (Module::const_global_iterator I = M->global_begin(), + E = M->global_end(); I != E; ++I) EnumerateValue(I); diff --git a/lib/Bitcode/Writer/ValueEnumerator.h b/lib/Bitcode/Writer/ValueEnumerator.h index 1c9f38e07b4..62488936b2f 100644 --- a/lib/Bitcode/Writer/ValueEnumerator.h +++ b/lib/Bitcode/Writer/ValueEnumerator.h @@ -18,6 +18,7 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/UniqueVector.h" #include "llvm/IR/Attributes.h" +#include "llvm/IR/UseListOrder.h" #include namespace llvm { @@ -42,6 +43,9 @@ public: // For each value, we remember its Value* and occurrence frequency. typedef std::vector > ValueList; + + UseListOrderStack UseListOrders; + private: typedef DenseMap TypeMapType; TypeMapType TypeMap; diff --git a/test/Bitcode/use-list-order.ll b/test/Bitcode/use-list-order.ll index aef526403c6..6a1fedc1ff4 100644 --- a/test/Bitcode/use-list-order.ll +++ b/test/Bitcode/use-list-order.ll @@ -1,5 +1,4 @@ ; RUN: llvm-uselistorder < %s -preserve-bc-use-list-order -; XFAIL: * @a = global [4 x i1] [i1 0, i1 1, i1 0, i1 1] @b = alias i1* getelementptr ([4 x i1]* @a, i64 0, i64 2) diff --git a/tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp b/tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp index 15567cf3123..63145c087a6 100644 --- a/tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp +++ b/tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp @@ -271,7 +271,8 @@ static const char *GetCodeName(unsigned CodeID, unsigned BlockID, case bitc::USELIST_BLOCK_ID: switch(CodeID) { default:return nullptr; - case bitc::USELIST_CODE_ENTRY: return "USELIST_CODE_ENTRY"; + case bitc::USELIST_CODE_DEFAULT: return "USELIST_CODE_DEFAULT"; + case bitc::USELIST_CODE_BB: return "USELIST_CODE_BB"; } } } -- 2.34.1