From fff0879df07ce0f449b00a9ed4b3d7732735bc4e Mon Sep 17 00:00:00 2001 From: Gerolf Hoflehner Date: Wed, 30 Apr 2014 22:05:02 +0000 Subject: [PATCH] Patch for function cloning to inline all blocks whose address is taken Not all address taken blocks get inlined. The reason is that a blocks new address is known only when it is cloned. But e.g. a branch instruction in a different block could need that address earlier while it gets cloned. The solution is to collect the set of all blocks that can potentially get inlined and compute a new block address up front. Then clone and cleanup. rdar://16427209 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@207713 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Utils/CloneFunction.cpp | 140 +++++++++++++++++++------ 1 file changed, 106 insertions(+), 34 deletions(-) diff --git a/lib/Transforms/Utils/CloneFunction.cpp b/lib/Transforms/Utils/CloneFunction.cpp index 5c8f20d5f88..5c2bbd19564 100644 --- a/lib/Transforms/Utils/CloneFunction.cpp +++ b/lib/Transforms/Utils/CloneFunction.cpp @@ -32,6 +32,7 @@ #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/ValueMapper.h" #include +#include using namespace llvm; // CloneBasicBlock - See comments in Cloning.h @@ -272,42 +273,36 @@ namespace { NameSuffix(nameSuffix), CodeInfo(codeInfo), DL(DL) { } - /// CloneBlock - The specified block is found to be reachable, clone it and - /// anything that it can reach. + /// CloneBlock - The specified block is found to be reachable, so clone it + /// into NewBB. + /// ToClone is the vector of actually cloned blocks. + /// OrigBBs is the set of all blocks reacheable from the entry block. + /// It contains the block candidates and makes sure each block + /// is cloned at most once. void CloneBlock(const BasicBlock *BB, - std::vector &ToClone); + BasicBlock *NewBB, + std::vector &ToClone, + std::set &OrigBBs); }; } -/// CloneBlock - The specified block is found to be reachable, clone it and -/// anything that it can reach. +/// CloneBlock - The specified block is found to be reachable, so clone it +/// into NewBB. +/// ToClone is the vector of actually cloned blocks. +/// OrigBBs is the set of all blocks reacheable from the entry block. +/// It contains the block candidates and makes sure each block +/// is cloned at most once. void PruningFunctionCloner::CloneBlock(const BasicBlock *BB, - std::vector &ToClone){ - WeakVH &BBEntry = VMap[BB]; - - // Have we already cloned this block? - if (BBEntry) return; + BasicBlock *NewBB, + std::vector &ToClone, + std::set &OrigBBs) { - // Nope, clone it now. - BasicBlock *NewBB; - BBEntry = NewBB = BasicBlock::Create(BB->getContext()); - if (BB->hasName()) NewBB->setName(BB->getName()+NameSuffix); + // Remove BB from list of blocks to clone. + // When it was not in the list, it has been cloned already, so + // don't clone again. + if (!OrigBBs.erase(BB)) return; - // It is only legal to clone a function if a block address within that - // function is never referenced outside of the function. Given that, we - // want to map block addresses from the old function to block addresses in - // the clone. (This is different from the generic ValueMapper - // implementation, which generates an invalid blockaddress when - // cloning a function.) - // - // Note that we don't need to fix the mapping for unreachable blocks; - // the default mapping there is safe. - if (BB->hasAddressTaken()) { - Constant *OldBBAddr = BlockAddress::get(const_cast(OldFunc), - const_cast(BB)); - VMap[OldBBAddr] = BlockAddress::get(NewFunc, NewBB); - } - + // Nope, clone it now. bool hasCalls = false, hasDynamicAllocas = false, hasStaticAllocas = false; @@ -425,7 +420,7 @@ void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc, const DataLayout *DL, Instruction *TheCall) { assert(NameSuffix && "NameSuffix cannot be null!"); - + #ifndef NDEBUG for (Function::const_arg_iterator II = OldFunc->arg_begin(), E = OldFunc->arg_end(); II != E; ++II) @@ -435,15 +430,91 @@ void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc, PruningFunctionCloner PFC(NewFunc, OldFunc, VMap, ModuleLevelChanges, NameSuffix, CodeInfo, DL); - // Clone the entry block, and anything recursively reachable from it. + // Since all BB address references need to be known before block-by-block + // processing, we need to create all reachable blocks before processing + // them for instruction cloning and pruning. Some of these blocks may + // be removed due to later pruning. std::vector CloneWorklist; + // + // OrigBBs consists of all blocks reachable from the entry + // block. + // This list will be pruned down by the CloneFunction() due to two + // two optimizations: + // First, when a conditional branch target is known at compile-time, + // only the actual branch destination block needs to be cloned. + // Second, when a switch statement target is known at compile-time, + // only the actual case statement needs to be cloned. + std::set OrigBBs; + + CloneWorklist.push_back(&OldFunc->getEntryBlock()); + while (!CloneWorklist.empty()) { + const BasicBlock *BB = CloneWorklist.back(); + CloneWorklist.pop_back(); + + // Don't revisit blocks. + if (VMap.count(BB)) + continue; + + BasicBlock *NewBB = BasicBlock::Create(BB->getContext()); + if (BB->hasName()) + NewBB->setName(BB->getName() + NameSuffix); + + // It is legal to clone a function when a block address within that + // function is never escapes outside of the function. Given that, we + // want to map block addresses from the old function to block addresses in + // the clone. (This is different from the generic ValueMapper + // implementation, which generates an invalid block address when + // cloning a function.) + // Note the current escape address does not catch all legal cases: even + // when all block addresses taken are local and the function has the + // always_inline attribute due to the indirect branch inlining is + // suppressed. + // Note that we don't need to fix the mapping for unreachable blocks; + // the default mapping there is safe. + if (BB->hasAddressTaken()) { + Constant *OldBBAddr = BlockAddress::get(const_cast(OldFunc), + const_cast(BB)); + VMap[OldBBAddr] = BlockAddress::get(NewFunc, NewBB); + } + + OrigBBs.insert(BB); + VMap[BB] = NewBB; + // Iterate over all possible successors and add them to the CloneWorklist. + const TerminatorInst *Term = BB->getTerminator(); + for (unsigned i = 0, e = Term->getNumSuccessors(); i != e; ++i) { + BasicBlock *Succ = Term->getSuccessor(i); + CloneWorklist.push_back(Succ); + } + } + + // Now, fill only the reachable blocks with the cloned contents + // of the originals. + assert(CloneWorklist.empty() && "Dirty worklist before re-use\n"); CloneWorklist.push_back(&OldFunc->getEntryBlock()); while (!CloneWorklist.empty()) { const BasicBlock *BB = CloneWorklist.back(); CloneWorklist.pop_back(); - PFC.CloneBlock(BB, CloneWorklist); + PFC.CloneBlock(BB, cast(VMap[BB]), CloneWorklist, + OrigBBs); } - + + // FIXME: Delete BB's that were created but have been pruned. + // Actual cloning may have found pruning opportunities since + // branch or switch statement target may have been known at compile-time. + // Alternatively we could write a routine CloneFunction and add a) a + // parameter to actually do the cloning and b) a return parameter that + // gives a list of blocks that need to be cloned also. Then we could + // call CloneFunction when we collect the blocks to clone, but suppress + // cloning. And then actually *do* the cloning in the while loop above. Then + // the cleanup here would become redundant, and so would be the OrigBBs. + for (std::set::iterator Oi = OrigBBs.begin(), + Oe = OrigBBs.end(); Oi != Oe; ++Oi) { + const BasicBlock *Orig = *Oi; + BasicBlock *NewBB = cast(VMap[Orig]); + delete NewBB; + VMap[Orig] = 0; + } + // Loop over all of the basic blocks in the old function. If the block was // reachable, we have cloned it and the old block is now in the value map: // insert it into the new function in the right order. If not, ignore it. @@ -454,7 +525,8 @@ void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc, BI != BE; ++BI) { Value *V = VMap[BI]; BasicBlock *NewBB = cast_or_null(V); - if (!NewBB) continue; // Dead block. + if (!NewBB) + continue; // Dead block. // Add the new block to the new function. NewFunc->getBasicBlockList().push_back(NewBB); -- 2.34.1