X-Git-Url: http://demsky.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FTransforms%2FScalar%2FLoopUnroll.cpp;h=75ad96e49a2266546d0414ffd096ae9c3da27e2f;hb=1a2d667d6f0f57d252ddde093e3fc06ec2704829;hp=8e625202455e38a0f51767a448c200f07d687d19;hpb=d4bc564531ec9d7db7ec6630bc89f411f730cda1;p=oota-llvm.git diff --git a/lib/Transforms/Scalar/LoopUnroll.cpp b/lib/Transforms/Scalar/LoopUnroll.cpp index 8e625202455..75ad96e49a2 100644 --- a/lib/Transforms/Scalar/LoopUnroll.cpp +++ b/lib/Transforms/Scalar/LoopUnroll.cpp @@ -1,82 +1,70 @@ //===-- LoopUnroll.cpp - Loop unroller pass -------------------------------===// -// +// // The LLVM Compiler Infrastructure // -// This file was developed by the LLVM research group and is distributed under -// the University of Illinois Open Source License. See LICENSE.TXT for details. -// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// //===----------------------------------------------------------------------===// // // This pass implements a simple loop unroller. It works best when loops have // been canonicalized by the -indvars pass, allowing it to determine the trip // counts of loops easily. -// -// This pass is currently extremely limited. It only currently only unrolls -// single basic block loops that execute a constant number of times. -// //===----------------------------------------------------------------------===// #define DEBUG_TYPE "loop-unroll" +#include "llvm/IntrinsicInst.h" #include "llvm/Transforms/Scalar.h" -#include "llvm/Constants.h" -#include "llvm/Function.h" -#include "llvm/Instructions.h" #include "llvm/Analysis/LoopInfo.h" -#include "llvm/Transforms/Utils/Cloning.h" -#include "llvm/Transforms/Utils/Local.h" +#include "llvm/Analysis/LoopPass.h" +#include "llvm/Support/Compiler.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/ADT/STLExtras.h" -#include "llvm/IntrinsicInst.h" -#include -#include -#include +#include "llvm/Transforms/Utils/UnrollLoop.h" +#include using namespace llvm; -namespace { - Statistic<> NumUnrolled("loop-unroll", "Number of loops completely unrolled"); +static cl::opt +UnrollThreshold("unroll-threshold", cl::init(100), cl::Hidden, + cl::desc("The cut-off point for automatic loop unrolling")); - cl::opt - UnrollThreshold("unroll-threshold", cl::init(100), cl::Hidden, - cl::desc("The cut-off point for loop unrolling")); +static cl::opt +UnrollCount("unroll-count", cl::init(0), cl::Hidden, + cl::desc("Use this unroll count for all loops, for testing purposes")); - class LoopUnroll : public FunctionPass { - LoopInfo *LI; // The current loop information +namespace { + class VISIBILITY_HIDDEN LoopUnroll : public LoopPass { public: - virtual bool runOnFunction(Function &F); - bool visitLoop(Loop *L); + static char ID; // Pass ID, replacement for typeid + LoopUnroll() : LoopPass((intptr_t)&ID) {} + + /// A magic value for use with the Threshold parameter to indicate + /// that the loop unroll should be performed regardless of how much + /// code expansion would result. + static const unsigned NoThreshold = UINT_MAX; + + bool runOnLoop(Loop *L, LPPassManager &LPM); /// This transformation requires natural loop information & requires that /// loop preheaders be inserted into the CFG... /// virtual void getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequiredID(LoopSimplifyID); + AU.addRequiredID(LCSSAID); AU.addRequired(); + AU.addPreservedID(LCSSAID); AU.addPreserved(); } }; - RegisterOpt X("loop-unroll", "Unroll loops"); } -FunctionPass *llvm::createLoopUnrollPass() { return new LoopUnroll(); } +char LoopUnroll::ID = 0; +static RegisterPass X("loop-unroll", "Unroll loops"); -bool LoopUnroll::runOnFunction(Function &F) { - bool Changed = false; - LI = &getAnalysis(); - - // Transform all the top-level loops. Copy the loop list so that the child - // can update the loop tree if it needs to delete the loop. - std::vector SubLoops(LI->begin(), LI->end()); - for (unsigned i = 0, e = SubLoops.size(); i != e; ++i) - Changed |= visitLoop(SubLoops[i]); - - return Changed; -} +LoopPass *llvm::createLoopUnrollPass() { return new LoopUnroll(); } -/// ApproximateLoopSize - Approximate the size of the loop after it has been -/// unrolled. +/// ApproximateLoopSize - Approximate the size of the loop. static unsigned ApproximateLoopSize(const Loop *L) { unsigned Size = 0; for (unsigned i = 0, e = L->getBlocks().size(); i != e; ++i) { @@ -87,8 +75,17 @@ static unsigned ApproximateLoopSize(const Loop *L) { // Ignore PHI nodes in the header. } else if (I->hasOneUse() && I->use_back() == Term) { // Ignore instructions only used by the loop terminator. - } else if (DbgInfoIntrinsic *DbgI = dyn_cast(I)) { - // Ignore debug instructions + } else if (isa(I)) { + // Ignore debug instructions + } else if (isa(I)) { + // Estimate size overhead introduced by call instructions which + // is higher than other instructions. Here 3 and 10 are magic + // numbers that help one isolated test case from PR2067 without + // negatively impacting measured benchmarks. + if (isa(I)) + Size = Size + 3; + else + Size = Size + 10; } else { ++Size; } @@ -102,201 +99,45 @@ static unsigned ApproximateLoopSize(const Loop *L) { return Size; } -// RemapInstruction - Convert the instruction operands from referencing the -// current values into those specified by ValueMap. -// -static inline void RemapInstruction(Instruction *I, - std::map &ValueMap) { - for (unsigned op = 0, E = I->getNumOperands(); op != E; ++op) { - Value *Op = I->getOperand(op); - std::map::iterator It = ValueMap.find(Op); - if (It != ValueMap.end()) Op = It->second; - I->setOperand(op, Op); - } -} - -bool LoopUnroll::visitLoop(Loop *L) { - bool Changed = false; - - // Recurse through all subloops before we process this loop. Copy the loop - // list so that the child can update the loop tree if it needs to delete the - // loop. - std::vector SubLoops(L->begin(), L->end()); - for (unsigned i = 0, e = SubLoops.size(); i != e; ++i) - Changed |= visitLoop(SubLoops[i]); - - // We only handle single basic block loops right now. - if (L->getBlocks().size() != 1) - return Changed; +bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) { + assert(L->isLCSSAForm()); + LoopInfo *LI = &getAnalysis(); - BasicBlock *BB = L->getHeader(); - BranchInst *BI = dyn_cast(BB->getTerminator()); - if (BI == 0) return Changed; // Must end in a conditional branch + BasicBlock *Header = L->getHeader(); + DOUT << "Loop Unroll: F[" << Header->getParent()->getName() + << "] Loop %" << Header->getName() << "\n"; - ConstantInt *TripCountC = dyn_cast_or_null(L->getTripCount()); - if (!TripCountC) return Changed; // Must have constant trip count! - - uint64_t TripCountFull = TripCountC->getRawValue(); - if (TripCountFull != TripCountC->getRawValue() || TripCountFull == 0) - return Changed; // More than 2^32 iterations??? - - unsigned LoopSize = ApproximateLoopSize(L); - DEBUG(std::cerr << "Loop Unroll: F[" << BB->getParent()->getName() - << "] Loop %" << BB->getName() << " Loop Size = " << LoopSize - << " Trip Count = " << TripCountFull << " - "); - uint64_t Size = (uint64_t)LoopSize*TripCountFull; - if (Size > UnrollThreshold) { - DEBUG(std::cerr << "TOO LARGE: " << Size << ">" << UnrollThreshold << "\n"); - return Changed; - } - DEBUG(std::cerr << "UNROLLING!\n"); + // Find trip count + unsigned TripCount = L->getSmallConstantTripCount(); + unsigned Count = UnrollCount; - unsigned TripCount = (unsigned)TripCountFull; - - BasicBlock *LoopExit = BI->getSuccessor(L->contains(BI->getSuccessor(0))); - - // Create a new basic block to temporarily hold all of the cloned code. - BasicBlock *NewBlock = new BasicBlock(); - - // For the first iteration of the loop, we should use the precloned values for - // PHI nodes. Insert associations now. - std::map LastValueMap; - std::vector OrigPHINode; - for (BasicBlock::iterator I = BB->begin(); isa(I); ++I) { - PHINode *PN = cast(I); - OrigPHINode.push_back(PN); - if (Instruction *I =dyn_cast(PN->getIncomingValueForBlock(BB))) - if (I->getParent() == BB) - LastValueMap[I] = I; - } - - // Remove the exit branch from the loop - BB->getInstList().erase(BI); - - assert(TripCount != 0 && "Trip count of 0 is impossible!"); - for (unsigned It = 1; It != TripCount; ++It) { - char SuffixBuffer[100]; - sprintf(SuffixBuffer, ".%d", It); - std::map ValueMap; - BasicBlock *New = CloneBasicBlock(BB, ValueMap, SuffixBuffer); - - // Loop over all of the PHI nodes in the block, changing them to use the - // incoming values from the previous block. - for (unsigned i = 0, e = OrigPHINode.size(); i != e; ++i) { - PHINode *NewPHI = cast(ValueMap[OrigPHINode[i]]); - Value *InVal = NewPHI->getIncomingValueForBlock(BB); - if (Instruction *InValI = dyn_cast(InVal)) - if (InValI->getParent() == BB) - InVal = LastValueMap[InValI]; - ValueMap[OrigPHINode[i]] = InVal; - New->getInstList().erase(NewPHI); + // Automatically select an unroll count. + if (Count == 0) { + // Conservative heuristic: if we know the trip count, see if we can + // completely unroll (subject to the threshold, checked below); otherwise + // don't unroll. + if (TripCount != 0) { + Count = TripCount; + } else { + return false; } - - for (BasicBlock::iterator I = New->begin(), E = New->end(); I != E; ++I) - RemapInstruction(I, ValueMap); - - // Now that all of the instructions are remapped, splice them into the end - // of the NewBlock. - NewBlock->getInstList().splice(NewBlock->end(), New->getInstList()); - delete New; - - // LastValue map now contains values from this iteration. - std::swap(LastValueMap, ValueMap); } - // If there was more than one iteration, replace any uses of values computed - // in the loop with values computed during the last iteration of the loop. - if (TripCount != 1) { - std::set Users; - for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) - Users.insert(I->use_begin(), I->use_end()); - - // We don't want to reprocess entries with PHI nodes in them. For this - // reason, we look at each operand of each user exactly once, performing the - // stubstitution exactly once. - for (std::set::iterator UI = Users.begin(), E = Users.end(); UI != E; - ++UI) { - Instruction *I = cast(*UI); - if (I->getParent() != BB && I->getParent() != NewBlock) - RemapInstruction(I, LastValueMap); + // Enforce the threshold. + if (UnrollThreshold != NoThreshold) { + unsigned LoopSize = ApproximateLoopSize(L); + DOUT << " Loop Size = " << LoopSize << "\n"; + uint64_t Size = (uint64_t)LoopSize*Count; + if (TripCount != 1 && Size > UnrollThreshold) { + DOUT << " TOO LARGE TO UNROLL: " + << Size << ">" << UnrollThreshold << "\n"; + return false; } } - // Now that we cloned the block as many times as we needed, stitch the new - // code into the original block and delete the temporary block. - BB->getInstList().splice(BB->end(), NewBlock->getInstList()); - delete NewBlock; - - // Now loop over the PHI nodes in the original block, setting them to their - // incoming values. - BasicBlock *Preheader = L->getLoopPreheader(); - for (unsigned i = 0, e = OrigPHINode.size(); i != e; ++i) { - PHINode *PN = OrigPHINode[i]; - PN->replaceAllUsesWith(PN->getIncomingValueForBlock(Preheader)); - BB->getInstList().erase(PN); - } - - // Finally, add an unconditional branch to the block to continue into the exit - // block. - new BranchInst(LoopExit, BB); - - // At this point, the code is well formed. We now do a quick sweep over the - // inserted code, doing constant propagation and dead code elimination as we - // go. - for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ) { - Instruction *Inst = I++; - - if (isInstructionTriviallyDead(Inst)) - BB->getInstList().erase(Inst); - else if (Constant *C = ConstantFoldInstruction(Inst)) { - Inst->replaceAllUsesWith(C); - BB->getInstList().erase(Inst); - } - } - - // Update the loop information for this loop. - Loop *Parent = L->getParentLoop(); - - // Move all of the basic blocks in the loop into the parent loop. - LI->changeLoopFor(BB, Parent); - - // Remove the loop from the parent. - if (Parent) - delete Parent->removeChildLoop(std::find(Parent->begin(), Parent->end(),L)); - else - delete LI->removeLoop(std::find(LI->begin(), LI->end(), L)); - - - // FIXME: Should update dominator analyses - - - // Now that everything is up-to-date that will be, we fold the loop block into - // the preheader and exit block, updating our analyses as we go. - LoopExit->getInstList().splice(LoopExit->begin(), BB->getInstList(), - BB->getInstList().begin(), - prior(BB->getInstList().end())); - LoopExit->getInstList().splice(LoopExit->begin(), Preheader->getInstList(), - Preheader->getInstList().begin(), - prior(Preheader->getInstList().end())); - - // Make all other blocks in the program branch to LoopExit now instead of - // Preheader. - Preheader->replaceAllUsesWith(LoopExit); - - // Remove BB and LoopExit from our analyses. - LI->removeBlock(Preheader); - LI->removeBlock(BB); - - // If the preheader was the entry block of this function, move the exit block - // to be the new entry of the loop. - Function *F = LoopExit->getParent(); - if (Preheader == &F->front()) - F->getBasicBlockList().splice(F->begin(), F->getBasicBlockList(), LoopExit); - - // Actually delete the blocks now. - F->getBasicBlockList().erase(Preheader); - F->getBasicBlockList().erase(BB); + // Unroll the loop. + if (!UnrollLoop(L, Count, LI, &LPM)) + return false; - ++NumUnrolled; return true; }