// trip counts. See LoopUnroll.cpp for unrolling loops with compile-time
// trip counts.
//
-// The functions in this file are used to generate extra code when the
+// The functions in this file are used to generate extra code when the
// run-time trip count modulo the unroll factor is not 0. When this is the
// case, we need to generate code to execute these 'left over' iterations.
//
-// The current strategy generates an if-then-else sequence prior to the
+// The current strategy generates an if-then-else sequence prior to the
// unrolled loop to execute the 'left over' iterations. Other strategies
// include generate a loop before or after the unrolled loop.
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "loop-unroll"
#include "llvm/Transforms/Utils/UnrollLoop.h"
-#include "llvm/BasicBlock.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/LoopIterator.h"
#include "llvm/Analysis/LoopPass.h"
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/ScalarEvolutionExpander.h"
+#include "llvm/IR/BasicBlock.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
using namespace llvm;
-STATISTIC(NumRuntimeUnrolled,
+#define DEBUG_TYPE "loop-unroll"
+
+STATISTIC(NumRuntimeUnrolled,
"Number of loops unrolled with run-time trip counts");
/// Connect the unrolling prolog code to the original loop.
BasicBlock *OrigPH, BasicBlock *NewPH,
ValueToValueMapTy &LVMap, Pass *P) {
BasicBlock *Latch = L->getLoopLatch();
- assert(Latch != 0 && "Loop must have a latch");
+ assert(Latch && "Loop must have a latch");
// Create a PHI node for each outgoing value from the original loop
// (which means it is an outgoing value from the prolog code too).
} else {
NewPN->addIncoming(Constant::getNullValue(PN->getType()), OrigPH);
}
- Value *OrigVal = PN->getIncomingValueForBlock(Latch);
- Value *V = OrigVal;
+
+ Value *V = PN->getIncomingValueForBlock(Latch);
if (Instruction *I = dyn_cast<Instruction>(V)) {
if (L->contains(I)) {
V = LVMap[I];
new ICmpInst(InsertPt, ICmpInst::ICMP_ULT, TripCount,
ConstantInt::get(TripCount->getType(), Count));
BasicBlock *Exit = L->getUniqueExitBlock();
- assert(Exit != 0 && "Loop must have a single exit block only");
+ assert(Exit && "Loop must have a single exit block only");
// Split the exit to maintain loop canonicalization guarantees
SmallVector<BasicBlock*, 4> Preds(pred_begin(Exit), pred_end(Exit));
if (!Exit->isLandingPad()) {
- SplitBlockPredecessors(Exit, Preds.data(), Preds.size(),
- ".unr-lcssa", P);
+ SplitBlockPredecessors(Exit, Preds, ".unr-lcssa", P);
} else {
SmallVector<BasicBlock*, 2> NewBBs;
SplitLandingPadPredecessors(Exit, Preds, ".unr1-lcssa", ".unr2-lcssa",
/// There are two value maps that are defined and used. VMap is
/// for the values in the current loop instance. LVMap contains
/// the values from the last loop instance. We need the LVMap values
-/// to update the inital values for the current loop instance.
+/// to update the initial values for the current loop instance.
///
static void CloneLoopBlocks(Loop *L,
bool FirstCopy,
/// to the unroll factor as the number of *extra* copies added).
/// We assume also that the loop unroll factor is a power-of-two. So, after
/// unrolling the loop, the number of loop bodies executed is 2,
-/// 4, 8, etc. Note - LLVM converts the if-then-sequence to a switch
+/// 4, 8, etc. Note - LLVM converts the if-then-sequence to a switch
/// instruction in SimplifyCFG.cpp. Then, the backend decides how code for
/// the switch instruction is generated.
///
bool llvm::UnrollRuntimeLoopProlog(Loop *L, unsigned Count, LoopInfo *LI,
LPPassManager *LPM) {
// for now, only unroll loops that contain a single exit
- SmallVector<BasicBlock*, 4> ExitingBlocks;
- L->getExitingBlocks(ExitingBlocks);
- if (ExitingBlocks.size() > 1)
+ if (!L->getExitingBlock())
return false;
// Make sure the loop is in canonical form, and there is a single
// exit block only.
- if (!L->isLoopSimplifyForm() || L->getUniqueExitBlock() == 0)
+ if (!L->isLoopSimplifyForm() || !L->getUniqueExitBlock())
return false;
// Use Scalar Evolution to compute the trip count. This allows more
// loops to be unrolled than relying on induction var simplification
+ if (!LPM)
+ return false;
ScalarEvolution *SE = LPM->getAnalysisIfAvailable<ScalarEvolution>();
- if (SE == 0)
+ if (!SE)
return false;
// Only unroll loops with a computable trip count and the trip count needs
return false;
// Add 1 since the backedge count doesn't include the first loop iteration
- const SCEV *TripCountSC =
+ const SCEV *TripCountSC =
SE->getAddExpr(BECount, SE->getConstant(BECount->getType(), 1));
if (isa<SCEVCouldNotCompute>(TripCountSC))
return false;
SCEVExpander Expander(*SE, "loop-unroll");
Value *TripCount = Expander.expandCodeFor(TripCountSC, TripCountSC->getType(),
PreHeaderBR);
- Type *CountTy = TripCount->getType();
- BinaryOperator *ModVal =
- BinaryOperator::CreateURem(TripCount,
- ConstantInt::get(CountTy, Count),
- "xtraiter");
- ModVal->insertBefore(PreHeaderBR);
-
- // Check if for no extra iterations, then jump to unrolled loop
- Value *BranchVal = new ICmpInst(PreHeaderBR,
- ICmpInst::ICMP_NE, ModVal,
- ConstantInt::get(CountTy, 0), "lcmp");
+
+ IRBuilder<> B(PreHeaderBR);
+ Value *ModVal = B.CreateAnd(TripCount, Count - 1, "xtraiter");
+
+ // Check if for no extra iterations, then jump to unrolled loop. We have to
+ // check that the trip count computation didn't overflow when adding one to
+ // the backedge taken count.
+ Value *LCmp = B.CreateIsNotNull(ModVal, "lcmp.mod");
+ Value *OverflowCheck = B.CreateIsNull(TripCount, "lcmp.overflow");
+ Value *BranchVal = B.CreateOr(OverflowCheck, LCmp, "lcmp.or");
+
// Branch to either the extra iterations or the unrolled loop
// We will fix up the true branch label when adding loop body copies
BranchInst::Create(PEnd, PEnd, BranchVal, PreHeaderBR);
- assert(PreHeaderBR->isUnconditional() &&
- PreHeaderBR->getSuccessor(0) == PEnd &&
+ assert(PreHeaderBR->isUnconditional() &&
+ PreHeaderBR->getSuccessor(0) == PEnd &&
"CFG edges in Preheader are not correct");
PreHeaderBR->eraseFromParent();
ValueToValueMapTy LVMap;
Function *F = Header->getParent();
// These variables are used to update the CFG links in each iteration
- BasicBlock *CompareBB = 0;
+ BasicBlock *CompareBB = nullptr;
BasicBlock *LastLoopBB = PH;
// Get an ordered list of blocks in the loop to help with the ordering of the
// cloned blocks in the prolog code
}
// The comparison w/ the extra iteration value and branch
+ Type *CountTy = TripCount->getType();
Value *BranchVal = new ICmpInst(*NewBB, ICmpInst::ICMP_EQ, ModVal,
ConstantInt::get(CountTy, leftOverIters),
"un.tmp");
for (unsigned i = 0, e = NewBlocks.size(); i != e; ++i) {
for (BasicBlock::iterator I = NewBlocks[i]->begin(),
E = NewBlocks[i]->end(); I != E; ++I) {
- RemapInstruction(I, VMap,
+ RemapInstruction(I, VMap,
RF_NoModuleLevelChanges|RF_IgnoreMissingEntries);
}
}