#define DEBUG_TYPE "loop-rotate"
#include "llvm/Transforms/Scalar.h"
-#include "llvm/Function.h"
-#include "llvm/IntrinsicInst.h"
+#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/CodeMetrics.h"
-#include "llvm/Analysis/LoopPass.h"
#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/LoopPass.h"
#include "llvm/Analysis/ScalarEvolution.h"
-#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/CFG.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/Support/Debug.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/SSAUpdater.h"
#include "llvm/Transforms/Utils/ValueMapper.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/ADT/Statistic.h"
using namespace llvm;
#define MAX_HEADER_SIZE 16
}
// LCSSA form makes instruction renaming easier.
- virtual void getAnalysisUsage(AnalysisUsage &AU) const {
- AU.addPreserved<DominatorTree>();
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addPreserved<DominatorTreeWrapperPass>();
AU.addRequired<LoopInfo>();
AU.addPreserved<LoopInfo>();
AU.addRequiredID(LoopSimplifyID);
AU.addRequiredID(LCSSAID);
AU.addPreservedID(LCSSAID);
AU.addPreserved<ScalarEvolution>();
+ AU.addRequired<TargetTransformInfo>();
}
- bool runOnLoop(Loop *L, LPPassManager &LPM);
- bool rotateLoop(Loop *L);
+ bool runOnLoop(Loop *L, LPPassManager &LPM) override;
+ bool simplifyLoopLatch(Loop *L);
+ bool rotateLoop(Loop *L, bool SimplifiedLatch);
private:
LoopInfo *LI;
+ const TargetTransformInfo *TTI;
};
}
char LoopRotate::ID = 0;
INITIALIZE_PASS_BEGIN(LoopRotate, "loop-rotate", "Rotate Loops", false, false)
+INITIALIZE_AG_DEPENDENCY(TargetTransformInfo)
INITIALIZE_PASS_DEPENDENCY(LoopInfo)
INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
INITIALIZE_PASS_DEPENDENCY(LCSSA)
/// Rotate Loop L as many times as possible. Return true if
/// the loop is rotated at least once.
bool LoopRotate::runOnLoop(Loop *L, LPPassManager &LPM) {
+ if (skipOptnoneFunction(L))
+ return false;
+
LI = &getAnalysis<LoopInfo>();
+ TTI = &getAnalysis<TargetTransformInfo>();
+
+ // Simplify the loop latch before attempting to rotate the header
+ // upward. Rotation may not be needed if the loop tail can be folded into the
+ // loop exit.
+ bool SimplifiedLatch = simplifyLoopLatch(L);
// One loop can be rotated multiple times.
bool MadeChange = false;
- while (rotateLoop(L))
+ while (rotateLoop(L, SimplifiedLatch)) {
MadeChange = true;
-
+ SimplifiedLatch = false;
+ }
return MadeChange;
}
}
}
+/// Determine whether the instructions in this range my be safely and cheaply
+/// speculated. This is not an important enough situation to develop complex
+/// heuristics. We handle a single arithmetic instruction along with any type
+/// conversions.
+static bool shouldSpeculateInstrs(BasicBlock::iterator Begin,
+ BasicBlock::iterator End) {
+ bool seenIncrement = false;
+ for (BasicBlock::iterator I = Begin; I != End; ++I) {
+
+ if (!isSafeToSpeculativelyExecute(I))
+ return false;
+
+ if (isa<DbgInfoIntrinsic>(I))
+ continue;
+
+ switch (I->getOpcode()) {
+ default:
+ return false;
+ case Instruction::GetElementPtr:
+ // GEPs are cheap if all indices are constant.
+ if (!cast<GEPOperator>(I)->hasAllConstantIndices())
+ return false;
+ // fall-thru to increment case
+ case Instruction::Add:
+ case Instruction::Sub:
+ case Instruction::And:
+ case Instruction::Or:
+ case Instruction::Xor:
+ case Instruction::Shl:
+ case Instruction::LShr:
+ case Instruction::AShr:
+ if (seenIncrement)
+ return false;
+ seenIncrement = true;
+ break;
+ case Instruction::Trunc:
+ case Instruction::ZExt:
+ case Instruction::SExt:
+ // ignore type conversions
+ break;
+ }
+ }
+ return true;
+}
+
+/// Fold the loop tail into the loop exit by speculating the loop tail
+/// instructions. Typically, this is a single post-increment. In the case of a
+/// simple 2-block loop, hoisting the increment can be much better than
+/// duplicating the entire loop header. In the cast of loops with early exits,
+/// rotation will not work anyway, but simplifyLoopLatch will put the loop in
+/// canonical form so downstream passes can handle it.
+///
+/// I don't believe this invalidates SCEV.
+bool LoopRotate::simplifyLoopLatch(Loop *L) {
+ BasicBlock *Latch = L->getLoopLatch();
+ if (!Latch || Latch->hasAddressTaken())
+ return false;
+
+ BranchInst *Jmp = dyn_cast<BranchInst>(Latch->getTerminator());
+ if (!Jmp || !Jmp->isUnconditional())
+ return false;
+
+ BasicBlock *LastExit = Latch->getSinglePredecessor();
+ if (!LastExit || !L->isLoopExiting(LastExit))
+ return false;
+
+ BranchInst *BI = dyn_cast<BranchInst>(LastExit->getTerminator());
+ if (!BI)
+ return false;
+
+ if (!shouldSpeculateInstrs(Latch->begin(), Jmp))
+ return false;
+
+ DEBUG(dbgs() << "Folding loop latch " << Latch->getName() << " into "
+ << LastExit->getName() << "\n");
+
+ // Hoist the instructions from Latch into LastExit.
+ LastExit->getInstList().splice(BI, Latch->getInstList(), Latch->begin(), Jmp);
+
+ unsigned FallThruPath = BI->getSuccessor(0) == Latch ? 0 : 1;
+ BasicBlock *Header = Jmp->getSuccessor(0);
+ assert(Header == L->getHeader() && "expected a backward branch");
+
+ // Remove Latch from the CFG so that LastExit becomes the new Latch.
+ BI->setSuccessor(FallThruPath, Header);
+ Latch->replaceSuccessorsPhiUsesWith(LastExit);
+ Jmp->eraseFromParent();
+
+ // Nuke the Latch block.
+ assert(Latch->empty() && "unable to evacuate Latch");
+ LI->removeBlock(Latch);
+ if (DominatorTreeWrapperPass *DTWP =
+ getAnalysisIfAvailable<DominatorTreeWrapperPass>())
+ DTWP->getDomTree().eraseNode(Latch);
+ Latch->eraseFromParent();
+ return true;
+}
+
/// Rotate loop LP. Return true if the loop is rotated.
-bool LoopRotate::rotateLoop(Loop *L) {
+///
+/// \param SimplifiedLatch is true if the latch was just folded into the final
+/// loop exit. In this case we may want to rotate even though the new latch is
+/// now an exiting branch. This rotation would have happened had the latch not
+/// been simplified. However, if SimplifiedLatch is false, then we avoid
+/// rotating loops in which the latch exits to avoid excessive or endless
+/// rotation. LoopRotate should be repeatable and converge to a canonical
+/// form. This property is satisfied because simplifying the loop latch can only
+/// happen once across multiple invocations of the LoopRotate pass.
+bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) {
// If the loop has only one block then there is not much to rotate.
if (L->getBlocks().size() == 1)
return false;
BasicBlock *OrigHeader = L->getHeader();
+ BasicBlock *OrigLatch = L->getLoopLatch();
BranchInst *BI = dyn_cast<BranchInst>(OrigHeader->getTerminator());
if (BI == 0 || BI->isUnconditional())
if (!L->isLoopExiting(OrigHeader))
return false;
- // Updating PHInodes in loops with multiple exits adds complexity.
- // Keep it simple, and restrict loop rotation to loops with one exit only.
- // In future, lift this restriction and support for multiple exits if
- // required.
- SmallVector<BasicBlock*, 8> ExitBlocks;
- L->getExitBlocks(ExitBlocks);
- if (ExitBlocks.size() > 1)
+ // If the loop latch already contains a branch that leaves the loop then the
+ // loop is already rotated.
+ if (OrigLatch == 0)
+ return false;
+
+ // Rotate if either the loop latch does *not* exit the loop, or if the loop
+ // latch was just simplified.
+ if (L->isLoopExiting(OrigLatch) && !SimplifiedLatch)
return false;
- // Check size of original header and reject loop if it is very big.
+ // Check size of original header and reject loop if it is very big or we can't
+ // duplicate blocks inside it.
{
CodeMetrics Metrics;
- Metrics.analyzeBasicBlock(OrigHeader);
+ Metrics.analyzeBasicBlock(OrigHeader, *TTI);
+ if (Metrics.notDuplicatable) {
+ DEBUG(dbgs() << "LoopRotation: NOT rotating - contains non-duplicatable"
+ << " instructions: "; L->dump());
+ return false;
+ }
if (Metrics.NumInsts > MAX_HEADER_SIZE)
return false;
}
// Now, this loop is suitable for rotation.
BasicBlock *OrigPreheader = L->getLoopPreheader();
- BasicBlock *OrigLatch = L->getLoopLatch();
// If the loop could not be converted to canonical form, it must have an
// indirectbr in it, just give up.
- if (OrigPreheader == 0 || OrigLatch == 0)
+ if (OrigPreheader == 0)
return false;
// Anything ScalarEvolution may know about this loop or the PHI nodes
if (ScalarEvolution *SE = getAnalysisIfAvailable<ScalarEvolution>())
SE->forgetLoop(L);
+ DEBUG(dbgs() << "LoopRotation: rotating "; L->dump());
+
// Find new Loop header. NewHeader is a Header's one and only successor
// that is inside loop. Header's other successor is outside the
// loop. Otherwise loop is not suitable for rotation.
// memory (without proving that the loop doesn't write).
if (L->hasLoopInvariantOperands(Inst) &&
!Inst->mayReadFromMemory() && !Inst->mayWriteToMemory() &&
- !isa<TerminatorInst>(Inst) && !isa<DbgInfoIntrinsic>(Inst)) {
+ !isa<TerminatorInst>(Inst) && !isa<DbgInfoIntrinsic>(Inst) &&
+ !isa<AllocaInst>(Inst)) {
Inst->moveBefore(LoopEntryBranch);
continue;
}
// The conditional branch can't be folded, handle the general case.
// Update DominatorTree to reflect the CFG change we just made. Then split
// edges as necessary to preserve LoopSimplify form.
- if (DominatorTree *DT = getAnalysisIfAvailable<DominatorTree>()) {
- // Since OrigPreheader now has the conditional branch to Exit block, it is
- // the dominator of Exit.
- DT->changeImmediateDominator(Exit, OrigPreheader);
- DT->changeImmediateDominator(NewHeader, OrigPreheader);
+ if (DominatorTreeWrapperPass *DTWP =
+ getAnalysisIfAvailable<DominatorTreeWrapperPass>()) {
+ DominatorTree &DT = DTWP->getDomTree();
+ // Everything that was dominated by the old loop header is now dominated
+ // by the original loop preheader. Conceptually the header was merged
+ // into the preheader, even though we reuse the actual block as a new
+ // loop latch.
+ DomTreeNode *OrigHeaderNode = DT.getNode(OrigHeader);
+ SmallVector<DomTreeNode *, 8> HeaderChildren(OrigHeaderNode->begin(),
+ OrigHeaderNode->end());
+ DomTreeNode *OrigPreheaderNode = DT.getNode(OrigPreheader);
+ for (unsigned I = 0, E = HeaderChildren.size(); I != E; ++I)
+ DT.changeImmediateDominator(HeaderChildren[I], OrigPreheaderNode);
+
+ assert(DT.getNode(Exit)->getIDom() == OrigPreheaderNode);
+ assert(DT.getNode(NewHeader)->getIDom() == OrigPreheaderNode);
// Update OrigHeader to be dominated by the new header block.
- DT->changeImmediateDominator(OrigHeader, OrigLatch);
+ DT.changeImmediateDominator(OrigHeader, OrigLatch);
}
// Right now OrigPreHeader has two successors, NewHeader and ExitBlock, and
- // thus is not a preheader anymore. Split the edge to form a real preheader.
+ // thus is not a preheader anymore.
+ // Split the edge to form a real preheader.
BasicBlock *NewPH = SplitCriticalEdge(OrigPreheader, NewHeader, this);
NewPH->setName(NewHeader->getName() + ".lr.ph");
- // Preserve canonical loop form, which means that 'Exit' should have only one
- // predecessor.
- BasicBlock *ExitSplit = SplitCriticalEdge(L->getLoopLatch(), Exit, this);
- ExitSplit->moveBefore(Exit);
+ // Preserve canonical loop form, which means that 'Exit' should have only
+ // one predecessor. Note that Exit could be an exit block for multiple
+ // nested loops, causing both of the edges to now be critical and need to
+ // be split.
+ SmallVector<BasicBlock *, 4> ExitPreds(pred_begin(Exit), pred_end(Exit));
+ bool SplitLatchEdge = false;
+ for (SmallVectorImpl<BasicBlock *>::iterator PI = ExitPreds.begin(),
+ PE = ExitPreds.end();
+ PI != PE; ++PI) {
+ // We only need to split loop exit edges.
+ Loop *PredLoop = LI->getLoopFor(*PI);
+ if (!PredLoop || PredLoop->contains(Exit))
+ continue;
+ SplitLatchEdge |= L->getLoopLatch() == *PI;
+ BasicBlock *ExitSplit = SplitCriticalEdge(*PI, Exit, this);
+ ExitSplit->moveBefore(Exit);
+ }
+ assert(SplitLatchEdge &&
+ "Despite splitting all preds, failed to split latch exit?");
} else {
// We can fold the conditional branch in the preheader, this makes things
// simpler. The first step is to remove the extra edge to the Exit block.
PHBI->eraseFromParent();
// With our CFG finalized, update DomTree if it is available.
- if (DominatorTree *DT = getAnalysisIfAvailable<DominatorTree>()) {
+ if (DominatorTreeWrapperPass *DTWP =
+ getAnalysisIfAvailable<DominatorTreeWrapperPass>()) {
+ DominatorTree &DT = DTWP->getDomTree();
// Update OrigHeader to be dominated by the new header block.
- DT->changeImmediateDominator(NewHeader, OrigPreheader);
- DT->changeImmediateDominator(OrigHeader, OrigLatch);
+ DT.changeImmediateDominator(NewHeader, OrigPreheader);
+ DT.changeImmediateDominator(OrigHeader, OrigLatch);
+
+ // Brute force incremental dominator tree update. Call
+ // findNearestCommonDominator on all CFG predecessors of each child of the
+ // original header.
+ DomTreeNode *OrigHeaderNode = DT.getNode(OrigHeader);
+ SmallVector<DomTreeNode *, 8> HeaderChildren(OrigHeaderNode->begin(),
+ OrigHeaderNode->end());
+ bool Changed;
+ do {
+ Changed = false;
+ for (unsigned I = 0, E = HeaderChildren.size(); I != E; ++I) {
+ DomTreeNode *Node = HeaderChildren[I];
+ BasicBlock *BB = Node->getBlock();
+
+ pred_iterator PI = pred_begin(BB);
+ BasicBlock *NearestDom = *PI;
+ for (pred_iterator PE = pred_end(BB); PI != PE; ++PI)
+ NearestDom = DT.findNearestCommonDominator(NearestDom, *PI);
+
+ // Remember if this changes the DomTree.
+ if (Node->getIDom()->getBlock() != NearestDom) {
+ DT.changeImmediateDominator(BB, NearestDom);
+ Changed = true;
+ }
+ }
+
+ // If the dominator changed, this may have an effect on other
+ // predecessors, continue until we reach a fixpoint.
+ } while (Changed);
}
}
// emitted code isn't too gross in this common case.
MergeBlockIntoPredecessor(OrigHeader, this);
+ DEBUG(dbgs() << "LoopRotation: into "; L->dump());
+
++NumRotated;
return true;
}
-