#include "llvm/Constants.h"
#include "llvm/Instructions.h"
#include "llvm/IntrinsicInst.h"
-#include "llvm/Type.h"
#include "llvm/DerivedTypes.h"
-#include "llvm/Analysis/Dominators.h"
#include "llvm/Analysis/IVUsers.h"
-#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/LoopPass.h"
#include "llvm/Analysis/ScalarEvolutionExpander.h"
#include "llvm/Transforms/Utils/AddrModeMatcher.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/ADT/Statistic.h"
-#include "llvm/Support/CFG.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ValueHandle.h"
STATISTIC(NumShadow, "Number of Shadow IVs optimized");
STATISTIC(NumImmSunk, "Number of common expr immediates sunk into uses");
STATISTIC(NumLoopCond, "Number of loop terminating conds optimized");
+STATISTIC(NumCountZero, "Number of count iv optimized to count toward zero");
static cl::opt<bool> EnableFullLSRMode("enable-full-lsr",
cl::init(false),
class LoopStrengthReduce : public LoopPass {
IVUsers *IU;
- LoopInfo *LI;
- DominatorTree *DT;
ScalarEvolution *SE;
bool Changed;
/// particular stride.
std::map<const SCEV *, IVsOfOneStride> IVsByStride;
- /// StrideNoReuse - Keep track of all the strides whose ivs cannot be
- /// reused (nor should they be rewritten to reuse other strides).
- SmallSet<const SCEV *, 4> StrideNoReuse;
-
/// DeadInsts - Keep track of instructions we may have made dead, so that
/// we can remove them after we are done working.
SmallVector<WeakVH, 16> DeadInsts;
public:
static char ID; // Pass ID, replacement for typeid
- explicit LoopStrengthReduce(const TargetLowering *tli = NULL) :
- LoopPass(&ID), TLI(tli) {
- }
+ explicit LoopStrengthReduce(const TargetLowering *tli = NULL) :
+ LoopPass(&ID), TLI(tli) {}
bool runOnLoop(Loop *L, LPPassManager &LPM);
// We split critical edges, so we change the CFG. However, we do update
// many analyses if they are around.
AU.addPreservedID(LoopSimplifyID);
- AU.addPreserved<LoopInfo>();
- AU.addPreserved<DominanceFrontier>();
- AU.addPreserved<DominatorTree>();
+ AU.addPreserved("loops");
+ AU.addPreserved("domfrontier");
+ AU.addPreserved("domtree");
AU.addRequiredID(LoopSimplifyID);
- AU.addRequired<LoopInfo>();
- AU.addRequired<DominatorTree>();
AU.addRequired<ScalarEvolution>();
AU.addPreserved<ScalarEvolution>();
AU.addRequired<IVUsers>();
}
private:
- ICmpInst *ChangeCompareStride(Loop *L, ICmpInst *Cond,
- IVStrideUse* &CondUse,
- const SCEV *const * &CondStride);
-
void OptimizeIndvars(Loop *L);
- void OptimizeLoopCountIV(Loop *L);
+
+ /// OptimizeLoopTermCond - Change loop terminating condition to use the
+ /// postinc iv when possible.
void OptimizeLoopTermCond(Loop *L);
/// OptimizeShadowIV - If IV is used in a int-to-float cast
ICmpInst *OptimizeMax(Loop *L, ICmpInst *Cond,
IVStrideUse* &CondUse);
+ /// OptimizeLoopCountIV - If, after all sharing of IVs, the IV used for
+ /// deciding when to exit the loop is used only for that purpose, try to
+ /// rearrange things so it counts down to a test against zero.
+ bool OptimizeLoopCountIV(Loop *L);
+ bool OptimizeLoopCountIVOfStride(const SCEV* &Stride,
+ IVStrideUse* &CondUse, Loop *L);
+
+ /// StrengthReduceIVUsersOfStride - Strength reduce all of the users of a
+ /// single stride of IV. All of the users may have different starting
+ /// values, and this may not be the only stride.
+ void StrengthReduceIVUsersOfStride(const SCEV *Stride,
+ IVUsersOfOneStride &Uses,
+ Loop *L);
+ void StrengthReduceIVUsers(Loop *L);
+
+ ICmpInst *ChangeCompareStride(Loop *L, ICmpInst *Cond,
+ IVStrideUse* &CondUse,
+ const SCEV* &CondStride,
+ bool PostPass = false);
+
bool FindIVUserForCond(ICmpInst *Cond, IVStrideUse *&CondUse,
- const SCEV *const * &CondStride);
+ const SCEV* &CondStride);
bool RequiresTypeConversion(const Type *Ty, const Type *NewTy);
- const SCEV *CheckForIVReuse(bool, bool, bool, const SCEV *const&,
+ const SCEV *CheckForIVReuse(bool, bool, bool, const SCEV *,
IVExpr&, const Type*,
const std::vector<BasedUser>& UsersToProcess);
bool ValidScale(bool, int64_t,
const std::vector<BasedUser>& UsersToProcess);
bool ValidOffset(bool, int64_t, int64_t,
const std::vector<BasedUser>& UsersToProcess);
- const SCEV *CollectIVUsers(const SCEV *const &Stride,
+ const SCEV *CollectIVUsers(const SCEV *Stride,
IVUsersOfOneStride &Uses,
Loop *L,
bool &AllUsesAreAddresses,
bool &AllUsesAreOutsideLoop,
std::vector<BasedUser> &UsersToProcess);
+ bool StrideMightBeShared(const SCEV *Stride, Loop *L, bool CheckPreInc);
bool ShouldUseFullStrengthReductionMode(
const std::vector<BasedUser> &UsersToProcess,
const Loop *L,
Instruction *IVIncInsertPt,
const Loop *L,
SCEVExpander &PreheaderRewriter);
- void StrengthReduceStridedIVUsers(const SCEV *const &Stride,
- IVUsersOfOneStride &Uses,
- Loop *L);
+
void DeleteTriviallyDeadInstructions();
};
}
/// specified set are trivially dead, delete them and see if this makes any of
/// their operands subsequently dead.
void LoopStrengthReduce::DeleteTriviallyDeadInstructions() {
- if (DeadInsts.empty()) return;
-
while (!DeadInsts.empty()) {
- Instruction *I = dyn_cast_or_null<Instruction>(DeadInsts.back());
- DeadInsts.pop_back();
-
+ Instruction *I = dyn_cast_or_null<Instruction>(DeadInsts.pop_back_val());
+
if (I == 0 || !isInstructionTriviallyDead(I))
continue;
- for (User::op_iterator OI = I->op_begin(), E = I->op_end(); OI != E; ++OI) {
+ for (User::op_iterator OI = I->op_begin(), E = I->op_end(); OI != E; ++OI)
if (Instruction *U = dyn_cast<Instruction>(*OI)) {
*OI = 0;
if (U->use_empty())
DeadInsts.push_back(U);
}
- }
-
+
I->eraseFromParent();
Changed = true;
}
}
-/// containsAddRecFromDifferentLoop - Determine whether expression S involves a
-/// subexpression that is an AddRec from a loop other than L. An outer loop
-/// of L is OK, but not an inner loop nor a disjoint loop.
-static bool containsAddRecFromDifferentLoop(const SCEV *S, Loop *L) {
- // This is very common, put it first.
- if (isa<SCEVConstant>(S))
- return false;
- if (const SCEVCommutativeExpr *AE = dyn_cast<SCEVCommutativeExpr>(S)) {
- for (unsigned int i=0; i< AE->getNumOperands(); i++)
- if (containsAddRecFromDifferentLoop(AE->getOperand(i), L))
- return true;
- return false;
- }
- if (const SCEVAddRecExpr *AE = dyn_cast<SCEVAddRecExpr>(S)) {
- if (const Loop *newLoop = AE->getLoop()) {
- if (newLoop == L)
- return false;
- // if newLoop is an outer loop of L, this is OK.
- if (!LoopInfo::isNotAlreadyContainedIn(L, newLoop))
- return false;
- }
- return true;
- }
- if (const SCEVUDivExpr *DE = dyn_cast<SCEVUDivExpr>(S))
- return containsAddRecFromDifferentLoop(DE->getLHS(), L) ||
- containsAddRecFromDifferentLoop(DE->getRHS(), L);
-#if 0
- // SCEVSDivExpr has been backed out temporarily, but will be back; we'll
- // need this when it is.
- if (const SCEVSDivExpr *DE = dyn_cast<SCEVSDivExpr>(S))
- return containsAddRecFromDifferentLoop(DE->getLHS(), L) ||
- containsAddRecFromDifferentLoop(DE->getRHS(), L);
-#endif
- if (const SCEVCastExpr *CE = dyn_cast<SCEVCastExpr>(S))
- return containsAddRecFromDifferentLoop(CE->getOperand(), L);
- return false;
-}
-
/// isAddressUse - Returns true if the specified instruction is using the
/// specified value as an address.
static bool isAddressUse(Instruction *Inst, Value *OperandVal) {
/// BasedUser - For a particular base value, keep information about how we've
/// partitioned the expression so far.
struct BasedUser {
- /// SE - The current ScalarEvolution object.
- ScalarEvolution *SE;
-
/// Base - The Base value for the PHI node that needs to be inserted for
/// this use. As the use is processed, information gets moved from this
/// field to the Imm field (below). BasedUser values are sorted by this
/// field.
const SCEV *Base;
-
+
/// Inst - The instruction using the induction variable.
Instruction *Inst;
// instruction for a loop and uses outside the loop that are dominated by
// the loop.
bool isUseOfPostIncrementedValue;
-
+
BasedUser(IVStrideUse &IVSU, ScalarEvolution *se)
- : SE(se), Base(IVSU.getOffset()), Inst(IVSU.getUser()),
+ : Base(IVSU.getOffset()), Inst(IVSU.getUser()),
OperandValToReplace(IVSU.getOperandValToReplace()),
- Imm(SE->getIntegerSCEV(0, Base->getType())),
+ Imm(se->getIntegerSCEV(0, Base->getType())),
isUseOfPostIncrementedValue(IVSU.isUseOfPostIncrementedValue()) {}
// Once we rewrite the code to insert the new IVs we want, update the
// operands of Inst to use the new expression 'NewBase', with 'Imm' added
// to it.
- void RewriteInstructionToUseNewBase(const SCEV *const &NewBase,
+ void RewriteInstructionToUseNewBase(const SCEV *NewBase,
Instruction *InsertPt,
SCEVExpander &Rewriter, Loop *L, Pass *P,
- LoopInfo &LI,
- SmallVectorImpl<WeakVH> &DeadInsts);
-
- Value *InsertCodeForBaseAtPosition(const SCEV *const &NewBase,
+ SmallVectorImpl<WeakVH> &DeadInsts,
+ ScalarEvolution *SE);
+
+ Value *InsertCodeForBaseAtPosition(const SCEV *NewBase,
const Type *Ty,
SCEVExpander &Rewriter,
- Instruction *IP, Loop *L,
- LoopInfo &LI);
+ Instruction *IP,
+ ScalarEvolution *SE);
void dump() const;
};
}
errs() << " Inst: " << *Inst;
}
-Value *BasedUser::InsertCodeForBaseAtPosition(const SCEV *const &NewBase,
+Value *BasedUser::InsertCodeForBaseAtPosition(const SCEV *NewBase,
const Type *Ty,
SCEVExpander &Rewriter,
- Instruction *IP, Loop *L,
- LoopInfo &LI) {
- // Figure out where we *really* want to insert this code. In particular, if
- // the user is inside of a loop that is nested inside of L, we really don't
- // want to insert this expression before the user, we'd rather pull it out as
- // many loops as possible.
- Instruction *BaseInsertPt = IP;
-
- // Figure out the most-nested loop that IP is in.
- Loop *InsertLoop = LI.getLoopFor(IP->getParent());
-
- // If InsertLoop is not L, and InsertLoop is nested inside of L, figure out
- // the preheader of the outer-most loop where NewBase is not loop invariant.
- if (L->contains(IP->getParent()))
- while (InsertLoop && NewBase->isLoopInvariant(InsertLoop)) {
- BaseInsertPt = InsertLoop->getLoopPreheader()->getTerminator();
- InsertLoop = InsertLoop->getParentLoop();
- }
-
- Value *Base = Rewriter.expandCodeFor(NewBase, 0, BaseInsertPt);
+ Instruction *IP,
+ ScalarEvolution *SE) {
+ Value *Base = Rewriter.expandCodeFor(NewBase, 0, IP);
+ // Wrap the base in a SCEVUnknown so that ScalarEvolution doesn't try to
+ // re-analyze it.
const SCEV *NewValSCEV = SE->getUnknown(Base);
// Always emit the immediate into the same block as the user.
// value of NewBase in the case that it's a diffferent instruction from
// the PHI that NewBase is computed from, or null otherwise.
//
-void BasedUser::RewriteInstructionToUseNewBase(const SCEV *const &NewBase,
+void BasedUser::RewriteInstructionToUseNewBase(const SCEV *NewBase,
Instruction *NewBasePt,
SCEVExpander &Rewriter, Loop *L, Pass *P,
- LoopInfo &LI,
- SmallVectorImpl<WeakVH> &DeadInsts) {
+ SmallVectorImpl<WeakVH> &DeadInsts,
+ ScalarEvolution *SE) {
if (!isa<PHINode>(Inst)) {
// By default, insert code at the user instruction.
BasicBlock::iterator InsertPt = Inst;
-
+
// However, if the Operand is itself an instruction, the (potentially
// complex) inserted code may be shared by many users. Because of this, we
// want to emit code for the computation of the operand right before its old
//
// If this is a use outside the loop (which means after, since it is based
// on a loop indvar) we use the post-incremented value, so that we don't
- // artificially make the preinc value live out the bottom of the loop.
- if (!isUseOfPostIncrementedValue && L->contains(Inst->getParent())) {
+ // artificially make the preinc value live out the bottom of the loop.
+ if (!isUseOfPostIncrementedValue && L->contains(Inst)) {
if (NewBasePt && isa<PHINode>(OperandValToReplace)) {
InsertPt = NewBasePt;
++InsertPt;
}
Value *NewVal = InsertCodeForBaseAtPosition(NewBase,
OperandValToReplace->getType(),
- Rewriter, InsertPt, L, LI);
+ Rewriter, InsertPt, SE);
// Replace the use of the operand Value with the new Phi we just created.
Inst->replaceUsesOfWith(OperandValToReplace, NewVal);
if (PN->getIncomingValue(i) == OperandValToReplace) {
// If the original expression is outside the loop, put the replacement
// code in the same place as the original expression,
- // which need not be an immediate predecessor of this PHI. This way we
+ // which need not be an immediate predecessor of this PHI. This way we
// need only one copy of it even if it is referenced multiple times in
// the PHI. We don't do this when the original expression is inside the
// loop because multiple copies sometimes do useful sinking of code in
// that case(?).
Instruction *OldLoc = dyn_cast<Instruction>(OperandValToReplace);
BasicBlock *PHIPred = PN->getIncomingBlock(i);
- if (L->contains(OldLoc->getParent())) {
+ if (L->contains(OldLoc)) {
// If this is a critical edge, split the edge so that we do not insert
// the code on all predecessor/successor paths. We do this unless this
// is the canonical backedge for this loop, as this can make some
// inserted code be in an illegal position.
if (e != 1 && PHIPred->getTerminator()->getNumSuccessors() > 1 &&
+ !isa<IndirectBrInst>(PHIPred->getTerminator()) &&
(PN->getParent() != L->getHeader() || !L->contains(PHIPred))) {
// First step, split the critical edge.
// is outside of the loop, and PredTI is in the loop, we want to
// move the block to be immediately before the PHI block, not
// immediately after PredTI.
- if (L->contains(PHIPred) && !L->contains(PN->getParent()))
+ if (L->contains(PHIPred) && !L->contains(PN))
NewBB->moveBefore(PN->getParent());
// Splitting the edge can reduce the number of PHI entries we have.
Value *&Code = InsertedCode[PHIPred];
if (!Code) {
// Insert the code into the end of the predecessor block.
- Instruction *InsertPt = (L->contains(OldLoc->getParent())) ?
+ Instruction *InsertPt = (L->contains(OldLoc)) ?
PHIPred->getTerminator() :
OldLoc->getParent()->getTerminator();
Code = InsertCodeForBaseAtPosition(NewBase, PN->getType(),
- Rewriter, InsertPt, L, LI);
+ Rewriter, InsertPt, SE);
DEBUG(errs() << " Changing PHI use to ");
DEBUG(WriteAsOperand(errs(), Code, /*PrintType=*/false));
/// fitsInAddressMode - Return true if V can be subsumed within an addressing
/// mode, and does not need to be put in a register first.
-static bool fitsInAddressMode(const SCEV *const &V, const Type *AccessTy,
+static bool fitsInAddressMode(const SCEV *V, const Type *AccessTy,
const TargetLowering *TLI, bool HasBaseReg) {
if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(V)) {
int64_t VC = SC->getValue()->getSExtValue();
static void MoveLoopVariantsToImmediateField(const SCEV *&Val, const SCEV *&Imm,
Loop *L, ScalarEvolution *SE) {
if (Val->isLoopInvariant(L)) return; // Nothing to do.
-
+
if (const SCEVAddExpr *SAE = dyn_cast<SCEVAddExpr>(Val)) {
SmallVector<const SCEV *, 4> NewOps;
NewOps.reserve(SAE->getNumOperands());
-
+
for (unsigned i = 0; i != SAE->getNumOperands(); ++i)
if (!SAE->getOperand(i)->isLoopInvariant(L)) {
// If this is a loop-variant expression, it must stay in the immediate
// Try to pull immediates out of the start value of nested addrec's.
const SCEV *Start = SARE->getStart();
MoveLoopVariantsToImmediateField(Start, Imm, L, SE);
-
+
SmallVector<const SCEV *, 4> Ops(SARE->op_begin(), SARE->op_end());
Ops[0] = Start;
Val = SE->getAddRecExpr(Ops, SARE->getLoop());
if (const SCEVAddExpr *SAE = dyn_cast<SCEVAddExpr>(Val)) {
SmallVector<const SCEV *, 4> NewOps;
NewOps.reserve(SAE->getNumOperands());
-
+
for (unsigned i = 0; i != SAE->getNumOperands(); ++i) {
const SCEV *NewOp = SAE->getOperand(i);
MoveImmediateValues(TLI, AccessTy, NewOp, Imm, isAddress, L, SE);
-
+
if (!NewOp->isLoopInvariant(L)) {
// If this is a loop-variant expression, it must stay in the immediate
// field of the expression.
// Try to pull immediates out of the start value of nested addrec's.
const SCEV *Start = SARE->getStart();
MoveImmediateValues(TLI, AccessTy, Start, Imm, isAddress, L, SE);
-
+
if (Start != SARE->getStart()) {
SmallVector<const SCEV *, 4> Ops(SARE->op_begin(), SARE->op_end());
Ops[0] = Start;
const SCEV *SubImm = SE->getIntegerSCEV(0, Val->getType());
const SCEV *NewOp = SME->getOperand(1);
MoveImmediateValues(TLI, AccessTy, NewOp, SubImm, isAddress, L, SE);
-
- // If we extracted something out of the subexpressions, see if we can
+
+ // If we extracted something out of the subexpressions, see if we can
// simplify this!
if (NewOp != SME->getOperand(1)) {
// Scale SubImm up by "8". If the result is a target constant, we are
if (fitsInAddressMode(SubImm, AccessTy, TLI, false)) {
// Accumulate the immediate.
Imm = SE->getAddExpr(Imm, SubImm);
-
+
// Update what is left of 'Val'.
Val = SE->getMulExpr(SME->getOperand(0), NewOp);
return;
SmallVector<const SCEV *, 4> Ops(SARE->op_begin(), SARE->op_end());
Ops[0] = Zero; // Start with zero base.
SubExprs.push_back(SE->getAddRecExpr(Ops, SARE->getLoop()));
-
+
SeparateSubExprs(SubExprs, SARE->getOperand(0), SE);
}
}
}
-// This is logically local to the following function, but C++ says we have
+// This is logically local to the following function, but C++ says we have
// to make it file scope.
struct SubExprUseData { unsigned Count; bool notAllUsesAreFree; };
// it is clearly shared across all the IV's. If the use is outside the loop
// (which means after it) we don't want to factor anything *into* the loop,
// so just use 0 as the base.
- if (L->contains(Uses[0].Inst->getParent()))
+ if (L->contains(Uses[0].Inst))
std::swap(Result, Uses[0].Base);
return Result;
}
// an addressing mode "for free"; such expressions are left within the loop.
// struct SubExprUseData { unsigned Count; bool notAllUsesAreFree; };
std::map<const SCEV *, SubExprUseData> SubExpressionUseData;
-
+
// UniqueSubExprs - Keep track of all of the subexpressions we see in the
// order we see them.
SmallVector<const SCEV *, 16> UniqueSubExprs;
// after the loop to affect base computation of values *inside* the loop,
// because we can always add their offsets to the result IV after the loop
// is done, ensuring we get good code inside the loop.
- if (!L->contains(Uses[i].Inst->getParent()))
+ if (!L->contains(Uses[i].Inst))
continue;
NumUsesInsideLoop++;
-
+
// If the base is zero (which is common), return zero now, there are no
// CSEs we can find.
if (Uses[i].Base == Zero) return Zero;
// Now that we know how many times each is used, build Result. Iterate over
// UniqueSubexprs so that we have a stable ordering.
for (unsigned i = 0, e = UniqueSubExprs.size(); i != e; ++i) {
- std::map<const SCEV *, SubExprUseData>::iterator I =
+ std::map<const SCEV *, SubExprUseData>::iterator I =
SubExpressionUseData.find(UniqueSubExprs[i]);
assert(I != SubExpressionUseData.end() && "Entry not found?");
- if (I->second.Count == NumUsesInsideLoop) { // Found CSE!
+ if (I->second.Count == NumUsesInsideLoop) { // Found CSE!
if (I->second.notAllUsesAreFree)
Result = SE->getAddExpr(Result, I->first);
- else
+ else
FreeResult = SE->getAddExpr(FreeResult, I->first);
} else
// Remove non-cse's from SubExpressionUseData.
// and a Result in the same instruction (for example because it would
// require too many registers). Check this.
for (unsigned i=0; i<NumUses; ++i) {
- if (!L->contains(Uses[i].Inst->getParent()))
+ if (!L->contains(Uses[i].Inst))
continue;
// We know this is an addressing mode use; if there are any uses that
// are not, FreeResult would be Zero.
// If we found no CSE's, return now.
if (Result == Zero) return Result;
-
+
// If we still have a FreeResult, remove its subexpressions from
// SubExpressionUseData. This means they will remain in the use Bases.
if (FreeResult != Zero) {
SeparateSubExprs(SubExprs, FreeResult, SE);
for (unsigned j = 0, e = SubExprs.size(); j != e; ++j) {
- std::map<const SCEV *, SubExprUseData>::iterator I =
+ std::map<const SCEV *, SubExprUseData>::iterator I =
SubExpressionUseData.find(SubExprs[j]);
SubExpressionUseData.erase(I);
}
// the final IV value coming into those uses does. Instead of trying to
// remove the pieces of the common base, which might not be there,
// subtract off the base to compensate for this.
- if (!L->contains(Uses[i].Inst->getParent())) {
+ if (!L->contains(Uses[i].Inst)) {
Uses[i].Base = SE->getMinusSCEV(Uses[i].Base, Result);
continue;
}
SubExprs.erase(SubExprs.begin()+j);
--j; --e;
}
-
+
// Finally, add the non-shared expressions together.
if (SubExprs.empty())
Uses[i].Base = Zero;
Uses[i].Base = SE->getAddExpr(SubExprs);
SubExprs.clear();
}
-
+
return Result;
}
-/// ValidScale - Check whether the given Scale is valid for all loads and
+/// ValidScale - Check whether the given Scale is valid for all loads and
/// stores in UsersToProcess.
///
bool LoopStrengthReduce::ValidScale(bool HasBaseReg, int64_t Scale,
AccessTy = getAccessType(UsersToProcess[i].Inst);
else if (isa<PHINode>(UsersToProcess[i].Inst))
continue;
-
+
TargetLowering::AddrMode AM;
if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(UsersToProcess[i].Imm))
AM.BaseOffs = SC->getValue()->getSExtValue();
/// reuse is possible. Factors can be negative on same targets, e.g. ARM.
///
/// If all uses are outside the loop, we don't require that all multiplies
-/// be folded into the addressing mode, nor even that the factor be constant;
-/// a multiply (executed once) outside the loop is better than another IV
+/// be folded into the addressing mode, nor even that the factor be constant;
+/// a multiply (executed once) outside the loop is better than another IV
/// within. Well, usually.
const SCEV *LoopStrengthReduce::CheckForIVReuse(bool HasBaseReg,
bool AllUsesAreAddresses,
bool AllUsesAreOutsideLoop,
- const SCEV *const &Stride,
+ const SCEV *Stride,
IVExpr &IV, const Type *Ty,
const std::vector<BasedUser>& UsersToProcess) {
- if (StrideNoReuse.count(Stride))
- return SE->getIntegerSCEV(0, Stride->getType());
-
if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(Stride)) {
int64_t SInt = SC->getValue()->getSExtValue();
for (unsigned NewStride = 0, e = IU->StrideOrder.size();
NewStride != e; ++NewStride) {
- std::map<const SCEV *, IVsOfOneStride>::iterator SI =
+ std::map<const SCEV *, IVsOfOneStride>::iterator SI =
IVsByStride.find(IU->StrideOrder[NewStride]);
- if (SI == IVsByStride.end() || !isa<SCEVConstant>(SI->first) ||
- StrideNoReuse.count(SI->first))
+ if (SI == IVsByStride.end() || !isa<SCEVConstant>(SI->first))
+ continue;
+ // The other stride has no uses, don't reuse it.
+ std::map<const SCEV *, IVUsersOfOneStride *>::iterator UI =
+ IU->IVUsesByStride.find(IU->StrideOrder[NewStride]);
+ if (UI->second->Users.empty())
continue;
int64_t SSInt = cast<SCEVConstant>(SI->first)->getValue()->getSExtValue();
if (SI->first != Stride &&
// an existing IV if we can.
for (unsigned NewStride = 0, e = IU->StrideOrder.size();
NewStride != e; ++NewStride) {
- std::map<const SCEV *, IVsOfOneStride>::iterator SI =
+ std::map<const SCEV *, IVsOfOneStride>::iterator SI =
IVsByStride.find(IU->StrideOrder[NewStride]);
if (SI == IVsByStride.end() || !isa<SCEVConstant>(SI->first))
continue;
// -1*old.
for (unsigned NewStride = 0, e = IU->StrideOrder.size();
NewStride != e; ++NewStride) {
- std::map<const SCEV *, IVsOfOneStride>::iterator SI =
+ std::map<const SCEV *, IVsOfOneStride>::iterator SI =
IVsByStride.find(IU->StrideOrder[NewStride]);
- if (SI == IVsByStride.end())
+ if (SI == IVsByStride.end())
continue;
if (const SCEVMulExpr *ME = dyn_cast<SCEVMulExpr>(SI->first))
if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(ME->getOperand(0)))
/// isNonConstantNegative - Return true if the specified scev is negated, but
/// not a constant.
-static bool isNonConstantNegative(const SCEV *const &Expr) {
+static bool isNonConstantNegative(const SCEV *Expr) {
const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(Expr);
if (!Mul) return false;
-
+
// If there is a constant factor, it will be first.
const SCEVConstant *SC = dyn_cast<SCEVConstant>(Mul->getOperand(0));
if (!SC) return false;
-
+
// Return true if the value is negative, this matches things like (-42 * V).
return SC->getValue()->getValue().isNegative();
}
/// CollectIVUsers - Transform our list of users and offsets to a bit more
-/// complex table. In this new vector, each 'BasedUser' contains 'Base', the base
-/// of the strided accesses, as well as the old information from Uses. We
+/// complex table. In this new vector, each 'BasedUser' contains 'Base', the
+/// base of the strided accesses, as well as the old information from Uses. We
/// progressively move information from the Base field to the Imm field, until
/// we eventually have the full access expression to rewrite the use.
-const SCEV *LoopStrengthReduce::CollectIVUsers(const SCEV *const &Stride,
+const SCEV *LoopStrengthReduce::CollectIVUsers(const SCEV *Stride,
IVUsersOfOneStride &Uses,
Loop *L,
bool &AllUsesAreAddresses,
// We now have a whole bunch of uses of like-strided induction variables, but
// they might all have different bases. We want to emit one PHI node for this
// stride which we fold as many common expressions (between the IVs) into as
- // possible. Start by identifying the common expressions in the base values
+ // possible. Start by identifying the common expressions in the base values
// for the strides (e.g. if we have "A+C+B" and "A+B+D" as our bases, find
// "A+B"), emit it to the preheader, then remove the expression from the
// UsersToProcess base values.
// If the user is not in the current loop, this means it is using the exit
// value of the IV. Do not put anything in the base, make sure it's all in
// the immediate field to allow as much factoring as possible.
- if (!L->contains(UsersToProcess[i].Inst->getParent())) {
+ if (!L->contains(UsersToProcess[i].Inst)) {
UsersToProcess[i].Imm = SE->getAddExpr(UsersToProcess[i].Imm,
UsersToProcess[i].Base);
- UsersToProcess[i].Base =
+ UsersToProcess[i].Base =
SE->getIntegerSCEV(0, UsersToProcess[i].Base->getType());
} else {
// Not all uses are outside the loop.
- AllUsesAreOutsideLoop = false;
+ AllUsesAreOutsideLoop = false;
// Addressing modes can be folded into loads and stores. Be careful that
// the store is through the expression, not of the expression though.
if (isAddress)
HasAddress = true;
-
+
// If this use isn't an address, then not all uses are addresses.
if (!isAddress && !isPHI)
AllUsesAreAddresses = false;
-
+
MoveImmediateValues(TLI, UsersToProcess[i].Inst, UsersToProcess[i].Base,
UsersToProcess[i].Imm, isAddress, L, SE);
}
// for one fewer iv.
if (NumPHI > 1)
AllUsesAreAddresses = false;
-
+
// There are no in-loop address uses.
if (AllUsesAreAddresses && (!HasAddress && !AllUsesAreOutsideLoop))
AllUsesAreAddresses = false;
const Loop *L) {
if (UsersToProcess.size() == 1 &&
UsersToProcess[0].isUseOfPostIncrementedValue &&
- L->contains(UsersToProcess[0].Inst->getParent()))
+ L->contains(UsersToProcess[0].Inst))
return UsersToProcess[0].Inst;
return L->getLoopLatch()->getTerminator();
}
return true;
}
-/// StrengthReduceStridedIVUsers - Strength reduce all of the users of a single
+/// StrengthReduceIVUsersOfStride - Strength reduce all of the users of a single
/// stride of IV. All of the users may have different starting values, and this
/// may not be the only stride.
-void LoopStrengthReduce::StrengthReduceStridedIVUsers(const SCEV *const &Stride,
- IVUsersOfOneStride &Uses,
- Loop *L) {
+void
+LoopStrengthReduce::StrengthReduceIVUsersOfStride(const SCEV *Stride,
+ IVUsersOfOneStride &Uses,
+ Loop *L) {
// If all the users are moved to another stride, then there is nothing to do.
if (Uses.Users.empty())
return;
// have the full access expression to rewrite the use.
std::vector<BasedUser> UsersToProcess;
const SCEV *CommonExprs = CollectIVUsers(Stride, Uses, L, AllUsesAreAddresses,
- AllUsesAreOutsideLoop,
- UsersToProcess);
+ AllUsesAreOutsideLoop,
+ UsersToProcess);
// Sort the UsersToProcess array so that users with common bases are
// next to each other.
const SCEV *RewriteFactor = SE->getIntegerSCEV(0, ReplacedTy);
IVExpr ReuseIV(SE->getIntegerSCEV(0,
Type::getInt32Ty(Preheader->getContext())),
- SE->getIntegerSCEV(0,
+ SE->getIntegerSCEV(0,
Type::getInt32Ty(Preheader->getContext())),
0);
- /// Choose a strength-reduction strategy and prepare for it by creating
- /// the necessary PHIs and adjusting the bookkeeping.
+ // Choose a strength-reduction strategy and prepare for it by creating
+ // the necessary PHIs and adjusting the bookkeeping.
if (ShouldUseFullStrengthReductionMode(UsersToProcess, L,
AllUsesAreAddresses, Stride)) {
PrepareToStrengthReduceFully(UsersToProcess, Stride, CommonExprs, L,
// If all uses are addresses, check if it is possible to reuse an IV. The
// new IV must have a stride that is a multiple of the old stride; the
// multiple must be a number that can be encoded in the scale field of the
- // target addressing mode; and we must have a valid instruction after this
+ // target addressing mode; and we must have a valid instruction after this
// substitution, including the immediate field, if any.
RewriteFactor = CheckForIVReuse(HaveCommonExprs, AllUsesAreAddresses,
AllUsesAreOutsideLoop,
// We want this constant emitted into the preheader! This is just
// using cast as a copy so BitCast (no-op cast) is appropriate
BaseV = new BitCastInst(BaseV, BaseV->getType(), "preheaderinsert",
- PreInsertPt);
+ PreInsertPt);
}
}
// loop to ensure it is dominated by the increment. In case it's the
// only use of the iv, the increment instruction is already before the
// use.
- if (L->contains(User.Inst->getParent()) && User.Inst != IVIncInsertPt)
+ if (L->contains(User.Inst) && User.Inst != IVIncInsertPt)
User.Inst->moveBefore(IVIncInsertPt);
}
assert(SE->getTypeSizeInBits(RewriteExpr->getType()) <
SE->getTypeSizeInBits(ReuseIV.Base->getType()) &&
"Unexpected lengthening conversion!");
- typedBase = SE->getTruncateExpr(ReuseIV.Base,
+ typedBase = SE->getTruncateExpr(ReuseIV.Base,
RewriteExpr->getType());
}
RewriteExpr = SE->getMinusSCEV(RewriteExpr, typedBase);
// common base, and are adding it back here. Use the same expression
// as before, rather than CommonBaseV, so DAGCombiner will zap it.
if (!CommonExprs->isZero()) {
- if (L->contains(User.Inst->getParent()))
+ if (L->contains(User.Inst))
RewriteExpr = SE->getAddExpr(RewriteExpr,
SE->getUnknown(CommonBaseV));
else
RewriteExpr = SE->getAddExpr(RewriteExpr, SE->getUnknown(BaseV));
User.RewriteInstructionToUseNewBase(RewriteExpr, NewBasePt,
- Rewriter, L, this, *LI,
- DeadInsts);
+ Rewriter, L, this,
+ DeadInsts, SE);
// Mark old value we replaced as possibly dead, so that it is eliminated
// if we just replaced the last use of that value.
// different starting values, into different PHIs.
}
+void LoopStrengthReduce::StrengthReduceIVUsers(Loop *L) {
+ // Note: this processes each stride/type pair individually. All users
+ // passed into StrengthReduceIVUsersOfStride have the same type AND stride.
+ // Also, note that we iterate over IVUsesByStride indirectly by using
+ // StrideOrder. This extra layer of indirection makes the ordering of
+ // strides deterministic - not dependent on map order.
+ for (unsigned Stride = 0, e = IU->StrideOrder.size(); Stride != e; ++Stride) {
+ std::map<const SCEV *, IVUsersOfOneStride *>::iterator SI =
+ IU->IVUsesByStride.find(IU->StrideOrder[Stride]);
+ assert(SI != IU->IVUsesByStride.end() && "Stride doesn't exist!");
+ // FIXME: Generalize to non-affine IV's.
+ if (!SI->first->isLoopInvariant(L))
+ continue;
+ StrengthReduceIVUsersOfStride(SI->first, *SI->second, L);
+ }
+}
+
/// FindIVUserForCond - If Cond has an operand that is an expression of an IV,
/// set the IV user and stride information and return true, otherwise return
/// false.
-bool LoopStrengthReduce::FindIVUserForCond(ICmpInst *Cond, IVStrideUse *&CondUse,
- const SCEV *const * &CondStride) {
+bool LoopStrengthReduce::FindIVUserForCond(ICmpInst *Cond,
+ IVStrideUse *&CondUse,
+ const SCEV* &CondStride) {
for (unsigned Stride = 0, e = IU->StrideOrder.size();
Stride != e && !CondUse; ++Stride) {
std::map<const SCEV *, IVUsersOfOneStride *>::iterator SI =
// InstCombine does it as well for simple uses, it's not clear that it
// occurs enough in real life to handle.
CondUse = UI;
- CondStride = &SI->first;
+ CondStride = SI->first;
return true;
}
}
return false;
-}
+}
namespace {
// Constant strides come first which in turns are sorted by their absolute
const ScalarEvolution *SE;
explicit StrideCompare(const ScalarEvolution *se) : SE(se) {}
- bool operator()(const SCEV *const &LHS, const SCEV *const &RHS) {
+ bool operator()(const SCEV *LHS, const SCEV *RHS) {
const SCEVConstant *LHSC = dyn_cast<SCEVConstant>(LHS);
const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(RHS);
if (LHSC && RHSC) {
/// v1 = v1 + 3
/// if (v1 < 30) goto loop
ICmpInst *LoopStrengthReduce::ChangeCompareStride(Loop *L, ICmpInst *Cond,
- IVStrideUse* &CondUse,
- const SCEV *const* &CondStride) {
+ IVStrideUse* &CondUse,
+ const SCEV* &CondStride,
+ bool PostPass) {
// If there's only one stride in the loop, there's nothing to do here.
if (IU->StrideOrder.size() < 2)
return Cond;
// trying to change the condition because the stride will still
// remain.
std::map<const SCEV *, IVUsersOfOneStride *>::iterator I =
- IU->IVUsesByStride.find(*CondStride);
- if (I == IU->IVUsesByStride.end() ||
- I->second->Users.size() != 1)
+ IU->IVUsesByStride.find(CondStride);
+ if (I == IU->IVUsesByStride.end())
return Cond;
+ if (I->second->Users.size() > 1) {
+ for (ilist<IVStrideUse>::iterator II = I->second->Users.begin(),
+ EE = I->second->Users.end(); II != EE; ++II) {
+ if (II->getUser() == Cond)
+ continue;
+ if (!isInstructionTriviallyDead(II->getUser()))
+ return Cond;
+ }
+ }
// Only handle constant strides for now.
- const SCEVConstant *SC = dyn_cast<SCEVConstant>(*CondStride);
+ const SCEVConstant *SC = dyn_cast<SCEVConstant>(CondStride);
if (!SC) return Cond;
ICmpInst::Predicate Predicate = Cond->getPredicate();
int64_t CmpSSInt = SC->getValue()->getSExtValue();
- unsigned BitWidth = SE->getTypeSizeInBits((*CondStride)->getType());
+ unsigned BitWidth = SE->getTypeSizeInBits(CondStride->getType());
uint64_t SignBit = 1ULL << (BitWidth-1);
const Type *CmpTy = Cond->getOperand(0)->getType();
const Type *NewCmpTy = NULL;
unsigned TyBits = SE->getTypeSizeInBits(CmpTy);
unsigned NewTyBits = 0;
- const SCEV **NewStride = NULL;
+ const SCEV *NewStride = NULL;
Value *NewCmpLHS = NULL;
Value *NewCmpRHS = NULL;
int64_t Scale = 1;
if (ConstantInt *C = dyn_cast<ConstantInt>(Cond->getOperand(1))) {
int64_t CmpVal = C->getValue().getSExtValue();
+ // Check the relevant induction variable for conformance to
+ // the pattern.
+ const SCEV *IV = SE->getSCEV(Cond->getOperand(0));
+ const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(IV);
+ if (!AR || !AR->isAffine())
+ return Cond;
+
+ const SCEVConstant *StartC = dyn_cast<SCEVConstant>(AR->getStart());
// Check stride constant and the comparision constant signs to detect
// overflow.
- if ((CmpVal & SignBit) != (CmpSSInt & SignBit))
- return Cond;
+ if (StartC) {
+ if ((StartC->getValue()->getSExtValue() < CmpVal && CmpSSInt < 0) ||
+ (StartC->getValue()->getSExtValue() > CmpVal && CmpSSInt > 0))
+ return Cond;
+ } else {
+ // More restrictive check for the other cases.
+ if ((CmpVal & SignBit) != (CmpSSInt & SignBit))
+ return Cond;
+ }
// Look for a suitable stride / iv as replacement.
for (unsigned i = 0, e = IU->StrideOrder.size(); i != e; ++i) {
std::map<const SCEV *, IVUsersOfOneStride *>::iterator SI =
IU->IVUsesByStride.find(IU->StrideOrder[i]);
- if (!isa<SCEVConstant>(SI->first))
+ if (!isa<SCEVConstant>(SI->first) || SI->second->Users.empty())
continue;
int64_t SSInt = cast<SCEVConstant>(SI->first)->getValue()->getSExtValue();
if (SSInt == CmpSSInt ||
Scale = SSInt / CmpSSInt;
int64_t NewCmpVal = CmpVal * Scale;
+
+ // If old icmp value fits in icmp immediate field, but the new one doesn't
+ // try something else.
+ if (TLI &&
+ TLI->isLegalICmpImmediate(CmpVal) &&
+ !TLI->isLegalICmpImmediate(NewCmpVal))
+ continue;
+
APInt Mul = APInt(BitWidth*2, CmpVal, true);
Mul = Mul * APInt(BitWidth*2, Scale, true);
// Check for overflow.
(CmpVal & SignBit) != (NewCmpVal & SignBit))
continue;
- if (NewCmpVal == CmpVal)
- continue;
// Pick the best iv to use trying to avoid a cast.
NewCmpLHS = NULL;
for (ilist<IVStrideUse>::iterator UI = SI->second->Users.begin(),
if (NewTyBits != TyBits && !isa<SCEVConstant>(CondUse->getOffset()))
continue;
- bool AllUsesAreAddresses = true;
- bool AllUsesAreOutsideLoop = true;
- std::vector<BasedUser> UsersToProcess;
- const SCEV *CommonExprs = CollectIVUsers(SI->first, *SI->second, L,
- AllUsesAreAddresses,
- AllUsesAreOutsideLoop,
- UsersToProcess);
- // Avoid rewriting the compare instruction with an iv of new stride
- // if it's likely the new stride uses will be rewritten using the
- // stride of the compare instruction.
- if (AllUsesAreAddresses &&
- ValidScale(!CommonExprs->isZero(), Scale, UsersToProcess))
- continue;
+ if (!PostPass) {
+ bool AllUsesAreAddresses = true;
+ bool AllUsesAreOutsideLoop = true;
+ std::vector<BasedUser> UsersToProcess;
+ const SCEV *CommonExprs = CollectIVUsers(SI->first, *SI->second, L,
+ AllUsesAreAddresses,
+ AllUsesAreOutsideLoop,
+ UsersToProcess);
+ // Avoid rewriting the compare instruction with an iv of new stride
+ // if it's likely the new stride uses will be rewritten using the
+ // stride of the compare instruction.
+ if (AllUsesAreAddresses &&
+ ValidScale(!CommonExprs->isZero(), Scale, UsersToProcess))
+ continue;
+ }
// Avoid rewriting the compare instruction with an iv which has
// implicit extension or truncation built into it.
if (Scale < 0 && !Cond->isEquality())
Predicate = ICmpInst::getSwappedPredicate(Predicate);
- NewStride = &IU->StrideOrder[i];
+ NewStride = IU->StrideOrder[i];
if (!isa<PointerType>(NewCmpTy))
NewCmpRHS = ConstantInt::get(NewCmpTy, NewCmpVal);
else {
Cond = new ICmpInst(OldCond, Predicate, NewCmpLHS, NewCmpRHS,
L->getHeader()->getName() + ".termcond");
+ DEBUG(errs() << " Change compare stride in Inst " << *OldCond);
+ DEBUG(errs() << " to " << *Cond << '\n');
+
// Remove the old compare instruction. The old indvar is probably dead too.
DeadInsts.push_back(CondUse->getOperandValToReplace());
OldCond->replaceAllUsesWith(Cond);
OldCond->eraseFromParent();
- IU->IVUsesByStride[*NewStride]->addUser(NewOffset, Cond, NewCmpLHS);
- CondUse = &IU->IVUsesByStride[*NewStride]->Users.back();
+ IU->IVUsesByStride[NewStride]->addUser(NewOffset, Cond, NewCmpLHS);
+ CondUse = &IU->IVUsesByStride[NewStride]->Users.back();
CondStride = NewStride;
++NumEliminated;
Changed = true;
const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L);
if (isa<SCEVCouldNotCompute>(BackedgeTakenCount))
return;
-
+
for (unsigned Stride = 0, e = IU->StrideOrder.size(); Stride != e;
++Stride) {
std::map<const SCEV *, IVUsersOfOneStride *>::iterator SI =
/* If shadow use is a int->float cast then insert a second IV
to eliminate this cast.
- for (unsigned i = 0; i < n; ++i)
+ for (unsigned i = 0; i < n; ++i)
foo((double)i);
is transformed into
double d = 0.0;
- for (unsigned i = 0; i < n; ++i, ++d)
+ for (unsigned i = 0; i < n; ++i, ++d)
foo(d);
*/
if (UIToFPInst *UCast = dyn_cast<UIToFPInst>(CandidateUI->getUser()))
const Type *SrcTy = PH->getType();
int Mantissa = DestTy->getFPMantissaWidth();
- if (Mantissa == -1) continue;
+ if (Mantissa == -1) continue;
if ((int)SE->getTypeSizeInBits(SrcTy) > Mantissa)
continue;
Entry = 1;
Latch = 0;
}
-
+
ConstantInt *Init = dyn_cast<ConstantInt>(PH->getIncomingValue(Entry));
if (!Init) continue;
Constant *NewInit = ConstantFP::get(DestTy, Init->getZExtValue());
- BinaryOperator *Incr =
+ BinaryOperator *Incr =
dyn_cast<BinaryOperator>(PH->getIncomingValue(Latch));
if (!Incr) continue;
if (Incr->getOpcode() != Instruction::Add
if (!C) continue;
+ // Ignore negative constants, as the code below doesn't handle them
+ // correctly. TODO: Remove this restriction.
+ if (!C->getValue().isStrictlyPositive()) continue;
+
/* Add new PHINode. */
PHINode *NewPH = PHINode::Create(DestTy, "IV.S.", PH);
/* create new increment. '++d' in above example. */
Constant *CFP = ConstantFP::get(DestTy, C->getZExtValue());
- BinaryOperator *NewIncr =
+ BinaryOperator *NewIncr =
BinaryOperator::Create(Incr->getOpcode() == Instruction::Add ?
Instruction::FAdd : Instruction::FSub,
NewPH, CFP, "IV.S.next.", Incr);
OptimizeShadowIV(L);
}
-/// OptimizeLoopTermCond - Change loop terminating condition to use the
+bool LoopStrengthReduce::StrideMightBeShared(const SCEV* Stride, Loop *L,
+ bool CheckPreInc) {
+ int64_t SInt = cast<SCEVConstant>(Stride)->getValue()->getSExtValue();
+ for (unsigned i = 0, e = IU->StrideOrder.size(); i != e; ++i) {
+ std::map<const SCEV *, IVUsersOfOneStride *>::iterator SI =
+ IU->IVUsesByStride.find(IU->StrideOrder[i]);
+ const SCEV *Share = SI->first;
+ if (!isa<SCEVConstant>(SI->first) || Share == Stride)
+ continue;
+ int64_t SSInt = cast<SCEVConstant>(Share)->getValue()->getSExtValue();
+ if (SSInt == SInt)
+ return true; // This can definitely be reused.
+ if (unsigned(abs64(SSInt)) < SInt || (SSInt % SInt) != 0)
+ continue;
+ int64_t Scale = SSInt / SInt;
+ bool AllUsesAreAddresses = true;
+ bool AllUsesAreOutsideLoop = true;
+ std::vector<BasedUser> UsersToProcess;
+ const SCEV *CommonExprs = CollectIVUsers(SI->first, *SI->second, L,
+ AllUsesAreAddresses,
+ AllUsesAreOutsideLoop,
+ UsersToProcess);
+ if (AllUsesAreAddresses &&
+ ValidScale(!CommonExprs->isZero(), Scale, UsersToProcess)) {
+ if (!CheckPreInc)
+ return true;
+ // Any pre-inc iv use?
+ IVUsersOfOneStride &StrideUses = *IU->IVUsesByStride[Share];
+ for (ilist<IVStrideUse>::iterator I = StrideUses.Users.begin(),
+ E = StrideUses.Users.end(); I != E; ++I) {
+ if (!I->isUseOfPostIncrementedValue())
+ return true;
+ }
+ }
+ }
+ return false;
+}
+
+/// isUsedByExitBranch - Return true if icmp is used by a loop terminating
+/// conditional branch or it's and / or with other conditions before being used
+/// as the condition.
+static bool isUsedByExitBranch(ICmpInst *Cond, Loop *L) {
+ BasicBlock *CondBB = Cond->getParent();
+ if (!L->isLoopExiting(CondBB))
+ return false;
+ BranchInst *TermBr = dyn_cast<BranchInst>(CondBB->getTerminator());
+ if (!TermBr || !TermBr->isConditional())
+ return false;
+
+ Value *User = *Cond->use_begin();
+ Instruction *UserInst = dyn_cast<Instruction>(User);
+ while (UserInst &&
+ (UserInst->getOpcode() == Instruction::And ||
+ UserInst->getOpcode() == Instruction::Or)) {
+ if (!UserInst->hasOneUse() || UserInst->getParent() != CondBB)
+ return false;
+ User = *User->use_begin();
+ UserInst = dyn_cast<Instruction>(User);
+ }
+ return User == TermBr;
+}
+
+static bool ShouldCountToZero(ICmpInst *Cond, IVStrideUse* &CondUse,
+ ScalarEvolution *SE, Loop *L,
+ const TargetLowering *TLI = 0) {
+ if (!L->contains(Cond))
+ return false;
+
+ if (!isa<SCEVConstant>(CondUse->getOffset()))
+ return false;
+
+ // Handle only tests for equality for the moment.
+ if (!Cond->isEquality() || !Cond->hasOneUse())
+ return false;
+ if (!isUsedByExitBranch(Cond, L))
+ return false;
+
+ Value *CondOp0 = Cond->getOperand(0);
+ const SCEV *IV = SE->getSCEV(CondOp0);
+ const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(IV);
+ if (!AR || !AR->isAffine())
+ return false;
+
+ const SCEVConstant *SC = dyn_cast<SCEVConstant>(AR->getStepRecurrence(*SE));
+ if (!SC || SC->getValue()->getSExtValue() < 0)
+ // If it's already counting down, don't do anything.
+ return false;
+
+ // If the RHS of the comparison is not an loop invariant, the rewrite
+ // cannot be done. Also bail out if it's already comparing against a zero.
+ // If we are checking this before cmp stride optimization, check if it's
+ // comparing against a already legal immediate.
+ Value *RHS = Cond->getOperand(1);
+ ConstantInt *RHSC = dyn_cast<ConstantInt>(RHS);
+ if (!L->isLoopInvariant(RHS) ||
+ (RHSC && RHSC->isZero()) ||
+ (RHSC && TLI && TLI->isLegalICmpImmediate(RHSC->getSExtValue())))
+ return false;
+
+ // Make sure the IV is only used for counting. Value may be preinc or
+ // postinc; 2 uses in either case.
+ if (!CondOp0->hasNUses(2))
+ return false;
+
+ return true;
+}
+
+/// OptimizeLoopTermCond - Change loop terminating condition to use the
/// postinc iv when possible.
void LoopStrengthReduce::OptimizeLoopTermCond(Loop *L) {
- // Finally, get the terminating condition for the loop if possible. If we
- // can, we want to change it to use a post-incremented version of its
- // induction variable, to allow coalescing the live ranges for the IV into
- // one register value.
BasicBlock *LatchBlock = L->getLoopLatch();
- BasicBlock *ExitingBlock = L->getExitingBlock();
-
- if (!ExitingBlock)
- // Multiple exits, just look at the exit in the latch block if there is one.
- ExitingBlock = LatchBlock;
- BranchInst *TermBr = dyn_cast<BranchInst>(ExitingBlock->getTerminator());
- if (!TermBr)
- return;
- if (TermBr->isUnconditional() || !isa<ICmpInst>(TermBr->getCondition()))
- return;
+ bool LatchExit = L->isLoopExiting(LatchBlock);
+ SmallVector<BasicBlock*, 8> ExitingBlocks;
+ L->getExitingBlocks(ExitingBlocks);
- // Search IVUsesByStride to find Cond's IVUse if there is one.
- IVStrideUse *CondUse = 0;
- const SCEV *const *CondStride = 0;
- ICmpInst *Cond = cast<ICmpInst>(TermBr->getCondition());
- if (!FindIVUserForCond(Cond, CondUse, CondStride))
- return; // setcc doesn't use the IV.
-
- if (ExitingBlock != LatchBlock) {
- if (!Cond->hasOneUse())
- // See below, we don't want the condition to be cloned.
- return;
-
- // If exiting block is the latch block, we know it's safe and profitable to
- // transform the icmp to use post-inc iv. Otherwise do so only if it would
- // not reuse another iv and its iv would be reused by other uses. We are
- // optimizing for the case where the icmp is the only use of the iv.
- IVUsersOfOneStride &StrideUses = *IU->IVUsesByStride[*CondStride];
- for (ilist<IVStrideUse>::iterator I = StrideUses.Users.begin(),
- E = StrideUses.Users.end(); I != E; ++I) {
- if (I->getUser() == Cond)
- continue;
- if (!I->isUseOfPostIncrementedValue())
- return;
- }
+ for (unsigned i = 0, e = ExitingBlocks.size(); i != e; ++i) {
+ BasicBlock *ExitingBlock = ExitingBlocks[i];
- // FIXME: This is expensive, and worse still ChangeCompareStride does a
- // similar check. Can we perform all the icmp related transformations after
- // StrengthReduceStridedIVUsers?
- if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(*CondStride)) {
- int64_t SInt = SC->getValue()->getSExtValue();
- for (unsigned NewStride = 0, ee = IU->StrideOrder.size(); NewStride != ee;
- ++NewStride) {
- std::map<const SCEV *, IVUsersOfOneStride *>::iterator SI =
- IU->IVUsesByStride.find(IU->StrideOrder[NewStride]);
- if (!isa<SCEVConstant>(SI->first) || SI->first == *CondStride)
- continue;
- int64_t SSInt =
- cast<SCEVConstant>(SI->first)->getValue()->getSExtValue();
- if (SSInt == SInt)
- return; // This can definitely be reused.
- if (unsigned(abs64(SSInt)) < SInt || (SSInt % SInt) != 0)
- continue;
- int64_t Scale = SSInt / SInt;
- bool AllUsesAreAddresses = true;
- bool AllUsesAreOutsideLoop = true;
- std::vector<BasedUser> UsersToProcess;
- const SCEV *CommonExprs = CollectIVUsers(SI->first, *SI->second, L,
- AllUsesAreAddresses,
- AllUsesAreOutsideLoop,
- UsersToProcess);
- // Avoid rewriting the compare instruction with an iv of new stride
- // if it's likely the new stride uses will be rewritten using the
- // stride of the compare instruction.
- if (AllUsesAreAddresses &&
- ValidScale(!CommonExprs->isZero(), Scale, UsersToProcess))
- return;
- }
- }
+ // Finally, get the terminating condition for the loop if possible. If we
+ // can, we want to change it to use a post-incremented version of its
+ // induction variable, to allow coalescing the live ranges for the IV into
+ // one register value.
- StrideNoReuse.insert(*CondStride);
- }
+ BranchInst *TermBr = dyn_cast<BranchInst>(ExitingBlock->getTerminator());
+ if (!TermBr)
+ continue;
+ // FIXME: Overly conservative, termination condition could be an 'or' etc..
+ if (TermBr->isUnconditional() || !isa<ICmpInst>(TermBr->getCondition()))
+ continue;
- // If the trip count is computed in terms of a max (due to ScalarEvolution
- // being unable to find a sufficient guard, for example), change the loop
- // comparison to use SLT or ULT instead of NE.
- Cond = OptimizeMax(L, Cond, CondUse);
-
- // If possible, change stride and operands of the compare instruction to
- // eliminate one stride.
- if (ExitingBlock == LatchBlock)
- Cond = ChangeCompareStride(L, Cond, CondUse, CondStride);
-
- // It's possible for the setcc instruction to be anywhere in the loop, and
- // possible for it to have multiple users. If it is not immediately before
- // the latch block branch, move it.
- if (&*++BasicBlock::iterator(Cond) != (Instruction*)TermBr) {
- if (Cond->hasOneUse()) { // Condition has a single use, just move it.
- Cond->moveBefore(TermBr);
- } else {
- // Otherwise, clone the terminating condition and insert into the loopend.
- Cond = cast<ICmpInst>(Cond->clone());
- Cond->setName(L->getHeader()->getName() + ".termcond");
- LatchBlock->getInstList().insert(TermBr, Cond);
-
- // Clone the IVUse, as the old use still exists!
- IU->IVUsesByStride[*CondStride]->addUser(CondUse->getOffset(), Cond,
- CondUse->getOperandValToReplace());
- CondUse = &IU->IVUsesByStride[*CondStride]->Users.back();
+ // Search IVUsesByStride to find Cond's IVUse if there is one.
+ IVStrideUse *CondUse = 0;
+ const SCEV *CondStride = 0;
+ ICmpInst *Cond = cast<ICmpInst>(TermBr->getCondition());
+ if (!FindIVUserForCond(Cond, CondUse, CondStride))
+ continue;
+
+ // If the latch block is exiting and it's not a single block loop, it's
+ // not safe to use postinc iv in other exiting blocks. FIXME: overly
+ // conservative? How about icmp stride optimization?
+ bool UsePostInc = !(e > 1 && LatchExit && ExitingBlock != LatchBlock);
+ if (UsePostInc && ExitingBlock != LatchBlock) {
+ if (!Cond->hasOneUse())
+ // See below, we don't want the condition to be cloned.
+ UsePostInc = false;
+ else {
+ // If exiting block is the latch block, we know it's safe and profitable
+ // to transform the icmp to use post-inc iv. Otherwise do so only if it
+ // would not reuse another iv and its iv would be reused by other uses.
+ // We are optimizing for the case where the icmp is the only use of the
+ // iv.
+ IVUsersOfOneStride &StrideUses = *IU->IVUsesByStride[CondStride];
+ for (ilist<IVStrideUse>::iterator I = StrideUses.Users.begin(),
+ E = StrideUses.Users.end(); I != E; ++I) {
+ if (I->getUser() == Cond)
+ continue;
+ if (!I->isUseOfPostIncrementedValue()) {
+ UsePostInc = false;
+ break;
+ }
+ }
+ }
+
+ // If iv for the stride might be shared and any of the users use pre-inc
+ // iv might be used, then it's not safe to use post-inc iv.
+ if (UsePostInc &&
+ isa<SCEVConstant>(CondStride) &&
+ StrideMightBeShared(CondStride, L, true))
+ UsePostInc = false;
}
- }
- // If we get to here, we know that we can transform the setcc instruction to
- // use the post-incremented version of the IV, allowing us to coalesce the
- // live ranges for the IV correctly.
- CondUse->setOffset(SE->getMinusSCEV(CondUse->getOffset(), *CondStride));
- CondUse->setIsUseOfPostIncrementedValue(true);
- Changed = true;
+ // If the trip count is computed in terms of a max (due to ScalarEvolution
+ // being unable to find a sufficient guard, for example), change the loop
+ // comparison to use SLT or ULT instead of NE.
+ Cond = OptimizeMax(L, Cond, CondUse);
+
+ // If possible, change stride and operands of the compare instruction to
+ // eliminate one stride. However, avoid rewriting the compare instruction
+ // with an iv of new stride if it's likely the new stride uses will be
+ // rewritten using the stride of the compare instruction.
+ if (ExitingBlock == LatchBlock && isa<SCEVConstant>(CondStride)) {
+ // If the condition stride is a constant and it's the only use, we might
+ // want to optimize it first by turning it to count toward zero.
+ if (!StrideMightBeShared(CondStride, L, false) &&
+ !ShouldCountToZero(Cond, CondUse, SE, L, TLI))
+ Cond = ChangeCompareStride(L, Cond, CondUse, CondStride);
+ }
- ++NumLoopCond;
-}
+ if (!UsePostInc)
+ continue;
-/// OptimizeLoopCountIV - If, after all sharing of IVs, the IV used for deciding
-/// when to exit the loop is used only for that purpose, try to rearrange things
-/// so it counts down to a test against zero.
-void LoopStrengthReduce::OptimizeLoopCountIV(Loop *L) {
+ DEBUG(errs() << " Change loop exiting icmp to use postinc iv: "
+ << *Cond << '\n');
- // If the number of times the loop is executed isn't computable, give up.
- const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L);
- if (isa<SCEVCouldNotCompute>(BackedgeTakenCount))
- return;
+ // It's possible for the setcc instruction to be anywhere in the loop, and
+ // possible for it to have multiple users. If it is not immediately before
+ // the exiting block branch, move it.
+ if (&*++BasicBlock::iterator(Cond) != (Instruction*)TermBr) {
+ if (Cond->hasOneUse()) { // Condition has a single use, just move it.
+ Cond->moveBefore(TermBr);
+ } else {
+ // Otherwise, clone the terminating condition and insert into the
+ // loopend.
+ Cond = cast<ICmpInst>(Cond->clone());
+ Cond->setName(L->getHeader()->getName() + ".termcond");
+ ExitingBlock->getInstList().insert(TermBr, Cond);
+
+ // Clone the IVUse, as the old use still exists!
+ IU->IVUsesByStride[CondStride]->addUser(CondUse->getOffset(), Cond,
+ CondUse->getOperandValToReplace());
+ CondUse = &IU->IVUsesByStride[CondStride]->Users.back();
+ }
+ }
- // Get the terminating condition for the loop if possible (this isn't
- // necessarily in the latch, or a block that's a predecessor of the header).
- if (!L->getExitBlock())
- return; // More than one loop exit blocks.
+ // If we get to here, we know that we can transform the setcc instruction to
+ // use the post-incremented version of the IV, allowing us to coalesce the
+ // live ranges for the IV correctly.
+ CondUse->setOffset(SE->getMinusSCEV(CondUse->getOffset(), CondStride));
+ CondUse->setIsUseOfPostIncrementedValue(true);
+ Changed = true;
- // Okay, there is one exit block. Try to find the condition that causes the
- // loop to be exited.
- BasicBlock *ExitingBlock = L->getExitingBlock();
- if (!ExitingBlock)
- return; // More than one block exiting!
+ ++NumLoopCond;
+ }
+}
- // Okay, we've computed the exiting block. See what condition causes us to
- // exit.
- //
- // FIXME: we should be able to handle switch instructions (with a single exit)
- BranchInst *TermBr = dyn_cast<BranchInst>(ExitingBlock->getTerminator());
- if (TermBr == 0) return;
- assert(TermBr->isConditional() && "If unconditional, it can't be in loop!");
- if (!isa<ICmpInst>(TermBr->getCondition()))
- return;
- ICmpInst *Cond = cast<ICmpInst>(TermBr->getCondition());
+bool LoopStrengthReduce::OptimizeLoopCountIVOfStride(const SCEV* &Stride,
+ IVStrideUse* &CondUse,
+ Loop *L) {
+ // If the only use is an icmp of a loop exiting conditional branch, then
+ // attempt the optimization.
+ BasedUser User = BasedUser(*CondUse, SE);
+ assert(isa<ICmpInst>(User.Inst) && "Expecting an ICMPInst!");
+ ICmpInst *Cond = cast<ICmpInst>(User.Inst);
+
+ // Less strict check now that compare stride optimization is done.
+ if (!ShouldCountToZero(Cond, CondUse, SE, L))
+ return false;
- // Handle only tests for equality for the moment, and only stride 1.
- if (Cond->getPredicate() != CmpInst::ICMP_EQ)
- return;
- const SCEV *IV = SE->getSCEV(Cond->getOperand(0));
- const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(IV);
- const SCEV *One = SE->getIntegerSCEV(1, BackedgeTakenCount->getType());
- if (!AR || !AR->isAffine() || AR->getStepRecurrence(*SE) != One)
- return;
- // If the RHS of the comparison is defined inside the loop, the rewrite
- // cannot be done.
- if (Instruction *CR = dyn_cast<Instruction>(Cond->getOperand(1)))
- if (L->contains(CR->getParent()))
- return;
+ Value *CondOp0 = Cond->getOperand(0);
+ PHINode *PHIExpr = dyn_cast<PHINode>(CondOp0);
+ Instruction *Incr;
+ if (!PHIExpr) {
+ // Value tested is postinc. Find the phi node.
+ Incr = dyn_cast<BinaryOperator>(CondOp0);
+ // FIXME: Just use User.OperandValToReplace here?
+ if (!Incr || Incr->getOpcode() != Instruction::Add)
+ return false;
- // Make sure the IV is only used for counting. Value may be preinc or
- // postinc; 2 uses in either case.
- if (!Cond->getOperand(0)->hasNUses(2))
- return;
- PHINode *phi = dyn_cast<PHINode>(Cond->getOperand(0));
- Instruction *incr;
- if (phi && phi->getParent()==L->getHeader()) {
- // value tested is preinc. Find the increment.
- // A CmpInst is not a BinaryOperator; we depend on this.
- Instruction::use_iterator UI = phi->use_begin();
- incr = dyn_cast<BinaryOperator>(UI);
- if (!incr)
- incr = dyn_cast<BinaryOperator>(++UI);
- // 1 use for postinc value, the phi. Unnecessarily conservative?
- if (!incr || !incr->hasOneUse() || incr->getOpcode()!=Instruction::Add)
- return;
- } else {
- // Value tested is postinc. Find the phi node.
- incr = dyn_cast<BinaryOperator>(Cond->getOperand(0));
- if (!incr || incr->getOpcode()!=Instruction::Add)
- return;
-
- Instruction::use_iterator UI = Cond->getOperand(0)->use_begin();
- phi = dyn_cast<PHINode>(UI);
- if (!phi)
- phi = dyn_cast<PHINode>(++UI);
+ PHIExpr = dyn_cast<PHINode>(Incr->getOperand(0));
+ if (!PHIExpr)
+ return false;
// 1 use for preinc value, the increment.
- if (!phi || phi->getParent()!=L->getHeader() || !phi->hasOneUse())
- return;
+ if (!PHIExpr->hasOneUse())
+ return false;
+ } else {
+ assert(isa<PHINode>(CondOp0) &&
+ "Unexpected loop exiting counting instruction sequence!");
+ PHIExpr = cast<PHINode>(CondOp0);
+ // Value tested is preinc. Find the increment.
+ // A CmpInst is not a BinaryOperator; we depend on this.
+ Instruction::use_iterator UI = PHIExpr->use_begin();
+ Incr = dyn_cast<BinaryOperator>(UI);
+ if (!Incr)
+ Incr = dyn_cast<BinaryOperator>(++UI);
+ // One use for postinc value, the phi. Unnecessarily conservative?
+ if (!Incr || !Incr->hasOneUse() || Incr->getOpcode() != Instruction::Add)
+ return false;
}
// Replace the increment with a decrement.
- BinaryOperator *decr =
- BinaryOperator::Create(Instruction::Sub, incr->getOperand(0),
- incr->getOperand(1), "tmp", incr);
- incr->replaceAllUsesWith(decr);
- incr->eraseFromParent();
+ DEBUG(errs() << "LSR: Examining use ");
+ DEBUG(WriteAsOperand(errs(), CondOp0, /*PrintType=*/false));
+ DEBUG(errs() << " in Inst: " << *Cond << '\n');
+ BinaryOperator *Decr = BinaryOperator::Create(Instruction::Sub,
+ Incr->getOperand(0), Incr->getOperand(1), "tmp", Incr);
+ Incr->replaceAllUsesWith(Decr);
+ Incr->eraseFromParent();
// Substitute endval-startval for the original startval, and 0 for the
- // original endval. Since we're only testing for equality this is OK even
+ // original endval. Since we're only testing for equality this is OK even
// if the computation wraps around.
BasicBlock *Preheader = L->getLoopPreheader();
Instruction *PreInsertPt = Preheader->getTerminator();
- int inBlock = L->contains(phi->getIncomingBlock(0)) ? 1 : 0;
- Value *startVal = phi->getIncomingValue(inBlock);
- Value *endVal = Cond->getOperand(1);
- // FIXME check for case where both are constant
+ unsigned InBlock = L->contains(PHIExpr->getIncomingBlock(0)) ? 1 : 0;
+ Value *StartVal = PHIExpr->getIncomingValue(InBlock);
+ Value *EndVal = Cond->getOperand(1);
+ DEBUG(errs() << " Optimize loop counting iv to count down ["
+ << *EndVal << " .. " << *StartVal << "]\n");
+
+ // FIXME: check for case where both are constant.
Constant* Zero = ConstantInt::get(Cond->getOperand(1)->getType(), 0);
- BinaryOperator *NewStartVal =
- BinaryOperator::Create(Instruction::Sub, endVal, startVal,
- "tmp", PreInsertPt);
- phi->setIncomingValue(inBlock, NewStartVal);
+ BinaryOperator *NewStartVal = BinaryOperator::Create(Instruction::Sub,
+ EndVal, StartVal, "tmp", PreInsertPt);
+ PHIExpr->setIncomingValue(InBlock, NewStartVal);
Cond->setOperand(1, Zero);
+ DEBUG(errs() << " New icmp: " << *Cond << "\n");
+
+ int64_t SInt = cast<SCEVConstant>(Stride)->getValue()->getSExtValue();
+ const SCEV *NewStride = 0;
+ bool Found = false;
+ for (unsigned i = 0, e = IU->StrideOrder.size(); i != e; ++i) {
+ const SCEV *OldStride = IU->StrideOrder[i];
+ if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(OldStride))
+ if (SC->getValue()->getSExtValue() == -SInt) {
+ Found = true;
+ NewStride = OldStride;
+ break;
+ }
+ }
- Changed = true;
+ if (!Found)
+ NewStride = SE->getIntegerSCEV(-SInt, Stride->getType());
+ IU->AddUser(NewStride, CondUse->getOffset(), Cond, Cond->getOperand(0));
+ IU->IVUsesByStride[Stride]->removeUser(CondUse);
+
+ CondUse = &IU->IVUsesByStride[NewStride]->Users.back();
+ Stride = NewStride;
+
+ ++NumCountZero;
+
+ return true;
}
-bool LoopStrengthReduce::runOnLoop(Loop *L, LPPassManager &LPM) {
+/// OptimizeLoopCountIV - If, after all sharing of IVs, the IV used for deciding
+/// when to exit the loop is used only for that purpose, try to rearrange things
+/// so it counts down to a test against zero.
+bool LoopStrengthReduce::OptimizeLoopCountIV(Loop *L) {
+ bool ThisChanged = false;
+ for (unsigned i = 0, e = IU->StrideOrder.size(); i != e; ++i) {
+ const SCEV *Stride = IU->StrideOrder[i];
+ std::map<const SCEV *, IVUsersOfOneStride *>::iterator SI =
+ IU->IVUsesByStride.find(Stride);
+ assert(SI != IU->IVUsesByStride.end() && "Stride doesn't exist!");
+ // FIXME: Generalize to non-affine IV's.
+ if (!SI->first->isLoopInvariant(L))
+ continue;
+ // If stride is a constant and it has an icmpinst use, check if we can
+ // optimize the loop to count down.
+ if (isa<SCEVConstant>(Stride) && SI->second->Users.size() == 1) {
+ Instruction *User = SI->second->Users.begin()->getUser();
+ if (!isa<ICmpInst>(User))
+ continue;
+ const SCEV *CondStride = Stride;
+ IVStrideUse *Use = &*SI->second->Users.begin();
+ if (!OptimizeLoopCountIVOfStride(CondStride, Use, L))
+ continue;
+ ThisChanged = true;
+ // Now check if it's possible to reuse this iv for other stride uses.
+ for (unsigned j = 0, ee = IU->StrideOrder.size(); j != ee; ++j) {
+ const SCEV *SStride = IU->StrideOrder[j];
+ if (SStride == CondStride)
+ continue;
+ std::map<const SCEV *, IVUsersOfOneStride *>::iterator SII =
+ IU->IVUsesByStride.find(SStride);
+ assert(SII != IU->IVUsesByStride.end() && "Stride doesn't exist!");
+ // FIXME: Generalize to non-affine IV's.
+ if (!SII->first->isLoopInvariant(L))
+ continue;
+ // FIXME: Rewrite other stride using CondStride.
+ }
+ }
+ }
+
+ Changed |= ThisChanged;
+ return ThisChanged;
+}
+
+bool LoopStrengthReduce::runOnLoop(Loop *L, LPPassManager &LPM) {
IU = &getAnalysis<IVUsers>();
- LI = &getAnalysis<LoopInfo>();
- DT = &getAnalysis<DominatorTree>();
SE = &getAnalysis<ScalarEvolution>();
Changed = false;
+ // If LoopSimplify form is not available, stay out of trouble.
+ if (!L->getLoopPreheader() || !L->getLoopLatch())
+ return false;
+
if (!IU->IVUsesByStride.empty()) {
DEBUG(errs() << "\nLSR on \"" << L->getHeader()->getParent()->getName()
<< "\" ";
// Change loop terminating condition to use the postinc iv when possible
// and optimize loop terminating compare. FIXME: Move this after
- // StrengthReduceStridedIVUsers?
+ // StrengthReduceIVUsersOfStride?
OptimizeLoopTermCond(L);
// FIXME: We can shrink overlarge IV's here. e.g. if the code has
// IVsByStride keeps IVs for one particular loop.
assert(IVsByStride.empty() && "Stale entries in IVsByStride?");
- // Note: this processes each stride/type pair individually. All users
- // passed into StrengthReduceStridedIVUsers have the same type AND stride.
- // Also, note that we iterate over IVUsesByStride indirectly by using
- // StrideOrder. This extra layer of indirection makes the ordering of
- // strides deterministic - not dependent on map order.
- for (unsigned Stride = 0, e = IU->StrideOrder.size();
- Stride != e; ++Stride) {
- std::map<const SCEV *, IVUsersOfOneStride *>::iterator SI =
- IU->IVUsesByStride.find(IU->StrideOrder[Stride]);
- assert(SI != IU->IVUsesByStride.end() && "Stride doesn't exist!");
- // FIXME: Generalize to non-affine IV's.
- if (!SI->first->isLoopInvariant(L))
- continue;
- StrengthReduceStridedIVUsers(SI->first, *SI->second, L);
- }
- }
+ StrengthReduceIVUsers(L);
- // After all sharing is done, see if we can adjust the loop to test against
- // zero instead of counting up to a maximum. This is usually faster.
- OptimizeLoopCountIV(L);
+ // After all sharing is done, see if we can adjust the loop to test against
+ // zero instead of counting up to a maximum. This is usually faster.
+ OptimizeLoopCountIV(L);
- // We're done analyzing this loop; release all the state we built up for it.
- IVsByStride.clear();
- StrideNoReuse.clear();
+ // We're done analyzing this loop; release all the state we built up for it.
+ IVsByStride.clear();
- // Clean up after ourselves
- if (!DeadInsts.empty())
+ // Clean up after ourselves
DeleteTriviallyDeadInstructions();
+ }
// At this point, it is worth checking to see if any recurrence PHIs are also
// dead, so that we can remove them as well.