X-Git-Url: http://demsky.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FAnalysis%2FScalarEvolution.cpp;h=089ca42ef68d5eff75c9334075211e5e49cb1186;hb=c7a3b95c0f3e0aabc61e39ac635c340387765f30;hp=1fe72fd2b9ae5d16f4b5f1a501c445a80f10ace4;hpb=f8fd841a4bb7a59f81cf4642169e8251e039acfe;p=oota-llvm.git diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp index 1fe72fd2b9a..089ca42ef68 100644 --- a/lib/Analysis/ScalarEvolution.cpp +++ b/lib/Analysis/ScalarEvolution.cpp @@ -59,33 +59,33 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "scalar-evolution" -#include "llvm/Analysis/ScalarEvolutionExpressions.h" -#include "llvm/Constants.h" -#include "llvm/DerivedTypes.h" -#include "llvm/GlobalVariable.h" -#include "llvm/GlobalAlias.h" -#include "llvm/Instructions.h" -#include "llvm/LLVMContext.h" -#include "llvm/Operator.h" +#include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/Statistic.h" #include "llvm/Analysis/ConstantFolding.h" -#include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/ScalarEvolutionExpressions.h" #include "llvm/Analysis/ValueTracking.h" -#include "llvm/Assembly/Writer.h" -#include "llvm/Target/TargetData.h" -#include "llvm/Target/TargetLibraryInfo.h" +#include "llvm/IR/ConstantRange.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/GetElementPtrTypeIterator.h" +#include "llvm/IR/GlobalAlias.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/InstIterator.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Operator.h" #include "llvm/Support/CommandLine.h" -#include "llvm/Support/ConstantRange.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/GetElementPtrTypeIterator.h" -#include "llvm/Support/InstIterator.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/Target/TargetLibraryInfo.h" #include using namespace llvm; @@ -105,10 +105,15 @@ MaxBruteForceIterations("scalar-evolution-max-iterations", cl::ReallyHidden, "derived loop"), cl::init(100)); +// FIXME: Enable this with XDEBUG when the test suite is clean. +static cl::opt +VerifySCEV("verify-scev", + cl::desc("Verify ScalarEvolution's backedge taken counts (slow)")); + INITIALIZE_PASS_BEGIN(ScalarEvolution, "scalar-evolution", "Scalar Evolution Analysis", false, true) INITIALIZE_PASS_DEPENDENCY(LoopInfo) -INITIALIZE_PASS_DEPENDENCY(DominatorTree) +INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo) INITIALIZE_PASS_END(ScalarEvolution, "scalar-evolution", "Scalar Evolution Analysis", false, true) @@ -122,15 +127,17 @@ char ScalarEvolution::ID = 0; // Implementation of the SCEV class. // +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) void SCEV::dump() const { print(dbgs()); dbgs() << '\n'; } +#endif void SCEV::print(raw_ostream &OS) const { - switch (getSCEVType()) { + switch (static_cast(getSCEVType())) { case scConstant: - WriteAsOperand(OS, cast(this)->getValue(), false); + cast(this)->getValue()->printAsOperand(OS, false); return; case scTruncate: { const SCEVTruncateExpr *Trunc = cast(this); @@ -166,7 +173,7 @@ void SCEV::print(raw_ostream &OS) const { if (AR->getNoWrapFlags(FlagNW) && !AR->getNoWrapFlags((NoWrapFlags)(FlagNUW | FlagNSW))) OS << "nw><"; - WriteAsOperand(OS, AR->getLoop()->getHeader(), /*PrintType=*/false); + AR->getLoop()->getHeader()->printAsOperand(OS, /*PrintType=*/false); OS << ">"; return; } @@ -175,7 +182,7 @@ void SCEV::print(raw_ostream &OS) const { case scUMaxExpr: case scSMaxExpr: { const SCEVNAryExpr *NAry = cast(this); - const char *OpStr = 0; + const char *OpStr = nullptr; switch (NAry->getSCEVType()) { case scAddExpr: OpStr = " + "; break; case scMulExpr: OpStr = " * "; break; @@ -186,7 +193,7 @@ void SCEV::print(raw_ostream &OS) const { for (SCEVNAryExpr::op_iterator I = NAry->op_begin(), E = NAry->op_end(); I != E; ++I) { OS << **I; - if (llvm::next(I) != E) + if (std::next(I) != E) OS << OpStr; } OS << ")"; @@ -221,25 +228,24 @@ void SCEV::print(raw_ostream &OS) const { Constant *FieldNo; if (U->isOffsetOf(CTy, FieldNo)) { OS << "offsetof(" << *CTy << ", "; - WriteAsOperand(OS, FieldNo, false); + FieldNo->printAsOperand(OS, false); OS << ")"; return; } // Otherwise just print it normally. - WriteAsOperand(OS, U->getValue(), false); + U->getValue()->printAsOperand(OS, false); return; } case scCouldNotCompute: OS << "***COULDNOTCOMPUTE***"; return; - default: break; } llvm_unreachable("Unknown SCEV kind!"); } Type *SCEV::getType() const { - switch (getSCEVType()) { + switch (static_cast(getSCEVType())) { case scConstant: return cast(this)->getType(); case scTruncate: @@ -259,11 +265,8 @@ Type *SCEV::getType() const { return cast(this)->getType(); case scCouldNotCompute: llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!"); - return 0; - default: break; } llvm_unreachable("Unknown SCEV kind!"); - return 0; } bool SCEV::isZero() const { @@ -309,14 +312,14 @@ const SCEV *ScalarEvolution::getConstant(ConstantInt *V) { FoldingSetNodeID ID; ID.AddInteger(scConstant); ID.AddPointer(V); - void *IP = 0; + void *IP = nullptr; if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S; SCEV *S = new (SCEVAllocator) SCEVConstant(ID.Intern(SCEVAllocator), V); UniqueSCEVs.InsertNode(S, IP); return S; } -const SCEV *ScalarEvolution::getConstant(const APInt& Val) { +const SCEV *ScalarEvolution::getConstant(const APInt &Val) { return getConstant(ConstantInt::get(getContext(), Val)); } @@ -362,7 +365,7 @@ void SCEVUnknown::deleted() { SE->UniqueSCEVs.RemoveNode(this); // Release the value. - setValPtr(0); + setValPtr(nullptr); } void SCEVUnknown::allUsesReplacedWith(Value *New) { @@ -476,7 +479,7 @@ namespace { // Aside from the getSCEVType() ordering, the particular ordering // isn't very important except that it's beneficial to be consistent, // so that (a + b) and (b + a) don't end up as different expressions. - switch (LType) { + switch (static_cast(LType)) { case scUnknown: { const SCEVUnknown *LU = cast(LHS); const SCEVUnknown *RU = cast(RHS); @@ -579,6 +582,9 @@ namespace { // Lexicographically compare n-ary expressions. unsigned LNumOps = LC->getNumOperands(), RNumOps = RC->getNumOperands(); + if (LNumOps != RNumOps) + return (int)LNumOps - (int)RNumOps; + for (unsigned i = 0; i != LNumOps; ++i) { if (i >= RNumOps) return 1; @@ -610,12 +616,10 @@ namespace { return compare(LC->getOperand(), RC->getOperand()); } - default: - break; + case scCouldNotCompute: + llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!"); } - llvm_unreachable("Unknown SCEV kind!"); - return 0; } }; } @@ -755,7 +759,7 @@ static const SCEV *BinomialCoefficient(const SCEV *It, unsigned K, unsigned CalculationBits = W + T; // Calculate 2^T, at width T+W. - APInt DivFactor = APInt(CalculationBits, 1).shl(T); + APInt DivFactor = APInt::getOneBitSet(CalculationBits, T); // Calculate the multiplicative inverse of K! / 2^T; // this multiplication factor will perform the exact division by @@ -825,14 +829,13 @@ const SCEV *ScalarEvolution::getTruncateExpr(const SCEV *Op, ID.AddInteger(scTruncate); ID.AddPointer(Op); ID.AddPointer(Ty); - void *IP = 0; + void *IP = nullptr; if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S; // Fold if the operand is constant. if (const SCEVConstant *SC = dyn_cast(Op)) return getConstant( - cast(ConstantExpr::getTrunc(SC->getValue(), - getEffectiveSCEVType(Ty)))); + cast(ConstantExpr::getTrunc(SC->getValue(), Ty))); // trunc(trunc(x)) --> trunc(x) if (const SCEVTruncateExpr *ST = dyn_cast(Op)) @@ -884,13 +887,6 @@ const SCEV *ScalarEvolution::getTruncateExpr(const SCEV *Op, return getAddRecExpr(Operands, AddRec->getLoop(), SCEV::FlagAnyWrap); } - // As a special case, fold trunc(undef) to undef. We don't want to - // know too much about SCEVUnknowns, but this special case is handy - // and harmless. - if (const SCEVUnknown *U = dyn_cast(Op)) - if (isa(U->getValue())) - return getSCEV(UndefValue::get(Ty)); - // The cast wasn't folded; create an explicit cast node. We can reuse // the existing insert position since if we get here, we won't have // made any changes which would invalidate it. @@ -911,8 +907,7 @@ const SCEV *ScalarEvolution::getZeroExtendExpr(const SCEV *Op, // Fold if the operand is constant. if (const SCEVConstant *SC = dyn_cast(Op)) return getConstant( - cast(ConstantExpr::getZExt(SC->getValue(), - getEffectiveSCEVType(Ty)))); + cast(ConstantExpr::getZExt(SC->getValue(), Ty))); // zext(zext(x)) --> zext(x) if (const SCEVZeroExtendExpr *SZ = dyn_cast(Op)) @@ -924,7 +919,7 @@ const SCEV *ScalarEvolution::getZeroExtendExpr(const SCEV *Op, ID.AddInteger(scZeroExtend); ID.AddPointer(Op); ID.AddPointer(Ty); - void *IP = 0; + void *IP = nullptr; if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S; // zext(trunc(x)) --> zext(x) or x or trunc(x) @@ -981,12 +976,15 @@ const SCEV *ScalarEvolution::getZeroExtendExpr(const SCEV *Op, Type *WideTy = IntegerType::get(getContext(), BitWidth * 2); // Check whether Start+Step*MaxBECount has no unsigned overflow. const SCEV *ZMul = getMulExpr(CastedMaxBECount, Step); - const SCEV *Add = getAddExpr(Start, ZMul); + const SCEV *ZAdd = getZeroExtendExpr(getAddExpr(Start, ZMul), WideTy); + const SCEV *WideStart = getZeroExtendExpr(Start, WideTy); + const SCEV *WideMaxBECount = + getZeroExtendExpr(CastedMaxBECount, WideTy); const SCEV *OperandExtendedAdd = - getAddExpr(getZeroExtendExpr(Start, WideTy), - getMulExpr(getZeroExtendExpr(CastedMaxBECount, WideTy), + getAddExpr(WideStart, + getMulExpr(WideMaxBECount, getZeroExtendExpr(Step, WideTy))); - if (getZeroExtendExpr(Add, WideTy) == OperandExtendedAdd) { + if (ZAdd == OperandExtendedAdd) { // Cache knowledge of AR NUW, which is propagated to this AddRec. const_cast(AR)->setNoWrapFlags(SCEV::FlagNUW); // Return the expression with the addrec on the outside. @@ -996,13 +994,11 @@ const SCEV *ScalarEvolution::getZeroExtendExpr(const SCEV *Op, } // Similar to above, only this time treat the step value as signed. // This covers loops that count down. - const SCEV *SMul = getMulExpr(CastedMaxBECount, Step); - Add = getAddExpr(Start, SMul); OperandExtendedAdd = - getAddExpr(getZeroExtendExpr(Start, WideTy), - getMulExpr(getZeroExtendExpr(CastedMaxBECount, WideTy), + getAddExpr(WideStart, + getMulExpr(WideMaxBECount, getSignExtendExpr(Step, WideTy))); - if (getZeroExtendExpr(Add, WideTy) == OperandExtendedAdd) { + if (ZAdd == OperandExtendedAdd) { // Cache knowledge of AR NW, which is propagated to this AddRec. // Negative step causes unsigned wrap, but it still can't self-wrap. const_cast(AR)->setNoWrapFlags(SCEV::FlagNW); @@ -1076,7 +1072,7 @@ static const SCEV *getOverflowLimitForStep(const SCEV *Step, return SE->getConstant(APInt::getSignedMaxValue(BitWidth) - SE->getSignedRange(Step).getSignedMin()); } - return 0; + return nullptr; } // The recurrence AR has been shown to have no signed wrap. Typically, if we can @@ -1095,7 +1091,7 @@ static const SCEV *getPreStartForSignExtend(const SCEVAddRecExpr *AR, // Check for a simple looking step prior to loop entry. const SCEVAddExpr *SA = dyn_cast(Start); if (!SA) - return 0; + return nullptr; // Create an AddExpr for "PreStart" after subtracting Step. Full SCEV // subtraction is expensive. For this purpose, perform a quick and dirty @@ -1107,7 +1103,7 @@ static const SCEV *getPreStartForSignExtend(const SCEVAddRecExpr *AR, DiffOps.push_back(*I); } if (DiffOps.size() == SA->getNumOperands()) - return 0; + return nullptr; // This is a postinc AR. Check for overflow on the preinc recurrence using the // same three conditions that getSignExtendedExpr checks. @@ -1143,7 +1139,7 @@ static const SCEV *getPreStartForSignExtend(const SCEVAddRecExpr *AR, SE->isLoopEntryGuardedByCond(L, Pred, PreStart, OverflowLimit)) { return PreStart; } - return 0; + return nullptr; } // Get the normalized sign-extended expression for this AddRec's Start. @@ -1169,8 +1165,7 @@ const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op, // Fold if the operand is constant. if (const SCEVConstant *SC = dyn_cast(Op)) return getConstant( - cast(ConstantExpr::getSExt(SC->getValue(), - getEffectiveSCEVType(Ty)))); + cast(ConstantExpr::getSExt(SC->getValue(), Ty))); // sext(sext(x)) --> sext(x) if (const SCEVSignExtendExpr *SS = dyn_cast(Op)) @@ -1186,7 +1181,7 @@ const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op, ID.AddInteger(scSignExtend); ID.AddPointer(Op); ID.AddPointer(Ty); - void *IP = 0; + void *IP = nullptr; if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S; // If the input value is provably positive, build a zext instead. @@ -1247,12 +1242,15 @@ const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op, Type *WideTy = IntegerType::get(getContext(), BitWidth * 2); // Check whether Start+Step*MaxBECount has no signed overflow. const SCEV *SMul = getMulExpr(CastedMaxBECount, Step); - const SCEV *Add = getAddExpr(Start, SMul); + const SCEV *SAdd = getSignExtendExpr(getAddExpr(Start, SMul), WideTy); + const SCEV *WideStart = getSignExtendExpr(Start, WideTy); + const SCEV *WideMaxBECount = + getZeroExtendExpr(CastedMaxBECount, WideTy); const SCEV *OperandExtendedAdd = - getAddExpr(getSignExtendExpr(Start, WideTy), - getMulExpr(getZeroExtendExpr(CastedMaxBECount, WideTy), + getAddExpr(WideStart, + getMulExpr(WideMaxBECount, getSignExtendExpr(Step, WideTy))); - if (getSignExtendExpr(Add, WideTy) == OperandExtendedAdd) { + if (SAdd == OperandExtendedAdd) { // Cache knowledge of AR NSW, which is propagated to this AddRec. const_cast(AR)->setNoWrapFlags(SCEV::FlagNSW); // Return the expression with the addrec on the outside. @@ -1262,13 +1260,11 @@ const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op, } // Similar to above, only this time treat the step value as unsigned. // This covers loops that count up with an unsigned step. - const SCEV *UMul = getMulExpr(CastedMaxBECount, Step); - Add = getAddExpr(Start, UMul); OperandExtendedAdd = - getAddExpr(getSignExtendExpr(Start, WideTy), - getMulExpr(getZeroExtendExpr(CastedMaxBECount, WideTy), + getAddExpr(WideStart, + getMulExpr(WideMaxBECount, getZeroExtendExpr(Step, WideTy))); - if (getSignExtendExpr(Add, WideTy) == OperandExtendedAdd) { + if (SAdd == OperandExtendedAdd) { // Cache knowledge of AR NSW, which is propagated to this AddRec. const_cast(AR)->setNoWrapFlags(SCEV::FlagNSW); // Return the expression with the addrec on the outside. @@ -1350,13 +1346,6 @@ const SCEV *ScalarEvolution::getAnyExtendExpr(const SCEV *Op, return getAddRecExpr(Ops, AR->getLoop(), SCEV::FlagNW); } - // As a special case, fold anyext(undef) to undef. We don't want to - // know too much about SCEVUnknowns, but this special case is handy - // and harmless. - if (const SCEVUnknown *U = dyn_cast(Op)) - if (isa(U->getValue())) - return getSCEV(UndefValue::get(Ty)); - // If the expression is obviously signed, use the sext cast value. if (isa(Op)) return SExt; @@ -1371,7 +1360,7 @@ const SCEV *ScalarEvolution::getAnyExtendExpr(const SCEV *Op, /// what it does, given a sequence of operands that would form an add /// expression like this: /// -/// m + n + 13 + (A * (o + p + (B * q + m + 29))) + r + (-1 * r) +/// m + n + 13 + (A * (o + p + (B * (q + m + 29)))) + r + (-1 * r) /// /// where A and B are constants, update the map with these values: /// @@ -1392,7 +1381,7 @@ const SCEV *ScalarEvolution::getAnyExtendExpr(const SCEV *Op, /// static bool CollectAddOperandsWithScales(DenseMap &M, - SmallVector &NewOps, + SmallVectorImpl &NewOps, APInt &AccumulatedConstant, const SCEV *const *Ops, size_t NumOperands, const APInt &Scale, @@ -1640,7 +1629,7 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl &Ops, // re-generate the operands list. Group the operands by constant scale, // to avoid multiplying by the same constant scale multiple times. std::map, APIntCompare> MulOpLists; - for (SmallVector::const_iterator I = NewOps.begin(), + for (SmallVectorImpl::const_iterator I = NewOps.begin(), E = NewOps.end(); I != E; ++I) MulOpLists[M.find(*I)->second].push_back(*I); // Re-generate the operands list. @@ -1822,7 +1811,7 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl &Ops, ID.AddInteger(scAddExpr); for (unsigned i = 0, e = Ops.size(); i != e; ++i) ID.AddPointer(Ops[i]); - void *IP = 0; + void *IP = nullptr; SCEVAddExpr *S = static_cast(UniqueSCEVs.FindNodeOrInsertPos(ID, IP)); if (!S) { @@ -1844,7 +1833,7 @@ static uint64_t umul_ov(uint64_t i, uint64_t j, bool &Overflow) { /// Compute the result of "n choose k", the binomial coefficient. If an /// intermediate computation overflows, Overflow will be set and the return will -/// be garbage. Overflow is not cleared on absense of overflow. +/// be garbage. Overflow is not cleared on absence of overflow. static uint64_t Choose(uint64_t n, uint64_t k, bool &Overflow) { // We use the multiplicative formula: // n(n-1)(n-2)...(n-(k-1)) / k(k-1)(k-2)...1 . @@ -2043,63 +2032,67 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl &Ops, for (unsigned OtherIdx = Idx+1; OtherIdx < Ops.size() && isa(Ops[OtherIdx]); ++OtherIdx) { - if (AddRecLoop == cast(Ops[OtherIdx])->getLoop()) { - // {A1,+,A2,+,...,+,An} * {B1,+,B2,+,...,+,Bn} - // = {x=1 in [ sum y=x..2x [ sum z=max(y-x, y-n)..min(x,n) [ - // choose(x, 2x)*choose(2x-y, x-z)*A_{y-z}*B_z - // ]]],+,...up to x=2n}. - // Note that the arguments to choose() are always integers with values - // known at compile time, never SCEV objects. - // - // The implementation avoids pointless extra computations when the two - // addrec's are of different length (mathematically, it's equivalent to - // an infinite stream of zeros on the right). - bool OpsModified = false; - for (; OtherIdx != Ops.size() && isa(Ops[OtherIdx]); - ++OtherIdx) - if (const SCEVAddRecExpr *OtherAddRec = - dyn_cast(Ops[OtherIdx])) - if (OtherAddRec->getLoop() == AddRecLoop) { - bool Overflow = false; - Type *Ty = AddRec->getType(); - bool LargerThan64Bits = getTypeSizeInBits(Ty) > 64; - SmallVector AddRecOps; - for (int x = 0, xe = AddRec->getNumOperands() + - OtherAddRec->getNumOperands() - 1; - x != xe && !Overflow; ++x) { - const SCEV *Term = getConstant(Ty, 0); - for (int y = x, ye = 2*x+1; y != ye && !Overflow; ++y) { - uint64_t Coeff1 = Choose(x, 2*x - y, Overflow); - for (int z = std::max(y-x, y-(int)AddRec->getNumOperands()+1), - ze = std::min(x+1, (int)OtherAddRec->getNumOperands()); - z < ze && !Overflow; ++z) { - uint64_t Coeff2 = Choose(2*x - y, x-z, Overflow); - uint64_t Coeff; - if (LargerThan64Bits) - Coeff = umul_ov(Coeff1, Coeff2, Overflow); - else - Coeff = Coeff1*Coeff2; - const SCEV *CoeffTerm = getConstant(Ty, Coeff); - const SCEV *Term1 = AddRec->getOperand(y-z); - const SCEV *Term2 = OtherAddRec->getOperand(z); - Term = getAddExpr(Term, getMulExpr(CoeffTerm, Term1,Term2)); - } - } - AddRecOps.push_back(Term); - } - if (!Overflow) { - const SCEV *NewAddRec = getAddRecExpr(AddRecOps, - AddRec->getLoop(), - SCEV::FlagAnyWrap); - if (Ops.size() == 2) return NewAddRec; - Ops[Idx] = AddRec = cast(NewAddRec); - Ops.erase(Ops.begin() + OtherIdx); --OtherIdx; - OpsModified = true; - } + if (AddRecLoop != cast(Ops[OtherIdx])->getLoop()) + continue; + + // {A1,+,A2,+,...,+,An} * {B1,+,B2,+,...,+,Bn} + // = {x=1 in [ sum y=x..2x [ sum z=max(y-x, y-n)..min(x,n) [ + // choose(x, 2x)*choose(2x-y, x-z)*A_{y-z}*B_z + // ]]],+,...up to x=2n}. + // Note that the arguments to choose() are always integers with values + // known at compile time, never SCEV objects. + // + // The implementation avoids pointless extra computations when the two + // addrec's are of different length (mathematically, it's equivalent to + // an infinite stream of zeros on the right). + bool OpsModified = false; + for (; OtherIdx != Ops.size() && isa(Ops[OtherIdx]); + ++OtherIdx) { + const SCEVAddRecExpr *OtherAddRec = + dyn_cast(Ops[OtherIdx]); + if (!OtherAddRec || OtherAddRec->getLoop() != AddRecLoop) + continue; + + bool Overflow = false; + Type *Ty = AddRec->getType(); + bool LargerThan64Bits = getTypeSizeInBits(Ty) > 64; + SmallVector AddRecOps; + for (int x = 0, xe = AddRec->getNumOperands() + + OtherAddRec->getNumOperands() - 1; x != xe && !Overflow; ++x) { + const SCEV *Term = getConstant(Ty, 0); + for (int y = x, ye = 2*x+1; y != ye && !Overflow; ++y) { + uint64_t Coeff1 = Choose(x, 2*x - y, Overflow); + for (int z = std::max(y-x, y-(int)AddRec->getNumOperands()+1), + ze = std::min(x+1, (int)OtherAddRec->getNumOperands()); + z < ze && !Overflow; ++z) { + uint64_t Coeff2 = Choose(2*x - y, x-z, Overflow); + uint64_t Coeff; + if (LargerThan64Bits) + Coeff = umul_ov(Coeff1, Coeff2, Overflow); + else + Coeff = Coeff1*Coeff2; + const SCEV *CoeffTerm = getConstant(Ty, Coeff); + const SCEV *Term1 = AddRec->getOperand(y-z); + const SCEV *Term2 = OtherAddRec->getOperand(z); + Term = getAddExpr(Term, getMulExpr(CoeffTerm, Term1,Term2)); } - if (OpsModified) - return getMulExpr(Ops); + } + AddRecOps.push_back(Term); + } + if (!Overflow) { + const SCEV *NewAddRec = getAddRecExpr(AddRecOps, AddRec->getLoop(), + SCEV::FlagAnyWrap); + if (Ops.size() == 2) return NewAddRec; + Ops[Idx] = NewAddRec; + Ops.erase(Ops.begin() + OtherIdx); --OtherIdx; + OpsModified = true; + AddRec = dyn_cast(NewAddRec); + if (!AddRec) + break; + } } + if (OpsModified) + return getMulExpr(Ops); } // Otherwise couldn't fold anything into this recurrence. Move onto the @@ -2112,7 +2105,7 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl &Ops, ID.AddInteger(scMulExpr); for (unsigned i = 0, e = Ops.size(); i != e; ++i) ID.AddPointer(Ops[i]); - void *IP = 0; + void *IP = nullptr; SCEVMulExpr *S = static_cast(UniqueSCEVs.FindNodeOrInsertPos(ID, IP)); if (!S) { @@ -2237,7 +2230,7 @@ const SCEV *ScalarEvolution::getUDivExpr(const SCEV *LHS, ID.AddInteger(scUDivExpr); ID.AddPointer(LHS); ID.AddPointer(RHS); - void *IP = 0; + void *IP = nullptr; if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S; SCEV *S = new (SCEVAllocator) SCEVUDivExpr(ID.Intern(SCEVAllocator), LHS, RHS); @@ -2245,6 +2238,77 @@ const SCEV *ScalarEvolution::getUDivExpr(const SCEV *LHS, return S; } +static const APInt gcd(const SCEVConstant *C1, const SCEVConstant *C2) { + APInt A = C1->getValue()->getValue().abs(); + APInt B = C2->getValue()->getValue().abs(); + uint32_t ABW = A.getBitWidth(); + uint32_t BBW = B.getBitWidth(); + + if (ABW > BBW) + B = B.zext(ABW); + else if (ABW < BBW) + A = A.zext(BBW); + + return APIntOps::GreatestCommonDivisor(A, B); +} + +/// getUDivExactExpr - Get a canonical unsigned division expression, or +/// something simpler if possible. There is no representation for an exact udiv +/// in SCEV IR, but we can attempt to remove factors from the LHS and RHS. +/// We can't do this when it's not exact because the udiv may be clearing bits. +const SCEV *ScalarEvolution::getUDivExactExpr(const SCEV *LHS, + const SCEV *RHS) { + // TODO: we could try to find factors in all sorts of things, but for now we + // just deal with u/exact (multiply, constant). See SCEVDivision towards the + // end of this file for inspiration. + + const SCEVMulExpr *Mul = dyn_cast(LHS); + if (!Mul) + return getUDivExpr(LHS, RHS); + + if (const SCEVConstant *RHSCst = dyn_cast(RHS)) { + // If the mulexpr multiplies by a constant, then that constant must be the + // first element of the mulexpr. + if (const SCEVConstant *LHSCst = + dyn_cast(Mul->getOperand(0))) { + if (LHSCst == RHSCst) { + SmallVector Operands; + Operands.append(Mul->op_begin() + 1, Mul->op_end()); + return getMulExpr(Operands); + } + + // We can't just assume that LHSCst divides RHSCst cleanly, it could be + // that there's a factor provided by one of the other terms. We need to + // check. + APInt Factor = gcd(LHSCst, RHSCst); + if (!Factor.isIntN(1)) { + LHSCst = cast( + getConstant(LHSCst->getValue()->getValue().udiv(Factor))); + RHSCst = cast( + getConstant(RHSCst->getValue()->getValue().udiv(Factor))); + SmallVector Operands; + Operands.push_back(LHSCst); + Operands.append(Mul->op_begin() + 1, Mul->op_end()); + LHS = getMulExpr(Operands); + RHS = RHSCst; + Mul = dyn_cast(LHS); + if (!Mul) + return getUDivExactExpr(LHS, RHS); + } + } + } + + for (int i = 0, e = Mul->getNumOperands(); i != e; ++i) { + if (Mul->getOperand(i) == RHS) { + SmallVector Operands; + Operands.append(Mul->op_begin(), Mul->op_begin() + i); + Operands.append(Mul->op_begin() + i + 1, Mul->op_end()); + return getMulExpr(Operands); + } + } + + return getUDivExpr(LHS, RHS); +} /// getAddRecExpr - Get an add recurrence expression for the specified loop. /// Simplify the expression as much as possible. @@ -2361,7 +2425,7 @@ ScalarEvolution::getAddRecExpr(SmallVectorImpl &Operands, for (unsigned i = 0, e = Operands.size(); i != e; ++i) ID.AddPointer(Operands[i]); ID.AddPointer(L); - void *IP = 0; + void *IP = nullptr; SCEVAddRecExpr *S = static_cast(UniqueSCEVs.FindNodeOrInsertPos(ID, IP)); if (!S) { @@ -2469,7 +2533,7 @@ ScalarEvolution::getSMaxExpr(SmallVectorImpl &Ops) { ID.AddInteger(scSMaxExpr); for (unsigned i = 0, e = Ops.size(); i != e; ++i) ID.AddPointer(Ops[i]); - void *IP = 0; + void *IP = nullptr; if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S; const SCEV **O = SCEVAllocator.Allocate(Ops.size()); std::uninitialized_copy(Ops.begin(), Ops.end(), O); @@ -2573,7 +2637,7 @@ ScalarEvolution::getUMaxExpr(SmallVectorImpl &Ops) { ID.AddInteger(scUMaxExpr); for (unsigned i = 0, e = Ops.size(); i != e; ++i) ID.AddPointer(Ops[i]); - void *IP = 0; + void *IP = nullptr; if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S; const SCEV **O = SCEVAllocator.Allocate(Ops.size()); std::uninitialized_copy(Ops.begin(), Ops.end(), O); @@ -2595,55 +2659,39 @@ const SCEV *ScalarEvolution::getUMinExpr(const SCEV *LHS, return getNotSCEV(getUMaxExpr(getNotSCEV(LHS), getNotSCEV(RHS))); } -const SCEV *ScalarEvolution::getSizeOfExpr(Type *AllocTy) { - // If we have TargetData, we can bypass creating a target-independent +const SCEV *ScalarEvolution::getSizeOfExpr(Type *IntTy, Type *AllocTy) { + // If we have DataLayout, we can bypass creating a target-independent // constant expression and then folding it back into a ConstantInt. // This is just a compile-time optimization. - if (TD) - return getConstant(TD->getIntPtrType(getContext()), - TD->getTypeAllocSize(AllocTy)); + if (DL) + return getConstant(IntTy, DL->getTypeAllocSize(AllocTy)); Constant *C = ConstantExpr::getSizeOf(AllocTy); if (ConstantExpr *CE = dyn_cast(C)) - if (Constant *Folded = ConstantFoldConstantExpression(CE, TD, TLI)) + if (Constant *Folded = ConstantFoldConstantExpression(CE, DL, TLI)) C = Folded; Type *Ty = getEffectiveSCEVType(PointerType::getUnqual(AllocTy)); + assert(Ty == IntTy && "Effective SCEV type doesn't match"); return getTruncateOrZeroExtend(getSCEV(C), Ty); } -const SCEV *ScalarEvolution::getAlignOfExpr(Type *AllocTy) { - Constant *C = ConstantExpr::getAlignOf(AllocTy); - if (ConstantExpr *CE = dyn_cast(C)) - if (Constant *Folded = ConstantFoldConstantExpression(CE, TD, TLI)) - C = Folded; - Type *Ty = getEffectiveSCEVType(PointerType::getUnqual(AllocTy)); - return getTruncateOrZeroExtend(getSCEV(C), Ty); -} - -const SCEV *ScalarEvolution::getOffsetOfExpr(StructType *STy, +const SCEV *ScalarEvolution::getOffsetOfExpr(Type *IntTy, + StructType *STy, unsigned FieldNo) { - // If we have TargetData, we can bypass creating a target-independent + // If we have DataLayout, we can bypass creating a target-independent // constant expression and then folding it back into a ConstantInt. // This is just a compile-time optimization. - if (TD) - return getConstant(TD->getIntPtrType(getContext()), - TD->getStructLayout(STy)->getElementOffset(FieldNo)); + if (DL) { + return getConstant(IntTy, + DL->getStructLayout(STy)->getElementOffset(FieldNo)); + } Constant *C = ConstantExpr::getOffsetOf(STy, FieldNo); if (ConstantExpr *CE = dyn_cast(C)) - if (Constant *Folded = ConstantFoldConstantExpression(CE, TD, TLI)) + if (Constant *Folded = ConstantFoldConstantExpression(CE, DL, TLI)) C = Folded; - Type *Ty = getEffectiveSCEVType(PointerType::getUnqual(STy)); - return getTruncateOrZeroExtend(getSCEV(C), Ty); -} -const SCEV *ScalarEvolution::getOffsetOfExpr(Type *CTy, - Constant *FieldNo) { - Constant *C = ConstantExpr::getOffsetOf(CTy, FieldNo); - if (ConstantExpr *CE = dyn_cast(C)) - if (Constant *Folded = ConstantFoldConstantExpression(CE, TD, TLI)) - C = Folded; - Type *Ty = getEffectiveSCEVType(PointerType::getUnqual(CTy)); + Type *Ty = getEffectiveSCEVType(PointerType::getUnqual(STy)); return getTruncateOrZeroExtend(getSCEV(C), Ty); } @@ -2656,7 +2704,7 @@ const SCEV *ScalarEvolution::getUnknown(Value *V) { FoldingSetNodeID ID; ID.AddInteger(scUnknown); ID.AddPointer(V); - void *IP = 0; + void *IP = nullptr; if (SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) { assert(cast(S)->getValue() == V && "Stale SCEVUnknown in uniquing map!"); @@ -2687,15 +2735,15 @@ bool ScalarEvolution::isSCEVable(Type *Ty) const { uint64_t ScalarEvolution::getTypeSizeInBits(Type *Ty) const { assert(isSCEVable(Ty) && "Type is not SCEVable!"); - // If we have a TargetData, use it! - if (TD) - return TD->getTypeSizeInBits(Ty); + // If we have a DataLayout, use it! + if (DL) + return DL->getTypeSizeInBits(Ty); // Integer types have fixed sizes. if (Ty->isIntegerTy()) return Ty->getPrimitiveSizeInBits(); - // The only other support type is pointer. Without TargetData, conservatively + // The only other support type is pointer. Without DataLayout, conservatively // assume pointers are 64-bit. assert(Ty->isPointerTy() && "isSCEVable permitted a non-SCEVable type!"); return 64; @@ -2708,14 +2756,17 @@ uint64_t ScalarEvolution::getTypeSizeInBits(Type *Ty) const { Type *ScalarEvolution::getEffectiveSCEVType(Type *Ty) const { assert(isSCEVable(Ty) && "Type is not SCEVable!"); - if (Ty->isIntegerTy()) + if (Ty->isIntegerTy()) { return Ty; + } // The only other support type is pointer. assert(Ty->isPointerTy() && "Unexpected non-pointer non-integer type!"); - if (TD) return TD->getIntPtrType(getContext()); - // Without TargetData, conservatively assume pointers are 64-bit. + if (DL) + return DL->getIntPtrType(Ty); + + // Without DataLayout, conservatively assume pointers are 64-bit. return Type::getInt64Ty(getContext()); } @@ -2723,13 +2774,51 @@ const SCEV *ScalarEvolution::getCouldNotCompute() { return &CouldNotCompute; } +namespace { + // Helper class working with SCEVTraversal to figure out if a SCEV contains + // a SCEVUnknown with null value-pointer. FindInvalidSCEVUnknown::FindOne + // is set iff if find such SCEVUnknown. + // + struct FindInvalidSCEVUnknown { + bool FindOne; + FindInvalidSCEVUnknown() { FindOne = false; } + bool follow(const SCEV *S) { + switch (static_cast(S->getSCEVType())) { + case scConstant: + return false; + case scUnknown: + if (!cast(S)->getValue()) + FindOne = true; + return false; + default: + return true; + } + } + bool isDone() const { return FindOne; } + }; +} + +bool ScalarEvolution::checkValidity(const SCEV *S) const { + FindInvalidSCEVUnknown F; + SCEVTraversal ST(F); + ST.visitAll(S); + + return !F.FindOne; +} + /// getSCEV - Return an existing SCEV if it exists, otherwise analyze the /// expression and create a new one. const SCEV *ScalarEvolution::getSCEV(Value *V) { assert(isSCEVable(V->getType()) && "Value is not SCEVable!"); - ValueExprMapType::const_iterator I = ValueExprMap.find(V); - if (I != ValueExprMap.end()) return I->second; + ValueExprMapType::iterator I = ValueExprMap.find_as(V); + if (I != ValueExprMap.end()) { + const SCEV *S = I->second; + if (checkValidity(S)) + return S; + else + ValueExprMap.erase(I); + } const SCEV *S = createSCEV(V); // The process of creating a SCEV for V may have caused other SCEVs @@ -2921,7 +3010,7 @@ const SCEV *ScalarEvolution::getPointerBase(const SCEV *V) { return getPointerBase(Cast->getOperand()); } else if (const SCEVNAryExpr *NAry = dyn_cast(V)) { - const SCEV *PtrOp = 0; + const SCEV *PtrOp = nullptr; for (SCEVNAryExpr::op_iterator I = NAry->op_begin(), E = NAry->op_end(); I != E; ++I) { if ((*I)->getType()->isPointerTy()) { @@ -2944,9 +3033,8 @@ static void PushDefUseChildren(Instruction *I, SmallVectorImpl &Worklist) { // Push the def-use children onto the Worklist stack. - for (Value::use_iterator UI = I->use_begin(), UE = I->use_end(); - UI != UE; ++UI) - Worklist.push_back(cast(*UI)); + for (User *U : I->users()) + Worklist.push_back(cast(U)); } /// ForgetSymbolicValue - This looks up computed SCEV values for all @@ -2965,7 +3053,7 @@ ScalarEvolution::ForgetSymbolicName(Instruction *PN, const SCEV *SymName) { if (!Visited.insert(I)) continue; ValueExprMapType::iterator It = - ValueExprMap.find(static_cast(I)); + ValueExprMap.find_as(static_cast(I)); if (It != ValueExprMap.end()) { const SCEV *Old = It->second; @@ -3002,27 +3090,27 @@ const SCEV *ScalarEvolution::createNodeForPHI(PHINode *PN) { // The loop may have multiple entrances or multiple exits; we can analyze // this phi as an addrec if it has a unique entry value and a unique // backedge value. - Value *BEValueV = 0, *StartValueV = 0; + Value *BEValueV = nullptr, *StartValueV = nullptr; for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { Value *V = PN->getIncomingValue(i); if (L->contains(PN->getIncomingBlock(i))) { if (!BEValueV) { BEValueV = V; } else if (BEValueV != V) { - BEValueV = 0; + BEValueV = nullptr; break; } } else if (!StartValueV) { StartValueV = V; } else if (StartValueV != V) { - StartValueV = 0; + StartValueV = nullptr; break; } } if (BEValueV && StartValueV) { // While we are analyzing this PHI node, handle its value symbolically. const SCEV *SymbolicName = getUnknown(PN); - assert(ValueExprMap.find(PN) == ValueExprMap.end() && + assert(ValueExprMap.find_as(PN) == ValueExprMap.end() && "PHI node already processed?"); ValueExprMap.insert(std::make_pair(SCEVCallbackVH(PN, this), SymbolicName)); @@ -3068,15 +3156,26 @@ const SCEV *ScalarEvolution::createNodeForPHI(PHINode *PN) { Flags = setFlags(Flags, SCEV::FlagNUW); if (OBO->hasNoSignedWrap()) Flags = setFlags(Flags, SCEV::FlagNSW); - } else if (const GEPOperator *GEP = - dyn_cast(BEValueV)) { + } else if (GEPOperator *GEP = dyn_cast(BEValueV)) { // If the increment is an inbounds GEP, then we know the address // space cannot be wrapped around. We cannot make any guarantee // about signed or unsigned overflow because pointers are // unsigned but we may have a negative index from the base - // pointer. - if (GEP->isInBounds()) + // pointer. We can guarantee that no unsigned wrap occurs if the + // indices form a positive value. + if (GEP->isInBounds()) { Flags = setFlags(Flags, SCEV::FlagNW); + + const SCEV *Ptr = getSCEV(GEP->getPointerOperand()); + if (isKnownPositive(getMinusSCEV(getSCEV(GEP), Ptr))) + Flags = setFlags(Flags, SCEV::FlagNUW); + } + } else if (const SubOperator *OBO = + dyn_cast(BEValueV)) { + if (OBO->hasNoUnsignedWrap()) + Flags = setFlags(Flags, SCEV::FlagNUW); + if (OBO->hasNoSignedWrap()) + Flags = setFlags(Flags, SCEV::FlagNSW); } const SCEV *StartVal = getSCEV(StartValueV); @@ -3132,7 +3231,7 @@ const SCEV *ScalarEvolution::createNodeForPHI(PHINode *PN) { // PHI's incoming blocks are in a different loop, in which case doing so // risks breaking LCSSA form. Instcombine would normally zap these, but // it doesn't have DominatorTree information, so it may miss cases. - if (Value *V = SimplifyInstruction(PN, TD, TLI, DT)) + if (Value *V = SimplifyInstruction(PN, DL, TLI, DT)) if (LI->replacementPreservesLCSSAForm(PN, V)) return getSCEV(V); @@ -3144,21 +3243,21 @@ const SCEV *ScalarEvolution::createNodeForPHI(PHINode *PN) { /// operations. This allows them to be analyzed by regular SCEV code. /// const SCEV *ScalarEvolution::createNodeForGEP(GEPOperator *GEP) { + Type *IntPtrTy = getEffectiveSCEVType(GEP->getType()); + Value *Base = GEP->getOperand(0); + // Don't attempt to analyze GEPs over unsized objects. + if (!Base->getType()->getPointerElementType()->isSized()) + return getUnknown(GEP); // Don't blindly transfer the inbounds flag from the GEP instruction to the // Add expression, because the Instruction may be guarded by control flow // and the no-overflow bits may not be valid for the expression in any // context. - bool isInBounds = GEP->isInBounds(); + SCEV::NoWrapFlags Wrap = GEP->isInBounds() ? SCEV::FlagNSW : SCEV::FlagAnyWrap; - Type *IntPtrTy = getEffectiveSCEVType(GEP->getType()); - Value *Base = GEP->getOperand(0); - // Don't attempt to analyze GEPs over unsized objects. - if (!cast(Base->getType())->getElementType()->isSized()) - return getUnknown(GEP); const SCEV *TotalOffset = getConstant(IntPtrTy, 0); gep_type_iterator GTI = gep_type_begin(GEP); - for (GetElementPtrInst::op_iterator I = llvm::next(GEP->op_begin()), + for (GetElementPtrInst::op_iterator I = std::next(GEP->op_begin()), E = GEP->op_end(); I != E; ++I) { Value *Index = *I; @@ -3166,21 +3265,19 @@ const SCEV *ScalarEvolution::createNodeForGEP(GEPOperator *GEP) { if (StructType *STy = dyn_cast(*GTI++)) { // For a struct, add the member offset. unsigned FieldNo = cast(Index)->getZExtValue(); - const SCEV *FieldOffset = getOffsetOfExpr(STy, FieldNo); + const SCEV *FieldOffset = getOffsetOfExpr(IntPtrTy, STy, FieldNo); // Add the field offset to the running total offset. TotalOffset = getAddExpr(TotalOffset, FieldOffset); } else { // For an array, add the element offset, explicitly scaled. - const SCEV *ElementSize = getSizeOfExpr(*GTI); + const SCEV *ElementSize = getSizeOfExpr(IntPtrTy, *GTI); const SCEV *IndexS = getSCEV(Index); // Getelementptr indices are signed. IndexS = getTruncateOrSignExtend(IndexS, IntPtrTy); // Multiply the index by the element size to compute the element offset. - const SCEV *LocalOffset = getMulExpr(IndexS, ElementSize, - isInBounds ? SCEV::FlagNSW : - SCEV::FlagAnyWrap); + const SCEV *LocalOffset = getMulExpr(IndexS, ElementSize, Wrap); // Add the element offset to the running total offset. TotalOffset = getAddExpr(TotalOffset, LocalOffset); @@ -3191,8 +3288,7 @@ const SCEV *ScalarEvolution::createNodeForGEP(GEPOperator *GEP) { const SCEV *BaseS = getSCEV(Base); // Add the total offset from all the GEP indices to the base. - return getAddExpr(BaseS, TotalOffset, - isInBounds ? SCEV::FlagNSW : SCEV::FlagAnyWrap); + return getAddExpr(BaseS, TotalOffset, Wrap); } /// GetMinTrailingZeros - Determine the minimum number of zero bits that S is @@ -3266,9 +3362,8 @@ ScalarEvolution::GetMinTrailingZeros(const SCEV *S) { if (const SCEVUnknown *U = dyn_cast(S)) { // For a SCEVUnknown, ask ValueTracking. unsigned BitWidth = getTypeSizeInBits(U->getType()); - APInt Mask = APInt::getAllOnesValue(BitWidth); APInt Zeros(BitWidth, 0), Ones(BitWidth, 0); - ComputeMaskedBits(U->getValue(), Mask, Zeros, Ones); + ComputeMaskedBits(U->getValue(), Zeros, Ones); return Zeros.countTrailingOnes(); } @@ -3406,9 +3501,8 @@ ScalarEvolution::getUnsignedRange(const SCEV *S) { if (const SCEVUnknown *U = dyn_cast(S)) { // For a SCEVUnknown, ask ValueTracking. - APInt Mask = APInt::getAllOnesValue(BitWidth); APInt Zeros(BitWidth, 0), Ones(BitWidth, 0); - ComputeMaskedBits(U->getValue(), Mask, Zeros, Ones, TD); + ComputeMaskedBits(U->getValue(), Zeros, Ones, DL); if (Ones == ~Zeros + 1) return setUnsignedRange(U, ConservativeResult); return setUnsignedRange(U, @@ -3558,10 +3652,10 @@ ScalarEvolution::getSignedRange(const SCEV *S) { if (const SCEVUnknown *U = dyn_cast(S)) { // For a SCEVUnknown, ask ValueTracking. - if (!U->getValue()->getType()->isIntegerTy() && !TD) + if (!U->getValue()->getType()->isIntegerTy() && !DL) return setSignedRange(U, ConservativeResult); - unsigned NS = ComputeNumSignBits(U->getValue(), TD); - if (NS == 1) + unsigned NS = ComputeNumSignBits(U->getValue(), DL); + if (NS <= 1) return setSignedRange(U, ConservativeResult); return setSignedRange(U, ConservativeResult.intersectWith( ConstantRange(APInt::getSignedMinValue(BitWidth).ashr(NS - 1), @@ -3664,18 +3758,24 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) { // Use ComputeMaskedBits to compute what ShrinkDemandedConstant // knew about to reconstruct a low-bits mask value. unsigned LZ = A.countLeadingZeros(); + unsigned TZ = A.countTrailingZeros(); unsigned BitWidth = A.getBitWidth(); - APInt AllOnes = APInt::getAllOnesValue(BitWidth); APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0); - ComputeMaskedBits(U->getOperand(0), AllOnes, KnownZero, KnownOne, TD); - - APInt EffectiveMask = APInt::getLowBitsSet(BitWidth, BitWidth - LZ); - - if (LZ != 0 && !((~A & ~KnownZero) & EffectiveMask)) - return - getZeroExtendExpr(getTruncateExpr(getSCEV(U->getOperand(0)), - IntegerType::get(getContext(), BitWidth - LZ)), - U->getType()); + ComputeMaskedBits(U->getOperand(0), KnownZero, KnownOne, DL); + + APInt EffectiveMask = + APInt::getLowBitsSet(BitWidth, BitWidth - LZ - TZ).shl(TZ); + if ((LZ != 0 || TZ != 0) && !((~A & ~KnownZero) & EffectiveMask)) { + const SCEV *MulCount = getConstant( + ConstantInt::get(getContext(), APInt::getOneBitSet(BitWidth, TZ))); + return getMulExpr( + getZeroExtendExpr( + getTruncateExpr( + getUDivExactExpr(getSCEV(U->getOperand(0)), MulCount), + IntegerType::get(getContext(), BitWidth - LZ - TZ)), + U->getType()), + MulCount); + } } break; @@ -3762,7 +3862,7 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) { break; Constant *X = ConstantInt::get(getContext(), - APInt(BitWidth, 1).shl(SA->getZExtValue())); + APInt::getOneBitSet(BitWidth, SA->getZExtValue())); return getMulExpr(getSCEV(U->getOperand(0)), getSCEV(X)); } break; @@ -3780,7 +3880,7 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) { break; Constant *X = ConstantInt::get(getContext(), - APInt(BitWidth, 1).shl(SA->getZExtValue())); + APInt::getOneBitSet(BitWidth, SA->getZExtValue())); return getUDivExpr(getSCEV(U->getOperand(0)), getSCEV(X)); } break; @@ -3939,13 +4039,28 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) { // /// getSmallConstantTripCount - Returns the maximum trip count of this loop as a -/// normal unsigned value, if possible. Returns 0 if the trip count is unknown -/// or not constant. Will also return 0 if the maximum trip count is very large -/// (>= 2^32) -unsigned ScalarEvolution::getSmallConstantTripCount(Loop *L, - BasicBlock *ExitBlock) { +/// normal unsigned value. Returns 0 if the trip count is unknown or not +/// constant. Will also return 0 if the maximum trip count is very large (>= +/// 2^32). +/// +/// This "trip count" assumes that control exits via ExitingBlock. More +/// precisely, it is the number of times that control may reach ExitingBlock +/// before taking the branch. For loops with multiple exits, it may not be the +/// number times that the loop header executes because the loop may exit +/// prematurely via another branch. +/// +/// FIXME: We conservatively call getBackedgeTakenCount(L) instead of +/// getExitCount(L, ExitingBlock) to compute a safe trip count considering all +/// loop exits. getExitCount() may return an exact count for this branch +/// assuming no-signed-wrap. The number of well-defined iterations may actually +/// be higher than this trip count if this exit test is skipped and the loop +/// exits via a different branch. Ideally, getExitCount() would know whether it +/// depends on a NSW assumption, and we would only fall back to a conservative +/// trip count in that case. +unsigned ScalarEvolution:: +getSmallConstantTripCount(Loop *L, BasicBlock * /*ExitingBlock*/) { const SCEVConstant *ExitCount = - dyn_cast(getExitCount(L, ExitBlock)); + dyn_cast(getBackedgeTakenCount(L)); if (!ExitCount) return 0; @@ -3968,9 +4083,12 @@ unsigned ScalarEvolution::getSmallConstantTripCount(Loop *L, /// multiple of a constant (which is also the case if the trip count is simply /// constant, use getSmallConstantTripCount for that case), Will also return 1 /// if the trip count is very large (>= 2^32). -unsigned ScalarEvolution::getSmallConstantTripMultiple(Loop *L, - BasicBlock *ExitBlock) { - const SCEV *ExitCount = getExitCount(L, ExitBlock); +/// +/// As explained in the comments for getSmallConstantTripCount, this assumes +/// that control exits the loop via ExitingBlock. +unsigned ScalarEvolution:: +getSmallConstantTripMultiple(Loop *L, BasicBlock * /*ExitingBlock*/) { + const SCEV *ExitCount = getBackedgeTakenCount(L); if (ExitCount == getCouldNotCompute()) return 1; @@ -3988,15 +4106,18 @@ unsigned ScalarEvolution::getSmallConstantTripMultiple(Loop *L, ConstantInt *Result = MulC->getValue(); - // Guard against huge trip counts. - if (!Result || Result->getValue().getActiveBits() > 32) + // Guard against huge trip counts (this requires checking + // for zero to handle the case where the trip count == -1 and the + // addition wraps). + if (!Result || Result->getValue().getActiveBits() > 32 || + Result->getValue().getActiveBits() == 0) return 1; return (unsigned)Result->getZExtValue(); } // getExitCount - Get the expression for the number of loop iterations for which -// this loop is guaranteed not to exit via ExitintBlock. Otherwise return +// this loop is guaranteed not to exit via ExitingBlock. Otherwise return // SCEVCouldNotCompute. const SCEV *ScalarEvolution::getExitCount(Loop *L, BasicBlock *ExitingBlock) { return getBackedgeTakenInfo(L).getExact(ExitingBlock, this); @@ -4080,7 +4201,7 @@ ScalarEvolution::getBackedgeTakenInfo(const Loop *L) { if (!Visited.insert(I)) continue; ValueExprMapType::iterator It = - ValueExprMap.find(static_cast(I)); + ValueExprMap.find_as(static_cast(I)); if (It != ValueExprMap.end()) { const SCEV *Old = It->second; @@ -4131,7 +4252,8 @@ void ScalarEvolution::forgetLoop(const Loop *L) { Instruction *I = Worklist.pop_back_val(); if (!Visited.insert(I)) continue; - ValueExprMapType::iterator It = ValueExprMap.find(static_cast(I)); + ValueExprMapType::iterator It = + ValueExprMap.find_as(static_cast(I)); if (It != ValueExprMap.end()) { forgetMemoizedResults(It->second); ValueExprMap.erase(It); @@ -4164,7 +4286,8 @@ void ScalarEvolution::forgetValue(Value *V) { I = Worklist.pop_back_val(); if (!Visited.insert(I)) continue; - ValueExprMapType::iterator It = ValueExprMap.find(static_cast(I)); + ValueExprMapType::iterator It = + ValueExprMap.find_as(static_cast(I)); if (It != ValueExprMap.end()) { forgetMemoizedResults(It->second); ValueExprMap.erase(It); @@ -4193,9 +4316,9 @@ ScalarEvolution::BackedgeTakenInfo::getExact(ScalarEvolution *SE) const { if (!ExitNotTaken.ExitingBlock) return SE->getCouldNotCompute(); assert(ExitNotTaken.ExactNotTaken && "uninitialized not-taken info"); - const SCEV *BECount = 0; + const SCEV *BECount = nullptr; for (const ExitNotTakenInfo *ENT = &ExitNotTaken; - ENT != 0; ENT = ENT->getNextExit()) { + ENT != nullptr; ENT = ENT->getNextExit()) { assert(ENT->ExactNotTaken != SE->getCouldNotCompute() && "bad exit SCEV"); @@ -4213,7 +4336,7 @@ const SCEV * ScalarEvolution::BackedgeTakenInfo::getExact(BasicBlock *ExitingBlock, ScalarEvolution *SE) const { for (const ExitNotTakenInfo *ENT = &ExitNotTaken; - ENT != 0; ENT = ENT->getNextExit()) { + ENT != nullptr; ENT = ENT->getNextExit()) { if (ENT->ExitingBlock == ExitingBlock) return ENT->ExactNotTaken; @@ -4227,6 +4350,25 @@ ScalarEvolution::BackedgeTakenInfo::getMax(ScalarEvolution *SE) const { return Max ? Max : SE->getCouldNotCompute(); } +bool ScalarEvolution::BackedgeTakenInfo::hasOperand(const SCEV *S, + ScalarEvolution *SE) const { + if (Max && Max != SE->getCouldNotCompute() && SE->hasOperand(Max, S)) + return true; + + if (!ExitNotTaken.ExitingBlock) + return false; + + for (const ExitNotTakenInfo *ENT = &ExitNotTaken; + ENT != nullptr; ENT = ENT->getNextExit()) { + + if (ENT->ExactNotTaken != SE->getCouldNotCompute() + && SE->hasOperand(ENT->ExactNotTaken, S)) { + return true; + } + } + return false; +} + /// Allocate memory for BackedgeTakenInfo and copy the not-taken count of each /// computable exit into a persistent ExitNotTakenInfo array. ScalarEvolution::BackedgeTakenInfo::BackedgeTakenInfo( @@ -4256,8 +4398,8 @@ ScalarEvolution::BackedgeTakenInfo::BackedgeTakenInfo( /// clear - Invalidate this result and free the ExitNotTakenInfo array. void ScalarEvolution::BackedgeTakenInfo::clear() { - ExitNotTaken.ExitingBlock = 0; - ExitNotTaken.ExactNotTaken = 0; + ExitNotTaken.ExitingBlock = nullptr; + ExitNotTaken.ExactNotTaken = nullptr; delete[] ExitNotTaken.getNextExit(); } @@ -4271,6 +4413,8 @@ ScalarEvolution::ComputeBackedgeTakenCount(const Loop *L) { // Examine all exits and pick the most conservative values. const SCEV *MaxBECount = getCouldNotCompute(); bool CouldComputeBECount = true; + BasicBlock *Latch = L->getLoopLatch(); // may be NULL. + const SCEV *LatchMaxCount = nullptr; SmallVector, 4> ExitCounts; for (unsigned i = 0, e = ExitingBlocks.size(); i != e; ++i) { ExitLimit EL = ComputeExitLimit(L, ExitingBlocks[i]); @@ -4287,13 +4431,17 @@ ScalarEvolution::ComputeBackedgeTakenCount(const Loop *L) { // We cannot take the "min" MaxBECount, because non-unit stride loops may // skip some loop tests. Taking the max over the exits is sufficiently // conservative. TODO: We could do better taking into consideration - // that (1) the loop has unit stride (2) the last loop test is - // less-than/greater-than (3) any loop test is less-than/greater-than AND - // falls-through some constant times less then the other tests. - MaxBECount = getUMaxFromMismatchedTypes(MaxBECount, EL.Max); + // non-latch exits that dominate the latch. + if (EL.MustExit && ExitingBlocks[i] == Latch) + LatchMaxCount = EL.Max; + else + MaxBECount = getUMaxFromMismatchedTypes(MaxBECount, EL.Max); } } - + // Be more precise in the easy case of a loop latch that must exit. + if (LatchMaxCount) { + MaxBECount = getUMinFromMismatchedTypes(MaxBECount, LatchMaxCount); + } return BackedgeTakenInfo(ExitCounts, CouldComputeBECount, MaxBECount); } @@ -4303,12 +4451,19 @@ ScalarEvolution::ExitLimit ScalarEvolution::ComputeExitLimit(const Loop *L, BasicBlock *ExitingBlock) { // Okay, we've chosen an exiting block. See what condition causes us to - // exit at this block. - // - // FIXME: we should be able to handle switch instructions (with a single exit) - BranchInst *ExitBr = dyn_cast(ExitingBlock->getTerminator()); - if (ExitBr == 0) return getCouldNotCompute(); - assert(ExitBr->isConditional() && "If unconditional, it can't be in loop!"); + // exit at this block and remember the exit block and whether all other targets + // lead to the loop header. + bool MustExecuteLoopHeader = true; + BasicBlock *Exit = nullptr; + for (succ_iterator SI = succ_begin(ExitingBlock), SE = succ_end(ExitingBlock); + SI != SE; ++SI) + if (!L->contains(*SI)) { + if (Exit) // Multiple exit successors. + return getCouldNotCompute(); + Exit = *SI; + } else if (*SI != L->getHeader()) { + MustExecuteLoopHeader = false; + } // At this point, we know we have a conditional branch that determines whether // the loop is exited. However, we don't know if the branch is executed each @@ -4327,13 +4482,11 @@ ScalarEvolution::ComputeExitLimit(const Loop *L, BasicBlock *ExitingBlock) { // // More extensive analysis could be done to handle more cases here. // - if (ExitBr->getSuccessor(0) != L->getHeader() && - ExitBr->getSuccessor(1) != L->getHeader() && - ExitBr->getParent() != L->getHeader()) { + if (!MustExecuteLoopHeader && ExitingBlock != L->getHeader()) { // The simple checks failed, try climbing the unique predecessor chain // up to the header. bool Ok = false; - for (BasicBlock *BB = ExitBr->getParent(); BB; ) { + for (BasicBlock *BB = ExitingBlock; BB; ) { BasicBlock *Pred = BB->getUniquePredecessor(); if (!Pred) return getCouldNotCompute(); @@ -4357,29 +4510,49 @@ ScalarEvolution::ComputeExitLimit(const Loop *L, BasicBlock *ExitingBlock) { return getCouldNotCompute(); } - // Proceed to the next level to examine the exit condition expression. - return ComputeExitLimitFromCond(L, ExitBr->getCondition(), - ExitBr->getSuccessor(0), - ExitBr->getSuccessor(1)); + TerminatorInst *Term = ExitingBlock->getTerminator(); + if (BranchInst *BI = dyn_cast(Term)) { + assert(BI->isConditional() && "If unconditional, it can't be in loop!"); + // Proceed to the next level to examine the exit condition expression. + return ComputeExitLimitFromCond(L, BI->getCondition(), BI->getSuccessor(0), + BI->getSuccessor(1), + /*IsSubExpr=*/false); + } + + if (SwitchInst *SI = dyn_cast(Term)) + return ComputeExitLimitFromSingleExitSwitch(L, SI, Exit, + /*IsSubExpr=*/false); + + return getCouldNotCompute(); } /// ComputeExitLimitFromCond - Compute the number of times the /// backedge of the specified loop will execute if its exit condition /// were a conditional branch of ExitCond, TBB, and FBB. +/// +/// @param IsSubExpr is true if ExitCond does not directly control the exit +/// branch. In this case, we cannot assume that the loop only exits when the +/// condition is true and cannot infer that failing to meet the condition prior +/// to integer wraparound results in undefined behavior. ScalarEvolution::ExitLimit ScalarEvolution::ComputeExitLimitFromCond(const Loop *L, Value *ExitCond, BasicBlock *TBB, - BasicBlock *FBB) { + BasicBlock *FBB, + bool IsSubExpr) { // Check if the controlling expression for this loop is an And or Or. if (BinaryOperator *BO = dyn_cast(ExitCond)) { if (BO->getOpcode() == Instruction::And) { // Recurse on the operands of the and. - ExitLimit EL0 = ComputeExitLimitFromCond(L, BO->getOperand(0), TBB, FBB); - ExitLimit EL1 = ComputeExitLimitFromCond(L, BO->getOperand(1), TBB, FBB); + bool EitherMayExit = L->contains(TBB); + ExitLimit EL0 = ComputeExitLimitFromCond(L, BO->getOperand(0), TBB, FBB, + IsSubExpr || EitherMayExit); + ExitLimit EL1 = ComputeExitLimitFromCond(L, BO->getOperand(1), TBB, FBB, + IsSubExpr || EitherMayExit); const SCEV *BECount = getCouldNotCompute(); const SCEV *MaxBECount = getCouldNotCompute(); - if (L->contains(TBB)) { + bool MustExit = false; + if (EitherMayExit) { // Both conditions must be true for the loop to continue executing. // Choose the less conservative count. if (EL0.Exact == getCouldNotCompute() || @@ -4393,6 +4566,7 @@ ScalarEvolution::ComputeExitLimitFromCond(const Loop *L, MaxBECount = EL0.Max; else MaxBECount = getUMinFromMismatchedTypes(EL0.Max, EL1.Max); + MustExit = EL0.MustExit || EL1.MustExit; } else { // Both conditions must be true at the same time for the loop to exit. // For now, be conservative. @@ -4401,17 +4575,22 @@ ScalarEvolution::ComputeExitLimitFromCond(const Loop *L, MaxBECount = EL0.Max; if (EL0.Exact == EL1.Exact) BECount = EL0.Exact; + MustExit = EL0.MustExit && EL1.MustExit; } - return ExitLimit(BECount, MaxBECount); + return ExitLimit(BECount, MaxBECount, MustExit); } if (BO->getOpcode() == Instruction::Or) { // Recurse on the operands of the or. - ExitLimit EL0 = ComputeExitLimitFromCond(L, BO->getOperand(0), TBB, FBB); - ExitLimit EL1 = ComputeExitLimitFromCond(L, BO->getOperand(1), TBB, FBB); + bool EitherMayExit = L->contains(FBB); + ExitLimit EL0 = ComputeExitLimitFromCond(L, BO->getOperand(0), TBB, FBB, + IsSubExpr || EitherMayExit); + ExitLimit EL1 = ComputeExitLimitFromCond(L, BO->getOperand(1), TBB, FBB, + IsSubExpr || EitherMayExit); const SCEV *BECount = getCouldNotCompute(); const SCEV *MaxBECount = getCouldNotCompute(); - if (L->contains(FBB)) { + bool MustExit = false; + if (EitherMayExit) { // Both conditions must be false for the loop to continue executing. // Choose the less conservative count. if (EL0.Exact == getCouldNotCompute() || @@ -4425,6 +4604,7 @@ ScalarEvolution::ComputeExitLimitFromCond(const Loop *L, MaxBECount = EL0.Max; else MaxBECount = getUMinFromMismatchedTypes(EL0.Max, EL1.Max); + MustExit = EL0.MustExit || EL1.MustExit; } else { // Both conditions must be false at the same time for the loop to exit. // For now, be conservative. @@ -4433,16 +4613,17 @@ ScalarEvolution::ComputeExitLimitFromCond(const Loop *L, MaxBECount = EL0.Max; if (EL0.Exact == EL1.Exact) BECount = EL0.Exact; + MustExit = EL0.MustExit && EL1.MustExit; } - return ExitLimit(BECount, MaxBECount); + return ExitLimit(BECount, MaxBECount, MustExit); } } // With an icmp, it may be feasible to compute an exact backedge-taken count. // Proceed to the next level to examine the icmp. if (ICmpInst *ExitCondICmp = dyn_cast(ExitCond)) - return ComputeExitLimitFromICmp(L, ExitCondICmp, TBB, FBB); + return ComputeExitLimitFromICmp(L, ExitCondICmp, TBB, FBB, IsSubExpr); // Check for a constant condition. These are normally stripped out by // SimplifyCFG, but ScalarEvolution may be used by a pass which wishes to @@ -4468,7 +4649,8 @@ ScalarEvolution::ExitLimit ScalarEvolution::ComputeExitLimitFromICmp(const Loop *L, ICmpInst *ExitCond, BasicBlock *TBB, - BasicBlock *FBB) { + BasicBlock *FBB, + bool IsSubExpr) { // If the condition was exit on true, convert the condition to exit on false ICmpInst::Predicate Cond; @@ -4520,7 +4702,7 @@ ScalarEvolution::ComputeExitLimitFromICmp(const Loop *L, switch (Cond) { case ICmpInst::ICMP_NE: { // while (X != Y) // Convert to: while (X-Y != 0) - ExitLimit EL = HowFarToZero(getMinusSCEV(LHS, RHS), L); + ExitLimit EL = HowFarToZero(getMinusSCEV(LHS, RHS), L, IsSubExpr); if (EL.hasAnyInfo()) return EL; break; } @@ -4530,25 +4712,17 @@ ScalarEvolution::ComputeExitLimitFromICmp(const Loop *L, if (EL.hasAnyInfo()) return EL; break; } - case ICmpInst::ICMP_SLT: { - ExitLimit EL = HowManyLessThans(LHS, RHS, L, true); - if (EL.hasAnyInfo()) return EL; - break; - } - case ICmpInst::ICMP_SGT: { - ExitLimit EL = HowManyLessThans(getNotSCEV(LHS), - getNotSCEV(RHS), L, true); - if (EL.hasAnyInfo()) return EL; - break; - } - case ICmpInst::ICMP_ULT: { - ExitLimit EL = HowManyLessThans(LHS, RHS, L, false); + case ICmpInst::ICMP_SLT: + case ICmpInst::ICMP_ULT: { // while (X < Y) + bool IsSigned = Cond == ICmpInst::ICMP_SLT; + ExitLimit EL = HowManyLessThans(LHS, RHS, L, IsSigned, IsSubExpr); if (EL.hasAnyInfo()) return EL; break; } - case ICmpInst::ICMP_UGT: { - ExitLimit EL = HowManyLessThans(getNotSCEV(LHS), - getNotSCEV(RHS), L, false); + case ICmpInst::ICMP_SGT: + case ICmpInst::ICMP_UGT: { // while (X > Y) + bool IsSigned = Cond == ICmpInst::ICMP_SGT; + ExitLimit EL = HowManyGreaterThans(LHS, RHS, L, IsSigned, IsSubExpr); if (EL.hasAnyInfo()) return EL; break; } @@ -4566,6 +4740,30 @@ ScalarEvolution::ComputeExitLimitFromICmp(const Loop *L, return ComputeExitCountExhaustively(L, ExitCond, !L->contains(TBB)); } +ScalarEvolution::ExitLimit +ScalarEvolution::ComputeExitLimitFromSingleExitSwitch(const Loop *L, + SwitchInst *Switch, + BasicBlock *ExitingBlock, + bool IsSubExpr) { + assert(!L->contains(ExitingBlock) && "Not an exiting block!"); + + // Give up if the exit is the default dest of a switch. + if (Switch->getDefaultDest() == ExitingBlock) + return getCouldNotCompute(); + + assert(L->contains(Switch->getDefaultDest()) && + "Default case must not exit the loop!"); + const SCEV *LHS = getSCEVAtScope(Switch->getCondition(), L); + const SCEV *RHS = getConstant(Switch->findCaseDest(ExitingBlock)); + + // while (X != Y) --> while (X-Y != 0) + ExitLimit EL = HowFarToZero(getMinusSCEV(LHS, RHS), L, IsSubExpr); + if (EL.hasAnyInfo()) + return EL; + + return getCouldNotCompute(); +} + static ConstantInt * EvaluateConstantChrecAtConstant(const SCEVAddRecExpr *AddRec, ConstantInt *C, ScalarEvolution &SE) { @@ -4576,40 +4774,6 @@ EvaluateConstantChrecAtConstant(const SCEVAddRecExpr *AddRec, ConstantInt *C, return cast(Val)->getValue(); } -/// GetAddressedElementFromGlobal - Given a global variable with an initializer -/// and a GEP expression (missing the pointer index) indexing into it, return -/// the addressed element of the initializer or null if the index expression is -/// invalid. -static Constant * -GetAddressedElementFromGlobal(GlobalVariable *GV, - const std::vector &Indices) { - Constant *Init = GV->getInitializer(); - for (unsigned i = 0, e = Indices.size(); i != e; ++i) { - uint64_t Idx = Indices[i]->getZExtValue(); - if (ConstantStruct *CS = dyn_cast(Init)) { - assert(Idx < CS->getNumOperands() && "Bad struct index!"); - Init = cast(CS->getOperand(Idx)); - } else if (ConstantArray *CA = dyn_cast(Init)) { - if (Idx >= CA->getNumOperands()) return 0; // Bogus program - Init = cast(CA->getOperand(Idx)); - } else if (isa(Init)) { - if (StructType *STy = dyn_cast(Init->getType())) { - assert(Idx < STy->getNumElements() && "Bad struct index!"); - Init = Constant::getNullValue(STy->getElementType(Idx)); - } else if (ArrayType *ATy = dyn_cast(Init->getType())) { - if (Idx >= ATy->getNumElements()) return 0; // Bogus program - Init = Constant::getNullValue(ATy->getElementType()); - } else { - llvm_unreachable("Unknown constant aggregate type!"); - } - return 0; - } else { - return 0; // Unknown initializer type - } - } - return Init; -} - /// ComputeLoadConstantCompareExitLimit - Given an exit condition of /// 'icmp op load X, cst', try to see if we can compute the backedge /// execution count. @@ -4636,8 +4800,8 @@ ScalarEvolution::ComputeLoadConstantCompareExitLimit( return getCouldNotCompute(); // Okay, we allow one non-constant index into the GEP instruction. - Value *VarIdx = 0; - std::vector Indexes; + Value *VarIdx = nullptr; + std::vector Indexes; unsigned VarIdxNum = 0; for (unsigned i = 2, e = GEP->getNumOperands(); i != e; ++i) if (ConstantInt *CI = dyn_cast(GEP->getOperand(i))) { @@ -4646,9 +4810,13 @@ ScalarEvolution::ComputeLoadConstantCompareExitLimit( if (VarIdx) return getCouldNotCompute(); // Multiple non-constant idx's. VarIdx = GEP->getOperand(i); VarIdxNum = i-2; - Indexes.push_back(0); + Indexes.push_back(nullptr); } + // Loop-invariant loads may be a byproduct of loop optimization. Skip them. + if (!VarIdx) + return getCouldNotCompute(); + // Okay, we know we have a (load (gep GV, 0, X)) comparison with a constant. // Check to see if X is a loop variant variable value now. const SCEV *Idx = getSCEV(VarIdx); @@ -4671,8 +4839,9 @@ ScalarEvolution::ComputeLoadConstantCompareExitLimit( // Form the GEP offset. Indexes[VarIdxNum] = Val; - Constant *Result = GetAddressedElementFromGlobal(GV, Indexes); - if (Result == 0) break; // Cannot compute! + Constant *Result = ConstantFoldLoadThroughGEPIndices(GV->getInitializer(), + Indexes); + if (!Result) break; // Cannot compute! // Evaluate the condition for this iteration. Result = ConstantExpr::getICmp(predicate, Result, RHS); @@ -4733,14 +4902,14 @@ getConstantEvolvingPHIOperands(Instruction *UseInst, const Loop *L, // Otherwise, we can evaluate this instruction if all of its operands are // constant or derived from a PHI node themselves. - PHINode *PHI = 0; + PHINode *PHI = nullptr; for (Instruction::op_iterator OpI = UseInst->op_begin(), OpE = UseInst->op_end(); OpI != OpE; ++OpI) { if (isa(*OpI)) continue; Instruction *OpInst = dyn_cast(*OpI); - if (!OpInst || !canConstantEvolve(OpInst, L)) return 0; + if (!OpInst || !canConstantEvolve(OpInst, L)) return nullptr; PHINode *P = dyn_cast(OpInst); if (!P) @@ -4754,8 +4923,10 @@ getConstantEvolvingPHIOperands(Instruction *UseInst, const Loop *L, P = getConstantEvolvingPHIOperands(OpInst, L, PHIMap); PHIMap[OpInst] = P; } - if (P == 0) return 0; // Not evolving from PHI - if (PHI && PHI != P) return 0; // Evolving from multiple different PHIs. + if (!P) + return nullptr; // Not evolving from PHI + if (PHI && PHI != P) + return nullptr; // Evolving from multiple different PHIs. PHI = P; } // This is a expression evolving from a constant PHI! @@ -4769,7 +4940,7 @@ getConstantEvolvingPHIOperands(Instruction *UseInst, const Loop *L, /// constraints, return null. static PHINode *getConstantEvolvingPHI(Value *V, const Loop *L) { Instruction *I = dyn_cast(V); - if (I == 0 || !canConstantEvolve(I, L)) return 0; + if (!I || !canConstantEvolve(I, L)) return nullptr; if (PHINode *PN = dyn_cast(I)) { return PN; @@ -4786,23 +4957,23 @@ static PHINode *getConstantEvolvingPHI(Value *V, const Loop *L) { /// reason, return null. static Constant *EvaluateExpression(Value *V, const Loop *L, DenseMap &Vals, - const TargetData *TD, + const DataLayout *DL, const TargetLibraryInfo *TLI) { // Convenient constant check, but redundant for recursive calls. if (Constant *C = dyn_cast(V)) return C; Instruction *I = dyn_cast(V); - if (!I) return 0; + if (!I) return nullptr; if (Constant *C = Vals.lookup(I)) return C; // An instruction inside the loop depends on a value outside the loop that we // weren't given a mapping for, or a value such as a call inside the loop. - if (!canConstantEvolve(I, L)) return 0; + if (!canConstantEvolve(I, L)) return nullptr; // An unmapped PHI can be due to a branch or another loop inside this loop, // or due to this not being the initial iteration through a loop where we // couldn't compute the evolution of this particular PHI last time. - if (isa(I)) return 0; + if (isa(I)) return nullptr; std::vector Operands(I->getNumOperands()); @@ -4810,23 +4981,23 @@ static Constant *EvaluateExpression(Value *V, const Loop *L, Instruction *Operand = dyn_cast(I->getOperand(i)); if (!Operand) { Operands[i] = dyn_cast(I->getOperand(i)); - if (!Operands[i]) return 0; + if (!Operands[i]) return nullptr; continue; } - Constant *C = EvaluateExpression(Operand, L, Vals, TD, TLI); + Constant *C = EvaluateExpression(Operand, L, Vals, DL, TLI); Vals[Operand] = C; - if (!C) return 0; + if (!C) return nullptr; Operands[i] = C; } if (CmpInst *CI = dyn_cast(I)) return ConstantFoldCompareInstOperands(CI->getPredicate(), Operands[0], - Operands[1], TD, TLI); + Operands[1], DL, TLI); if (LoadInst *LI = dyn_cast(I)) { if (!LI->isVolatile()) - return ConstantFoldLoadFromConstPtr(Operands[0], TD); + return ConstantFoldLoadFromConstPtr(Operands[0], DL); } - return ConstantFoldInstOperands(I->getOpcode(), I->getType(), Operands, TD, + return ConstantFoldInstOperands(I->getOpcode(), I->getType(), Operands, DL, TLI); } @@ -4844,7 +5015,7 @@ ScalarEvolution::getConstantEvolutionLoopExitValue(PHINode *PN, return I->second; if (BEs.ugt(MaxBruteForceIterations)) - return ConstantEvolutionLoopExitValue[PN] = 0; // Not going to evaluate it. + return ConstantEvolutionLoopExitValue[PN] = nullptr; // Not going to evaluate it. Constant *&RetVal = ConstantEvolutionLoopExitValue[PN]; @@ -4856,22 +5027,22 @@ ScalarEvolution::getConstantEvolutionLoopExitValue(PHINode *PN, // entry must be a constant (coming in from outside of the loop), and the // second must be derived from the same PHI. bool SecondIsBackedge = L->contains(PN->getIncomingBlock(1)); - PHINode *PHI = 0; + PHINode *PHI = nullptr; for (BasicBlock::iterator I = Header->begin(); (PHI = dyn_cast(I)); ++I) { Constant *StartCST = dyn_cast(PHI->getIncomingValue(!SecondIsBackedge)); - if (StartCST == 0) continue; + if (!StartCST) continue; CurrentIterVals[PHI] = StartCST; } if (!CurrentIterVals.count(PN)) - return RetVal = 0; + return RetVal = nullptr; Value *BEValue = PN->getIncomingValue(SecondIsBackedge); // Execute the loop symbolically to determine the exit value. if (BEs.getActiveBits() >= 32) - return RetVal = 0; // More than 2^32-1 iterations?? Not doing it! + return RetVal = nullptr; // More than 2^32-1 iterations?? Not doing it! unsigned NumIterations = BEs.getZExtValue(); // must be in range unsigned IterationNum = 0; @@ -4882,10 +5053,10 @@ ScalarEvolution::getConstantEvolutionLoopExitValue(PHINode *PN, // Compute the value of the PHIs for the next iteration. // EvaluateExpression adds non-phi values to the CurrentIterVals map. DenseMap NextIterVals; - Constant *NextPHI = EvaluateExpression(BEValue, L, CurrentIterVals, TD, + Constant *NextPHI = EvaluateExpression(BEValue, L, CurrentIterVals, DL, TLI); - if (NextPHI == 0) - return 0; // Couldn't evaluate! + if (!NextPHI) + return nullptr; // Couldn't evaluate! NextIterVals[PN] = NextPHI; bool StoppedEvolving = NextPHI == CurrentIterVals[PN]; @@ -4908,7 +5079,7 @@ ScalarEvolution::getConstantEvolutionLoopExitValue(PHINode *PN, Constant *&NextPHI = NextIterVals[PHI]; if (!NextPHI) { // Not already computed. Value *BEValue = PHI->getIncomingValue(SecondIsBackedge); - NextPHI = EvaluateExpression(BEValue, L, CurrentIterVals, TD, TLI); + NextPHI = EvaluateExpression(BEValue, L, CurrentIterVals, DL, TLI); } if (NextPHI != I->second) StoppedEvolving = false; @@ -4932,7 +5103,7 @@ const SCEV *ScalarEvolution::ComputeExitCountExhaustively(const Loop *L, Value *Cond, bool ExitWhen) { PHINode *PN = getConstantEvolvingPHI(Cond, L); - if (PN == 0) return getCouldNotCompute(); + if (!PN) return getCouldNotCompute(); // If the loop is canonicalized, the PHI will have exactly two entries. // That's the only form we support here. @@ -4945,12 +5116,12 @@ const SCEV *ScalarEvolution::ComputeExitCountExhaustively(const Loop *L, // One entry must be a constant (coming in from outside of the loop), and the // second must be derived from the same PHI. bool SecondIsBackedge = L->contains(PN->getIncomingBlock(1)); - PHINode *PHI = 0; + PHINode *PHI = nullptr; for (BasicBlock::iterator I = Header->begin(); (PHI = dyn_cast(I)); ++I) { Constant *StartCST = dyn_cast(PHI->getIncomingValue(!SecondIsBackedge)); - if (StartCST == 0) continue; + if (!StartCST) continue; CurrentIterVals[PHI] = StartCST; } if (!CurrentIterVals.count(PN)) @@ -4964,7 +5135,7 @@ const SCEV *ScalarEvolution::ComputeExitCountExhaustively(const Loop *L, for (unsigned IterationNum = 0; IterationNum != MaxIterations;++IterationNum){ ConstantInt *CondVal = dyn_cast_or_null(EvaluateExpression(Cond, L, CurrentIterVals, - TD, TLI)); + DL, TLI)); // Couldn't symbolically evaluate. if (!CondVal) return getCouldNotCompute(); @@ -4994,7 +5165,7 @@ const SCEV *ScalarEvolution::ComputeExitCountExhaustively(const Loop *L, if (NextPHI) continue; // Already computed! Value *BEValue = PHI->getIncomingValue(SecondIsBackedge); - NextPHI = EvaluateExpression(BEValue, L, CurrentIterVals, TD, TLI); + NextPHI = EvaluateExpression(BEValue, L, CurrentIterVals, DL, TLI); } CurrentIterVals.swap(NextIterVals); } @@ -5015,15 +5186,21 @@ const SCEV *ScalarEvolution::ComputeExitCountExhaustively(const Loop *L, /// original value V is returned. const SCEV *ScalarEvolution::getSCEVAtScope(const SCEV *V, const Loop *L) { // Check to see if we've folded this expression at this loop before. - std::map &Values = ValuesAtScopes[V]; - std::pair::iterator, bool> Pair = - Values.insert(std::make_pair(L, static_cast(0))); - if (!Pair.second) - return Pair.first->second ? Pair.first->second : V; - + SmallVector, 2> &Values = ValuesAtScopes[V]; + for (unsigned u = 0; u < Values.size(); u++) { + if (Values[u].first == L) + return Values[u].second ? Values[u].second : V; + } + Values.push_back(std::make_pair(L, static_cast(nullptr))); // Otherwise compute it. const SCEV *C = computeSCEVAtScope(V, L); - ValuesAtScopes[V][L] = C; + SmallVector, 2> &Values2 = ValuesAtScopes[V]; + for (unsigned u = Values2.size(); u > 0; u--) { + if (Values2[u - 1].first == L) { + Values2[u - 1].second = C; + break; + } + } return C; } @@ -5032,8 +5209,7 @@ const SCEV *ScalarEvolution::getSCEVAtScope(const SCEV *V, const Loop *L) { /// SCEVConstant, because SCEVConstant is restricted to ConstantInt. /// Returns NULL if the SCEV isn't representable as a Constant. static Constant *BuildConstantFromSCEV(const SCEV *V) { - switch (V->getSCEVType()) { - default: // TODO: smax, umax. + switch (static_cast(V->getSCEVType())) { case scCouldNotCompute: case scAddRecExpr: break; @@ -5062,27 +5238,32 @@ static Constant *BuildConstantFromSCEV(const SCEV *V) { case scAddExpr: { const SCEVAddExpr *SA = cast(V); if (Constant *C = BuildConstantFromSCEV(SA->getOperand(0))) { - if (C->getType()->isPointerTy()) - C = ConstantExpr::getBitCast(C, Type::getInt8PtrTy(C->getContext())); + if (PointerType *PTy = dyn_cast(C->getType())) { + unsigned AS = PTy->getAddressSpace(); + Type *DestPtrTy = Type::getInt8PtrTy(C->getContext(), AS); + C = ConstantExpr::getBitCast(C, DestPtrTy); + } for (unsigned i = 1, e = SA->getNumOperands(); i != e; ++i) { Constant *C2 = BuildConstantFromSCEV(SA->getOperand(i)); - if (!C2) return 0; + if (!C2) return nullptr; // First pointer! if (!C->getType()->isPointerTy() && C2->getType()->isPointerTy()) { + unsigned AS = C2->getType()->getPointerAddressSpace(); std::swap(C, C2); + Type *DestPtrTy = Type::getInt8PtrTy(C->getContext(), AS); // The offsets have been converted to bytes. We can add bytes to an // i8* by GEP with the byte count in the first index. - C = ConstantExpr::getBitCast(C,Type::getInt8PtrTy(C->getContext())); + C = ConstantExpr::getBitCast(C, DestPtrTy); } // Don't bother trying to sum two pointers. We probably can't // statically compute a load that results from it anyway. if (C2->getType()->isPointerTy()) - return 0; + return nullptr; - if (C->getType()->isPointerTy()) { - if (cast(C->getType())->getElementType()->isStructTy()) + if (PointerType *PTy = dyn_cast(C->getType())) { + if (PTy->getElementType()->isStructTy()) C2 = ConstantExpr::getIntegerCast( C2, Type::getInt32Ty(C->getContext()), true); C = ConstantExpr::getGetElementPtr(C, C2); @@ -5097,10 +5278,10 @@ static Constant *BuildConstantFromSCEV(const SCEV *V) { const SCEVMulExpr *SM = cast(V); if (Constant *C = BuildConstantFromSCEV(SM->getOperand(0))) { // Don't bother with pointers at all. - if (C->getType()->isPointerTy()) return 0; + if (C->getType()->isPointerTy()) return nullptr; for (unsigned i = 1, e = SM->getNumOperands(); i != e; ++i) { Constant *C2 = BuildConstantFromSCEV(SM->getOperand(i)); - if (!C2 || C2->getType()->isPointerTy()) return 0; + if (!C2 || C2->getType()->isPointerTy()) return nullptr; C = ConstantExpr::getMul(C, C2); } return C; @@ -5115,8 +5296,11 @@ static Constant *BuildConstantFromSCEV(const SCEV *V) { return ConstantExpr::getUDiv(LHS, RHS); break; } + case scSMaxExpr: + case scUMaxExpr: + break; // TODO: smax, umax. } - return 0; + return nullptr; } const SCEV *ScalarEvolution::computeSCEVAtScope(const SCEV *V, const Loop *L) { @@ -5183,17 +5367,17 @@ const SCEV *ScalarEvolution::computeSCEVAtScope(const SCEV *V, const Loop *L) { // Check to see if getSCEVAtScope actually made an improvement. if (MadeImprovement) { - Constant *C = 0; + Constant *C = nullptr; if (const CmpInst *CI = dyn_cast(I)) C = ConstantFoldCompareInstOperands(CI->getPredicate(), - Operands[0], Operands[1], TD, + Operands[0], Operands[1], DL, TLI); else if (const LoadInst *LI = dyn_cast(I)) { if (!LI->isVolatile()) - C = ConstantFoldLoadFromConstPtr(Operands[0], TD); + C = ConstantFoldLoadFromConstPtr(Operands[0], DL); } else C = ConstantFoldInstOperands(I->getOpcode(), I->getType(), - Operands, TD, TLI); + Operands, DL, TLI); if (!C) return V; return getSCEV(C); } @@ -5311,7 +5495,6 @@ const SCEV *ScalarEvolution::computeSCEVAtScope(const SCEV *V, const Loop *L) { } llvm_unreachable("Unknown SCEV type!"); - return 0; } /// getSCEVAtScope - This is a convenience function which does @@ -5408,6 +5591,12 @@ SolveQuadraticEquation(const SCEVAddRecExpr *AddRec, ScalarEvolution &SE) { SqrtTerm *= B; SqrtTerm -= Four * (A * C); + if (SqrtTerm.isNegative()) { + // The loop is provably infinite. + const SCEV *CNC = SE.getCouldNotCompute(); + return std::make_pair(CNC, CNC); + } + // Compute sqrt(B^2-4ac). This is guaranteed to be the nearest // integer value or else APInt::sqrt() will assert. APInt SqrtVal(SqrtTerm.sqrt()); @@ -5441,7 +5630,7 @@ SolveQuadraticEquation(const SCEVAddRecExpr *AddRec, ScalarEvolution &SE) { /// effectively V != 0. We know and take advantage of the fact that this /// expression only being used in a comparison by zero context. ScalarEvolution::ExitLimit -ScalarEvolution::HowFarToZero(const SCEV *V, const Loop *L) { +ScalarEvolution::HowFarToZero(const SCEV *V, const Loop *L, bool IsSubExpr) { // If the value is a constant if (const SCEVConstant *C = dyn_cast(V)) { // If the value is already zero, the branch will execute zero times. @@ -5510,7 +5699,7 @@ ScalarEvolution::HowFarToZero(const SCEV *V, const Loop *L) { // to 0, it must be counting down to equal 0. Consequently, N = Start / -Step. // We have not yet seen any such cases. const SCEVConstant *StepC = dyn_cast(Step); - if (StepC == 0) + if (!StepC || StepC->getValue()->equalsInt(0)) return getCouldNotCompute(); // For positive steps (counting up until unsigned overflow): @@ -5535,23 +5724,38 @@ ScalarEvolution::HowFarToZero(const SCEV *V, const Loop *L) { else MaxBECount = getConstant(CountDown ? CR.getUnsignedMax() : -CR.getUnsignedMin()); - return ExitLimit(Distance, MaxBECount); + return ExitLimit(Distance, MaxBECount, /*MustExit=*/true); } // If the recurrence is known not to wraparound, unsigned divide computes the - // back edge count. We know that the value will either become zero (and thus - // the loop terminates), that the loop will terminate through some other exit - // condition first, or that the loop has undefined behavior. This means - // we can't "miss" the exit value, even with nonunit stride. + // back edge count. (Ideally we would have an "isexact" bit for udiv). We know + // that the value will either become zero (and thus the loop terminates), that + // the loop will terminate through some other exit condition first, or that + // the loop has undefined behavior. This means we can't "miss" the exit + // value, even with nonunit stride, and exit later via the same branch. Note + // that we can skip this exit if loop later exits via a different + // branch. Hence MustExit=false. // - // FIXME: Prove that loops always exhibits *acceptable* undefined - // behavior. Loops must exhibit defined behavior until a wrapped value is - // actually used. So the trip count computed by udiv could be smaller than the - // number of well-defined iterations. - if (AddRec->getNoWrapFlags(SCEV::FlagNW)) { - // FIXME: We really want an "isexact" bit for udiv. - return getUDivExpr(Distance, CountDown ? getNegativeSCEV(Step) : Step); - } + // This is only valid for expressions that directly compute the loop exit. It + // is invalid for subexpressions in which the loop may exit through this + // branch even if this subexpression is false. In that case, the trip count + // computed by this udiv could be smaller than the number of well-defined + // iterations. + if (!IsSubExpr && AddRec->getNoWrapFlags(SCEV::FlagNW)) { + const SCEV *Exact = + getUDivExpr(Distance, CountDown ? getNegativeSCEV(Step) : Step); + return ExitLimit(Exact, Exact, /*MustExit=*/false); + } + + // If Step is a power of two that evenly divides Start we know that the loop + // will always terminate. Start may not be a constant so we just have the + // number of trailing zeros available. This is safe even in presence of + // overflow as the recurrence will overflow to exactly 0. + const APInt &StepV = StepC->getValue()->getValue(); + if (StepV.isPowerOf2() && + GetMinTrailingZeros(getNegativeSCEV(Start)) >= StepV.countTrailingZeros()) + return getUDivExactExpr(Distance, CountDown ? getNegativeSCEV(Step) : Step); + // Then, try to solve the above equation provided that Start is constant. if (const SCEVConstant *StartC = dyn_cast(Start)) return SolveLinEquationWithOverflow(StepC->getValue()->getValue(), @@ -5631,9 +5835,14 @@ static bool HasSameValue(const SCEV *A, const SCEV *B) { /// predicate Pred. Return true iff any changes were made. /// bool ScalarEvolution::SimplifyICmpOperands(ICmpInst::Predicate &Pred, - const SCEV *&LHS, const SCEV *&RHS) { + const SCEV *&LHS, const SCEV *&RHS, + unsigned Depth) { bool Changed = false; + // If we hit the max recursion limit bail out. + if (Depth >= 3) + return false; + // Canonicalize a constant to the right side. if (const SCEVConstant *LHSC = dyn_cast(LHS)) { // Check for both operands constant. @@ -5671,6 +5880,16 @@ bool ScalarEvolution::SimplifyICmpOperands(ICmpInst::Predicate &Pred, default: llvm_unreachable("Unexpected ICmpInst::Predicate value!"); case ICmpInst::ICMP_EQ: case ICmpInst::ICMP_NE: + // Fold ((-1) * %a) + %b == 0 (equivalent to %b-%a == 0) into %a == %b. + if (!RA) + if (const SCEVAddExpr *AE = dyn_cast(LHS)) + if (const SCEVMulExpr *ME = dyn_cast(AE->getOperand(0))) + if (AE->getNumOperands() == 2 && ME->getNumOperands() == 2 && + ME->getOperand(0)->isAllOnesValue()) { + RHS = AE->getOperand(1); + LHS = ME->getOperand(1); + Changed = true; + } break; case ICmpInst::ICMP_UGE: if ((RA - 1).isMinValue()) { @@ -5872,6 +6091,11 @@ bool ScalarEvolution::SimplifyICmpOperands(ICmpInst::Predicate &Pred, // TODO: More simplifications are possible here. + // Recursively simplify until we either hit a recursion limit or nothing + // changes. + if (Changed) + return SimplifyICmpOperands(Pred, LHS, RHS, Depth+1); + return Changed; trivially_true: @@ -5942,9 +6166,7 @@ ScalarEvolution::isKnownPredicateWithRanges(ICmpInst::Predicate Pred, switch (Pred) { default: llvm_unreachable("Unexpected ICmpInst::Predicate value!"); - break; case ICmpInst::ICMP_SGT: - Pred = ICmpInst::ICMP_SLT; std::swap(LHS, RHS); case ICmpInst::ICMP_SLT: { ConstantRange LHSRange = getSignedRange(LHS); @@ -5956,7 +6178,6 @@ ScalarEvolution::isKnownPredicateWithRanges(ICmpInst::Predicate Pred, break; } case ICmpInst::ICMP_SGE: - Pred = ICmpInst::ICMP_SLE; std::swap(LHS, RHS); case ICmpInst::ICMP_SLE: { ConstantRange LHSRange = getSignedRange(LHS); @@ -5968,7 +6189,6 @@ ScalarEvolution::isKnownPredicateWithRanges(ICmpInst::Predicate Pred, break; } case ICmpInst::ICMP_UGT: - Pred = ICmpInst::ICMP_ULT; std::swap(LHS, RHS); case ICmpInst::ICMP_ULT: { ConstantRange LHSRange = getUnsignedRange(LHS); @@ -5980,7 +6200,6 @@ ScalarEvolution::isKnownPredicateWithRanges(ICmpInst::Predicate Pred, break; } case ICmpInst::ICMP_UGE: - Pred = ICmpInst::ICMP_ULE; std::swap(LHS, RHS); case ICmpInst::ICMP_ULE: { ConstantRange LHSRange = getUnsignedRange(LHS); @@ -6070,12 +6289,34 @@ ScalarEvolution::isLoopEntryGuardedByCond(const Loop *L, return false; } +/// RAII wrapper to prevent recursive application of isImpliedCond. +/// ScalarEvolution's PendingLoopPredicates set must be empty unless we are +/// currently evaluating isImpliedCond. +struct MarkPendingLoopPredicate { + Value *Cond; + DenseSet &LoopPreds; + bool Pending; + + MarkPendingLoopPredicate(Value *C, DenseSet &LP) + : Cond(C), LoopPreds(LP) { + Pending = !LoopPreds.insert(Cond).second; + } + ~MarkPendingLoopPredicate() { + if (!Pending) + LoopPreds.erase(Cond); + } +}; + /// isImpliedCond - Test whether the condition described by Pred, LHS, /// and RHS is true whenever the given Cond value evaluates to true. bool ScalarEvolution::isImpliedCond(ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS, Value *FoundCondValue, bool Inverse) { + MarkPendingLoopPredicate Mark(FoundCondValue, PendingLoopPredicates); + if (Mark.Pending) + return false; + // Recursively handle And and Or conditions. if (BinaryOperator *BO = dyn_cast(FoundCondValue)) { if (BO->getOpcode() == Instruction::And) { @@ -6101,8 +6342,8 @@ bool ScalarEvolution::isImpliedCond(ICmpInst::Predicate Pred, getTypeSizeInBits(ICI->getOperand(0)->getType())) return false; - // Now that we found a conditional branch that dominates the loop, check to - // see if it is the comparison we are looking for. + // Now that we found a conditional branch that dominates the loop or controls + // the loop latch. Check to see if it is the comparison we are looking for. ICmpInst::Predicate FoundPred; if (Inverse) FoundPred = ICI->getInversePredicate(); @@ -6116,7 +6357,7 @@ bool ScalarEvolution::isImpliedCond(ICmpInst::Predicate Pred, // LHS' type is checked for above. if (getTypeSizeInBits(LHS->getType()) > getTypeSizeInBits(FoundLHS->getType())) { - if (CmpInst::isSigned(Pred)) { + if (CmpInst::isSigned(FoundPred)) { FoundLHS = getSignExtendExpr(FoundLHS, LHS->getType()); FoundRHS = getSignExtendExpr(FoundRHS, LHS->getType()); } else { @@ -6132,7 +6373,7 @@ bool ScalarEvolution::isImpliedCond(ICmpInst::Predicate Pred, return CmpInst::isTrueWhenEqual(Pred); if (SimplifyICmpOperands(FoundPred, FoundLHS, FoundRHS)) if (FoundLHS == FoundRHS) - return CmpInst::isFalseWhenEqual(Pred); + return CmpInst::isFalseWhenEqual(FoundPred); // Check to see if we can make the LHS or RHS match. if (LHS == FoundRHS || RHS == FoundLHS) { @@ -6232,162 +6473,221 @@ ScalarEvolution::isImpliedCondOperandsHelper(ICmpInst::Predicate Pred, return false; } -/// getBECount - Subtract the end and start values and divide by the step, -/// rounding up, to get the number of times the backedge is executed. Return -/// CouldNotCompute if an intermediate computation overflows. -const SCEV *ScalarEvolution::getBECount(const SCEV *Start, - const SCEV *End, - const SCEV *Step, - bool NoWrap) { - assert(!isKnownNegative(Step) && - "This code doesn't handle negative strides yet!"); - - Type *Ty = Start->getType(); - - // When Start == End, we have an exact BECount == 0. Short-circuit this case - // here because SCEV may not be able to determine that the unsigned division - // after rounding is zero. - if (Start == End) - return getConstant(Ty, 0); - - const SCEV *NegOne = getConstant(Ty, (uint64_t)-1); - const SCEV *Diff = getMinusSCEV(End, Start); - const SCEV *RoundUp = getAddExpr(Step, NegOne); - - // Add an adjustment to the difference between End and Start so that - // the division will effectively round up. - const SCEV *Add = getAddExpr(Diff, RoundUp); - - if (!NoWrap) { - // Check Add for unsigned overflow. - // TODO: More sophisticated things could be done here. - Type *WideTy = IntegerType::get(getContext(), - getTypeSizeInBits(Ty) + 1); - const SCEV *EDiff = getZeroExtendExpr(Diff, WideTy); - const SCEV *ERoundUp = getZeroExtendExpr(RoundUp, WideTy); - const SCEV *OperandExtendedAdd = getAddExpr(EDiff, ERoundUp); - if (getZeroExtendExpr(Add, WideTy) != OperandExtendedAdd) - return getCouldNotCompute(); +// Verify if an linear IV with positive stride can overflow when in a +// less-than comparison, knowing the invariant term of the comparison, the +// stride and the knowledge of NSW/NUW flags on the recurrence. +bool ScalarEvolution::doesIVOverflowOnLT(const SCEV *RHS, const SCEV *Stride, + bool IsSigned, bool NoWrap) { + if (NoWrap) return false; + + unsigned BitWidth = getTypeSizeInBits(RHS->getType()); + const SCEV *One = getConstant(Stride->getType(), 1); + + if (IsSigned) { + APInt MaxRHS = getSignedRange(RHS).getSignedMax(); + APInt MaxValue = APInt::getSignedMaxValue(BitWidth); + APInt MaxStrideMinusOne = getSignedRange(getMinusSCEV(Stride, One)) + .getSignedMax(); + + // SMaxRHS + SMaxStrideMinusOne > SMaxValue => overflow! + return (MaxValue - MaxStrideMinusOne).slt(MaxRHS); } - return getUDivExpr(Add, Step); + APInt MaxRHS = getUnsignedRange(RHS).getUnsignedMax(); + APInt MaxValue = APInt::getMaxValue(BitWidth); + APInt MaxStrideMinusOne = getUnsignedRange(getMinusSCEV(Stride, One)) + .getUnsignedMax(); + + // UMaxRHS + UMaxStrideMinusOne > UMaxValue => overflow! + return (MaxValue - MaxStrideMinusOne).ult(MaxRHS); +} + +// Verify if an linear IV with negative stride can overflow when in a +// greater-than comparison, knowing the invariant term of the comparison, +// the stride and the knowledge of NSW/NUW flags on the recurrence. +bool ScalarEvolution::doesIVOverflowOnGT(const SCEV *RHS, const SCEV *Stride, + bool IsSigned, bool NoWrap) { + if (NoWrap) return false; + + unsigned BitWidth = getTypeSizeInBits(RHS->getType()); + const SCEV *One = getConstant(Stride->getType(), 1); + + if (IsSigned) { + APInt MinRHS = getSignedRange(RHS).getSignedMin(); + APInt MinValue = APInt::getSignedMinValue(BitWidth); + APInt MaxStrideMinusOne = getSignedRange(getMinusSCEV(Stride, One)) + .getSignedMax(); + + // SMinRHS - SMaxStrideMinusOne < SMinValue => overflow! + return (MinValue + MaxStrideMinusOne).sgt(MinRHS); + } + + APInt MinRHS = getUnsignedRange(RHS).getUnsignedMin(); + APInt MinValue = APInt::getMinValue(BitWidth); + APInt MaxStrideMinusOne = getUnsignedRange(getMinusSCEV(Stride, One)) + .getUnsignedMax(); + + // UMinRHS - UMaxStrideMinusOne < UMinValue => overflow! + return (MinValue + MaxStrideMinusOne).ugt(MinRHS); +} + +// Compute the backedge taken count knowing the interval difference, the +// stride and presence of the equality in the comparison. +const SCEV *ScalarEvolution::computeBECount(const SCEV *Delta, const SCEV *Step, + bool Equality) { + const SCEV *One = getConstant(Step->getType(), 1); + Delta = Equality ? getAddExpr(Delta, Step) + : getAddExpr(Delta, getMinusSCEV(Step, One)); + return getUDivExpr(Delta, Step); } /// HowManyLessThans - Return the number of times a backedge containing the /// specified less-than comparison will execute. If not computable, return /// CouldNotCompute. +/// +/// @param IsSubExpr is true when the LHS < RHS condition does not directly +/// control the branch. In this case, we can only compute an iteration count for +/// a subexpression that cannot overflow before evaluating true. ScalarEvolution::ExitLimit ScalarEvolution::HowManyLessThans(const SCEV *LHS, const SCEV *RHS, - const Loop *L, bool isSigned) { - // Only handle: "ADDREC < LoopInvariant". - if (!isLoopInvariant(RHS, L)) return getCouldNotCompute(); + const Loop *L, bool IsSigned, + bool IsSubExpr) { + // We handle only IV < Invariant + if (!isLoopInvariant(RHS, L)) + return getCouldNotCompute(); - const SCEVAddRecExpr *AddRec = dyn_cast(LHS); - if (!AddRec || AddRec->getLoop() != L) + const SCEVAddRecExpr *IV = dyn_cast(LHS); + + // Avoid weird loops + if (!IV || IV->getLoop() != L || !IV->isAffine()) return getCouldNotCompute(); - // Check to see if we have a flag which makes analysis easy. - bool NoWrap = isSigned ? - AddRec->getNoWrapFlags((SCEV::NoWrapFlags)(SCEV::FlagNSW | SCEV::FlagNW)) : - AddRec->getNoWrapFlags((SCEV::NoWrapFlags)(SCEV::FlagNUW | SCEV::FlagNW)); + bool NoWrap = !IsSubExpr && + IV->getNoWrapFlags(IsSigned ? SCEV::FlagNSW : SCEV::FlagNUW); - if (AddRec->isAffine()) { - unsigned BitWidth = getTypeSizeInBits(AddRec->getType()); - const SCEV *Step = AddRec->getStepRecurrence(*this); + const SCEV *Stride = IV->getStepRecurrence(*this); - if (Step->isZero()) - return getCouldNotCompute(); - if (Step->isOne()) { - // With unit stride, the iteration never steps past the limit value. - } else if (isKnownPositive(Step)) { - // Test whether a positive iteration can step past the limit - // value and past the maximum value for its type in a single step. - // Note that it's not sufficient to check NoWrap here, because even - // though the value after a wrap is undefined, it's not undefined - // behavior, so if wrap does occur, the loop could either terminate or - // loop infinitely, but in either case, the loop is guaranteed to - // iterate at least until the iteration where the wrapping occurs. - const SCEV *One = getConstant(Step->getType(), 1); - if (isSigned) { - APInt Max = APInt::getSignedMaxValue(BitWidth); - if ((Max - getSignedRange(getMinusSCEV(Step, One)).getSignedMax()) - .slt(getSignedRange(RHS).getSignedMax())) - return getCouldNotCompute(); - } else { - APInt Max = APInt::getMaxValue(BitWidth); - if ((Max - getUnsignedRange(getMinusSCEV(Step, One)).getUnsignedMax()) - .ult(getUnsignedRange(RHS).getUnsignedMax())) - return getCouldNotCompute(); - } - } else - // TODO: Handle negative strides here and below. - return getCouldNotCompute(); + // Avoid negative or zero stride values + if (!isKnownPositive(Stride)) + return getCouldNotCompute(); - // We know the LHS is of the form {n,+,s} and the RHS is some loop-invariant - // m. So, we count the number of iterations in which {n,+,s} < m is true. - // Note that we cannot simply return max(m-n,0)/s because it's not safe to - // treat m-n as signed nor unsigned due to overflow possibility. - - // First, we get the value of the LHS in the first iteration: n - const SCEV *Start = AddRec->getOperand(0); - - // Determine the minimum constant start value. - const SCEV *MinStart = getConstant(isSigned ? - getSignedRange(Start).getSignedMin() : - getUnsignedRange(Start).getUnsignedMin()); - - // If we know that the condition is true in order to enter the loop, - // then we know that it will run exactly (m-n)/s times. Otherwise, we - // only know that it will execute (max(m,n)-n)/s times. In both cases, - // the division must round up. - const SCEV *End = RHS; - if (!isLoopEntryGuardedByCond(L, - isSigned ? ICmpInst::ICMP_SLT : - ICmpInst::ICMP_ULT, - getMinusSCEV(Start, Step), RHS)) - End = isSigned ? getSMaxExpr(RHS, Start) - : getUMaxExpr(RHS, Start); - - // Determine the maximum constant end value. - const SCEV *MaxEnd = getConstant(isSigned ? - getSignedRange(End).getSignedMax() : - getUnsignedRange(End).getUnsignedMax()); - - // If MaxEnd is within a step of the maximum integer value in its type, - // adjust it down to the minimum value which would produce the same effect. - // This allows the subsequent ceiling division of (N+(step-1))/step to - // compute the correct value. - const SCEV *StepMinusOne = getMinusSCEV(Step, - getConstant(Step->getType(), 1)); - MaxEnd = isSigned ? - getSMinExpr(MaxEnd, - getMinusSCEV(getConstant(APInt::getSignedMaxValue(BitWidth)), - StepMinusOne)) : - getUMinExpr(MaxEnd, - getMinusSCEV(getConstant(APInt::getMaxValue(BitWidth)), - StepMinusOne)); - - // Finally, we subtract these two values and divide, rounding up, to get - // the number of times the backedge is executed. - const SCEV *BECount = getBECount(Start, End, Step, NoWrap); - - // The maximum backedge count is similar, except using the minimum start - // value and the maximum end value. - // If we already have an exact constant BECount, use it instead. - const SCEV *MaxBECount = isa(BECount) ? BECount - : getBECount(MinStart, MaxEnd, Step, NoWrap); - - // If the stride is nonconstant, and NoWrap == true, then - // getBECount(MinStart, MaxEnd) may not compute. This would result in an - // exact BECount and invalid MaxBECount, which should be avoided to catch - // more optimization opportunities. - if (isa(MaxBECount)) - MaxBECount = BECount; - - return ExitLimit(BECount, MaxBECount); - } + // Avoid proven overflow cases: this will ensure that the backedge taken count + // will not generate any unsigned overflow. Relaxed no-overflow conditions + // exploit NoWrapFlags, allowing to optimize in presence of undefined + // behaviors like the case of C language. + if (!Stride->isOne() && doesIVOverflowOnLT(RHS, Stride, IsSigned, NoWrap)) + return getCouldNotCompute(); - return getCouldNotCompute(); + ICmpInst::Predicate Cond = IsSigned ? ICmpInst::ICMP_SLT + : ICmpInst::ICMP_ULT; + const SCEV *Start = IV->getStart(); + const SCEV *End = RHS; + if (!isLoopEntryGuardedByCond(L, Cond, getMinusSCEV(Start, Stride), RHS)) + End = IsSigned ? getSMaxExpr(RHS, Start) + : getUMaxExpr(RHS, Start); + + const SCEV *BECount = computeBECount(getMinusSCEV(End, Start), Stride, false); + + APInt MinStart = IsSigned ? getSignedRange(Start).getSignedMin() + : getUnsignedRange(Start).getUnsignedMin(); + + APInt MinStride = IsSigned ? getSignedRange(Stride).getSignedMin() + : getUnsignedRange(Stride).getUnsignedMin(); + + unsigned BitWidth = getTypeSizeInBits(LHS->getType()); + APInt Limit = IsSigned ? APInt::getSignedMaxValue(BitWidth) - (MinStride - 1) + : APInt::getMaxValue(BitWidth) - (MinStride - 1); + + // Although End can be a MAX expression we estimate MaxEnd considering only + // the case End = RHS. This is safe because in the other case (End - Start) + // is zero, leading to a zero maximum backedge taken count. + APInt MaxEnd = + IsSigned ? APIntOps::smin(getSignedRange(RHS).getSignedMax(), Limit) + : APIntOps::umin(getUnsignedRange(RHS).getUnsignedMax(), Limit); + + const SCEV *MaxBECount; + if (isa(BECount)) + MaxBECount = BECount; + else + MaxBECount = computeBECount(getConstant(MaxEnd - MinStart), + getConstant(MinStride), false); + + if (isa(MaxBECount)) + MaxBECount = BECount; + + return ExitLimit(BECount, MaxBECount, /*MustExit=*/true); +} + +ScalarEvolution::ExitLimit +ScalarEvolution::HowManyGreaterThans(const SCEV *LHS, const SCEV *RHS, + const Loop *L, bool IsSigned, + bool IsSubExpr) { + // We handle only IV > Invariant + if (!isLoopInvariant(RHS, L)) + return getCouldNotCompute(); + + const SCEVAddRecExpr *IV = dyn_cast(LHS); + + // Avoid weird loops + if (!IV || IV->getLoop() != L || !IV->isAffine()) + return getCouldNotCompute(); + + bool NoWrap = !IsSubExpr && + IV->getNoWrapFlags(IsSigned ? SCEV::FlagNSW : SCEV::FlagNUW); + + const SCEV *Stride = getNegativeSCEV(IV->getStepRecurrence(*this)); + + // Avoid negative or zero stride values + if (!isKnownPositive(Stride)) + return getCouldNotCompute(); + + // Avoid proven overflow cases: this will ensure that the backedge taken count + // will not generate any unsigned overflow. Relaxed no-overflow conditions + // exploit NoWrapFlags, allowing to optimize in presence of undefined + // behaviors like the case of C language. + if (!Stride->isOne() && doesIVOverflowOnGT(RHS, Stride, IsSigned, NoWrap)) + return getCouldNotCompute(); + + ICmpInst::Predicate Cond = IsSigned ? ICmpInst::ICMP_SGT + : ICmpInst::ICMP_UGT; + + const SCEV *Start = IV->getStart(); + const SCEV *End = RHS; + if (!isLoopEntryGuardedByCond(L, Cond, getAddExpr(Start, Stride), RHS)) + End = IsSigned ? getSMinExpr(RHS, Start) + : getUMinExpr(RHS, Start); + + const SCEV *BECount = computeBECount(getMinusSCEV(Start, End), Stride, false); + + APInt MaxStart = IsSigned ? getSignedRange(Start).getSignedMax() + : getUnsignedRange(Start).getUnsignedMax(); + + APInt MinStride = IsSigned ? getSignedRange(Stride).getSignedMin() + : getUnsignedRange(Stride).getUnsignedMin(); + + unsigned BitWidth = getTypeSizeInBits(LHS->getType()); + APInt Limit = IsSigned ? APInt::getSignedMinValue(BitWidth) + (MinStride - 1) + : APInt::getMinValue(BitWidth) + (MinStride - 1); + + // Although End can be a MIN expression we estimate MinEnd considering only + // the case End = RHS. This is safe because in the other case (Start - End) + // is zero, leading to a zero maximum backedge taken count. + APInt MinEnd = + IsSigned ? APIntOps::smax(getSignedRange(RHS).getSignedMin(), Limit) + : APIntOps::umax(getUnsignedRange(RHS).getUnsignedMin(), Limit); + + + const SCEV *MaxBECount = getCouldNotCompute(); + if (isa(BECount)) + MaxBECount = BECount; + else + MaxBECount = computeBECount(getConstant(MaxStart - MinEnd), + getConstant(MinStride), false); + + if (isa(MaxBECount)) + MaxBECount = BECount; + + return ExitLimit(BECount, MaxBECount, /*MustExit=*/true); } /// getNumIterationsInRange - Return the number of iterations of this loop that @@ -6516,7 +6816,517 @@ const SCEV *SCEVAddRecExpr::getNumIterationsInRange(ConstantRange Range, return SE.getCouldNotCompute(); } +static const APInt srem(const SCEVConstant *C1, const SCEVConstant *C2) { + APInt A = C1->getValue()->getValue(); + APInt B = C2->getValue()->getValue(); + uint32_t ABW = A.getBitWidth(); + uint32_t BBW = B.getBitWidth(); + + if (ABW > BBW) + B = B.sext(ABW); + else if (ABW < BBW) + A = A.sext(BBW); + + return APIntOps::srem(A, B); +} + +static const APInt sdiv(const SCEVConstant *C1, const SCEVConstant *C2) { + APInt A = C1->getValue()->getValue(); + APInt B = C2->getValue()->getValue(); + uint32_t ABW = A.getBitWidth(); + uint32_t BBW = B.getBitWidth(); + + if (ABW > BBW) + B = B.sext(ABW); + else if (ABW < BBW) + A = A.sext(BBW); + + return APIntOps::sdiv(A, B); +} + +namespace { +struct SCEVGCD : public SCEVVisitor { +public: + // Pattern match Step into Start. When Step is a multiply expression, find + // the largest subexpression of Step that appears in Start. When Start is an + // add expression, try to match Step in the subexpressions of Start, non + // matching subexpressions are returned under Remainder. + static const SCEV *findGCD(ScalarEvolution &SE, const SCEV *Start, + const SCEV *Step, const SCEV **Remainder) { + assert(Remainder && "Remainder should not be NULL"); + SCEVGCD R(SE, Step, SE.getConstant(Step->getType(), 0)); + const SCEV *Res = R.visit(Start); + *Remainder = R.Remainder; + return Res; + } + + SCEVGCD(ScalarEvolution &S, const SCEV *G, const SCEV *R) + : SE(S), GCD(G), Remainder(R) { + Zero = SE.getConstant(GCD->getType(), 0); + One = SE.getConstant(GCD->getType(), 1); + } + + const SCEV *visitConstant(const SCEVConstant *Constant) { + if (GCD == Constant || Constant == Zero) + return GCD; + + if (const SCEVConstant *CGCD = dyn_cast(GCD)) { + const SCEV *Res = SE.getConstant(gcd(Constant, CGCD)); + if (Res != One) + return Res; + + Remainder = SE.getConstant(srem(Constant, CGCD)); + Constant = cast(SE.getMinusSCEV(Constant, Remainder)); + Res = SE.getConstant(gcd(Constant, CGCD)); + return Res; + } + + // When GCD is not a constant, it could be that the GCD is an Add, Mul, + // AddRec, etc., in which case we want to find out how many times the + // Constant divides the GCD: we then return that as the new GCD. + const SCEV *Rem = Zero; + const SCEV *Res = findGCD(SE, GCD, Constant, &Rem); + + if (Res == One || Rem != Zero) { + Remainder = Constant; + return One; + } + + assert(isa(Res) && "Res should be a constant"); + Remainder = SE.getConstant(srem(Constant, cast(Res))); + return Res; + } + + const SCEV *visitTruncateExpr(const SCEVTruncateExpr *Expr) { + if (GCD != Expr) + Remainder = Expr; + return GCD; + } + + const SCEV *visitZeroExtendExpr(const SCEVZeroExtendExpr *Expr) { + if (GCD != Expr) + Remainder = Expr; + return GCD; + } + + const SCEV *visitSignExtendExpr(const SCEVSignExtendExpr *Expr) { + if (GCD != Expr) + Remainder = Expr; + return GCD; + } + + const SCEV *visitAddExpr(const SCEVAddExpr *Expr) { + if (GCD == Expr) + return GCD; + + for (int i = 0, e = Expr->getNumOperands(); i < e; ++i) { + const SCEV *Rem = Zero; + const SCEV *Res = findGCD(SE, Expr->getOperand(e - 1 - i), GCD, &Rem); + + // FIXME: There may be ambiguous situations: for instance, + // GCD(-4 + (3 * %m), 2 * %m) where 2 divides -4 and %m divides (3 * %m). + // The order in which the AddExpr is traversed computes a different GCD + // and Remainder. + if (Res != One) + GCD = Res; + if (Rem != Zero) + Remainder = SE.getAddExpr(Remainder, Rem); + } + + return GCD; + } + + const SCEV *visitMulExpr(const SCEVMulExpr *Expr) { + if (GCD == Expr) + return GCD; + + for (int i = 0, e = Expr->getNumOperands(); i < e; ++i) { + if (Expr->getOperand(i) == GCD) + return GCD; + } + + // If we have not returned yet, it means that GCD is not part of Expr. + const SCEV *PartialGCD = One; + for (int i = 0, e = Expr->getNumOperands(); i < e; ++i) { + const SCEV *Rem = Zero; + const SCEV *Res = findGCD(SE, Expr->getOperand(i), GCD, &Rem); + if (Rem != Zero) + // GCD does not divide Expr->getOperand(i). + continue; + + if (Res == GCD) + return GCD; + PartialGCD = SE.getMulExpr(PartialGCD, Res); + if (PartialGCD == GCD) + return GCD; + } + + if (PartialGCD != One) + return PartialGCD; + + // Failed to find a PartialGCD: set the Remainder to the full expression, + // and return the GCD. + Remainder = Expr; + const SCEVMulExpr *Mul = dyn_cast(GCD); + if (!Mul) + return GCD; + + // When the GCD is a multiply expression, try to decompose it: + // this occurs when Step does not divide the Start expression + // as in: {(-4 + (3 * %m)),+,(2 * %m)} + for (int i = 0, e = Mul->getNumOperands(); i < e; ++i) { + const SCEV *Rem = Zero; + const SCEV *Res = findGCD(SE, Expr, Mul->getOperand(i), &Rem); + if (Rem == Zero) { + Remainder = Rem; + return Res; + } + } + + return GCD; + } + + const SCEV *visitUDivExpr(const SCEVUDivExpr *Expr) { + if (GCD != Expr) + Remainder = Expr; + return GCD; + } + + const SCEV *visitAddRecExpr(const SCEVAddRecExpr *Expr) { + if (GCD == Expr) + return GCD; + + if (!Expr->isAffine()) { + Remainder = Expr; + return GCD; + } + + const SCEV *Rem = Zero; + const SCEV *Res = findGCD(SE, Expr->getOperand(0), GCD, &Rem); + if (Res == One || Res->isAllOnesValue()) { + Remainder = Expr; + return GCD; + } + + if (Rem != Zero) + Remainder = SE.getAddExpr(Remainder, Rem); + + Rem = Zero; + Res = findGCD(SE, Expr->getOperand(1), Res, &Rem); + if (Rem != Zero || Res == One || Res->isAllOnesValue()) { + Remainder = Expr; + return GCD; + } + + return Res; + } + + const SCEV *visitSMaxExpr(const SCEVSMaxExpr *Expr) { + if (GCD != Expr) + Remainder = Expr; + return GCD; + } + + const SCEV *visitUMaxExpr(const SCEVUMaxExpr *Expr) { + if (GCD != Expr) + Remainder = Expr; + return GCD; + } + + const SCEV *visitUnknown(const SCEVUnknown *Expr) { + if (GCD != Expr) + Remainder = Expr; + return GCD; + } + + const SCEV *visitCouldNotCompute(const SCEVCouldNotCompute *Expr) { + return One; + } + +private: + ScalarEvolution &SE; + const SCEV *GCD, *Remainder, *Zero, *One; +}; + +struct SCEVDivision : public SCEVVisitor { +public: + // Remove from Start all multiples of Step. + static const SCEV *divide(ScalarEvolution &SE, const SCEV *Start, + const SCEV *Step) { + SCEVDivision D(SE, Step); + const SCEV *Rem = D.Zero; + (void)Rem; + // The division is guaranteed to succeed: Step should divide Start with no + // remainder. + assert(Step == SCEVGCD::findGCD(SE, Start, Step, &Rem) && Rem == D.Zero && + "Step should divide Start with no remainder."); + return D.visit(Start); + } + + SCEVDivision(ScalarEvolution &S, const SCEV *G) : SE(S), GCD(G) { + Zero = SE.getConstant(GCD->getType(), 0); + One = SE.getConstant(GCD->getType(), 1); + } + + const SCEV *visitConstant(const SCEVConstant *Constant) { + if (GCD == Constant) + return One; + + if (const SCEVConstant *CGCD = dyn_cast(GCD)) + return SE.getConstant(sdiv(Constant, CGCD)); + return Constant; + } + + const SCEV *visitTruncateExpr(const SCEVTruncateExpr *Expr) { + if (GCD == Expr) + return One; + return Expr; + } + + const SCEV *visitZeroExtendExpr(const SCEVZeroExtendExpr *Expr) { + if (GCD == Expr) + return One; + return Expr; + } + + const SCEV *visitSignExtendExpr(const SCEVSignExtendExpr *Expr) { + if (GCD == Expr) + return One; + return Expr; + } + + const SCEV *visitAddExpr(const SCEVAddExpr *Expr) { + if (GCD == Expr) + return One; + + SmallVector Operands; + for (int i = 0, e = Expr->getNumOperands(); i < e; ++i) + Operands.push_back(divide(SE, Expr->getOperand(i), GCD)); + + if (Operands.size() == 1) + return Operands[0]; + return SE.getAddExpr(Operands); + } + + const SCEV *visitMulExpr(const SCEVMulExpr *Expr) { + if (GCD == Expr) + return One; + + bool FoundGCDTerm = false; + for (int i = 0, e = Expr->getNumOperands(); i < e; ++i) + if (Expr->getOperand(i) == GCD) + FoundGCDTerm = true; + + SmallVector Operands; + if (FoundGCDTerm) { + FoundGCDTerm = false; + for (int i = 0, e = Expr->getNumOperands(); i < e; ++i) { + if (FoundGCDTerm) + Operands.push_back(Expr->getOperand(i)); + else if (Expr->getOperand(i) == GCD) + FoundGCDTerm = true; + else + Operands.push_back(Expr->getOperand(i)); + } + } else { + const SCEV *PartialGCD = One; + for (int i = 0, e = Expr->getNumOperands(); i < e; ++i) { + if (PartialGCD == GCD) { + Operands.push_back(Expr->getOperand(i)); + continue; + } + + const SCEV *Rem = Zero; + const SCEV *Res = SCEVGCD::findGCD(SE, Expr->getOperand(i), GCD, &Rem); + if (Rem == Zero) { + PartialGCD = SE.getMulExpr(PartialGCD, Res); + Operands.push_back(divide(SE, Expr->getOperand(i), Res)); + } else { + Operands.push_back(Expr->getOperand(i)); + } + } + } + + if (Operands.size() == 1) + return Operands[0]; + return SE.getMulExpr(Operands); + } + + const SCEV *visitUDivExpr(const SCEVUDivExpr *Expr) { + if (GCD == Expr) + return One; + return Expr; + } + + const SCEV *visitAddRecExpr(const SCEVAddRecExpr *Expr) { + if (GCD == Expr) + return One; + + assert(Expr->isAffine() && "Expr should be affine"); + + const SCEV *Start = divide(SE, Expr->getStart(), GCD); + const SCEV *Step = divide(SE, Expr->getStepRecurrence(SE), GCD); + + return SE.getAddRecExpr(Start, Step, Expr->getLoop(), + Expr->getNoWrapFlags()); + } + + const SCEV *visitSMaxExpr(const SCEVSMaxExpr *Expr) { + if (GCD == Expr) + return One; + return Expr; + } + + const SCEV *visitUMaxExpr(const SCEVUMaxExpr *Expr) { + if (GCD == Expr) + return One; + return Expr; + } + + const SCEV *visitUnknown(const SCEVUnknown *Expr) { + if (GCD == Expr) + return One; + return Expr; + } + + const SCEV *visitCouldNotCompute(const SCEVCouldNotCompute *Expr) { + return Expr; + } + +private: + ScalarEvolution &SE; + const SCEV *GCD, *Zero, *One; +}; +} + +/// Splits the SCEV into two vectors of SCEVs representing the subscripts and +/// sizes of an array access. Returns the remainder of the delinearization that +/// is the offset start of the array. The SCEV->delinearize algorithm computes +/// the multiples of SCEV coefficients: that is a pattern matching of sub +/// expressions in the stride and base of a SCEV corresponding to the +/// computation of a GCD (greatest common divisor) of base and stride. When +/// SCEV->delinearize fails, it returns the SCEV unchanged. +/// +/// For example: when analyzing the memory access A[i][j][k] in this loop nest +/// +/// void foo(long n, long m, long o, double A[n][m][o]) { +/// +/// for (long i = 0; i < n; i++) +/// for (long j = 0; j < m; j++) +/// for (long k = 0; k < o; k++) +/// A[i][j][k] = 1.0; +/// } +/// +/// the delinearization input is the following AddRec SCEV: +/// +/// AddRec: {{{%A,+,(8 * %m * %o)}<%for.i>,+,(8 * %o)}<%for.j>,+,8}<%for.k> +/// +/// From this SCEV, we are able to say that the base offset of the access is %A +/// because it appears as an offset that does not divide any of the strides in +/// the loops: +/// +/// CHECK: Base offset: %A +/// +/// and then SCEV->delinearize determines the size of some of the dimensions of +/// the array as these are the multiples by which the strides are happening: +/// +/// CHECK: ArrayDecl[UnknownSize][%m][%o] with elements of sizeof(double) bytes. +/// +/// Note that the outermost dimension remains of UnknownSize because there are +/// no strides that would help identifying the size of the last dimension: when +/// the array has been statically allocated, one could compute the size of that +/// dimension by dividing the overall size of the array by the size of the known +/// dimensions: %m * %o * 8. +/// +/// Finally delinearize provides the access functions for the array reference +/// that does correspond to A[i][j][k] of the above C testcase: +/// +/// CHECK: ArrayRef[{0,+,1}<%for.i>][{0,+,1}<%for.j>][{0,+,1}<%for.k>] +/// +/// The testcases are checking the output of a function pass: +/// DelinearizationPass that walks through all loads and stores of a function +/// asking for the SCEV of the memory access with respect to all enclosing +/// loops, calling SCEV->delinearize on that and printing the results. + +const SCEV * +SCEVAddRecExpr::delinearize(ScalarEvolution &SE, + SmallVectorImpl &Subscripts, + SmallVectorImpl &Sizes) const { + // Early exit in case this SCEV is not an affine multivariate function. + if (!this->isAffine()) + return this; + + const SCEV *Start = this->getStart(); + const SCEV *Step = this->getStepRecurrence(SE); + + // Build the SCEV representation of the canonical induction variable in the + // loop of this SCEV. + const SCEV *Zero = SE.getConstant(this->getType(), 0); + const SCEV *One = SE.getConstant(this->getType(), 1); + const SCEV *IV = + SE.getAddRecExpr(Zero, One, this->getLoop(), this->getNoWrapFlags()); + + DEBUG(dbgs() << "(delinearize: " << *this << "\n"); + + // When the stride of this SCEV is 1, do not compute the GCD: the size of this + // subscript is 1, and this same SCEV for the access function. + const SCEV *Remainder = Zero; + const SCEV *GCD = One; + + // Find the GCD and Remainder of the Start and Step coefficients of this SCEV. + if (Step != One && !Step->isAllOnesValue()) + GCD = SCEVGCD::findGCD(SE, Start, Step, &Remainder); + + DEBUG(dbgs() << "GCD: " << *GCD << "\n"); + DEBUG(dbgs() << "Remainder: " << *Remainder << "\n"); + + const SCEV *Quotient = Start; + if (GCD != One && !GCD->isAllOnesValue()) + // As findGCD computed Remainder, GCD divides "Start - Remainder." The + // Quotient is then this SCEV without Remainder, scaled down by the GCD. The + // Quotient is what will be used in the next subscript delinearization. + Quotient = SCEVDivision::divide(SE, SE.getMinusSCEV(Start, Remainder), GCD); + + DEBUG(dbgs() << "Quotient: " << *Quotient << "\n"); + + const SCEV *Rem = Quotient; + if (const SCEVAddRecExpr *AR = dyn_cast(Quotient)) + // Recursively call delinearize on the Quotient until there are no more + // multiples that can be recognized. + Rem = AR->delinearize(SE, Subscripts, Sizes); + + // Scale up the canonical induction variable IV by whatever remains from the + // Step after division by the GCD: the GCD is the size of all the sub-array. + if (Step != One && !Step->isAllOnesValue() && GCD != One && + !GCD->isAllOnesValue() && Step != GCD) { + Step = SCEVDivision::divide(SE, Step, GCD); + IV = SE.getMulExpr(IV, Step); + } + // The access function in the current subscript is computed as the canonical + // induction variable IV (potentially scaled up by the step) and offset by + // Rem, the offset of delinearization in the sub-array. + const SCEV *Index = SE.getAddExpr(IV, Rem); + + // Record the access function and the size of the current subscript. + Subscripts.push_back(Index); + Sizes.push_back(GCD); +#ifndef NDEBUG + int Size = Sizes.size(); + DEBUG(dbgs() << "succeeded to delinearize " << *this << "\n"); + DEBUG(dbgs() << "ArrayDecl[UnknownSize]"); + for (int i = 0; i < Size - 1; i++) + DEBUG(dbgs() << "[" << *Sizes[i] << "]"); + DEBUG(dbgs() << " with elements of " << *Sizes[Size - 1] << " bytes.\n"); + + DEBUG(dbgs() << "ArrayRef"); + for (int i = 0; i < Size; i++) + DEBUG(dbgs() << "[" << *Subscripts[i] << "]"); + DEBUG(dbgs() << "\n)\n"); +#endif + + return Remainder; +} //===----------------------------------------------------------------------===// // SCEVCallbackVH Class Implementation @@ -6537,11 +7347,8 @@ void ScalarEvolution::SCEVCallbackVH::allUsesReplacedWith(Value *V) { // so that future queries will recompute the expressions using the new // value. Value *Old = getValPtr(); - SmallVector Worklist; + SmallVector Worklist(Old->user_begin(), Old->user_end()); SmallPtrSet Visited; - for (Value::use_iterator UI = Old->use_begin(), UE = Old->use_end(); - UI != UE; ++UI) - Worklist.push_back(*UI); while (!Worklist.empty()) { User *U = Worklist.pop_back_val(); // Deleting the Old value will cause this to dangle. Postpone @@ -6553,9 +7360,7 @@ void ScalarEvolution::SCEVCallbackVH::allUsesReplacedWith(Value *V) { if (PHINode *PN = dyn_cast(U)) SE->ConstantEvolutionLoopExitValue.erase(PN); SE->ValueExprMap.erase(U); - for (Value::use_iterator UI = U->use_begin(), UE = U->use_end(); - UI != UE; ++UI) - Worklist.push_back(*UI); + Worklist.insert(Worklist.end(), U->user_begin(), U->user_end()); } // Delete the Old value. if (PHINode *PN = dyn_cast(Old)) @@ -6572,16 +7377,18 @@ ScalarEvolution::SCEVCallbackVH::SCEVCallbackVH(Value *V, ScalarEvolution *se) //===----------------------------------------------------------------------===// ScalarEvolution::ScalarEvolution() - : FunctionPass(ID), FirstUnknown(0) { + : FunctionPass(ID), ValuesAtScopes(64), LoopDispositions(64), + BlockDispositions(64), FirstUnknown(nullptr) { initializeScalarEvolutionPass(*PassRegistry::getPassRegistry()); } bool ScalarEvolution::runOnFunction(Function &F) { this->F = &F; LI = &getAnalysis(); - TD = getAnalysisIfAvailable(); + DataLayoutPass *DLP = getAnalysisIfAvailable(); + DL = DLP ? &DLP->getDataLayout() : nullptr; TLI = &getAnalysis(); - DT = &getAnalysis(); + DT = &getAnalysis().getDomTree(); return false; } @@ -6590,7 +7397,7 @@ void ScalarEvolution::releaseMemory() { // destructors, so that they release their references to their values. for (SCEVUnknown *U = FirstUnknown; U; U = U->Next) U->~SCEVUnknown(); - FirstUnknown = 0; + FirstUnknown = nullptr; ValueExprMap.clear(); @@ -6602,6 +7409,8 @@ void ScalarEvolution::releaseMemory() { I->second.clear(); } + assert(PendingLoopPredicates.empty() && "isImpliedCond garbage"); + BackedgeTakenCounts.clear(); ConstantEvolutionLoopExitValue.clear(); ValuesAtScopes.clear(); @@ -6616,7 +7425,7 @@ void ScalarEvolution::releaseMemory() { void ScalarEvolution::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesAll(); AU.addRequiredTransitive(); - AU.addRequiredTransitive(); + AU.addRequiredTransitive(); AU.addRequired(); } @@ -6631,7 +7440,7 @@ static void PrintLoopInfo(raw_ostream &OS, ScalarEvolution *SE, PrintLoopInfo(OS, SE, *I); OS << "Loop "; - WriteAsOperand(OS, L->getHeader(), /*PrintType=*/false); + L->getHeader()->printAsOperand(OS, /*PrintType=*/false); OS << ": "; SmallVector ExitBlocks; @@ -6647,7 +7456,7 @@ static void PrintLoopInfo(raw_ostream &OS, ScalarEvolution *SE, OS << "\n" "Loop "; - WriteAsOperand(OS, L->getHeader(), /*PrintType=*/false); + L->getHeader()->printAsOperand(OS, /*PrintType=*/false); OS << ": "; if (!isa(SE->getMaxBackedgeTakenCount(L))) { @@ -6669,7 +7478,7 @@ void ScalarEvolution::print(raw_ostream &OS, const Module *) const { ScalarEvolution &SE = *const_cast(this); OS << "Classifying expressions for: "; - WriteAsOperand(OS, F, /*PrintType=*/false); + F->printAsOperand(OS, /*PrintType=*/false); OS << "\n"; for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I) if (isSCEVable(I->getType()) && !isa(*I)) { @@ -6700,7 +7509,7 @@ void ScalarEvolution::print(raw_ostream &OS, const Module *) const { } OS << "Determining loop execution counts for: "; - WriteAsOperand(OS, F, /*PrintType=*/false); + F->printAsOperand(OS, /*PrintType=*/false); OS << "\n"; for (LoopInfo::iterator I = LI->begin(), E = LI->end(); I != E; ++I) PrintLoopInfo(OS, &SE, *I); @@ -6708,19 +7517,26 @@ void ScalarEvolution::print(raw_ostream &OS, const Module *) const { ScalarEvolution::LoopDisposition ScalarEvolution::getLoopDisposition(const SCEV *S, const Loop *L) { - std::map &Values = LoopDispositions[S]; - std::pair::iterator, bool> Pair = - Values.insert(std::make_pair(L, LoopVariant)); - if (!Pair.second) - return Pair.first->second; - + SmallVector, 2> &Values = LoopDispositions[S]; + for (unsigned u = 0; u < Values.size(); u++) { + if (Values[u].first == L) + return Values[u].second; + } + Values.push_back(std::make_pair(L, LoopVariant)); LoopDisposition D = computeLoopDisposition(S, L); - return LoopDispositions[S][L] = D; + SmallVector, 2> &Values2 = LoopDispositions[S]; + for (unsigned u = Values2.size(); u > 0; u--) { + if (Values2[u - 1].first == L) { + Values2[u - 1].second = D; + break; + } + } + return D; } ScalarEvolution::LoopDisposition ScalarEvolution::computeLoopDisposition(const SCEV *S, const Loop *L) { - switch (S->getSCEVType()) { + switch (static_cast(S->getSCEVType())) { case scConstant: return LoopInvariant; case scTruncate: @@ -6793,11 +7609,8 @@ ScalarEvolution::computeLoopDisposition(const SCEV *S, const Loop *L) { return LoopInvariant; case scCouldNotCompute: llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!"); - return LoopVariant; - default: break; } llvm_unreachable("Unknown SCEV kind!"); - return LoopVariant; } bool ScalarEvolution::isLoopInvariant(const SCEV *S, const Loop *L) { @@ -6810,19 +7623,26 @@ bool ScalarEvolution::hasComputableLoopEvolution(const SCEV *S, const Loop *L) { ScalarEvolution::BlockDisposition ScalarEvolution::getBlockDisposition(const SCEV *S, const BasicBlock *BB) { - std::map &Values = BlockDispositions[S]; - std::pair::iterator, bool> - Pair = Values.insert(std::make_pair(BB, DoesNotDominateBlock)); - if (!Pair.second) - return Pair.first->second; - + SmallVector, 2> &Values = BlockDispositions[S]; + for (unsigned u = 0; u < Values.size(); u++) { + if (Values[u].first == BB) + return Values[u].second; + } + Values.push_back(std::make_pair(BB, DoesNotDominateBlock)); BlockDisposition D = computeBlockDisposition(S, BB); - return BlockDispositions[S][BB] = D; + SmallVector, 2> &Values2 = BlockDispositions[S]; + for (unsigned u = Values2.size(); u > 0; u--) { + if (Values2[u - 1].first == BB) { + Values2[u - 1].second = D; + break; + } + } + return D; } ScalarEvolution::BlockDisposition ScalarEvolution::computeBlockDisposition(const SCEV *S, const BasicBlock *BB) { - switch (S->getSCEVType()) { + switch (static_cast(S->getSCEVType())) { case scConstant: return ProperlyDominatesBlock; case scTruncate: @@ -6879,11 +7699,8 @@ ScalarEvolution::computeBlockDisposition(const SCEV *S, const BasicBlock *BB) { return ProperlyDominatesBlock; case scCouldNotCompute: llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!"); - return DoesNotDominateBlock; - default: break; } llvm_unreachable("Unknown SCEV kind!"); - return DoesNotDominateBlock; } bool ScalarEvolution::dominates(const SCEV *S, const BasicBlock *BB) { @@ -6894,46 +7711,27 @@ bool ScalarEvolution::properlyDominates(const SCEV *S, const BasicBlock *BB) { return getBlockDisposition(S, BB) == ProperlyDominatesBlock; } -bool ScalarEvolution::hasOperand(const SCEV *S, const SCEV *Op) const { - switch (S->getSCEVType()) { - case scConstant: - return false; - case scTruncate: - case scZeroExtend: - case scSignExtend: { - const SCEVCastExpr *Cast = cast(S); - const SCEV *CastOp = Cast->getOperand(); - return Op == CastOp || hasOperand(CastOp, Op); - } - case scAddRecExpr: - case scAddExpr: - case scMulExpr: - case scUMaxExpr: - case scSMaxExpr: { - const SCEVNAryExpr *NAry = cast(S); - for (SCEVNAryExpr::op_iterator I = NAry->op_begin(), E = NAry->op_end(); - I != E; ++I) { - const SCEV *NAryOp = *I; - if (NAryOp == Op || hasOperand(NAryOp, Op)) - return true; - } - return false; - } - case scUDivExpr: { - const SCEVUDivExpr *UDiv = cast(S); - const SCEV *LHS = UDiv->getLHS(), *RHS = UDiv->getRHS(); - return LHS == Op || hasOperand(LHS, Op) || - RHS == Op || hasOperand(RHS, Op); - } - case scUnknown: - return false; - case scCouldNotCompute: - llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!"); - return false; - default: break; +namespace { +// Search for a SCEV expression node within an expression tree. +// Implements SCEVTraversal::Visitor. +struct SCEVSearch { + const SCEV *Node; + bool IsFound; + + SCEVSearch(const SCEV *N): Node(N), IsFound(false) {} + + bool follow(const SCEV *S) { + IsFound |= (S == Node); + return !IsFound; } - llvm_unreachable("Unknown SCEV kind!"); - return false; + bool isDone() const { return IsFound; } +}; +} + +bool ScalarEvolution::hasOperand(const SCEV *S, const SCEV *Op) const { + SCEVSearch Search(Op); + visitAll(S, Search); + return Search.IsFound; } void ScalarEvolution::forgetMemoizedResults(const SCEV *S) { @@ -6942,4 +7740,99 @@ void ScalarEvolution::forgetMemoizedResults(const SCEV *S) { BlockDispositions.erase(S); UnsignedRanges.erase(S); SignedRanges.erase(S); + + for (DenseMap::iterator I = + BackedgeTakenCounts.begin(), E = BackedgeTakenCounts.end(); I != E; ) { + BackedgeTakenInfo &BEInfo = I->second; + if (BEInfo.hasOperand(S, this)) { + BEInfo.clear(); + BackedgeTakenCounts.erase(I++); + } + else + ++I; + } +} + +typedef DenseMap VerifyMap; + +/// replaceSubString - Replaces all occurrences of From in Str with To. +static void replaceSubString(std::string &Str, StringRef From, StringRef To) { + size_t Pos = 0; + while ((Pos = Str.find(From, Pos)) != std::string::npos) { + Str.replace(Pos, From.size(), To.data(), To.size()); + Pos += To.size(); + } +} + +/// getLoopBackedgeTakenCounts - Helper method for verifyAnalysis. +static void +getLoopBackedgeTakenCounts(Loop *L, VerifyMap &Map, ScalarEvolution &SE) { + for (Loop::reverse_iterator I = L->rbegin(), E = L->rend(); I != E; ++I) { + getLoopBackedgeTakenCounts(*I, Map, SE); // recurse. + + std::string &S = Map[L]; + if (S.empty()) { + raw_string_ostream OS(S); + SE.getBackedgeTakenCount(L)->print(OS); + + // false and 0 are semantically equivalent. This can happen in dead loops. + replaceSubString(OS.str(), "false", "0"); + // Remove wrap flags, their use in SCEV is highly fragile. + // FIXME: Remove this when SCEV gets smarter about them. + replaceSubString(OS.str(), "", ""); + replaceSubString(OS.str(), "", ""); + replaceSubString(OS.str(), "", ""); + } + } +} + +void ScalarEvolution::verifyAnalysis() const { + if (!VerifySCEV) + return; + + ScalarEvolution &SE = *const_cast(this); + + // Gather stringified backedge taken counts for all loops using SCEV's caches. + // FIXME: It would be much better to store actual values instead of strings, + // but SCEV pointers will change if we drop the caches. + VerifyMap BackedgeDumpsOld, BackedgeDumpsNew; + for (LoopInfo::reverse_iterator I = LI->rbegin(), E = LI->rend(); I != E; ++I) + getLoopBackedgeTakenCounts(*I, BackedgeDumpsOld, SE); + + // Gather stringified backedge taken counts for all loops without using + // SCEV's caches. + SE.releaseMemory(); + for (LoopInfo::reverse_iterator I = LI->rbegin(), E = LI->rend(); I != E; ++I) + getLoopBackedgeTakenCounts(*I, BackedgeDumpsNew, SE); + + // Now compare whether they're the same with and without caches. This allows + // verifying that no pass changed the cache. + assert(BackedgeDumpsOld.size() == BackedgeDumpsNew.size() && + "New loops suddenly appeared!"); + + for (VerifyMap::iterator OldI = BackedgeDumpsOld.begin(), + OldE = BackedgeDumpsOld.end(), + NewI = BackedgeDumpsNew.begin(); + OldI != OldE; ++OldI, ++NewI) { + assert(OldI->first == NewI->first && "Loop order changed!"); + + // Compare the stringified SCEVs. We don't care if undef backedgetaken count + // changes. + // FIXME: We currently ignore SCEV changes from/to CouldNotCompute. This + // means that a pass is buggy or SCEV has to learn a new pattern but is + // usually not harmful. + if (OldI->second != NewI->second && + OldI->second.find("undef") == std::string::npos && + NewI->second.find("undef") == std::string::npos && + OldI->second != "***COULDNOTCOMPUTE***" && + NewI->second != "***COULDNOTCOMPUTE***") { + dbgs() << "SCEVValidator: SCEV for loop '" + << OldI->first->getHeader()->getName() + << "' changed from '" << OldI->second + << "' to '" << NewI->second << "'!\n"; + std::abort(); + } + } + + // TODO: Verify more things. }