X-Git-Url: http://demsky.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FAnalysis%2FScalarEvolutionExpander.cpp;h=7a9efdaa4c242b00ada86b61a76ab0b3f56fe3d4;hb=b8c5cfb13078eb0c6fd3de4a79f642aaf6b6b957;hp=bcccb04a1e7ce4822bea1466cc1d5b660c7d0236;hpb=7896c9f436a4eda5ec15e882a7505ba482a2fcd0;p=oota-llvm.git diff --git a/lib/Analysis/ScalarEvolutionExpander.cpp b/lib/Analysis/ScalarEvolutionExpander.cpp index bcccb04a1e7..7a9efdaa4c2 100644 --- a/lib/Analysis/ScalarEvolutionExpander.cpp +++ b/lib/Analysis/ScalarEvolutionExpander.cpp @@ -14,16 +14,80 @@ //===----------------------------------------------------------------------===// #include "llvm/Analysis/ScalarEvolutionExpander.h" -#include "llvm/Analysis/LoopInfo.h" -#include "llvm/LLVMContext.h" -#include "llvm/Target/TargetData.h" +#include "llvm/ADT/SmallSet.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/Analysis/Dominators.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/Support/Debug.h" + using namespace llvm; +/// ReuseOrCreateCast - Arrange for there to be a cast of V to Ty at IP, +/// reusing an existing cast if a suitable one exists, moving an existing +/// cast if a suitable one exists but isn't in the right place, or +/// creating a new one. +Value *SCEVExpander::ReuseOrCreateCast(Value *V, Type *Ty, + Instruction::CastOps Op, + BasicBlock::iterator IP) { + // This function must be called with the builder having a valid insertion + // point. It doesn't need to be the actual IP where the uses of the returned + // cast will be added, but it must dominate such IP. + // We use this precondition to produce a cast that will dominate all its + // uses. In particular, this is crucial for the case where the builder's + // insertion point *is* the point where we were asked to put the cast. + // Since we don't know the builder's insertion point is actually + // where the uses will be added (only that it dominates it), we are + // not allowed to move it. + BasicBlock::iterator BIP = Builder.GetInsertPoint(); + + Instruction *Ret = NULL; + + // Check to see if there is already a cast! + for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); + UI != E; ++UI) { + User *U = *UI; + if (U->getType() == Ty) + if (CastInst *CI = dyn_cast(U)) + if (CI->getOpcode() == Op) { + // If the cast isn't where we want it, create a new cast at IP. + // Likewise, do not reuse a cast at BIP because it must dominate + // instructions that might be inserted before BIP. + if (BasicBlock::iterator(CI) != IP || BIP == IP) { + // Create a new cast, and leave the old cast in place in case + // it is being used as an insert point. Clear its operand + // so that it doesn't hold anything live. + Ret = CastInst::Create(Op, V, Ty, "", IP); + Ret->takeName(CI); + CI->replaceAllUsesWith(Ret); + CI->setOperand(0, UndefValue::get(V->getType())); + break; + } + Ret = CI; + break; + } + } + + // Create a new cast. + if (!Ret) + Ret = CastInst::Create(Op, V, Ty, V->getName(), IP); + + // We assert at the end of the function since IP might point to an + // instruction with different dominance properties than a cast + // (an invoke for example) and not dominate BIP (but the cast does). + assert(SE.DT->dominates(Ret, BIP)); + + rememberInstruction(Ret); + return Ret; +} + /// InsertNoopCastOfTo - Insert a cast of V to the specified type, /// which must be possible with a noop cast, doing what we can to share /// the casts. -Value *SCEVExpander::InsertNoopCastOfTo(Value *V, const Type *Ty) { +Value *SCEVExpander::InsertNoopCastOfTo(Value *V, Type *Ty) { Instruction::CastOps Op = CastInst::getCastOpcode(V, false, Ty, false); assert((Op == Instruction::BitCast || Op == Instruction::PtrToInt || @@ -33,9 +97,14 @@ Value *SCEVExpander::InsertNoopCastOfTo(Value *V, const Type *Ty) { "InsertNoopCastOfTo cannot change sizes!"); // Short-circuit unnecessary bitcasts. - if (Op == Instruction::BitCast && V->getType() == Ty) - return V; - + if (Op == Instruction::BitCast) { + if (V->getType() == Ty) + return V; + if (CastInst *CI = dyn_cast(V)) { + if (CI->getOperand(0)->getType() == Ty) + return CI->getOperand(0); + } + } // Short-circuit unnecessary inttoptr<->ptrtoint casts. if ((Op == Instruction::PtrToInt || Op == Instruction::IntToPtr) && SE.getTypeSizeInBits(Ty) == SE.getTypeSizeInBits(V->getType())) { @@ -53,69 +122,31 @@ Value *SCEVExpander::InsertNoopCastOfTo(Value *V, const Type *Ty) { return CE->getOperand(0); } + // Fold a cast of a constant. if (Constant *C = dyn_cast(V)) return ConstantExpr::getCast(Op, C, Ty); + // Cast the argument at the beginning of the entry block, after + // any bitcasts of other arguments. if (Argument *A = dyn_cast(V)) { - // Check to see if there is already a cast! - for (Value::use_iterator UI = A->use_begin(), E = A->use_end(); - UI != E; ++UI) - if ((*UI)->getType() == Ty) - if (CastInst *CI = dyn_cast(cast(*UI))) - if (CI->getOpcode() == Op) { - // If the cast isn't the first instruction of the function, move it. - if (BasicBlock::iterator(CI) != - A->getParent()->getEntryBlock().begin()) { - // Recreate the cast at the beginning of the entry block. - // The old cast is left in place in case it is being used - // as an insert point. - Instruction *NewCI = - CastInst::Create(Op, V, Ty, "", - A->getParent()->getEntryBlock().begin()); - NewCI->takeName(CI); - CI->replaceAllUsesWith(NewCI); - return NewCI; - } - return CI; - } - - Instruction *I = CastInst::Create(Op, V, Ty, V->getName(), - A->getParent()->getEntryBlock().begin()); - InsertedValues.insert(I); - return I; + BasicBlock::iterator IP = A->getParent()->getEntryBlock().begin(); + while ((isa(IP) && + isa(cast(IP)->getOperand(0)) && + cast(IP)->getOperand(0) != A) || + isa(IP) || + isa(IP)) + ++IP; + return ReuseOrCreateCast(A, Ty, Op, IP); } + // Cast the instruction immediately after the instruction. Instruction *I = cast(V); - - // Check to see if there is already a cast. If there is, use it. - for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); - UI != E; ++UI) { - if ((*UI)->getType() == Ty) - if (CastInst *CI = dyn_cast(cast(*UI))) - if (CI->getOpcode() == Op) { - BasicBlock::iterator It = I; ++It; - if (isa(I)) - It = cast(I)->getNormalDest()->begin(); - while (isa(It)) ++It; - if (It != BasicBlock::iterator(CI)) { - // Recreate the cast at the beginning of the entry block. - // The old cast is left in place in case it is being used - // as an insert point. - Instruction *NewCI = CastInst::Create(Op, V, Ty, "", It); - NewCI->takeName(CI); - CI->replaceAllUsesWith(NewCI); - return NewCI; - } - return CI; - } - } BasicBlock::iterator IP = I; ++IP; if (InvokeInst *II = dyn_cast(I)) IP = II->getNormalDest()->begin(); - while (isa(IP)) ++IP; - Instruction *CI = CastInst::Create(Op, V, Ty, V->getName(), IP); - InsertedValues.insert(CI); - return CI; + while (isa(IP) || isa(IP)) + ++IP; + return ReuseOrCreateCast(I, Ty, Op, IP); } /// InsertBinop - Insert the specified binary operator, doing a small amount @@ -135,6 +166,10 @@ Value *SCEVExpander::InsertBinop(Instruction::BinaryOps Opcode, if (IP != BlockBegin) { --IP; for (; ScanLimit; --IP, --ScanLimit) { + // Don't count dbg.value against the ScanLimit, to avoid perturbing the + // generated code. + if (isa(IP)) + ScanLimit++; if (IP->getOpcode() == (unsigned)Opcode && IP->getOperand(0) == LHS && IP->getOperand(1) == RHS) return IP; @@ -142,15 +177,31 @@ Value *SCEVExpander::InsertBinop(Instruction::BinaryOps Opcode, } } + // Save the original insertion point so we can restore it when we're done. + DebugLoc Loc = Builder.GetInsertPoint()->getDebugLoc(); + BuilderType::InsertPointGuard Guard(Builder); + + // Move the insertion point out of as many loops as we can. + while (const Loop *L = SE.LI->getLoopFor(Builder.GetInsertBlock())) { + if (!L->isLoopInvariant(LHS) || !L->isLoopInvariant(RHS)) break; + BasicBlock *Preheader = L->getLoopPreheader(); + if (!Preheader) break; + + // Ok, move up a level. + Builder.SetInsertPoint(Preheader, Preheader->getTerminator()); + } + // If we haven't found this binop, insert it. - Value *BO = Builder.CreateBinOp(Opcode, LHS, RHS, "tmp"); - InsertedValues.insert(BO); + Instruction *BO = cast(Builder.CreateBinOp(Opcode, LHS, RHS)); + BO->setDebugLoc(Loc); + rememberInstruction(BO); + return BO; } /// FactorOutConstant - Test if S is divisible by Factor, using signed /// division. If so, update S with Factor divided out and return true. -/// S need not be evenly divisble if a reasonable remainder can be +/// S need not be evenly divisible if a reasonable remainder can be /// computed. /// TODO: When ScalarEvolution gets a SCEVSDivExpr, this can be made /// unnecessary; in its place, just signed-divide Ops[i] by the scale and @@ -159,14 +210,14 @@ static bool FactorOutConstant(const SCEV *&S, const SCEV *&Remainder, const SCEV *Factor, ScalarEvolution &SE, - const TargetData *TD) { + const DataLayout *TD) { // Everything is divisible by one. if (Factor->isOne()) return true; // x/x == 1. if (S == Factor) { - S = SE.getIntegerSCEV(1, S->getType()); + S = SE.getConstant(S->getType(), 1); return true; } @@ -200,15 +251,13 @@ static bool FactorOutConstant(const SCEV *&S, // of the given factor. if (const SCEVMulExpr *M = dyn_cast(S)) { if (TD) { - // With TargetData, the size is known. Check if there is a constant + // With DataLayout, the size is known. Check if there is a constant // operand which is a multiple of the given factor. If so, we can // factor it. const SCEVConstant *FC = cast(Factor); if (const SCEVConstant *C = dyn_cast(M->getOperand(0))) if (!C->getValue()->getValue().srem(FC->getValue()->getValue())) { - const SmallVectorImpl &MOperands = M->getOperands(); - SmallVector NewMulOps(MOperands.begin(), - MOperands.end()); + SmallVector NewMulOps(M->op_begin(), M->op_end()); NewMulOps[0] = SE.getConstant(C->getValue()->getValue().sdiv( FC->getValue()->getValue())); @@ -216,16 +265,14 @@ static bool FactorOutConstant(const SCEV *&S, return true; } } else { - // Without TargetData, check if Factor can be factored out of any of the + // Without DataLayout, check if Factor can be factored out of any of the // Mul's operands. If so, we can just remove it. for (unsigned i = 0, e = M->getNumOperands(); i != e; ++i) { const SCEV *SOp = M->getOperand(i); - const SCEV *Remainder = SE.getIntegerSCEV(0, SOp->getType()); + const SCEV *Remainder = SE.getConstant(SOp->getType(), 0); if (FactorOutConstant(SOp, Remainder, Factor, SE, TD) && Remainder->isZero()) { - const SmallVectorImpl &MOperands = M->getOperands(); - SmallVector NewMulOps(MOperands.begin(), - MOperands.end()); + SmallVector NewMulOps(M->op_begin(), M->op_end()); NewMulOps[i] = SOp; S = SE.getMulExpr(NewMulOps); return true; @@ -237,7 +284,7 @@ static bool FactorOutConstant(const SCEV *&S, // In an AddRec, check if both start and step are divisible. if (const SCEVAddRecExpr *A = dyn_cast(S)) { const SCEV *Step = A->getStepRecurrence(SE); - const SCEV *StepRem = SE.getIntegerSCEV(0, Step->getType()); + const SCEV *StepRem = SE.getConstant(Step->getType(), 0); if (!FactorOutConstant(Step, StepRem, Factor, SE, TD)) return false; if (!StepRem->isZero()) @@ -245,7 +292,8 @@ static bool FactorOutConstant(const SCEV *&S, const SCEV *Start = A->getStart(); if (!FactorOutConstant(Start, Remainder, Factor, SE, TD)) return false; - S = SE.getAddRecExpr(Start, Step, A->getLoop()); + S = SE.getAddRecExpr(Start, Step, A->getLoop(), + A->getNoWrapFlags(SCEV::FlagNW)); return true; } @@ -257,7 +305,7 @@ static bool FactorOutConstant(const SCEV *&S, /// the list. /// static void SimplifyAddOperands(SmallVectorImpl &Ops, - const Type *Ty, + Type *Ty, ScalarEvolution &SE) { unsigned NumAddRecs = 0; for (unsigned i = Ops.size(); i > 0 && isa(Ops[i-1]); --i) @@ -267,19 +315,17 @@ static void SimplifyAddOperands(SmallVectorImpl &Ops, SmallVector AddRecs(Ops.end() - NumAddRecs, Ops.end()); // Let ScalarEvolution sort and simplify the non-addrecs list. const SCEV *Sum = NoAddRecs.empty() ? - SE.getIntegerSCEV(0, Ty) : + SE.getConstant(Ty, 0) : SE.getAddExpr(NoAddRecs); // If it returned an add, use the operands. Otherwise it simplified // the sum into a single value, so just use that. + Ops.clear(); if (const SCEVAddExpr *Add = dyn_cast(Sum)) - Ops = Add->getOperands(); - else { - Ops.clear(); - if (!Sum->isZero()) - Ops.push_back(Sum); - } + Ops.append(Add->op_begin(), Add->op_end()); + else if (!Sum->isZero()) + Ops.push_back(Sum); // Then append the addrecs. - Ops.insert(Ops.end(), AddRecs.begin(), AddRecs.end()); + Ops.append(AddRecs.begin(), AddRecs.end()); } /// SplitAddRecs - Flatten a list of add operands, moving addrec start values @@ -288,7 +334,7 @@ static void SimplifyAddOperands(SmallVectorImpl &Ops, /// into GEP indices. /// static void SplitAddRecs(SmallVectorImpl &Ops, - const Type *Ty, + Type *Ty, ScalarEvolution &SE) { // Find the addrecs. SmallVector AddRecs; @@ -296,13 +342,14 @@ static void SplitAddRecs(SmallVectorImpl &Ops, while (const SCEVAddRecExpr *A = dyn_cast(Ops[i])) { const SCEV *Start = A->getStart(); if (Start->isZero()) break; - const SCEV *Zero = SE.getIntegerSCEV(0, Ty); + const SCEV *Zero = SE.getConstant(Ty, 0); AddRecs.push_back(SE.getAddRecExpr(Zero, A->getStepRecurrence(SE), - A->getLoop())); + A->getLoop(), + A->getNoWrapFlags(SCEV::FlagNW))); if (const SCEVAddExpr *Add = dyn_cast(Start)) { Ops[i] = Zero; - Ops.insert(Ops.end(), Add->op_begin(), Add->op_end()); + Ops.append(Add->op_begin(), Add->op_end()); e += Add->getNumOperands(); } else { Ops[i] = Start; @@ -310,7 +357,7 @@ static void SplitAddRecs(SmallVectorImpl &Ops, } if (!AddRecs.empty()) { // Add the addrecs onto the end of the list. - Ops.insert(Ops.end(), AddRecs.begin(), AddRecs.end()); + Ops.append(AddRecs.begin(), AddRecs.end()); // Resort the operand list, moving any constants to the front. SimplifyAddOperands(Ops, Ty, SE); } @@ -323,7 +370,7 @@ static void SplitAddRecs(SmallVectorImpl &Ops, /// http://llvm.org/docs/LangRef.html#pointeraliasing /// for details. /// -/// Design note: The correctness of using getelmeentptr here depends on +/// Design note: The correctness of using getelementptr here depends on /// ScalarEvolution not recognizing inttoptr and ptrtoint operators, as /// they may introduce pointer arithmetic which may not be safely converted /// into getelementptr. @@ -345,10 +392,10 @@ static void SplitAddRecs(SmallVectorImpl &Ops, /// Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin, const SCEV *const *op_end, - const PointerType *PTy, - const Type *Ty, + PointerType *PTy, + Type *Ty, Value *V) { - const Type *ElTy = PTy->getElementType(); + Type *ElTy = PTy->getElementType(); SmallVector GepIndices; SmallVector Ops(op_begin, op_end); bool AnyNonZeroIndices = false; @@ -357,37 +404,43 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin, // without the other. SplitAddRecs(Ops, Ty, SE); - // Decend down the pointer's type and attempt to convert the other + Type *IntPtrTy = SE.TD + ? SE.TD->getIntPtrType(PTy) + : Type::getInt64Ty(PTy->getContext()); + + // Descend down the pointer's type and attempt to convert the other // operands into GEP indices, at each level. The first index in a GEP // indexes into the array implied by the pointer operand; the rest of // the indices index into the element or field type selected by the // preceding index. for (;;) { - const SCEV *ElSize = SE.getAllocSizeExpr(ElTy); // If the scale size is not 0, attempt to factor out a scale for // array indexing. SmallVector ScaledOps; - if (ElTy->isSized() && !ElSize->isZero()) { - SmallVector NewOps; - for (unsigned i = 0, e = Ops.size(); i != e; ++i) { - const SCEV *Op = Ops[i]; - const SCEV *Remainder = SE.getIntegerSCEV(0, Ty); - if (FactorOutConstant(Op, Remainder, ElSize, SE, SE.TD)) { - // Op now has ElSize factored out. - ScaledOps.push_back(Op); - if (!Remainder->isZero()) - NewOps.push_back(Remainder); - AnyNonZeroIndices = true; - } else { - // The operand was not divisible, so add it to the list of operands - // we'll scan next iteration. - NewOps.push_back(Ops[i]); + if (ElTy->isSized()) { + const SCEV *ElSize = SE.getSizeOfExpr(IntPtrTy, ElTy); + if (!ElSize->isZero()) { + SmallVector NewOps; + for (unsigned i = 0, e = Ops.size(); i != e; ++i) { + const SCEV *Op = Ops[i]; + const SCEV *Remainder = SE.getConstant(Ty, 0); + if (FactorOutConstant(Op, Remainder, ElSize, SE, SE.TD)) { + // Op now has ElSize factored out. + ScaledOps.push_back(Op); + if (!Remainder->isZero()) + NewOps.push_back(Remainder); + AnyNonZeroIndices = true; + } else { + // The operand was not divisible, so add it to the list of operands + // we'll scan next iteration. + NewOps.push_back(Ops[i]); + } + } + // If we made any changes, update Ops. + if (!ScaledOps.empty()) { + Ops = NewOps; + SimplifyAddOperands(Ops, Ty, SE); } - } - // If we made any changes, update Ops. - if (!ScaledOps.empty()) { - Ops = NewOps; - SimplifyAddOperands(Ops, Ty, SE); } } @@ -401,12 +454,12 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin, GepIndices.push_back(Scaled); // Collect struct field index operands. - while (const StructType *STy = dyn_cast(ElTy)) { + while (StructType *STy = dyn_cast(ElTy)) { bool FoundFieldNo = false; // An empty struct has no fields. if (STy->getNumElements() == 0) break; if (SE.TD) { - // With TargetData, field offsets are known. See if a constant offset + // With DataLayout, field offsets are known. See if a constant offset // falls within any of the struct fields. if (Ops.empty()) break; if (const SCEVConstant *C = dyn_cast(Ops[0])) @@ -425,22 +478,22 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin, } } } else { - // Without TargetData, just check for a SCEVFieldOffsetExpr of the + // Without DataLayout, just check for an offsetof expression of the // appropriate struct type. for (unsigned i = 0, e = Ops.size(); i != e; ++i) - if (const SCEVFieldOffsetExpr *FO = - dyn_cast(Ops[i])) - if (FO->getStructType() == STy) { - unsigned FieldNo = FO->getFieldNo(); - GepIndices.push_back( - ConstantInt::get(Type::getInt32Ty(Ty->getContext()), - FieldNo)); - ElTy = STy->getTypeAtIndex(FieldNo); + if (const SCEVUnknown *U = dyn_cast(Ops[i])) { + Type *CTy; + Constant *FieldNo; + if (U->isOffsetOf(CTy, FieldNo) && CTy == STy) { + GepIndices.push_back(FieldNo); + ElTy = + STy->getTypeAtIndex(cast(FieldNo)->getZExtValue()); Ops[i] = SE.getConstant(Ty, 0); AnyNonZeroIndices = true; FoundFieldNo = true; break; } + } } // If no struct field offsets were found, tentatively assume that // field zero was selected (since the zero offset would obviously @@ -452,13 +505,13 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin, } } - if (const ArrayType *ATy = dyn_cast(ElTy)) + if (ArrayType *ATy = dyn_cast(ElTy)) ElTy = ATy->getElementType(); else break; } - // If none of the operands were convertable to proper GEP indices, cast + // If none of the operands were convertible to proper GEP indices, cast // the base to i8* and do an ugly getelementptr with that. It's still // better than ptrtoint+arithmetic+inttoptr at least. if (!AnyNonZeroIndices) { @@ -466,13 +519,16 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin, V = InsertNoopCastOfTo(V, Type::getInt8PtrTy(Ty->getContext(), PTy->getAddressSpace())); + assert(!isa(V) || + SE.DT->dominates(cast(V), Builder.GetInsertPoint())); + // Expand the operands for a plain byte offset. Value *Idx = expandCodeFor(SE.getAddExpr(Ops), Ty); // Fold a GEP with constant operands. if (Constant *CLHS = dyn_cast(V)) if (Constant *CRHS = dyn_cast(Idx)) - return ConstantExpr::getGetElementPtr(CLHS, &CRHS, 1); + return ConstantExpr::getGetElementPtr(CLHS, CRHS); // Do a quick scan to see if we have this GEP nearby. If so, reuse it. unsigned ScanLimit = 6; @@ -482,6 +538,10 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin, if (IP != BlockBegin) { --IP; for (; ScanLimit; --IP, --ScanLimit) { + // Don't count dbg.value against the ScanLimit, to avoid perturbing the + // generated code. + if (isa(IP)) + ScanLimit++; if (IP->getOpcode() == Instruction::GetElementPtr && IP->getOperand(0) == V && IP->getOperand(1) == Idx) return IP; @@ -489,86 +549,273 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin, } } + // Save the original insertion point so we can restore it when we're done. + BuilderType::InsertPointGuard Guard(Builder); + + // Move the insertion point out of as many loops as we can. + while (const Loop *L = SE.LI->getLoopFor(Builder.GetInsertBlock())) { + if (!L->isLoopInvariant(V) || !L->isLoopInvariant(Idx)) break; + BasicBlock *Preheader = L->getLoopPreheader(); + if (!Preheader) break; + + // Ok, move up a level. + Builder.SetInsertPoint(Preheader, Preheader->getTerminator()); + } + // Emit a GEP. Value *GEP = Builder.CreateGEP(V, Idx, "uglygep"); - InsertedValues.insert(GEP); + rememberInstruction(GEP); + return GEP; } + // Save the original insertion point so we can restore it when we're done. + BuilderType::InsertPoint SaveInsertPt = Builder.saveIP(); + + // Move the insertion point out of as many loops as we can. + while (const Loop *L = SE.LI->getLoopFor(Builder.GetInsertBlock())) { + if (!L->isLoopInvariant(V)) break; + + bool AnyIndexNotLoopInvariant = false; + for (SmallVectorImpl::const_iterator I = GepIndices.begin(), + E = GepIndices.end(); I != E; ++I) + if (!L->isLoopInvariant(*I)) { + AnyIndexNotLoopInvariant = true; + break; + } + if (AnyIndexNotLoopInvariant) + break; + + BasicBlock *Preheader = L->getLoopPreheader(); + if (!Preheader) break; + + // Ok, move up a level. + Builder.SetInsertPoint(Preheader, Preheader->getTerminator()); + } + // Insert a pretty getelementptr. Note that this GEP is not marked inbounds, // because ScalarEvolution may have changed the address arithmetic to // compute a value which is beyond the end of the allocated object. - Value *GEP = Builder.CreateGEP(V, - GepIndices.begin(), - GepIndices.end(), + Value *Casted = V; + if (V->getType() != PTy) + Casted = InsertNoopCastOfTo(Casted, PTy); + Value *GEP = Builder.CreateGEP(Casted, + GepIndices, "scevgep"); Ops.push_back(SE.getUnknown(GEP)); - InsertedValues.insert(GEP); + rememberInstruction(GEP); + + // Restore the original insert point. + Builder.restoreIP(SaveInsertPt); + return expand(SE.getAddExpr(Ops)); } -Value *SCEVExpander::visitAddExpr(const SCEVAddExpr *S) { - int NumOperands = S->getNumOperands(); - const Type *Ty = SE.getEffectiveSCEVType(S->getType()); - - // Find the index of an operand to start with. Choose the operand with - // pointer type, if there is one, or the last operand otherwise. - int PIdx = 0; - for (; PIdx != NumOperands - 1; ++PIdx) - if (isa(S->getOperand(PIdx)->getType())) break; - - // Expand code for the operand that we chose. - Value *V = expand(S->getOperand(PIdx)); - - // Turn things like ptrtoint+arithmetic+inttoptr into GEP. See the - // comments on expandAddToGEP for details. - if (const PointerType *PTy = dyn_cast(V->getType())) { - // Take the operand at PIdx out of the list. - const SmallVectorImpl &Ops = S->getOperands(); - SmallVector NewOps; - NewOps.insert(NewOps.end(), Ops.begin(), Ops.begin() + PIdx); - NewOps.insert(NewOps.end(), Ops.begin() + PIdx + 1, Ops.end()); - // Make a GEP. - return expandAddToGEP(NewOps.begin(), NewOps.end(), PTy, Ty, V); - } - - // Otherwise, we'll expand the rest of the SCEVAddExpr as plain integer - // arithmetic. - V = InsertNoopCastOfTo(V, Ty); - - // Emit a bunch of add instructions - for (int i = NumOperands-1; i >= 0; --i) { - if (i == PIdx) continue; - Value *W = expandCodeFor(S->getOperand(i), Ty); - V = InsertBinop(Instruction::Add, V, W); +/// PickMostRelevantLoop - Given two loops pick the one that's most relevant for +/// SCEV expansion. If they are nested, this is the most nested. If they are +/// neighboring, pick the later. +static const Loop *PickMostRelevantLoop(const Loop *A, const Loop *B, + DominatorTree &DT) { + if (!A) return B; + if (!B) return A; + if (A->contains(B)) return B; + if (B->contains(A)) return A; + if (DT.dominates(A->getHeader(), B->getHeader())) return B; + if (DT.dominates(B->getHeader(), A->getHeader())) return A; + return A; // Arbitrarily break the tie. +} + +/// getRelevantLoop - Get the most relevant loop associated with the given +/// expression, according to PickMostRelevantLoop. +const Loop *SCEVExpander::getRelevantLoop(const SCEV *S) { + // Test whether we've already computed the most relevant loop for this SCEV. + std::pair::iterator, bool> Pair = + RelevantLoops.insert(std::make_pair(S, static_cast(0))); + if (!Pair.second) + return Pair.first->second; + + if (isa(S)) + // A constant has no relevant loops. + return 0; + if (const SCEVUnknown *U = dyn_cast(S)) { + if (const Instruction *I = dyn_cast(U->getValue())) + return Pair.first->second = SE.LI->getLoopFor(I->getParent()); + // A non-instruction has no relevant loops. + return 0; } - return V; + if (const SCEVNAryExpr *N = dyn_cast(S)) { + const Loop *L = 0; + if (const SCEVAddRecExpr *AR = dyn_cast(S)) + L = AR->getLoop(); + for (SCEVNAryExpr::op_iterator I = N->op_begin(), E = N->op_end(); + I != E; ++I) + L = PickMostRelevantLoop(L, getRelevantLoop(*I), *SE.DT); + return RelevantLoops[N] = L; + } + if (const SCEVCastExpr *C = dyn_cast(S)) { + const Loop *Result = getRelevantLoop(C->getOperand()); + return RelevantLoops[C] = Result; + } + if (const SCEVUDivExpr *D = dyn_cast(S)) { + const Loop *Result = + PickMostRelevantLoop(getRelevantLoop(D->getLHS()), + getRelevantLoop(D->getRHS()), + *SE.DT); + return RelevantLoops[D] = Result; + } + llvm_unreachable("Unexpected SCEV type!"); } -Value *SCEVExpander::visitMulExpr(const SCEVMulExpr *S) { - const Type *Ty = SE.getEffectiveSCEVType(S->getType()); - int FirstOp = 0; // Set if we should emit a subtract. - if (const SCEVConstant *SC = dyn_cast(S->getOperand(0))) - if (SC->getValue()->isAllOnesValue()) - FirstOp = 1; +namespace { + +/// LoopCompare - Compare loops by PickMostRelevantLoop. +class LoopCompare { + DominatorTree &DT; +public: + explicit LoopCompare(DominatorTree &dt) : DT(dt) {} + + bool operator()(std::pair LHS, + std::pair RHS) const { + // Keep pointer operands sorted at the end. + if (LHS.second->getType()->isPointerTy() != + RHS.second->getType()->isPointerTy()) + return LHS.second->getType()->isPointerTy(); + + // Compare loops with PickMostRelevantLoop. + if (LHS.first != RHS.first) + return PickMostRelevantLoop(LHS.first, RHS.first, DT) != LHS.first; + + // If one operand is a non-constant negative and the other is not, + // put the non-constant negative on the right so that a sub can + // be used instead of a negate and add. + if (LHS.second->isNonConstantNegative()) { + if (!RHS.second->isNonConstantNegative()) + return false; + } else if (RHS.second->isNonConstantNegative()) + return true; + + // Otherwise they are equivalent according to this comparison. + return false; + } +}; - int i = S->getNumOperands()-2; - Value *V = expandCodeFor(S->getOperand(i+1), Ty); +} - // Emit a bunch of multiply instructions - for (; i >= FirstOp; --i) { - Value *W = expandCodeFor(S->getOperand(i), Ty); - V = InsertBinop(Instruction::Mul, V, W); +Value *SCEVExpander::visitAddExpr(const SCEVAddExpr *S) { + Type *Ty = SE.getEffectiveSCEVType(S->getType()); + + // Collect all the add operands in a loop, along with their associated loops. + // Iterate in reverse so that constants are emitted last, all else equal, and + // so that pointer operands are inserted first, which the code below relies on + // to form more involved GEPs. + SmallVector, 8> OpsAndLoops; + for (std::reverse_iterator I(S->op_end()), + E(S->op_begin()); I != E; ++I) + OpsAndLoops.push_back(std::make_pair(getRelevantLoop(*I), *I)); + + // Sort by loop. Use a stable sort so that constants follow non-constants and + // pointer operands precede non-pointer operands. + std::stable_sort(OpsAndLoops.begin(), OpsAndLoops.end(), LoopCompare(*SE.DT)); + + // Emit instructions to add all the operands. Hoist as much as possible + // out of loops, and form meaningful getelementptrs where possible. + Value *Sum = 0; + for (SmallVectorImpl >::iterator + I = OpsAndLoops.begin(), E = OpsAndLoops.end(); I != E; ) { + const Loop *CurLoop = I->first; + const SCEV *Op = I->second; + if (!Sum) { + // This is the first operand. Just expand it. + Sum = expand(Op); + ++I; + } else if (PointerType *PTy = dyn_cast(Sum->getType())) { + // The running sum expression is a pointer. Try to form a getelementptr + // at this level with that as the base. + SmallVector NewOps; + for (; I != E && I->first == CurLoop; ++I) { + // If the operand is SCEVUnknown and not instructions, peek through + // it, to enable more of it to be folded into the GEP. + const SCEV *X = I->second; + if (const SCEVUnknown *U = dyn_cast(X)) + if (!isa(U->getValue())) + X = SE.getSCEV(U->getValue()); + NewOps.push_back(X); + } + Sum = expandAddToGEP(NewOps.begin(), NewOps.end(), PTy, Ty, Sum); + } else if (PointerType *PTy = dyn_cast(Op->getType())) { + // The running sum is an integer, and there's a pointer at this level. + // Try to form a getelementptr. If the running sum is instructions, + // use a SCEVUnknown to avoid re-analyzing them. + SmallVector NewOps; + NewOps.push_back(isa(Sum) ? SE.getUnknown(Sum) : + SE.getSCEV(Sum)); + for (++I; I != E && I->first == CurLoop; ++I) + NewOps.push_back(I->second); + Sum = expandAddToGEP(NewOps.begin(), NewOps.end(), PTy, Ty, expand(Op)); + } else if (Op->isNonConstantNegative()) { + // Instead of doing a negate and add, just do a subtract. + Value *W = expandCodeFor(SE.getNegativeSCEV(Op), Ty); + Sum = InsertNoopCastOfTo(Sum, Ty); + Sum = InsertBinop(Instruction::Sub, Sum, W); + ++I; + } else { + // A simple add. + Value *W = expandCodeFor(Op, Ty); + Sum = InsertNoopCastOfTo(Sum, Ty); + // Canonicalize a constant to the RHS. + if (isa(Sum)) std::swap(Sum, W); + Sum = InsertBinop(Instruction::Add, Sum, W); + ++I; + } } - // -1 * ... ---> 0 - ... - if (FirstOp == 1) - V = InsertBinop(Instruction::Sub, Constant::getNullValue(Ty), V); - return V; + return Sum; +} + +Value *SCEVExpander::visitMulExpr(const SCEVMulExpr *S) { + Type *Ty = SE.getEffectiveSCEVType(S->getType()); + + // Collect all the mul operands in a loop, along with their associated loops. + // Iterate in reverse so that constants are emitted last, all else equal. + SmallVector, 8> OpsAndLoops; + for (std::reverse_iterator I(S->op_end()), + E(S->op_begin()); I != E; ++I) + OpsAndLoops.push_back(std::make_pair(getRelevantLoop(*I), *I)); + + // Sort by loop. Use a stable sort so that constants follow non-constants. + std::stable_sort(OpsAndLoops.begin(), OpsAndLoops.end(), LoopCompare(*SE.DT)); + + // Emit instructions to mul all the operands. Hoist as much as possible + // out of loops. + Value *Prod = 0; + for (SmallVectorImpl >::iterator + I = OpsAndLoops.begin(), E = OpsAndLoops.end(); I != E; ) { + const SCEV *Op = I->second; + if (!Prod) { + // This is the first operand. Just expand it. + Prod = expand(Op); + ++I; + } else if (Op->isAllOnesValue()) { + // Instead of doing a multiply by negative one, just do a negate. + Prod = InsertNoopCastOfTo(Prod, Ty); + Prod = InsertBinop(Instruction::Sub, Constant::getNullValue(Ty), Prod); + ++I; + } else { + // A simple mul. + Value *W = expandCodeFor(Op, Ty); + Prod = InsertNoopCastOfTo(Prod, Ty); + // Canonicalize a constant to the RHS. + if (isa(Prod)) std::swap(Prod, W); + Prod = InsertBinop(Instruction::Mul, Prod, W); + ++I; + } + } + + return Prod; } Value *SCEVExpander::visitUDivExpr(const SCEVUDivExpr *S) { - const Type *Ty = SE.getEffectiveSCEVType(S->getType()); + Type *Ty = SE.getEffectiveSCEVType(S->getType()); Value *LHS = expandCodeFor(S->getLHS(), Ty); if (const SCEVConstant *SC = dyn_cast(S->getRHS())) { @@ -590,9 +837,10 @@ static void ExposePointerBase(const SCEV *&Base, const SCEV *&Rest, while (const SCEVAddRecExpr *A = dyn_cast(Base)) { Base = A->getStart(); Rest = SE.getAddExpr(Rest, - SE.getAddRecExpr(SE.getIntegerSCEV(0, A->getType()), + SE.getAddRecExpr(SE.getConstant(A->getType(), 0), A->getStepRecurrence(SE), - A->getLoop())); + A->getLoop(), + A->getNoWrapFlags(SCEV::FlagNW))); } if (const SCEVAddExpr *A = dyn_cast(Base)) { Base = A->getOperand(A->getNumOperands()-1); @@ -603,16 +851,422 @@ static void ExposePointerBase(const SCEV *&Base, const SCEV *&Rest, } } +/// Determine if this is a well-behaved chain of instructions leading back to +/// the PHI. If so, it may be reused by expanded expressions. +bool SCEVExpander::isNormalAddRecExprPHI(PHINode *PN, Instruction *IncV, + const Loop *L) { + if (IncV->getNumOperands() == 0 || isa(IncV) || + (isa(IncV) && !isa(IncV))) + return false; + // If any of the operands don't dominate the insert position, bail. + // Addrec operands are always loop-invariant, so this can only happen + // if there are instructions which haven't been hoisted. + if (L == IVIncInsertLoop) { + for (User::op_iterator OI = IncV->op_begin()+1, + OE = IncV->op_end(); OI != OE; ++OI) + if (Instruction *OInst = dyn_cast(OI)) + if (!SE.DT->dominates(OInst, IVIncInsertPos)) + return false; + } + // Advance to the next instruction. + IncV = dyn_cast(IncV->getOperand(0)); + if (!IncV) + return false; + + if (IncV->mayHaveSideEffects()) + return false; + + if (IncV != PN) + return true; + + return isNormalAddRecExprPHI(PN, IncV, L); +} + +/// getIVIncOperand returns an induction variable increment's induction +/// variable operand. +/// +/// If allowScale is set, any type of GEP is allowed as long as the nonIV +/// operands dominate InsertPos. +/// +/// If allowScale is not set, ensure that a GEP increment conforms to one of the +/// simple patterns generated by getAddRecExprPHILiterally and +/// expandAddtoGEP. If the pattern isn't recognized, return NULL. +Instruction *SCEVExpander::getIVIncOperand(Instruction *IncV, + Instruction *InsertPos, + bool allowScale) { + if (IncV == InsertPos) + return NULL; + + switch (IncV->getOpcode()) { + default: + return NULL; + // Check for a simple Add/Sub or GEP of a loop invariant step. + case Instruction::Add: + case Instruction::Sub: { + Instruction *OInst = dyn_cast(IncV->getOperand(1)); + if (!OInst || SE.DT->dominates(OInst, InsertPos)) + return dyn_cast(IncV->getOperand(0)); + return NULL; + } + case Instruction::BitCast: + return dyn_cast(IncV->getOperand(0)); + case Instruction::GetElementPtr: + for (Instruction::op_iterator I = IncV->op_begin()+1, E = IncV->op_end(); + I != E; ++I) { + if (isa(*I)) + continue; + if (Instruction *OInst = dyn_cast(*I)) { + if (!SE.DT->dominates(OInst, InsertPos)) + return NULL; + } + if (allowScale) { + // allow any kind of GEP as long as it can be hoisted. + continue; + } + // This must be a pointer addition of constants (pretty), which is already + // handled, or some number of address-size elements (ugly). Ugly geps + // have 2 operands. i1* is used by the expander to represent an + // address-size element. + if (IncV->getNumOperands() != 2) + return NULL; + unsigned AS = cast(IncV->getType())->getAddressSpace(); + if (IncV->getType() != Type::getInt1PtrTy(SE.getContext(), AS) + && IncV->getType() != Type::getInt8PtrTy(SE.getContext(), AS)) + return NULL; + break; + } + return dyn_cast(IncV->getOperand(0)); + } +} + +/// hoistStep - Attempt to hoist a simple IV increment above InsertPos to make +/// it available to other uses in this loop. Recursively hoist any operands, +/// until we reach a value that dominates InsertPos. +bool SCEVExpander::hoistIVInc(Instruction *IncV, Instruction *InsertPos) { + if (SE.DT->dominates(IncV, InsertPos)) + return true; + + // InsertPos must itself dominate IncV so that IncV's new position satisfies + // its existing users. + if (isa(InsertPos) + || !SE.DT->dominates(InsertPos->getParent(), IncV->getParent())) + return false; + + // Check that the chain of IV operands leading back to Phi can be hoisted. + SmallVector IVIncs; + for(;;) { + Instruction *Oper = getIVIncOperand(IncV, InsertPos, /*allowScale*/true); + if (!Oper) + return false; + // IncV is safe to hoist. + IVIncs.push_back(IncV); + IncV = Oper; + if (SE.DT->dominates(IncV, InsertPos)) + break; + } + for (SmallVectorImpl::reverse_iterator I = IVIncs.rbegin(), + E = IVIncs.rend(); I != E; ++I) { + (*I)->moveBefore(InsertPos); + } + return true; +} + +/// Determine if this cyclic phi is in a form that would have been generated by +/// LSR. We don't care if the phi was actually expanded in this pass, as long +/// as it is in a low-cost form, for example, no implied multiplication. This +/// should match any patterns generated by getAddRecExprPHILiterally and +/// expandAddtoGEP. +bool SCEVExpander::isExpandedAddRecExprPHI(PHINode *PN, Instruction *IncV, + const Loop *L) { + for(Instruction *IVOper = IncV; + (IVOper = getIVIncOperand(IVOper, L->getLoopPreheader()->getTerminator(), + /*allowScale=*/false));) { + if (IVOper == PN) + return true; + } + return false; +} + +/// expandIVInc - Expand an IV increment at Builder's current InsertPos. +/// Typically this is the LatchBlock terminator or IVIncInsertPos, but we may +/// need to materialize IV increments elsewhere to handle difficult situations. +Value *SCEVExpander::expandIVInc(PHINode *PN, Value *StepV, const Loop *L, + Type *ExpandTy, Type *IntTy, + bool useSubtract) { + Value *IncV; + // If the PHI is a pointer, use a GEP, otherwise use an add or sub. + if (ExpandTy->isPointerTy()) { + PointerType *GEPPtrTy = cast(ExpandTy); + // If the step isn't constant, don't use an implicitly scaled GEP, because + // that would require a multiply inside the loop. + if (!isa(StepV)) + GEPPtrTy = PointerType::get(Type::getInt1Ty(SE.getContext()), + GEPPtrTy->getAddressSpace()); + const SCEV *const StepArray[1] = { SE.getSCEV(StepV) }; + IncV = expandAddToGEP(StepArray, StepArray+1, GEPPtrTy, IntTy, PN); + if (IncV->getType() != PN->getType()) { + IncV = Builder.CreateBitCast(IncV, PN->getType()); + rememberInstruction(IncV); + } + } else { + IncV = useSubtract ? + Builder.CreateSub(PN, StepV, Twine(IVName) + ".iv.next") : + Builder.CreateAdd(PN, StepV, Twine(IVName) + ".iv.next"); + rememberInstruction(IncV); + } + return IncV; +} + +/// getAddRecExprPHILiterally - Helper for expandAddRecExprLiterally. Expand +/// the base addrec, which is the addrec without any non-loop-dominating +/// values, and return the PHI. +PHINode * +SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized, + const Loop *L, + Type *ExpandTy, + Type *IntTy) { + assert((!IVIncInsertLoop||IVIncInsertPos) && "Uninitialized insert position"); + + // Reuse a previously-inserted PHI, if present. + BasicBlock *LatchBlock = L->getLoopLatch(); + if (LatchBlock) { + for (BasicBlock::iterator I = L->getHeader()->begin(); + PHINode *PN = dyn_cast(I); ++I) { + if (!SE.isSCEVable(PN->getType()) || + (SE.getEffectiveSCEVType(PN->getType()) != + SE.getEffectiveSCEVType(Normalized->getType())) || + SE.getSCEV(PN) != Normalized) + continue; + + Instruction *IncV = + cast(PN->getIncomingValueForBlock(LatchBlock)); + + if (LSRMode) { + if (!isExpandedAddRecExprPHI(PN, IncV, L)) + continue; + if (L == IVIncInsertLoop && !hoistIVInc(IncV, IVIncInsertPos)) + continue; + } + else { + if (!isNormalAddRecExprPHI(PN, IncV, L)) + continue; + if (L == IVIncInsertLoop) + do { + if (SE.DT->dominates(IncV, IVIncInsertPos)) + break; + // Make sure the increment is where we want it. But don't move it + // down past a potential existing post-inc user. + IncV->moveBefore(IVIncInsertPos); + IVIncInsertPos = IncV; + IncV = cast(IncV->getOperand(0)); + } while (IncV != PN); + } + // Ok, the add recurrence looks usable. + // Remember this PHI, even in post-inc mode. + InsertedValues.insert(PN); + // Remember the increment. + rememberInstruction(IncV); + return PN; + } + } + + // Save the original insertion point so we can restore it when we're done. + BuilderType::InsertPointGuard Guard(Builder); + + // Another AddRec may need to be recursively expanded below. For example, if + // this AddRec is quadratic, the StepV may itself be an AddRec in this + // loop. Remove this loop from the PostIncLoops set before expanding such + // AddRecs. Otherwise, we cannot find a valid position for the step + // (i.e. StepV can never dominate its loop header). Ideally, we could do + // SavedIncLoops.swap(PostIncLoops), but we generally have a single element, + // so it's not worth implementing SmallPtrSet::swap. + PostIncLoopSet SavedPostIncLoops = PostIncLoops; + PostIncLoops.clear(); + + // Expand code for the start value. + Value *StartV = expandCodeFor(Normalized->getStart(), ExpandTy, + L->getHeader()->begin()); + + // StartV must be hoisted into L's preheader to dominate the new phi. + assert(!isa(StartV) || + SE.DT->properlyDominates(cast(StartV)->getParent(), + L->getHeader())); + + // Expand code for the step value. Do this before creating the PHI so that PHI + // reuse code doesn't see an incomplete PHI. + const SCEV *Step = Normalized->getStepRecurrence(SE); + // If the stride is negative, insert a sub instead of an add for the increment + // (unless it's a constant, because subtracts of constants are canonicalized + // to adds). + bool useSubtract = !ExpandTy->isPointerTy() && Step->isNonConstantNegative(); + if (useSubtract) + Step = SE.getNegativeSCEV(Step); + // Expand the step somewhere that dominates the loop header. + Value *StepV = expandCodeFor(Step, IntTy, L->getHeader()->begin()); + + // Create the PHI. + BasicBlock *Header = L->getHeader(); + Builder.SetInsertPoint(Header, Header->begin()); + pred_iterator HPB = pred_begin(Header), HPE = pred_end(Header); + PHINode *PN = Builder.CreatePHI(ExpandTy, std::distance(HPB, HPE), + Twine(IVName) + ".iv"); + rememberInstruction(PN); + + // Create the step instructions and populate the PHI. + for (pred_iterator HPI = HPB; HPI != HPE; ++HPI) { + BasicBlock *Pred = *HPI; + + // Add a start value. + if (!L->contains(Pred)) { + PN->addIncoming(StartV, Pred); + continue; + } + + // Create a step value and add it to the PHI. + // If IVIncInsertLoop is non-null and equal to the addrec's loop, insert the + // instructions at IVIncInsertPos. + Instruction *InsertPos = L == IVIncInsertLoop ? + IVIncInsertPos : Pred->getTerminator(); + Builder.SetInsertPoint(InsertPos); + Value *IncV = expandIVInc(PN, StepV, L, ExpandTy, IntTy, useSubtract); + if (isa(IncV)) { + if (Normalized->getNoWrapFlags(SCEV::FlagNUW)) + cast(IncV)->setHasNoUnsignedWrap(); + if (Normalized->getNoWrapFlags(SCEV::FlagNSW)) + cast(IncV)->setHasNoSignedWrap(); + } + PN->addIncoming(IncV, Pred); + } + + // After expanding subexpressions, restore the PostIncLoops set so the caller + // can ensure that IVIncrement dominates the current uses. + PostIncLoops = SavedPostIncLoops; + + // Remember this PHI, even in post-inc mode. + InsertedValues.insert(PN); + + return PN; +} + +Value *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) { + Type *STy = S->getType(); + Type *IntTy = SE.getEffectiveSCEVType(STy); + const Loop *L = S->getLoop(); + + // Determine a normalized form of this expression, which is the expression + // before any post-inc adjustment is made. + const SCEVAddRecExpr *Normalized = S; + if (PostIncLoops.count(L)) { + PostIncLoopSet Loops; + Loops.insert(L); + Normalized = + cast(TransformForPostIncUse(Normalize, S, 0, 0, + Loops, SE, *SE.DT)); + } + + // Strip off any non-loop-dominating component from the addrec start. + const SCEV *Start = Normalized->getStart(); + const SCEV *PostLoopOffset = 0; + if (!SE.properlyDominates(Start, L->getHeader())) { + PostLoopOffset = Start; + Start = SE.getConstant(Normalized->getType(), 0); + Normalized = cast( + SE.getAddRecExpr(Start, Normalized->getStepRecurrence(SE), + Normalized->getLoop(), + Normalized->getNoWrapFlags(SCEV::FlagNW))); + } + + // Strip off any non-loop-dominating component from the addrec step. + const SCEV *Step = Normalized->getStepRecurrence(SE); + const SCEV *PostLoopScale = 0; + if (!SE.dominates(Step, L->getHeader())) { + PostLoopScale = Step; + Step = SE.getConstant(Normalized->getType(), 1); + Normalized = + cast(SE.getAddRecExpr( + Start, Step, Normalized->getLoop(), + Normalized->getNoWrapFlags(SCEV::FlagNW))); + } + + // Expand the core addrec. If we need post-loop scaling, force it to + // expand to an integer type to avoid the need for additional casting. + Type *ExpandTy = PostLoopScale ? IntTy : STy; + PHINode *PN = getAddRecExprPHILiterally(Normalized, L, ExpandTy, IntTy); + + // Accommodate post-inc mode, if necessary. + Value *Result; + if (!PostIncLoops.count(L)) + Result = PN; + else { + // In PostInc mode, use the post-incremented value. + BasicBlock *LatchBlock = L->getLoopLatch(); + assert(LatchBlock && "PostInc mode requires a unique loop latch!"); + Result = PN->getIncomingValueForBlock(LatchBlock); + + // For an expansion to use the postinc form, the client must call + // expandCodeFor with an InsertPoint that is either outside the PostIncLoop + // or dominated by IVIncInsertPos. + if (isa(Result) + && !SE.DT->dominates(cast(Result), + Builder.GetInsertPoint())) { + // The induction variable's postinc expansion does not dominate this use. + // IVUsers tries to prevent this case, so it is rare. However, it can + // happen when an IVUser outside the loop is not dominated by the latch + // block. Adjusting IVIncInsertPos before expansion begins cannot handle + // all cases. Consider a phi outide whose operand is replaced during + // expansion with the value of the postinc user. Without fundamentally + // changing the way postinc users are tracked, the only remedy is + // inserting an extra IV increment. StepV might fold into PostLoopOffset, + // but hopefully expandCodeFor handles that. + bool useSubtract = + !ExpandTy->isPointerTy() && Step->isNonConstantNegative(); + if (useSubtract) + Step = SE.getNegativeSCEV(Step); + Value *StepV; + { + // Expand the step somewhere that dominates the loop header. + BuilderType::InsertPointGuard Guard(Builder); + StepV = expandCodeFor(Step, IntTy, L->getHeader()->begin()); + } + Result = expandIVInc(PN, StepV, L, ExpandTy, IntTy, useSubtract); + } + } + + // Re-apply any non-loop-dominating scale. + if (PostLoopScale) { + assert(S->isAffine() && "Can't linearly scale non-affine recurrences."); + Result = InsertNoopCastOfTo(Result, IntTy); + Result = Builder.CreateMul(Result, + expandCodeFor(PostLoopScale, IntTy)); + rememberInstruction(Result); + } + + // Re-apply any non-loop-dominating offset. + if (PostLoopOffset) { + if (PointerType *PTy = dyn_cast(ExpandTy)) { + const SCEV *const OffsetArray[1] = { PostLoopOffset }; + Result = expandAddToGEP(OffsetArray, OffsetArray+1, PTy, IntTy, Result); + } else { + Result = InsertNoopCastOfTo(Result, IntTy); + Result = Builder.CreateAdd(Result, + expandCodeFor(PostLoopOffset, IntTy)); + rememberInstruction(Result); + } + } + + return Result; +} + Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) { - const Type *Ty = SE.getEffectiveSCEVType(S->getType()); + if (!CanonicalMode) return expandAddRecExprLiterally(S); + + Type *Ty = SE.getEffectiveSCEVType(S->getType()); const Loop *L = S->getLoop(); // First check for an existing canonical IV in a suitable type. PHINode *CanonicalIV = 0; if (PHINode *PN = L->getCanonicalInductionVariable()) - if (SE.isSCEVable(PN->getType()) && - isa(SE.getEffectiveSCEVType(PN->getType())) && - SE.getTypeSizeInBits(PN->getType()) >= SE.getTypeSizeInBits(Ty)) + if (SE.getTypeSizeInBits(PN->getType()) >= SE.getTypeSizeInBits(Ty)) CanonicalIV = PN; // Rewrite an AddRec in terms of the canonical induction variable, if @@ -620,28 +1274,28 @@ Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) { if (CanonicalIV && SE.getTypeSizeInBits(CanonicalIV->getType()) > SE.getTypeSizeInBits(Ty)) { - const SmallVectorImpl &Ops = S->getOperands(); - SmallVector NewOps(Ops.size()); - for (unsigned i = 0, e = Ops.size(); i != e; ++i) - NewOps[i] = SE.getAnyExtendExpr(Ops[i], CanonicalIV->getType()); - Value *V = expand(SE.getAddRecExpr(NewOps, S->getLoop())); - BasicBlock *SaveInsertBB = Builder.GetInsertBlock(); - BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint(); + SmallVector NewOps(S->getNumOperands()); + for (unsigned i = 0, e = S->getNumOperands(); i != e; ++i) + NewOps[i] = SE.getAnyExtendExpr(S->op_begin()[i], CanonicalIV->getType()); + Value *V = expand(SE.getAddRecExpr(NewOps, S->getLoop(), + S->getNoWrapFlags(SCEV::FlagNW))); BasicBlock::iterator NewInsertPt = llvm::next(BasicBlock::iterator(cast(V))); - while (isa(NewInsertPt)) ++NewInsertPt; + BuilderType::InsertPointGuard Guard(Builder); + while (isa(NewInsertPt) || isa(NewInsertPt) || + isa(NewInsertPt)) + ++NewInsertPt; V = expandCodeFor(SE.getTruncateExpr(SE.getUnknown(V), Ty), 0, NewInsertPt); - Builder.SetInsertPoint(SaveInsertBB, SaveInsertPt); return V; } // {X,+,F} --> X + {0,+,F} if (!S->getStart()->isZero()) { - const SmallVectorImpl &SOperands = S->getOperands(); - SmallVector NewOps(SOperands.begin(), SOperands.end()); - NewOps[0] = SE.getIntegerSCEV(0, Ty); - const SCEV *Rest = SE.getAddRecExpr(NewOps, L); + SmallVector NewOps(S->op_begin(), S->op_end()); + NewOps[0] = SE.getConstant(Ty, 0); + const SCEV *Rest = SE.getAddRecExpr(NewOps, L, + S->getNoWrapFlags(SCEV::FlagNW)); // Turn things like ptrtoint+arithmetic+inttoptr into GEP. See the // comments on expandAddToGEP for details. @@ -650,7 +1304,7 @@ Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) { // Dig into the expression to find the pointer base for a GEP. ExposePointerBase(Base, RestArray[0], SE); // If we found a pointer, expand the AddRec with a GEP. - if (const PointerType *PTy = dyn_cast(Base->getType())) { + if (PointerType *PTy = dyn_cast(Base->getType())) { // Make sure the Base isn't something exotic, such as a multiplied // or divided pointer value. In those cases, the result type isn't // actually a pointer type. @@ -666,62 +1320,66 @@ Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) { SE.getUnknown(expand(Rest)))); } - // {0,+,1} --> Insert a canonical induction variable into the loop! - if (S->isAffine() && - S->getOperand(1) == SE.getIntegerSCEV(1, Ty)) { - // If there's a canonical IV, just use it. - if (CanonicalIV) { - assert(Ty == SE.getEffectiveSCEVType(CanonicalIV->getType()) && - "IVs with types different from the canonical IV should " - "already have been handled!"); - return CanonicalIV; - } - + // If we don't yet have a canonical IV, create one. + if (!CanonicalIV) { // Create and insert the PHI node for the induction variable in the // specified loop. BasicBlock *Header = L->getHeader(); - PHINode *PN = PHINode::Create(Ty, "indvar", Header->begin()); - InsertedValues.insert(PN); + pred_iterator HPB = pred_begin(Header), HPE = pred_end(Header); + CanonicalIV = PHINode::Create(Ty, std::distance(HPB, HPE), "indvar", + Header->begin()); + rememberInstruction(CanonicalIV); + SmallSet PredSeen; Constant *One = ConstantInt::get(Ty, 1); - for (pred_iterator HPI = pred_begin(Header), HPE = pred_end(Header); - HPI != HPE; ++HPI) - if (L->contains(*HPI)) { - // Insert a unit add instruction right before the terminator corresponding - // to the back-edge. - Instruction *Add = BinaryOperator::CreateAdd(PN, One, "indvar.next", - (*HPI)->getTerminator()); - InsertedValues.insert(Add); - PN->addIncoming(Add, *HPI); + for (pred_iterator HPI = HPB; HPI != HPE; ++HPI) { + BasicBlock *HP = *HPI; + if (!PredSeen.insert(HP)) + continue; + + if (L->contains(HP)) { + // Insert a unit add instruction right before the terminator + // corresponding to the back-edge. + Instruction *Add = BinaryOperator::CreateAdd(CanonicalIV, One, + "indvar.next", + HP->getTerminator()); + Add->setDebugLoc(HP->getTerminator()->getDebugLoc()); + rememberInstruction(Add); + CanonicalIV->addIncoming(Add, HP); } else { - PN->addIncoming(Constant::getNullValue(Ty), *HPI); + CanonicalIV->addIncoming(Constant::getNullValue(Ty), HP); } + } + } + + // {0,+,1} --> Insert a canonical induction variable into the loop! + if (S->isAffine() && S->getOperand(1)->isOne()) { + assert(Ty == SE.getEffectiveSCEVType(CanonicalIV->getType()) && + "IVs with types different from the canonical IV should " + "already have been handled!"); + return CanonicalIV; } // {0,+,F} --> {0,+,1} * F - // Get the canonical induction variable I for this loop. - Value *I = CanonicalIV ? - CanonicalIV : - getOrInsertCanonicalInductionVariable(L, Ty); // If this is a simple linear addrec, emit it now as a special case. if (S->isAffine()) // {0,+,F} --> i*F return expand(SE.getTruncateOrNoop( - SE.getMulExpr(SE.getUnknown(I), + SE.getMulExpr(SE.getUnknown(CanonicalIV), SE.getNoopOrAnyExtend(S->getOperand(1), - I->getType())), + CanonicalIV->getType())), Ty)); // If this is a chain of recurrences, turn it into a closed form, using the // folders, then expandCodeFor the closed form. This allows the folders to // simplify the expression without having to build a bunch of special code // into this folder. - const SCEV *IH = SE.getUnknown(I); // Get I as a "symbolic" SCEV. + const SCEV *IH = SE.getUnknown(CanonicalIV); // Get I as a "symbolic" SCEV. // Promote S up to the canonical IV type, if the cast is foldable. const SCEV *NewS = S; - const SCEV *Ext = SE.getNoopOrAnyExtend(S, I->getType()); + const SCEV *Ext = SE.getNoopOrAnyExtend(S, CanonicalIV->getType()); if (isa(Ext)) NewS = Ext; @@ -734,35 +1392,35 @@ Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) { } Value *SCEVExpander::visitTruncateExpr(const SCEVTruncateExpr *S) { - const Type *Ty = SE.getEffectiveSCEVType(S->getType()); + Type *Ty = SE.getEffectiveSCEVType(S->getType()); Value *V = expandCodeFor(S->getOperand(), SE.getEffectiveSCEVType(S->getOperand()->getType())); - Value *I = Builder.CreateTrunc(V, Ty, "tmp"); - InsertedValues.insert(I); + Value *I = Builder.CreateTrunc(V, Ty); + rememberInstruction(I); return I; } Value *SCEVExpander::visitZeroExtendExpr(const SCEVZeroExtendExpr *S) { - const Type *Ty = SE.getEffectiveSCEVType(S->getType()); + Type *Ty = SE.getEffectiveSCEVType(S->getType()); Value *V = expandCodeFor(S->getOperand(), SE.getEffectiveSCEVType(S->getOperand()->getType())); - Value *I = Builder.CreateZExt(V, Ty, "tmp"); - InsertedValues.insert(I); + Value *I = Builder.CreateZExt(V, Ty); + rememberInstruction(I); return I; } Value *SCEVExpander::visitSignExtendExpr(const SCEVSignExtendExpr *S) { - const Type *Ty = SE.getEffectiveSCEVType(S->getType()); + Type *Ty = SE.getEffectiveSCEVType(S->getType()); Value *V = expandCodeFor(S->getOperand(), SE.getEffectiveSCEVType(S->getOperand()->getType())); - Value *I = Builder.CreateSExt(V, Ty, "tmp"); - InsertedValues.insert(I); + Value *I = Builder.CreateSExt(V, Ty); + rememberInstruction(I); return I; } Value *SCEVExpander::visitSMaxExpr(const SCEVSMaxExpr *S) { Value *LHS = expand(S->getOperand(S->getNumOperands()-1)); - const Type *Ty = LHS->getType(); + Type *Ty = LHS->getType(); for (int i = S->getNumOperands()-2; i >= 0; --i) { // In the case of mixed integer and pointer types, do the // rest of the comparisons as integer. @@ -771,10 +1429,10 @@ Value *SCEVExpander::visitSMaxExpr(const SCEVSMaxExpr *S) { LHS = InsertNoopCastOfTo(LHS, Ty); } Value *RHS = expandCodeFor(S->getOperand(i), Ty); - Value *ICmp = Builder.CreateICmpSGT(LHS, RHS, "tmp"); - InsertedValues.insert(ICmp); + Value *ICmp = Builder.CreateICmpSGT(LHS, RHS); + rememberInstruction(ICmp); Value *Sel = Builder.CreateSelect(ICmp, LHS, RHS, "smax"); - InsertedValues.insert(Sel); + rememberInstruction(Sel); LHS = Sel; } // In the case of mixed integer and pointer types, cast the @@ -786,7 +1444,7 @@ Value *SCEVExpander::visitSMaxExpr(const SCEVSMaxExpr *S) { Value *SCEVExpander::visitUMaxExpr(const SCEVUMaxExpr *S) { Value *LHS = expand(S->getOperand(S->getNumOperands()-1)); - const Type *Ty = LHS->getType(); + Type *Ty = LHS->getType(); for (int i = S->getNumOperands()-2; i >= 0; --i) { // In the case of mixed integer and pointer types, do the // rest of the comparisons as integer. @@ -795,10 +1453,10 @@ Value *SCEVExpander::visitUMaxExpr(const SCEVUMaxExpr *S) { LHS = InsertNoopCastOfTo(LHS, Ty); } Value *RHS = expandCodeFor(S->getOperand(i), Ty); - Value *ICmp = Builder.CreateICmpUGT(LHS, RHS, "tmp"); - InsertedValues.insert(ICmp); + Value *ICmp = Builder.CreateICmpUGT(LHS, RHS); + rememberInstruction(ICmp); Value *Sel = Builder.CreateSelect(ICmp, LHS, RHS, "umax"); - InsertedValues.insert(Sel); + rememberInstruction(Sel); LHS = Sel; } // In the case of mixed integer and pointer types, cast the @@ -808,15 +1466,13 @@ Value *SCEVExpander::visitUMaxExpr(const SCEVUMaxExpr *S) { return LHS; } -Value *SCEVExpander::visitFieldOffsetExpr(const SCEVFieldOffsetExpr *S) { - return ConstantExpr::getOffsetOf(S->getStructType(), S->getFieldNo()); -} - -Value *SCEVExpander::visitAllocSizeExpr(const SCEVAllocSizeExpr *S) { - return ConstantExpr::getSizeOf(S->getAllocType()); +Value *SCEVExpander::expandCodeFor(const SCEV *SH, Type *Ty, + Instruction *IP) { + Builder.SetInsertPoint(IP->getParent(), IP); + return expandCodeFor(SH, Ty); } -Value *SCEVExpander::expandCodeFor(const SCEV *SH, const Type *Ty) { +Value *SCEVExpander::expandCodeFor(const SCEV *SH, Type *Ty) { // Expand the code for this SCEV. Value *V = expand(SH); if (Ty) { @@ -833,56 +1489,261 @@ Value *SCEVExpander::expand(const SCEV *S) { Instruction *InsertPt = Builder.GetInsertPoint(); for (Loop *L = SE.LI->getLoopFor(Builder.GetInsertBlock()); ; L = L->getParentLoop()) - if (S->isLoopInvariant(L)) { + if (SE.isLoopInvariant(S, L)) { if (!L) break; if (BasicBlock *Preheader = L->getLoopPreheader()) InsertPt = Preheader->getTerminator(); + else { + // LSR sets the insertion point for AddRec start/step values to the + // block start to simplify value reuse, even though it's an invalid + // position. SCEVExpander must correct for this in all cases. + InsertPt = L->getHeader()->getFirstInsertionPt(); + } } else { // If the SCEV is computable at this level, insert it into the header // after the PHIs (and after any other instructions that we've inserted // there) so that it is guaranteed to dominate any user inside the loop. - if (L && S->hasComputableLoopEvolution(L)) - InsertPt = L->getHeader()->getFirstNonPHI(); - while (isInsertedInstruction(InsertPt)) + if (L && SE.hasComputableLoopEvolution(S, L) && !PostIncLoops.count(L)) + InsertPt = L->getHeader()->getFirstInsertionPt(); + while (InsertPt != Builder.GetInsertPoint() + && (isInsertedInstruction(InsertPt) + || isa(InsertPt))) { InsertPt = llvm::next(BasicBlock::iterator(InsertPt)); + } break; } // Check to see if we already expanded this here. - std::map, - AssertingVH >::iterator I = - InsertedExpressions.find(std::make_pair(S, InsertPt)); + std::map, TrackingVH >::iterator + I = InsertedExpressions.find(std::make_pair(S, InsertPt)); if (I != InsertedExpressions.end()) return I->second; - BasicBlock *SaveInsertBB = Builder.GetInsertBlock(); - BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint(); + BuilderType::InsertPointGuard Guard(Builder); Builder.SetInsertPoint(InsertPt->getParent(), InsertPt); // Expand the expression into instructions. Value *V = visit(S); // Remember the expanded value for this SCEV at this location. + // + // This is independent of PostIncLoops. The mapped value simply materializes + // the expression at this insertion point. If the mapped value happened to be + // a postinc expansion, it could be reused by a non-postinc user, but only if + // its insertion point was already at the head of the loop. InsertedExpressions[std::make_pair(S, InsertPt)] = V; - - Builder.SetInsertPoint(SaveInsertBB, SaveInsertPt); return V; } +void SCEVExpander::rememberInstruction(Value *I) { + if (!PostIncLoops.empty()) + InsertedPostIncValues.insert(I); + else + InsertedValues.insert(I); +} + /// getOrInsertCanonicalInductionVariable - This method returns the /// canonical induction variable of the specified type for the specified /// loop (inserting one if there is none). A canonical induction variable /// starts at zero and steps by one on each iteration. -Value * +PHINode * SCEVExpander::getOrInsertCanonicalInductionVariable(const Loop *L, - const Type *Ty) { - assert(Ty->isInteger() && "Can only insert integer induction variables!"); - const SCEV *H = SE.getAddRecExpr(SE.getIntegerSCEV(0, Ty), - SE.getIntegerSCEV(1, Ty), L); - BasicBlock *SaveInsertBB = Builder.GetInsertBlock(); - BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint(); - Value *V = expandCodeFor(H, 0, L->getHeader()->begin()); - if (SaveInsertBB) - Builder.SetInsertPoint(SaveInsertBB, SaveInsertPt); + Type *Ty) { + assert(Ty->isIntegerTy() && "Can only insert integer induction variables!"); + + // Build a SCEV for {0,+,1}. + // Conservatively use FlagAnyWrap for now. + const SCEV *H = SE.getAddRecExpr(SE.getConstant(Ty, 0), + SE.getConstant(Ty, 1), L, SCEV::FlagAnyWrap); + + // Emit code for it. + BuilderType::InsertPointGuard Guard(Builder); + PHINode *V = cast(expandCodeFor(H, 0, L->getHeader()->begin())); + return V; } + +/// Sort values by integer width for replaceCongruentIVs. +static bool width_descending(Value *lhs, Value *rhs) { + // Put pointers at the back and make sure pointer < pointer = false. + if (!lhs->getType()->isIntegerTy() || !rhs->getType()->isIntegerTy()) + return rhs->getType()->isIntegerTy() && !lhs->getType()->isIntegerTy(); + return rhs->getType()->getPrimitiveSizeInBits() + < lhs->getType()->getPrimitiveSizeInBits(); +} + +/// replaceCongruentIVs - Check for congruent phis in this loop header and +/// replace them with their most canonical representative. Return the number of +/// phis eliminated. +/// +/// This does not depend on any SCEVExpander state but should be used in +/// the same context that SCEVExpander is used. +unsigned SCEVExpander::replaceCongruentIVs(Loop *L, const DominatorTree *DT, + SmallVectorImpl &DeadInsts, + const TargetTransformInfo *TTI) { + // Find integer phis in order of increasing width. + SmallVector Phis; + for (BasicBlock::iterator I = L->getHeader()->begin(); + PHINode *Phi = dyn_cast(I); ++I) { + Phis.push_back(Phi); + } + if (TTI) + std::sort(Phis.begin(), Phis.end(), width_descending); + + unsigned NumElim = 0; + DenseMap ExprToIVMap; + // Process phis from wide to narrow. Mapping wide phis to the their truncation + // so narrow phis can reuse them. + for (SmallVectorImpl::const_iterator PIter = Phis.begin(), + PEnd = Phis.end(); PIter != PEnd; ++PIter) { + PHINode *Phi = *PIter; + + // Fold constant phis. They may be congruent to other constant phis and + // would confuse the logic below that expects proper IVs. + if (Value *V = Phi->hasConstantValue()) { + Phi->replaceAllUsesWith(V); + DeadInsts.push_back(Phi); + ++NumElim; + DEBUG_WITH_TYPE(DebugType, dbgs() + << "INDVARS: Eliminated constant iv: " << *Phi << '\n'); + continue; + } + + if (!SE.isSCEVable(Phi->getType())) + continue; + + PHINode *&OrigPhiRef = ExprToIVMap[SE.getSCEV(Phi)]; + if (!OrigPhiRef) { + OrigPhiRef = Phi; + if (Phi->getType()->isIntegerTy() && TTI + && TTI->isTruncateFree(Phi->getType(), Phis.back()->getType())) { + // This phi can be freely truncated to the narrowest phi type. Map the + // truncated expression to it so it will be reused for narrow types. + const SCEV *TruncExpr = + SE.getTruncateExpr(SE.getSCEV(Phi), Phis.back()->getType()); + ExprToIVMap[TruncExpr] = Phi; + } + continue; + } + + // Replacing a pointer phi with an integer phi or vice-versa doesn't make + // sense. + if (OrigPhiRef->getType()->isPointerTy() != Phi->getType()->isPointerTy()) + continue; + + if (BasicBlock *LatchBlock = L->getLoopLatch()) { + Instruction *OrigInc = + cast(OrigPhiRef->getIncomingValueForBlock(LatchBlock)); + Instruction *IsomorphicInc = + cast(Phi->getIncomingValueForBlock(LatchBlock)); + + // If this phi has the same width but is more canonical, replace the + // original with it. As part of the "more canonical" determination, + // respect a prior decision to use an IV chain. + if (OrigPhiRef->getType() == Phi->getType() + && !(ChainedPhis.count(Phi) + || isExpandedAddRecExprPHI(OrigPhiRef, OrigInc, L)) + && (ChainedPhis.count(Phi) + || isExpandedAddRecExprPHI(Phi, IsomorphicInc, L))) { + std::swap(OrigPhiRef, Phi); + std::swap(OrigInc, IsomorphicInc); + } + // Replacing the congruent phi is sufficient because acyclic redundancy + // elimination, CSE/GVN, should handle the rest. However, once SCEV proves + // that a phi is congruent, it's often the head of an IV user cycle that + // is isomorphic with the original phi. It's worth eagerly cleaning up the + // common case of a single IV increment so that DeleteDeadPHIs can remove + // cycles that had postinc uses. + const SCEV *TruncExpr = SE.getTruncateOrNoop(SE.getSCEV(OrigInc), + IsomorphicInc->getType()); + if (OrigInc != IsomorphicInc + && TruncExpr == SE.getSCEV(IsomorphicInc) + && ((isa(OrigInc) && isa(IsomorphicInc)) + || hoistIVInc(OrigInc, IsomorphicInc))) { + DEBUG_WITH_TYPE(DebugType, dbgs() + << "INDVARS: Eliminated congruent iv.inc: " + << *IsomorphicInc << '\n'); + Value *NewInc = OrigInc; + if (OrigInc->getType() != IsomorphicInc->getType()) { + Instruction *IP = isa(OrigInc) + ? (Instruction*)L->getHeader()->getFirstInsertionPt() + : OrigInc->getNextNode(); + IRBuilder<> Builder(IP); + Builder.SetCurrentDebugLocation(IsomorphicInc->getDebugLoc()); + NewInc = Builder. + CreateTruncOrBitCast(OrigInc, IsomorphicInc->getType(), IVName); + } + IsomorphicInc->replaceAllUsesWith(NewInc); + DeadInsts.push_back(IsomorphicInc); + } + } + DEBUG_WITH_TYPE(DebugType, dbgs() + << "INDVARS: Eliminated congruent iv: " << *Phi << '\n'); + ++NumElim; + Value *NewIV = OrigPhiRef; + if (OrigPhiRef->getType() != Phi->getType()) { + IRBuilder<> Builder(L->getHeader()->getFirstInsertionPt()); + Builder.SetCurrentDebugLocation(Phi->getDebugLoc()); + NewIV = Builder.CreateTruncOrBitCast(OrigPhiRef, Phi->getType(), IVName); + } + Phi->replaceAllUsesWith(NewIV); + DeadInsts.push_back(Phi); + } + return NumElim; +} + +namespace { +// Search for a SCEV subexpression that is not safe to expand. Any expression +// that may expand to a !isSafeToSpeculativelyExecute value is unsafe, namely +// UDiv expressions. We don't know if the UDiv is derived from an IR divide +// instruction, but the important thing is that we prove the denominator is +// nonzero before expansion. +// +// IVUsers already checks that IV-derived expressions are safe. So this check is +// only needed when the expression includes some subexpression that is not IV +// derived. +// +// Currently, we only allow division by a nonzero constant here. If this is +// inadequate, we could easily allow division by SCEVUnknown by using +// ValueTracking to check isKnownNonZero(). +// +// We cannot generally expand recurrences unless the step dominates the loop +// header. The expander handles the special case of affine recurrences by +// scaling the recurrence outside the loop, but this technique isn't generally +// applicable. Expanding a nested recurrence outside a loop requires computing +// binomial coefficients. This could be done, but the recurrence has to be in a +// perfectly reduced form, which can't be guaranteed. +struct SCEVFindUnsafe { + ScalarEvolution &SE; + bool IsUnsafe; + + SCEVFindUnsafe(ScalarEvolution &se): SE(se), IsUnsafe(false) {} + + bool follow(const SCEV *S) { + if (const SCEVUDivExpr *D = dyn_cast(S)) { + const SCEVConstant *SC = dyn_cast(D->getRHS()); + if (!SC || SC->getValue()->isZero()) { + IsUnsafe = true; + return false; + } + } + if (const SCEVAddRecExpr *AR = dyn_cast(S)) { + const SCEV *Step = AR->getStepRecurrence(SE); + if (!AR->isAffine() && !SE.dominates(Step, AR->getLoop()->getHeader())) { + IsUnsafe = true; + return false; + } + } + return true; + } + bool isDone() const { return IsUnsafe; } +}; +} + +namespace llvm { +bool isSafeToExpand(const SCEV *S, ScalarEvolution &SE) { + SCEVFindUnsafe Search(SE); + visitAll(S, Search); + return !Search.IsUnsafe; +} +}