X-Git-Url: http://demsky.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FAnalysis%2FScalarEvolutionExpander.cpp;h=14ba33003dba9d82b33a6a4ed92780b4e8c24a8b;hb=630d17566793c7f25a05cd407ab9b79a1756966a;hp=e76b6b10cf9ec13679e6706633cd3ce294d204a6;hpb=48ba0e45ed68689ce7b384578e6272410e4e23fe;p=oota-llvm.git diff --git a/lib/Analysis/ScalarEvolutionExpander.cpp b/lib/Analysis/ScalarEvolutionExpander.cpp index e76b6b10cf9..14ba33003db 100644 --- a/lib/Analysis/ScalarEvolutionExpander.cpp +++ b/lib/Analysis/ScalarEvolutionExpander.cpp @@ -14,12 +14,14 @@ //===----------------------------------------------------------------------===// #include "llvm/Analysis/ScalarEvolutionExpander.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/Analysis/LoopInfo.h" -#include "llvm/IntrinsicInst.h" -#include "llvm/LLVMContext.h" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/LLVMContext.h" #include "llvm/Support/Debug.h" -#include "llvm/Target/TargetData.h" -#include "llvm/ADT/STLExtras.h" using namespace llvm; @@ -30,6 +32,19 @@ using namespace llvm; Value *SCEVExpander::ReuseOrCreateCast(Value *V, Type *Ty, Instruction::CastOps Op, BasicBlock::iterator IP) { + // This function must be called with the builder having a valid insertion + // point. It doesn't need to be the actual IP where the uses of the returned + // cast will be added, but it must dominate such IP. + // We use this precondition to produce a cast that will dominate all its + // uses. In particular, this is crucial for the case where the builder's + // insertion point *is* the point where we were asked to put the cast. + // Since we don't know the builder's insertion point is actually + // where the uses will be added (only that it dominates it), we are + // not allowed to move it. + BasicBlock::iterator BIP = Builder.GetInsertPoint(); + + Instruction *Ret = NULL; + // Check to see if there is already a cast! for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E; ++UI) { @@ -37,27 +52,35 @@ Value *SCEVExpander::ReuseOrCreateCast(Value *V, Type *Ty, if (U->getType() == Ty) if (CastInst *CI = dyn_cast(U)) if (CI->getOpcode() == Op) { - // If the cast isn't where we want it, fix it. - if (BasicBlock::iterator(CI) != IP) { + // If the cast isn't where we want it, create a new cast at IP. + // Likewise, do not reuse a cast at BIP because it must dominate + // instructions that might be inserted before BIP. + if (BasicBlock::iterator(CI) != IP || BIP == IP) { // Create a new cast, and leave the old cast in place in case // it is being used as an insert point. Clear its operand // so that it doesn't hold anything live. - Instruction *NewCI = CastInst::Create(Op, V, Ty, "", IP); - NewCI->takeName(CI); - CI->replaceAllUsesWith(NewCI); + Ret = CastInst::Create(Op, V, Ty, "", IP); + Ret->takeName(CI); + CI->replaceAllUsesWith(Ret); CI->setOperand(0, UndefValue::get(V->getType())); - rememberInstruction(NewCI); - return NewCI; + break; } - rememberInstruction(CI); - return CI; + Ret = CI; + break; } } // Create a new cast. - Instruction *I = CastInst::Create(Op, V, Ty, V->getName(), IP); - rememberInstruction(I); - return I; + if (!Ret) + Ret = CastInst::Create(Op, V, Ty, V->getName(), IP); + + // We assert at the end of the function since IP might point to an + // instruction with different dominance properties than a cast + // (an invoke for example) and not dominate BIP (but the cast does). + assert(SE.DT->dominates(Ret, BIP)); + + rememberInstruction(Ret); + return Ret; } /// InsertNoopCastOfTo - Insert a cast of V to the specified type, @@ -73,9 +96,14 @@ Value *SCEVExpander::InsertNoopCastOfTo(Value *V, Type *Ty) { "InsertNoopCastOfTo cannot change sizes!"); // Short-circuit unnecessary bitcasts. - if (Op == Instruction::BitCast && V->getType() == Ty) - return V; - + if (Op == Instruction::BitCast) { + if (V->getType() == Ty) + return V; + if (CastInst *CI = dyn_cast(V)) { + if (CI->getOperand(0)->getType() == Ty) + return CI->getOperand(0); + } + } // Short-circuit unnecessary inttoptr<->ptrtoint casts. if ((Op == Instruction::PtrToInt || Op == Instruction::IntToPtr) && SE.getTypeSizeInBits(Ty) == SE.getTypeSizeInBits(V->getType())) { @@ -115,8 +143,7 @@ Value *SCEVExpander::InsertNoopCastOfTo(Value *V, Type *Ty) { BasicBlock::iterator IP = I; ++IP; if (InvokeInst *II = dyn_cast(I)) IP = II->getNormalDest()->begin(); - while (isa(IP) || isa(IP) || - isa(IP)) + while (isa(IP) || isa(IP)) ++IP; return ReuseOrCreateCast(I, Ty, Op, IP); } @@ -186,7 +213,7 @@ static bool FactorOutConstant(const SCEV *&S, const SCEV *&Remainder, const SCEV *Factor, ScalarEvolution &SE, - const TargetData *TD) { + const DataLayout *TD) { // Everything is divisible by one. if (Factor->isOne()) return true; @@ -227,7 +254,7 @@ static bool FactorOutConstant(const SCEV *&S, // of the given factor. if (const SCEVMulExpr *M = dyn_cast(S)) { if (TD) { - // With TargetData, the size is known. Check if there is a constant + // With DataLayout, the size is known. Check if there is a constant // operand which is a multiple of the given factor. If so, we can // factor it. const SCEVConstant *FC = cast(Factor); @@ -241,7 +268,7 @@ static bool FactorOutConstant(const SCEV *&S, return true; } } else { - // Without TargetData, check if Factor can be factored out of any of the + // Without DataLayout, check if Factor can be factored out of any of the // Mul's operands. If so, we can just remove it. for (unsigned i = 0, e = M->getNumOperands(); i != e; ++i) { const SCEV *SOp = M->getOperand(i); @@ -268,8 +295,8 @@ static bool FactorOutConstant(const SCEV *&S, const SCEV *Start = A->getStart(); if (!FactorOutConstant(Start, Remainder, Factor, SE, TD)) return false; - // FIXME: can use A->getNoWrapFlags(FlagNW) - S = SE.getAddRecExpr(Start, Step, A->getLoop(), SCEV::FlagAnyWrap); + S = SE.getAddRecExpr(Start, Step, A->getLoop(), + A->getNoWrapFlags(SCEV::FlagNW)); return true; } @@ -322,8 +349,7 @@ static void SplitAddRecs(SmallVectorImpl &Ops, AddRecs.push_back(SE.getAddRecExpr(Zero, A->getStepRecurrence(SE), A->getLoop(), - // FIXME: A->getNoWrapFlags(FlagNW) - SCEV::FlagAnyWrap)); + A->getNoWrapFlags(SCEV::FlagNW))); if (const SCEVAddExpr *Add = dyn_cast(Start)) { Ops[i] = Zero; Ops.append(Add->op_begin(), Add->op_end()); @@ -432,7 +458,7 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin, // An empty struct has no fields. if (STy->getNumElements() == 0) break; if (SE.TD) { - // With TargetData, field offsets are known. See if a constant offset + // With DataLayout, field offsets are known. See if a constant offset // falls within any of the struct fields. if (Ops.empty()) break; if (const SCEVConstant *C = dyn_cast(Ops[0])) @@ -451,7 +477,7 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin, } } } else { - // Without TargetData, just check for an offsetof expression of the + // Without DataLayout, just check for an offsetof expression of the // appropriate struct type. for (unsigned i = 0, e = Ops.size(); i != e; ++i) if (const SCEVUnknown *U = dyn_cast(Ops[i])) { @@ -492,6 +518,9 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin, V = InsertNoopCastOfTo(V, Type::getInt8PtrTy(Ty->getContext(), PTy->getAddressSpace())); + assert(!isa(V) || + SE.DT->dominates(cast(V), Builder.GetInsertPoint())); + // Expand the operands for a plain byte offset. Value *Idx = expandCodeFor(SE.getAddExpr(Ops), Ty); @@ -588,20 +617,6 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin, return expand(SE.getAddExpr(Ops)); } -/// isNonConstantNegative - Return true if the specified scev is negated, but -/// not a constant. -static bool isNonConstantNegative(const SCEV *F) { - const SCEVMulExpr *Mul = dyn_cast(F); - if (!Mul) return false; - - // If there is a constant factor, it will be first. - const SCEVConstant *SC = dyn_cast(Mul->getOperand(0)); - if (!SC) return false; - - // Return true if the value is negative, this matches things like (-42 * V). - return SC->getValue()->getValue().isNegative(); -} - /// PickMostRelevantLoop - Given two loops pick the one that's most relevant for /// SCEV expansion. If they are nested, this is the most nested. If they are /// neighboring, pick the later. @@ -655,7 +670,6 @@ const Loop *SCEVExpander::getRelevantLoop(const SCEV *S) { return RelevantLoops[D] = Result; } llvm_unreachable("Unexpected SCEV type!"); - return 0; } namespace { @@ -680,10 +694,10 @@ public: // If one operand is a non-constant negative and the other is not, // put the non-constant negative on the right so that a sub can // be used instead of a negate and add. - if (isNonConstantNegative(LHS.second)) { - if (!isNonConstantNegative(RHS.second)) + if (LHS.second->isNonConstantNegative()) { + if (!RHS.second->isNonConstantNegative()) return false; - } else if (isNonConstantNegative(RHS.second)) + } else if (RHS.second->isNonConstantNegative()) return true; // Otherwise they are equivalent according to this comparison. @@ -744,7 +758,7 @@ Value *SCEVExpander::visitAddExpr(const SCEVAddExpr *S) { for (++I; I != E && I->first == CurLoop; ++I) NewOps.push_back(I->second); Sum = expandAddToGEP(NewOps.begin(), NewOps.end(), PTy, Ty, expand(Op)); - } else if (isNonConstantNegative(Op)) { + } else if (Op->isNonConstantNegative()) { // Instead of doing a negate and add, just do a subtract. Value *W = expandCodeFor(SE.getNegativeSCEV(Op), Ty); Sum = InsertNoopCastOfTo(Sum, Ty); @@ -832,8 +846,7 @@ static void ExposePointerBase(const SCEV *&Base, const SCEV *&Rest, SE.getAddRecExpr(SE.getConstant(A->getType(), 0), A->getStepRecurrence(SE), A->getLoop(), - // FIXME: A->getNoWrapFlags(FlagNW) - SCEV::FlagAnyWrap)); + A->getNoWrapFlags(SCEV::FlagNW))); } if (const SCEVAddExpr *A = dyn_cast(Base)) { Base = A->getOperand(A->getNumOperands()-1); @@ -875,58 +888,139 @@ bool SCEVExpander::isNormalAddRecExprPHI(PHINode *PN, Instruction *IncV, return isNormalAddRecExprPHI(PN, IncV, L); } -/// Determine if this cyclic phi is in a form that would have been generated by -/// LSR. We don't care if the phi was actually expanded in this pass, as long -/// as it is in a low-cost form, for example, no implied multiplication. This -/// should match any patterns generated by getAddRecExprPHILiterally and -/// expandAddtoGEP. -bool SCEVExpander::isExpandedAddRecExprPHI(PHINode *PN, Instruction *IncV, - const Loop *L, Type *ExpandTy) { +/// getIVIncOperand returns an induction variable increment's induction +/// variable operand. +/// +/// If allowScale is set, any type of GEP is allowed as long as the nonIV +/// operands dominate InsertPos. +/// +/// If allowScale is not set, ensure that a GEP increment conforms to one of the +/// simple patterns generated by getAddRecExprPHILiterally and +/// expandAddtoGEP. If the pattern isn't recognized, return NULL. +Instruction *SCEVExpander::getIVIncOperand(Instruction *IncV, + Instruction *InsertPos, + bool allowScale) { + if (IncV == InsertPos) + return NULL; + switch (IncV->getOpcode()) { + default: + return NULL; // Check for a simple Add/Sub or GEP of a loop invariant step. case Instruction::Add: - case Instruction::Sub: - return IncV->getOperand(0) == PN - && L->isLoopInvariant(IncV->getOperand(1)); + case Instruction::Sub: { + Instruction *OInst = dyn_cast(IncV->getOperand(1)); + if (!OInst || SE.DT->dominates(OInst, InsertPos)) + return dyn_cast(IncV->getOperand(0)); + return NULL; + } case Instruction::BitCast: - IncV = dyn_cast(IncV->getOperand(0)); - if (!IncV) - return false; - // fall-thru to GEP handling - case Instruction::GetElementPtr: { - // This must be a pointer addition of constants (pretty) or some number of - // address-size elements (ugly). + return dyn_cast(IncV->getOperand(0)); + case Instruction::GetElementPtr: for (Instruction::op_iterator I = IncV->op_begin()+1, E = IncV->op_end(); I != E; ++I) { if (isa(*I)) continue; - // ugly geps have 2 operands. - // i1* is used by the expander to represent an address-size element. + if (Instruction *OInst = dyn_cast(*I)) { + if (!SE.DT->dominates(OInst, InsertPos)) + return NULL; + } + if (allowScale) { + // allow any kind of GEP as long as it can be hoisted. + continue; + } + // This must be a pointer addition of constants (pretty), which is already + // handled, or some number of address-size elements (ugly). Ugly geps + // have 2 operands. i1* is used by the expander to represent an + // address-size element. if (IncV->getNumOperands() != 2) - return false; - unsigned AS = cast(ExpandTy)->getAddressSpace(); + return NULL; + unsigned AS = cast(IncV->getType())->getAddressSpace(); if (IncV->getType() != Type::getInt1PtrTy(SE.getContext(), AS) && IncV->getType() != Type::getInt8PtrTy(SE.getContext(), AS)) - return false; - // Ensure the operands dominate the insertion point. I don't know of a - // case when this would not be true, so this is somewhat untested. - if (L == IVIncInsertLoop) { - for (User::op_iterator OI = IncV->op_begin()+1, - OE = IncV->op_end(); OI != OE; ++OI) - if (Instruction *OInst = dyn_cast(OI)) - if (!SE.DT->dominates(OInst, IVIncInsertPos)) - return false; - } + return NULL; break; } - IncV = dyn_cast(IncV->getOperand(0)); - if (IncV && IncV->getOpcode() == Instruction::BitCast) - IncV = dyn_cast(IncV->getOperand(0)); - return IncV == PN; + return dyn_cast(IncV->getOperand(0)); } - default: +} + +/// hoistStep - Attempt to hoist a simple IV increment above InsertPos to make +/// it available to other uses in this loop. Recursively hoist any operands, +/// until we reach a value that dominates InsertPos. +bool SCEVExpander::hoistIVInc(Instruction *IncV, Instruction *InsertPos) { + if (SE.DT->dominates(IncV, InsertPos)) + return true; + + // InsertPos must itself dominate IncV so that IncV's new position satisfies + // its existing users. + if (isa(InsertPos) + || !SE.DT->dominates(InsertPos->getParent(), IncV->getParent())) return false; + + // Check that the chain of IV operands leading back to Phi can be hoisted. + SmallVector IVIncs; + for(;;) { + Instruction *Oper = getIVIncOperand(IncV, InsertPos, /*allowScale*/true); + if (!Oper) + return false; + // IncV is safe to hoist. + IVIncs.push_back(IncV); + IncV = Oper; + if (SE.DT->dominates(IncV, InsertPos)) + break; + } + for (SmallVectorImpl::reverse_iterator I = IVIncs.rbegin(), + E = IVIncs.rend(); I != E; ++I) { + (*I)->moveBefore(InsertPos); } + return true; +} + +/// Determine if this cyclic phi is in a form that would have been generated by +/// LSR. We don't care if the phi was actually expanded in this pass, as long +/// as it is in a low-cost form, for example, no implied multiplication. This +/// should match any patterns generated by getAddRecExprPHILiterally and +/// expandAddtoGEP. +bool SCEVExpander::isExpandedAddRecExprPHI(PHINode *PN, Instruction *IncV, + const Loop *L) { + for(Instruction *IVOper = IncV; + (IVOper = getIVIncOperand(IVOper, L->getLoopPreheader()->getTerminator(), + /*allowScale=*/false));) { + if (IVOper == PN) + return true; + } + return false; +} + +/// expandIVInc - Expand an IV increment at Builder's current InsertPos. +/// Typically this is the LatchBlock terminator or IVIncInsertPos, but we may +/// need to materialize IV increments elsewhere to handle difficult situations. +Value *SCEVExpander::expandIVInc(PHINode *PN, Value *StepV, const Loop *L, + Type *ExpandTy, Type *IntTy, + bool useSubtract) { + Value *IncV; + // If the PHI is a pointer, use a GEP, otherwise use an add or sub. + if (ExpandTy->isPointerTy()) { + PointerType *GEPPtrTy = cast(ExpandTy); + // If the step isn't constant, don't use an implicitly scaled GEP, because + // that would require a multiply inside the loop. + if (!isa(StepV)) + GEPPtrTy = PointerType::get(Type::getInt1Ty(SE.getContext()), + GEPPtrTy->getAddressSpace()); + const SCEV *const StepArray[1] = { SE.getSCEV(StepV) }; + IncV = expandAddToGEP(StepArray, StepArray+1, GEPPtrTy, IntTy, PN); + if (IncV->getType() != PN->getType()) { + IncV = Builder.CreateBitCast(IncV, PN->getType()); + rememberInstruction(IncV); + } + } else { + IncV = useSubtract ? + Builder.CreateSub(PN, StepV, Twine(IVName) + ".iv.next") : + Builder.CreateAdd(PN, StepV, Twine(IVName) + ".iv.next"); + rememberInstruction(IncV); + } + return IncV; } /// getAddRecExprPHILiterally - Helper for expandAddRecExprLiterally. Expand @@ -954,28 +1048,30 @@ SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized, cast(PN->getIncomingValueForBlock(LatchBlock)); if (LSRMode) { - if (!isExpandedAddRecExprPHI(PN, IncV, L, ExpandTy)) + if (!isExpandedAddRecExprPHI(PN, IncV, L)) + continue; + if (L == IVIncInsertLoop && !hoistIVInc(IncV, IVIncInsertPos)) continue; } else { if (!isNormalAddRecExprPHI(PN, IncV, L)) continue; + if (L == IVIncInsertLoop) + do { + if (SE.DT->dominates(IncV, IVIncInsertPos)) + break; + // Make sure the increment is where we want it. But don't move it + // down past a potential existing post-inc user. + IncV->moveBefore(IVIncInsertPos); + IVIncInsertPos = IncV; + IncV = cast(IncV->getOperand(0)); + } while (IncV != PN); } // Ok, the add recurrence looks usable. // Remember this PHI, even in post-inc mode. InsertedValues.insert(PN); // Remember the increment. rememberInstruction(IncV); - if (L == IVIncInsertLoop) - do { - if (SE.DT->dominates(IncV, IVIncInsertPos)) - break; - // Make sure the increment is where we want it. But don't move it - // down past a potential existing post-inc user. - IncV->moveBefore(IVIncInsertPos); - IVIncInsertPos = IncV; - IncV = cast(IncV->getOperand(0)); - } while (IncV != PN); return PN; } } @@ -984,6 +1080,16 @@ SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized, BasicBlock *SaveInsertBB = Builder.GetInsertBlock(); BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint(); + // Another AddRec may need to be recursively expanded below. For example, if + // this AddRec is quadratic, the StepV may itself be an AddRec in this + // loop. Remove this loop from the PostIncLoops set before expanding such + // AddRecs. Otherwise, we cannot find a valid position for the step + // (i.e. StepV can never dominate its loop header). Ideally, we could do + // SavedIncLoops.swap(PostIncLoops), but we generally have a single element, + // so it's not worth implementing SmallPtrSet::swap. + PostIncLoopSet SavedPostIncLoops = PostIncLoops; + PostIncLoops.clear(); + // Expand code for the start value. Value *StartV = expandCodeFor(Normalized->getStart(), ExpandTy, L->getHeader()->begin()); @@ -993,16 +1099,16 @@ SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized, SE.DT->properlyDominates(cast(StartV)->getParent(), L->getHeader())); - // Expand code for the step value. Insert instructions right before the - // terminator corresponding to the back-edge. Do this before creating the PHI - // so that PHI reuse code doesn't see an incomplete PHI. If the stride is - // negative, insert a sub instead of an add for the increment (unless it's a - // constant, because subtracts of constants are canonicalized to adds). + // Expand code for the step value. Do this before creating the PHI so that PHI + // reuse code doesn't see an incomplete PHI. const SCEV *Step = Normalized->getStepRecurrence(SE); - bool isPointer = ExpandTy->isPointerTy(); - bool isNegative = !isPointer && isNonConstantNegative(Step); - if (isNegative) + // If the stride is negative, insert a sub instead of an add for the increment + // (unless it's a constant, because subtracts of constants are canonicalized + // to adds). + bool useSubtract = !ExpandTy->isPointerTy() && Step->isNonConstantNegative(); + if (useSubtract) Step = SE.getNegativeSCEV(Step); + // Expand the step somewhere that dominates the loop header. Value *StepV = expandCodeFor(Step, IntTy, L->getHeader()->begin()); // Create the PHI. @@ -1023,32 +1129,18 @@ SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized, continue; } - // Create a step value and add it to the PHI. If IVIncInsertLoop is - // non-null and equal to the addrec's loop, insert the instructions - // at IVIncInsertPos. + // Create a step value and add it to the PHI. + // If IVIncInsertLoop is non-null and equal to the addrec's loop, insert the + // instructions at IVIncInsertPos. Instruction *InsertPos = L == IVIncInsertLoop ? IVIncInsertPos : Pred->getTerminator(); Builder.SetInsertPoint(InsertPos); - Value *IncV; - // If the PHI is a pointer, use a GEP, otherwise use an add or sub. - if (isPointer) { - PointerType *GEPPtrTy = cast(ExpandTy); - // If the step isn't constant, don't use an implicitly scaled GEP, because - // that would require a multiply inside the loop. - if (!isa(StepV)) - GEPPtrTy = PointerType::get(Type::getInt1Ty(SE.getContext()), - GEPPtrTy->getAddressSpace()); - const SCEV *const StepArray[1] = { SE.getSCEV(StepV) }; - IncV = expandAddToGEP(StepArray, StepArray+1, GEPPtrTy, IntTy, PN); - if (IncV->getType() != PN->getType()) { - IncV = Builder.CreateBitCast(IncV, PN->getType()); - rememberInstruction(IncV); - } - } else { - IncV = isNegative ? - Builder.CreateSub(PN, StepV, Twine(IVName) + ".iv.next") : - Builder.CreateAdd(PN, StepV, Twine(IVName) + ".iv.next"); - rememberInstruction(IncV); + Value *IncV = expandIVInc(PN, StepV, L, ExpandTy, IntTy, useSubtract); + if (isa(IncV)) { + if (Normalized->getNoWrapFlags(SCEV::FlagNUW)) + cast(IncV)->setHasNoUnsignedWrap(); + if (Normalized->getNoWrapFlags(SCEV::FlagNSW)) + cast(IncV)->setHasNoSignedWrap(); } PN->addIncoming(IncV, Pred); } @@ -1057,6 +1149,10 @@ SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized, if (SaveInsertBB) restoreInsertPoint(SaveInsertBB, SaveInsertPt); + // After expanding subexpressions, restore the PostIncLoops set so the caller + // can ensure that IVIncrement dominates the current uses. + PostIncLoops = SavedPostIncLoops; + // Remember this PHI, even in post-inc mode. InsertedValues.insert(PN); @@ -1088,8 +1184,7 @@ Value *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) { Normalized = cast( SE.getAddRecExpr(Start, Normalized->getStepRecurrence(SE), Normalized->getLoop(), - // FIXME: Normalized->getNoWrapFlags(FlagNW) - SCEV::FlagAnyWrap)); + Normalized->getNoWrapFlags(SCEV::FlagNW))); } // Strip off any non-loop-dominating component from the addrec step. @@ -1099,11 +1194,9 @@ Value *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) { PostLoopScale = Step; Step = SE.getConstant(Normalized->getType(), 1); Normalized = - cast(SE.getAddRecExpr(Start, Step, - Normalized->getLoop(), - // FIXME: Normalized - // ->getNoWrapFlags(FlagNW) - SCEV::FlagAnyWrap)); + cast(SE.getAddRecExpr( + Start, Step, Normalized->getLoop(), + Normalized->getNoWrapFlags(SCEV::FlagNW))); } // Expand the core addrec. If we need post-loop scaling, force it to @@ -1124,10 +1217,31 @@ Value *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) { // For an expansion to use the postinc form, the client must call // expandCodeFor with an InsertPoint that is either outside the PostIncLoop // or dominated by IVIncInsertPos. - assert((!isa(Result) || - SE.DT->dominates(cast(Result), - Builder.GetInsertPoint())) && - "postinc expansion does not dominate use"); + if (isa(Result) + && !SE.DT->dominates(cast(Result), + Builder.GetInsertPoint())) { + // The induction variable's postinc expansion does not dominate this use. + // IVUsers tries to prevent this case, so it is rare. However, it can + // happen when an IVUser outside the loop is not dominated by the latch + // block. Adjusting IVIncInsertPos before expansion begins cannot handle + // all cases. Consider a phi outide whose operand is replaced during + // expansion with the value of the postinc user. Without fundamentally + // changing the way postinc users are tracked, the only remedy is + // inserting an extra IV increment. StepV might fold into PostLoopOffset, + // but hopefully expandCodeFor handles that. + bool useSubtract = + !ExpandTy->isPointerTy() && Step->isNonConstantNegative(); + if (useSubtract) + Step = SE.getNegativeSCEV(Step); + // Expand the step somewhere that dominates the loop header. + BasicBlock *SaveInsertBB = Builder.GetInsertBlock(); + BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint(); + Value *StepV = expandCodeFor(Step, IntTy, L->getHeader()->begin()); + // Restore the insertion point to the place where the caller has + // determined dominates all uses. + restoreInsertPoint(SaveInsertBB, SaveInsertPt); + Result = expandIVInc(PN, StepV, L, ExpandTy, IntTy, useSubtract); + } } // Re-apply any non-loop-dominating scale. @@ -1175,8 +1289,7 @@ Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) { for (unsigned i = 0, e = S->getNumOperands(); i != e; ++i) NewOps[i] = SE.getAnyExtendExpr(S->op_begin()[i], CanonicalIV->getType()); Value *V = expand(SE.getAddRecExpr(NewOps, S->getLoop(), - // FIXME: S->getNoWrapFlags(FlagNW) - SCEV::FlagAnyWrap)); + S->getNoWrapFlags(SCEV::FlagNW))); BasicBlock *SaveInsertBB = Builder.GetInsertBlock(); BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint(); BasicBlock::iterator NewInsertPt = @@ -1194,8 +1307,8 @@ Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) { if (!S->getStart()->isZero()) { SmallVector NewOps(S->op_begin(), S->op_end()); NewOps[0] = SE.getConstant(Ty, 0); - // FIXME: can use S->getNoWrapFlags() - const SCEV *Rest = SE.getAddRecExpr(NewOps, L, SCEV::FlagAnyWrap); + const SCEV *Rest = SE.getAddRecExpr(NewOps, L, + S->getNoWrapFlags(SCEV::FlagNW)); // Turn things like ptrtoint+arithmetic+inttoptr into GEP. See the // comments on expandAddToGEP for details. @@ -1230,9 +1343,13 @@ Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) { Header->begin()); rememberInstruction(CanonicalIV); + SmallSet PredSeen; Constant *One = ConstantInt::get(Ty, 1); for (pred_iterator HPI = HPB; HPI != HPE; ++HPI) { BasicBlock *HP = *HPI; + if (!PredSeen.insert(HP)) + continue; + if (L->contains(HP)) { // Insert a unit add instruction right before the terminator // corresponding to the back-edge. @@ -1363,10 +1480,7 @@ Value *SCEVExpander::visitUMaxExpr(const SCEVUMaxExpr *S) { } Value *SCEVExpander::expandCodeFor(const SCEV *SH, Type *Ty, - Instruction *I) { - BasicBlock::iterator IP = I; - while (isInsertedInstruction(IP) || isa(IP)) - ++IP; + Instruction *IP) { Builder.SetInsertPoint(IP->getParent(), IP); return expandCodeFor(SH, Ty); } @@ -1392,21 +1506,29 @@ Value *SCEVExpander::expand(const SCEV *S) { if (!L) break; if (BasicBlock *Preheader = L->getLoopPreheader()) InsertPt = Preheader->getTerminator(); + else { + // LSR sets the insertion point for AddRec start/step values to the + // block start to simplify value reuse, even though it's an invalid + // position. SCEVExpander must correct for this in all cases. + InsertPt = L->getHeader()->getFirstInsertionPt(); + } } else { // If the SCEV is computable at this level, insert it into the header // after the PHIs (and after any other instructions that we've inserted // there) so that it is guaranteed to dominate any user inside the loop. if (L && SE.hasComputableLoopEvolution(S, L) && !PostIncLoops.count(L)) InsertPt = L->getHeader()->getFirstInsertionPt(); - while (isInsertedInstruction(InsertPt) || isa(InsertPt)) + while (InsertPt != Builder.GetInsertPoint() + && (isInsertedInstruction(InsertPt) + || isa(InsertPt))) { InsertPt = llvm::next(BasicBlock::iterator(InsertPt)); + } break; } // Check to see if we already expanded this here. - std::map, - AssertingVH >::iterator I = - InsertedExpressions.find(std::make_pair(S, InsertPt)); + std::map, TrackingVH >::iterator + I = InsertedExpressions.find(std::make_pair(S, InsertPt)); if (I != InsertedExpressions.end()) return I->second; @@ -1434,23 +1556,9 @@ void SCEVExpander::rememberInstruction(Value *I) { InsertedPostIncValues.insert(I); else InsertedValues.insert(I); - - // If we just claimed an existing instruction and that instruction had - // been the insert point, adjust the insert point forward so that - // subsequently inserted code will be dominated. - if (Builder.GetInsertPoint() == I) { - BasicBlock::iterator It = cast(I); - do { ++It; } while (isInsertedInstruction(It) || - isa(It)); - Builder.SetInsertPoint(Builder.GetInsertBlock(), It); - } } void SCEVExpander::restoreInsertPoint(BasicBlock *BB, BasicBlock::iterator I) { - // If we acquired more instructions since the old insert point was saved, - // advance past them. - while (isInsertedInstruction(I) || isa(I)) ++I; - Builder.SetInsertPoint(BB, I); } @@ -1478,40 +1586,13 @@ SCEVExpander::getOrInsertCanonicalInductionVariable(const Loop *L, return V; } -/// hoistStep - Attempt to hoist an IV increment above a potential use. -/// -/// To successfully hoist, two criteria must be met: -/// - IncV operands dominate InsertPos and -/// - InsertPos dominates IncV -/// -/// Meeting the second condition means that we don't need to check all of IncV's -/// existing uses (it's moving up in the domtree). -/// -/// This does not yet recursively hoist the operands, although that would -/// not be difficult. -/// -/// This does not require a SCEVExpander instance and could be replaced by a -/// general code-insertion helper. -bool SCEVExpander::hoistStep(Instruction *IncV, Instruction *InsertPos, - const DominatorTree *DT) { - if (DT->dominates(IncV, InsertPos)) - return true; - - if (!DT->dominates(InsertPos->getParent(), IncV->getParent())) - return false; - - if (IncV->mayHaveSideEffects()) - return false; - - // Attempt to hoist IncV - for (User::op_iterator OI = IncV->op_begin(), OE = IncV->op_end(); - OI != OE; ++OI) { - Instruction *OInst = dyn_cast(OI); - if (OInst && !DT->dominates(OInst, InsertPos)) - return false; - } - IncV->moveBefore(InsertPos); - return true; +/// Sort values by integer width for replaceCongruentIVs. +static bool width_descending(Value *lhs, Value *rhs) { + // Put pointers at the back and make sure pointer < pointer = false. + if (!lhs->getType()->isIntegerTy() || !rhs->getType()->isIntegerTy()) + return rhs->getType()->isIntegerTy() && !lhs->getType()->isIntegerTy(); + return rhs->getType()->getPrimitiveSizeInBits() + < lhs->getType()->getPrimitiveSizeInBits(); } /// replaceCongruentIVs - Check for congruent phis in this loop header and @@ -1521,23 +1602,56 @@ bool SCEVExpander::hoistStep(Instruction *IncV, Instruction *InsertPos, /// This does not depend on any SCEVExpander state but should be used in /// the same context that SCEVExpander is used. unsigned SCEVExpander::replaceCongruentIVs(Loop *L, const DominatorTree *DT, - SmallVectorImpl &DeadInsts) { + SmallVectorImpl &DeadInsts, + const TargetTransformInfo *TTI) { + // Find integer phis in order of increasing width. + SmallVector Phis; + for (BasicBlock::iterator I = L->getHeader()->begin(); + PHINode *Phi = dyn_cast(I); ++I) { + Phis.push_back(Phi); + } + if (TTI) + std::sort(Phis.begin(), Phis.end(), width_descending); + unsigned NumElim = 0; DenseMap ExprToIVMap; - for (BasicBlock::iterator I = L->getHeader()->begin(); isa(I); ++I) { - PHINode *Phi = cast(I); + // Process phis from wide to narrow. Mapping wide phis to the their truncation + // so narrow phis can reuse them. + for (SmallVectorImpl::const_iterator PIter = Phis.begin(), + PEnd = Phis.end(); PIter != PEnd; ++PIter) { + PHINode *Phi = *PIter; + + // Fold constant phis. They may be congruent to other constant phis and + // would confuse the logic below that expects proper IVs. + if (Value *V = Phi->hasConstantValue()) { + Phi->replaceAllUsesWith(V); + DeadInsts.push_back(Phi); + ++NumElim; + DEBUG_WITH_TYPE(DebugType, dbgs() + << "INDVARS: Eliminated constant iv: " << *Phi << '\n'); + continue; + } + if (!SE.isSCEVable(Phi->getType())) continue; PHINode *&OrigPhiRef = ExprToIVMap[SE.getSCEV(Phi)]; if (!OrigPhiRef) { OrigPhiRef = Phi; + if (Phi->getType()->isIntegerTy() && TTI + && TTI->isTruncateFree(Phi->getType(), Phis.back()->getType())) { + // This phi can be freely truncated to the narrowest phi type. Map the + // truncated expression to it so it will be reused for narrow types. + const SCEV *TruncExpr = + SE.getTruncateExpr(SE.getSCEV(Phi), Phis.back()->getType()); + ExprToIVMap[TruncExpr] = Phi; + } continue; } - // If one phi derives from the other via GEPs, types may differ. - // We could consider adding a bitcast here to handle it. - if (OrigPhiRef->getType() != Phi->getType()) + // Replacing a pointer phi with an integer phi or vice-versa doesn't make + // sense. + if (OrigPhiRef->getType()->isPointerTy() != Phi->getType()->isPointerTy()) continue; if (BasicBlock *LatchBlock = L->getLoopLatch()) { @@ -1546,34 +1660,98 @@ unsigned SCEVExpander::replaceCongruentIVs(Loop *L, const DominatorTree *DT, Instruction *IsomorphicInc = cast(Phi->getIncomingValueForBlock(LatchBlock)); - // If this phi is more canonical, swap it with the original. - if (!isExpandedAddRecExprPHI(OrigPhiRef, OrigInc, L, - OrigPhiRef->getType()) - && isExpandedAddRecExprPHI(Phi, IsomorphicInc, L, Phi->getType())) { + // If this phi has the same width but is more canonical, replace the + // original with it. As part of the "more canonical" determination, + // respect a prior decision to use an IV chain. + if (OrigPhiRef->getType() == Phi->getType() + && !(ChainedPhis.count(Phi) + || isExpandedAddRecExprPHI(OrigPhiRef, OrigInc, L)) + && (ChainedPhis.count(Phi) + || isExpandedAddRecExprPHI(Phi, IsomorphicInc, L))) { std::swap(OrigPhiRef, Phi); std::swap(OrigInc, IsomorphicInc); } // Replacing the congruent phi is sufficient because acyclic redundancy // elimination, CSE/GVN, should handle the rest. However, once SCEV proves // that a phi is congruent, it's often the head of an IV user cycle that - // is isomorphic with the original phi. So it's worth eagerly cleaning up - // the common case of a single IV increment. - if (OrigInc != IsomorphicInc && - OrigInc->getType() == IsomorphicInc->getType() && - SE.getSCEV(OrigInc) == SE.getSCEV(IsomorphicInc) && - hoistStep(OrigInc, IsomorphicInc, DT)) { + // is isomorphic with the original phi. It's worth eagerly cleaning up the + // common case of a single IV increment so that DeleteDeadPHIs can remove + // cycles that had postinc uses. + const SCEV *TruncExpr = SE.getTruncateOrNoop(SE.getSCEV(OrigInc), + IsomorphicInc->getType()); + if (OrigInc != IsomorphicInc + && TruncExpr == SE.getSCEV(IsomorphicInc) + && ((isa(OrigInc) && isa(IsomorphicInc)) + || hoistIVInc(OrigInc, IsomorphicInc))) { DEBUG_WITH_TYPE(DebugType, dbgs() << "INDVARS: Eliminated congruent iv.inc: " << *IsomorphicInc << '\n'); - IsomorphicInc->replaceAllUsesWith(OrigInc); + Value *NewInc = OrigInc; + if (OrigInc->getType() != IsomorphicInc->getType()) { + Instruction *IP = isa(OrigInc) + ? (Instruction*)L->getHeader()->getFirstInsertionPt() + : OrigInc->getNextNode(); + IRBuilder<> Builder(IP); + Builder.SetCurrentDebugLocation(IsomorphicInc->getDebugLoc()); + NewInc = Builder. + CreateTruncOrBitCast(OrigInc, IsomorphicInc->getType(), IVName); + } + IsomorphicInc->replaceAllUsesWith(NewInc); DeadInsts.push_back(IsomorphicInc); } } DEBUG_WITH_TYPE(DebugType, dbgs() << "INDVARS: Eliminated congruent iv: " << *Phi << '\n'); ++NumElim; - Phi->replaceAllUsesWith(OrigPhiRef); + Value *NewIV = OrigPhiRef; + if (OrigPhiRef->getType() != Phi->getType()) { + IRBuilder<> Builder(L->getHeader()->getFirstInsertionPt()); + Builder.SetCurrentDebugLocation(Phi->getDebugLoc()); + NewIV = Builder.CreateTruncOrBitCast(OrigPhiRef, Phi->getType(), IVName); + } + Phi->replaceAllUsesWith(NewIV); DeadInsts.push_back(Phi); } return NumElim; } + +namespace { +// Search for a SCEV subexpression that is not safe to expand. Any expression +// that may expand to a !isSafeToSpeculativelyExecute value is unsafe, namely +// UDiv expressions. We don't know if the UDiv is derived from an IR divide +// instruction, but the important thing is that we prove the denominator is +// nonzero before expansion. +// +// IVUsers already checks that IV-derived expressions are safe. So this check is +// only needed when the expression includes some subexpression that is not IV +// derived. +// +// Currently, we only allow division by a nonzero constant here. If this is +// inadequate, we could easily allow division by SCEVUnknown by using +// ValueTracking to check isKnownNonZero(). +struct SCEVFindUnsafe { + bool IsUnsafe; + + SCEVFindUnsafe(): IsUnsafe(false) {} + + bool follow(const SCEV *S) { + const SCEVUDivExpr *D = dyn_cast(S); + if (!D) + return true; + const SCEVConstant *SC = dyn_cast(D->getRHS()); + if (SC && !SC->getValue()->isZero()) + return true; + IsUnsafe = true; + return false; + } + bool isDone() const { return IsUnsafe; } +}; +} + +namespace llvm { +bool isSafeToExpand(const SCEV *S) { + SCEVFindUnsafe Search; + visitAll(S, Search); + return !Search.IsUnsafe; +} +}