X-Git-Url: http://demsky.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FTransforms%2FScalar%2FLoopStrengthReduce.cpp;h=db8ab485e394c428ae1b60e83ab27864e3f17893;hb=1d31290634eccc3b360c427282d59780d76b9169;hp=b7c1ac58eae48d26482ad47d8c7f6922c270a7ad;hpb=035c6a23564a0ff4384a67d6e20d258a6a2dac95;p=oota-llvm.git diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp index b7c1ac58eae..db8ab485e39 100644 --- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -23,6 +23,7 @@ #include "llvm/DerivedTypes.h" #include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/ScalarEvolutionExpander.h" #include "llvm/Support/CFG.h" #include "llvm/Support/GetElementPtrTypeIterator.h" @@ -31,22 +32,25 @@ #include "llvm/Target/TargetData.h" #include "llvm/ADT/Statistic.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/Compiler.h" #include "llvm/Target/TargetLowering.h" #include -#include #include using namespace llvm; +STATISTIC(NumReduced , "Number of GEPs strength reduced"); +STATISTIC(NumInserted, "Number of PHIs inserted"); +STATISTIC(NumVariable, "Number of PHIs with variable strides"); + namespace { - Statistic<> NumReduced ("loop-reduce", "Number of GEPs strength reduced"); - Statistic<> NumInserted("loop-reduce", "Number of PHIs inserted"); - Statistic<> NumVariable("loop-reduce","Number of PHIs with variable strides"); + + struct BasedUser; /// IVStrideUse - Keep track of one use of a strided induction variable, where /// the stride is stored externally. The Offset member keeps track of the /// offset from the IV, User is the actual user of the operand, and 'Operand' /// is the operand # of the User that is the use. - struct IVStrideUse { + struct VISIBILITY_HIDDEN IVStrideUse { SCEVHandle Offset; Instruction *User; Value *OperandValToReplace; @@ -66,7 +70,7 @@ namespace { /// have an operand that is based on the trip count multiplied by some stride. /// The stride for all of these users is common and kept external to this /// structure. - struct IVUsersOfOneStride { + struct VISIBILITY_HIDDEN IVUsersOfOneStride { /// Users - Keep track of all of the users of this stride as well as the /// initial value and the operand that uses the IV. std::vector Users; @@ -79,15 +83,15 @@ namespace { /// IVInfo - This structure keeps track of one IV expression inserted during /// StrengthReduceStridedIVUsers. It contains the stride, the common base, as /// well as the PHI node and increment value created for rewrite. - struct IVExpr { + struct VISIBILITY_HIDDEN IVExpr { SCEVHandle Stride; SCEVHandle Base; PHINode *PHI; Value *IncV; IVExpr() - : Stride(SCEVUnknown::getIntegerSCEV(0, Type::UIntTy)), - Base (SCEVUnknown::getIntegerSCEV(0, Type::UIntTy)) {} + : Stride(SCEVUnknown::getIntegerSCEV(0, Type::Int32Ty)), + Base (SCEVUnknown::getIntegerSCEV(0, Type::Int32Ty)) {} IVExpr(const SCEVHandle &stride, const SCEVHandle &base, PHINode *phi, Value *incv) : Stride(stride), Base(base), PHI(phi), IncV(incv) {} @@ -95,7 +99,7 @@ namespace { /// IVsOfOneStride - This structure keeps track of all IV expression inserted /// during StrengthReduceStridedIVUsers for a particular stride of the IV. - struct IVsOfOneStride { + struct VISIBILITY_HIDDEN IVsOfOneStride { std::vector IVs; void addIV(const SCEVHandle &Stride, const SCEVHandle &Base, PHINode *PHI, @@ -104,7 +108,7 @@ namespace { } }; - class LoopStrengthReduce : public FunctionPass { + class VISIBILITY_HIDDEN LoopStrengthReduce : public LoopPass { LoopInfo *LI; ETForest *EF; ScalarEvolution *SE; @@ -143,19 +147,7 @@ namespace { : TLI(tli) { } - virtual bool runOnFunction(Function &) { - LI = &getAnalysis(); - EF = &getAnalysis(); - SE = &getAnalysis(); - TD = &getAnalysis(); - UIntPtrTy = TD->getIntPtrType(); - Changed = false; - - for (LoopInfo::iterator I = LI->begin(), E = LI->end(); I != E; ++I) - runOnLoop(*I); - - return Changed; - } + bool runOnLoop(Loop *L, LPPassManager &LPM); virtual void getAnalysisUsage(AnalysisUsage &AU) const { // We split critical edges, so we change the CFG. However, we do update @@ -177,41 +169,44 @@ namespace { /// getCastedVersionOf - Return the specified value casted to uintptr_t. /// - Value *getCastedVersionOf(Value *V); + Value *getCastedVersionOf(Instruction::CastOps opcode, Value *V); private: - void runOnLoop(Loop *L); bool AddUsersIfInteresting(Instruction *I, Loop *L, std::set &Processed); SCEVHandle GetExpressionSCEV(Instruction *E, Loop *L); void OptimizeIndvars(Loop *L); - unsigned CheckForIVReuse(const SCEVHandle &Stride, IVExpr &IV); + unsigned CheckForIVReuse(const SCEVHandle&, IVExpr&, const Type*, + const std::vector& UsersToProcess); + + bool ValidStride(int64_t, const std::vector& UsersToProcess); void StrengthReduceStridedIVUsers(const SCEVHandle &Stride, IVUsersOfOneStride &Uses, Loop *L, bool isOnlyStride); void DeleteTriviallyDeadInstructions(std::set &Insts); }; - RegisterOpt X("loop-reduce", - "Loop Strength Reduction"); + RegisterPass X("loop-reduce", "Loop Strength Reduction"); } -FunctionPass *llvm::createLoopStrengthReducePass(const TargetLowering *TLI) { +LoopPass *llvm::createLoopStrengthReducePass(const TargetLowering *TLI) { return new LoopStrengthReduce(TLI); } -/// getCastedVersionOf - Return the specified value casted to uintptr_t. +/// getCastedVersionOf - Return the specified value casted to uintptr_t. This +/// assumes that the Value* V is of integer or pointer type only. /// -Value *LoopStrengthReduce::getCastedVersionOf(Value *V) { +Value *LoopStrengthReduce::getCastedVersionOf(Instruction::CastOps opcode, + Value *V) { if (V->getType() == UIntPtrTy) return V; if (Constant *CB = dyn_cast(V)) - return ConstantExpr::getCast(CB, UIntPtrTy); + return ConstantExpr::getCast(opcode, CB, UIntPtrTy); Value *&New = CastedPointers[V]; if (New) return New; - New = SCEVExpander::InsertCastOfTo(V, UIntPtrTy); + New = SCEVExpander::InsertCastOfTo(opcode, V, UIntPtrTy); DeadInsts.insert(cast(New)); return New; } @@ -240,6 +235,16 @@ DeleteTriviallyDeadInstructions(std::set &Insts) { /// GetExpressionSCEV - Compute and return the SCEV for the specified /// instruction. SCEVHandle LoopStrengthReduce::GetExpressionSCEV(Instruction *Exp, Loop *L) { + // Pointer to pointer bitcast instructions return the same value as their + // operand. + if (BitCastInst *BCI = dyn_cast(Exp)) { + if (SE->hasSCEV(BCI) || !isa(BCI->getOperand(0))) + return SE->getSCEV(BCI); + SCEVHandle R = GetExpressionSCEV(cast(BCI->getOperand(0)), L); + SE->setSCEV(BCI, R); + return R; + } + // Scalar Evolutions doesn't know how to compute SCEV's for GEP instructions. // If this is a GEP that SE doesn't know about, compute it now and insert it. // If this is not a GEP, or if we have already done this computation, just let @@ -254,7 +259,8 @@ SCEVHandle LoopStrengthReduce::GetExpressionSCEV(Instruction *Exp, Loop *L) { // Build up the base expression. Insert an LLVM cast of the pointer to // uintptr_t first. - SCEVHandle GEPVal = SCEVUnknown::get(getCastedVersionOf(GEP->getOperand(0))); + SCEVHandle GEPVal = SCEVUnknown::get( + getCastedVersionOf(Instruction::PtrToInt, GEP->getOperand(0))); gep_type_iterator GTI = gep_type_begin(GEP); @@ -264,18 +270,24 @@ SCEVHandle LoopStrengthReduce::GetExpressionSCEV(Instruction *Exp, Loop *L) { // operand. if (const StructType *STy = dyn_cast(*GTI)) { const StructLayout *SL = TD->getStructLayout(STy); - unsigned Idx = cast(GEP->getOperand(i))->getValue(); - uint64_t Offset = SL->MemberOffsets[Idx]; + unsigned Idx = cast(GEP->getOperand(i))->getZExtValue(); + uint64_t Offset = SL->getElementOffset(Idx); GEPVal = SCEVAddExpr::get(GEPVal, SCEVUnknown::getIntegerSCEV(Offset, UIntPtrTy)); } else { - Value *OpVal = getCastedVersionOf(GEP->getOperand(i)); + unsigned GEPOpiBits = + GEP->getOperand(i)->getType()->getPrimitiveSizeInBits(); + unsigned IntPtrBits = UIntPtrTy->getPrimitiveSizeInBits(); + Instruction::CastOps opcode = (GEPOpiBits < IntPtrBits ? + Instruction::SExt : (GEPOpiBits > IntPtrBits ? Instruction::Trunc : + Instruction::BitCast)); + Value *OpVal = getCastedVersionOf(opcode, GEP->getOperand(i)); SCEVHandle Idx = SE->getSCEV(OpVal); uint64_t TypeSize = TD->getTypeSize(GTI.getIndexedType()); if (TypeSize != 1) Idx = SCEVMulExpr::get(Idx, - SCEVConstant::get(ConstantUInt::get(UIntPtrTy, + SCEVConstant::get(ConstantInt::get(UIntPtrTy, TypeSize))); GEPVal = SCEVAddExpr::get(GEPVal, Idx); } @@ -307,7 +319,7 @@ static bool getSCEVStartAndStride(const SCEVHandle &SH, Loop *L, Start = SCEVAddExpr::get(Start, AE->getOperand(i)); } - } else if (SCEVAddRecExpr *AddRec = dyn_cast(SH)) { + } else if (isa(SH)) { TheAddRec = SH; } else { return false; // not analyzable. @@ -322,16 +334,10 @@ static bool getSCEVStartAndStride(const SCEVHandle &SH, Loop *L, Start = SCEVAddExpr::get(Start, AddRec->getOperand(0)); if (!isa(AddRec->getOperand(1))) - DEBUG(std::cerr << "[" << L->getHeader()->getName() - << "] Variable stride: " << *AddRec << "\n"); + DOUT << "[" << L->getHeader()->getName() + << "] Variable stride: " << *AddRec << "\n"; Stride = AddRec->getOperand(1); - // Check that all constant strides are the unsigned type, we don't want to - // have two IV's one of signed stride 4 and one of unsigned stride 4 to not be - // merged. - assert((!isa(Stride) || Stride->getType()->isUnsigned()) && - "Constants should be canonicalized to unsigned!"); - return true; } @@ -378,7 +384,11 @@ static bool IVUseShouldUsePostIncValue(Instruction *User, Instruction *IV, // post-incremented value. for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) if (PN->getIncomingValue(i) == IV) { - SplitCriticalEdge(PN->getIncomingBlock(i), PN->getParent(), P); + SplitCriticalEdge(PN->getIncomingBlock(i), PN->getParent(), P, + true); + // Splitting the critical edge can reduce the number of entries in this + // PHI. + e = PN->getNumIncomingValues(); if (--NumUses == 0) break; } @@ -406,10 +416,14 @@ bool LoopStrengthReduce::AddUsersIfInteresting(Instruction *I, Loop *L, SCEVHandle Stride = Start; if (!getSCEVStartAndStride(ISE, L, Start, Stride)) return false; // Non-reducible symbolic expression, bail out. - - for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI != E;++UI){ + + for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI != E;) { Instruction *User = cast(*UI); + // Increment iterator now because IVUseShouldUsePostIncValue may remove + // User from the list of I users. + ++UI; + // Do not infinitely recurse on PHI nodes. if (isa(User) && Processed.count(User)) continue; @@ -418,12 +432,12 @@ bool LoopStrengthReduce::AddUsersIfInteresting(Instruction *I, Loop *L, // don't recurse into it. bool AddUserToIVUsers = false; if (LI->getLoopFor(User->getParent()) != L) { - DEBUG(std::cerr << "FOUND USER in other loop: " << *User - << " OF SCEV: " << *ISE << "\n"); + DOUT << "FOUND USER in other loop: " << *User + << " OF SCEV: " << *ISE << "\n"; AddUserToIVUsers = true; } else if (!AddUsersIfInteresting(User, L, Processed)) { - DEBUG(std::cerr << "FOUND USER: " << *User - << " OF SCEV: " << *ISE << "\n"); + DOUT << "FOUND USER: " << *User + << " OF SCEV: " << *ISE << "\n"; AddUserToIVUsers = true; } @@ -441,7 +455,7 @@ bool LoopStrengthReduce::AddUsersIfInteresting(Instruction *I, Loop *L, SCEVHandle NewStart = SCEV::getMinusSCEV(Start, Stride); StrideUses.addUser(NewStart, User, I); StrideUses.Users.back().isUseOfPostIncrementedValue = true; - DEBUG(std::cerr << " USING POSTINC SCEV, START=" << *NewStart<< "\n"); + DOUT << " USING POSTINC SCEV, START=" << *NewStart<< "\n"; } else { StrideUses.addUser(Start, User, I); } @@ -504,12 +518,12 @@ namespace { } void BasedUser::dump() const { - std::cerr << " Base=" << *Base; - std::cerr << " Imm=" << *Imm; + cerr << " Base=" << *Base; + cerr << " Imm=" << *Imm; if (EmittedBase) - std::cerr << " EB=" << *EmittedBase; + cerr << " EB=" << *EmittedBase; - std::cerr << " Inst: " << *Inst; + cerr << " Inst: " << *Inst; } Value *BasedUser::InsertCodeForBaseAtPosition(const SCEVHandle &NewBase, @@ -534,7 +548,7 @@ Value *BasedUser::InsertCodeForBaseAtPosition(const SCEVHandle &NewBase, // If there is no immediate value, skip the next part. if (SCEVConstant *SC = dyn_cast(Imm)) - if (SC->getValue()->isNullValue()) + if (SC->getValue()->isZero()) return Rewriter.expandCodeFor(NewBase, BaseInsertPt, OperandValToReplace->getType()); @@ -557,7 +571,7 @@ void BasedUser::RewriteInstructionToUseNewBase(const SCEVHandle &NewBase, Value *NewVal = InsertCodeForBaseAtPosition(NewBase, Rewriter, Inst, L); // Replace the use of the operand Value with the new Phi we just created. Inst->replaceUsesOfWith(OperandValToReplace, NewVal); - DEBUG(std::cerr << " CHANGED: IMM =" << *Imm << " Inst = " << *Inst); + DOUT << " CHANGED: IMM =" << *Imm << " Inst = " << *Inst; return; } @@ -579,7 +593,7 @@ void BasedUser::RewriteInstructionToUseNewBase(const SCEVHandle &NewBase, (PN->getParent() != L->getHeader() || !L->contains(PHIPred))) { // First step, split the critical edge. - SplitCriticalEdge(PHIPred, PN->getParent(), P); + SplitCriticalEdge(PHIPred, PN->getParent(), P, true); // Next step: move the basic block. In particular, if the PHI node // is outside of the loop, and PredTI is in the loop, we want to @@ -589,6 +603,9 @@ void BasedUser::RewriteInstructionToUseNewBase(const SCEVHandle &NewBase, BasicBlock *NewBB = PN->getIncomingBlock(i); NewBB->moveBefore(PN->getParent()); } + + // Splitting the edge can reduce the number of PHI entries we have. + e = PN->getNumIncomingValues(); } Value *&Code = InsertedCode[PN->getIncomingBlock(i)]; @@ -603,28 +620,28 @@ void BasedUser::RewriteInstructionToUseNewBase(const SCEVHandle &NewBase, Rewriter.clear(); } } - DEBUG(std::cerr << " CHANGED: IMM =" << *Imm << " Inst = " << *Inst); + DOUT << " CHANGED: IMM =" << *Imm << " Inst = " << *Inst; } /// isTargetConstant - Return true if the following can be referenced by the /// immediate field of a target instruction. -static bool isTargetConstant(const SCEVHandle &V, const TargetLowering *TLI) { +static bool isTargetConstant(const SCEVHandle &V, const Type *UseTy, + const TargetLowering *TLI) { if (SCEVConstant *SC = dyn_cast(V)) { - int64_t V = SC->getValue()->getSExtValue(); + int64_t VC = SC->getValue()->getSExtValue(); if (TLI) - return TLI->isLegalAddressImmediate(V); + return TLI->isLegalAddressImmediate(VC, UseTy); else // Defaults to PPC. PPC allows a sign-extended 16-bit immediate field. - return (V > -(1 << 16) && V < (1 << 16)-1); + return (VC > -(1 << 16) && VC < (1 << 16)-1); } if (SCEVUnknown *SU = dyn_cast(V)) if (ConstantExpr *CE = dyn_cast(SU->getValue())) - if (CE->getOpcode() == Instruction::Cast) { + if (CE->getOpcode() == Instruction::PtrToInt) { Constant *Op0 = CE->getOperand(0); - if (isa(Op0) && - TLI && + if (isa(Op0) && TLI && TLI->isLegalAddressImmediate(cast(Op0))) return true; } @@ -674,15 +691,20 @@ static void MoveLoopVariantsToImediateField(SCEVHandle &Val, SCEVHandle &Imm, /// that can fit into the immediate field of instructions in the target. /// Accumulate these immediate values into the Imm value. static void MoveImmediateValues(const TargetLowering *TLI, + Instruction *User, SCEVHandle &Val, SCEVHandle &Imm, bool isAddress, Loop *L) { + const Type *UseTy = User->getType(); + if (StoreInst *SI = dyn_cast(User)) + UseTy = SI->getOperand(0)->getType(); + if (SCEVAddExpr *SAE = dyn_cast(Val)) { std::vector NewOps; NewOps.reserve(SAE->getNumOperands()); for (unsigned i = 0; i != SAE->getNumOperands(); ++i) { SCEVHandle NewOp = SAE->getOperand(i); - MoveImmediateValues(TLI, NewOp, Imm, isAddress, L); + MoveImmediateValues(TLI, User, NewOp, Imm, isAddress, L); if (!NewOp->isLoopInvariant(L)) { // If this is a loop-variant expression, it must stay in the immediate @@ -701,7 +723,7 @@ static void MoveImmediateValues(const TargetLowering *TLI, } else if (SCEVAddRecExpr *SARE = dyn_cast(Val)) { // Try to pull immediates out of the start value of nested addrec's. SCEVHandle Start = SARE->getStart(); - MoveImmediateValues(TLI, Start, Imm, isAddress, L); + MoveImmediateValues(TLI, User, Start, Imm, isAddress, L); if (Start != SARE->getStart()) { std::vector Ops(SARE->op_begin(), SARE->op_end()); @@ -711,12 +733,12 @@ static void MoveImmediateValues(const TargetLowering *TLI, return; } else if (SCEVMulExpr *SME = dyn_cast(Val)) { // Transform "8 * (4 + v)" -> "32 + 8*V" if "32" fits in the immed field. - if (isAddress && isTargetConstant(SME->getOperand(0), TLI) && + if (isAddress && isTargetConstant(SME->getOperand(0), UseTy, TLI) && SME->getNumOperands() == 2 && SME->isLoopInvariant(L)) { SCEVHandle SubImm = SCEVUnknown::getIntegerSCEV(0, Val->getType()); SCEVHandle NewOp = SME->getOperand(1); - MoveImmediateValues(TLI, NewOp, SubImm, isAddress, L); + MoveImmediateValues(TLI, User, NewOp, SubImm, isAddress, L); // If we extracted something out of the subexpressions, see if we can // simplify this! @@ -724,7 +746,7 @@ static void MoveImmediateValues(const TargetLowering *TLI, // Scale SubImm up by "8". If the result is a target constant, we are // good. SubImm = SCEVMulExpr::get(SubImm, SME->getOperand(0)); - if (isTargetConstant(SubImm, TLI)) { + if (isTargetConstant(SubImm, UseTy, TLI)) { // Accumulate the immediate. Imm = SCEVAddExpr::get(Imm, SubImm); @@ -738,7 +760,7 @@ static void MoveImmediateValues(const TargetLowering *TLI, // Loop-variant expressions must stay in the immediate field of the // expression. - if ((isAddress && isTargetConstant(Val, TLI)) || + if ((isAddress && isTargetConstant(Val, UseTy, TLI)) || !Val->isLoopInvariant(L)) { Imm = SCEVAddExpr::get(Imm, Val); Val = SCEVUnknown::getIntegerSCEV(0, Val->getType()); @@ -749,9 +771,9 @@ static void MoveImmediateValues(const TargetLowering *TLI, } -/// IncrementAddExprUses - Decompose the specified expression into its added -/// subexpressions, and increment SubExpressionUseCounts for each of these -/// decomposed parts. +/// SeparateSubExprs - Decompose Expr into all of the subexpressions that are +/// added together. This is used to reassociate common addition subexprs +/// together for maximal sharing when rewriting bases. static void SeparateSubExprs(std::vector &SubExprs, SCEVHandle Expr) { if (SCEVAddExpr *AE = dyn_cast(Expr)) { @@ -771,7 +793,7 @@ static void SeparateSubExprs(std::vector &SubExprs, SeparateSubExprs(SubExprs, SARE->getOperand(0)); } } else if (!isa(Expr) || - !cast(Expr)->getValue()->isNullValue()) { + !cast(Expr)->getValue()->isZero()) { // Do not add zero. SubExprs.push_back(Expr); } @@ -861,46 +883,79 @@ RemoveCommonExpressionsFromUseBases(std::vector &Uses) { /// static bool isZero(SCEVHandle &V) { if (SCEVConstant *SC = dyn_cast(V)) - return SC->getValue()->getRawValue() == 0; + return SC->getValue()->isZero(); return false; } +/// ValidStride - Check whether the given Scale is valid for all loads and +/// stores in UsersToProcess. Pulled into a function to avoid disturbing the +/// sensibilities of those who dislike goto's. +/// +bool LoopStrengthReduce::ValidStride(int64_t Scale, + const std::vector& UsersToProcess) { + int64_t Imm; + for (unsigned i=0, e = UsersToProcess.size(); i!=e; ++i) { + if (SCEVConstant *SC = dyn_cast(UsersToProcess[i].Imm)) + Imm = SC->getValue()->getSExtValue(); + else + Imm = 0; + + // If this is a load or other access, pass the type of the access in. + const Type *AccessTy = Type::VoidTy; + if (StoreInst *SI = dyn_cast(UsersToProcess[i].Inst)) + AccessTy = SI->getOperand(0)->getType(); + else if (LoadInst *LI = dyn_cast(UsersToProcess[i].Inst)) + AccessTy = LI->getType(); + + if (!TLI->isLegalAddressScaleAndImm(Scale, Imm, AccessTy)) + return false; + } + return true; +} /// CheckForIVReuse - Returns the multiple if the stride is the multiple /// of a previous stride and it is a legal value for the target addressing /// mode scale component. This allows the users of this stride to be rewritten /// as prev iv * factor. It returns 0 if no reuse is possible. -unsigned LoopStrengthReduce::CheckForIVReuse(const SCEVHandle &Stride, - IVExpr &IV) { +unsigned LoopStrengthReduce::CheckForIVReuse(const SCEVHandle &Stride, + IVExpr &IV, const Type *Ty, + const std::vector& UsersToProcess) { if (!TLI) return 0; if (SCEVConstant *SC = dyn_cast(Stride)) { int64_t SInt = SC->getValue()->getSExtValue(); if (SInt == 1) return 0; - for (TargetLowering::legal_am_scale_iterator - I = TLI->legal_am_scale_begin(), E = TLI->legal_am_scale_end(); - I != E; ++I) { - unsigned Scale = *I; - if (abs(SInt) < Scale || (SInt % Scale) != 0) + for (std::map::iterator SI= IVsByStride.begin(), + SE = IVsByStride.end(); SI != SE; ++SI) { + int64_t SSInt = cast(SI->first)->getValue()->getSExtValue(); + if (SInt != -SSInt && + (unsigned(abs(SInt)) < SSInt || (SInt % SSInt) != 0)) continue; - std::map::iterator SI = - IVsByStride.find(SCEVUnknown::getIntegerSCEV(SInt/Scale, Type::UIntTy)); - if (SI == IVsByStride.end()) - continue; - for (std::vector::iterator II = SI->second.IVs.begin(), - IE = SI->second.IVs.end(); II != IE; ++II) - // FIXME: Only handle base == 0 for now. - if (isZero(II->Base)) { - IV = *II; - return Scale; - } + int64_t Scale = SInt / SSInt; + // Check that this stride is valid for all the types used for loads and + // stores; if it can be used for some and not others, we might as well use + // the original stride everywhere, since we have to create the IV for it + // anyway. + if (ValidStride(Scale, UsersToProcess)) + for (std::vector::iterator II = SI->second.IVs.begin(), + IE = SI->second.IVs.end(); II != IE; ++II) + // FIXME: Only handle base == 0 for now. + // Only reuse previous IV if it would not require a type conversion. + if (isZero(II->Base) && II->Base->getType() == Ty) { + IV = *II; + return Scale; + } } } - return 0; } +/// PartitionByIsUseOfPostIncrementedValue - Simple boolean predicate that +/// returns true if Val's isUseOfPostIncrementedValue is true. +static bool PartitionByIsUseOfPostIncrementedValue(const BasedUser &Val) { + return Val.isUseOfPostIncrementedValue; +} /// StrengthReduceStridedIVUsers - Strength reduce all of the users of a single /// stride of IV. All of the users may have different starting values, and this @@ -928,20 +983,6 @@ void LoopStrengthReduce::StrengthReduceStridedIVUsers(const SCEVHandle &Stride, "Base value is not loop invariant!"); } - // Check if it is possible to reuse a IV with stride that is factor of this - // stride. And the multiple is a number that can be encoded in the scale - // field of the target addressing mode. - PHINode *NewPHI = NULL; - Value *IncV = NULL; - IVExpr ReuseIV; - unsigned RewriteFactor = CheckForIVReuse(Stride, ReuseIV); - if (RewriteFactor != 0) { - DEBUG(std::cerr << "BASED ON IV of STRIDE " << *ReuseIV.Stride - << " and BASE " << *ReuseIV.Base << " :\n"); - NewPHI = ReuseIV.PHI; - IncV = ReuseIV.IncV; - } - // We now have a whole bunch of uses of like-strided induction variables, but // they might all have different bases. We want to emit one PHI node for this // stride which we fold as many common expressions (between the IVs) into as @@ -974,15 +1015,34 @@ void LoopStrengthReduce::StrengthReduceStridedIVUsers(const SCEVHandle &Stride, if (SI->getOperand(1) == UsersToProcess[i].OperandValToReplace) isAddress = true; - MoveImmediateValues(TLI, UsersToProcess[i].Base, UsersToProcess[i].Imm, - isAddress, L); + MoveImmediateValues(TLI, UsersToProcess[i].Inst, UsersToProcess[i].Base, + UsersToProcess[i].Imm, isAddress, L); } } + // Check if it is possible to reuse a IV with stride that is factor of this + // stride. And the multiple is a number that can be encoded in the scale + // field of the target addressing mode. And we will have a valid + // instruction after this substition, including the immediate field, if any. + PHINode *NewPHI = NULL; + Value *IncV = NULL; + IVExpr ReuseIV; + unsigned RewriteFactor = CheckForIVReuse(Stride, ReuseIV, + CommonExprs->getType(), + UsersToProcess); + if (RewriteFactor != 0) { + DOUT << "BASED ON IV of STRIDE " << *ReuseIV.Stride + << " and BASE " << *ReuseIV.Base << " :\n"; + NewPHI = ReuseIV.PHI; + IncV = ReuseIV.IncV; + } + + const Type *ReplacedTy = CommonExprs->getType(); + // Now that we know what we need to do, insert the PHI node itself. // - DEBUG(std::cerr << "INSERTING IV of STRIDE " << *Stride << " and BASE " - << *CommonExprs << " :\n"); + DOUT << "INSERTING IV of TYPE " << *ReplacedTy << " of STRIDE " + << *Stride << " and BASE " << *CommonExprs << " :\n"; SCEVExpander Rewriter(*SE, *LI); SCEVExpander PreheaderRewriter(*SE, *LI); @@ -993,7 +1053,6 @@ void LoopStrengthReduce::StrengthReduceStridedIVUsers(const SCEVHandle &Stride, BasicBlock *LatchBlock = L->getLoopLatch(); - const Type *ReplacedTy = CommonExprs->getType(); // Emit the initial base value into the loop preheader. Value *CommonBaseV @@ -1029,18 +1088,49 @@ void LoopStrengthReduce::StrengthReduceStridedIVUsers(const SCEVHandle &Stride, Constant *C = dyn_cast(CommonBaseV); if (!C || (!C->isNullValue() && - !isTargetConstant(SCEVUnknown::get(CommonBaseV), TLI))) - // We want the common base emitted into the preheader! - CommonBaseV = new CastInst(CommonBaseV, CommonBaseV->getType(), - "commonbase", PreInsertPt); + !isTargetConstant(SCEVUnknown::get(CommonBaseV), ReplacedTy, TLI))) + // We want the common base emitted into the preheader! This is just + // using cast as a copy so BitCast (no-op cast) is appropriate + CommonBaseV = new BitCastInst(CommonBaseV, CommonBaseV->getType(), + "commonbase", PreInsertPt); } - // Sort by the base value, so that all IVs with identical bases are next to - // each other. + // We want to emit code for users inside the loop first. To do this, we + // rearrange BasedUser so that the entries at the end have + // isUseOfPostIncrementedValue = false, because we pop off the end of the + // vector (so we handle them first). + std::partition(UsersToProcess.begin(), UsersToProcess.end(), + PartitionByIsUseOfPostIncrementedValue); + + // Sort this by base, so that things with the same base are handled + // together. By partitioning first and stable-sorting later, we are + // guaranteed that within each base we will pop off users from within the + // loop before users outside of the loop with a particular base. + // + // We would like to use stable_sort here, but we can't. The problem is that + // SCEVHandle's don't have a deterministic ordering w.r.t to each other, so + // we don't have anything to do a '<' comparison on. Because we think the + // number of uses is small, do a horrible bubble sort which just relies on + // ==. + for (unsigned i = 0, e = UsersToProcess.size(); i != e; ++i) { + // Get a base value. + SCEVHandle Base = UsersToProcess[i].Base; + + // Compact everything with this base to be consequetive with this one. + for (unsigned j = i+1; j != e; ++j) { + if (UsersToProcess[j].Base == Base) { + std::swap(UsersToProcess[i+1], UsersToProcess[j]); + ++i; + } + } + } + + // Process all the users now. This outer loop handles all bases, the inner + // loop handles all users of a particular base. while (!UsersToProcess.empty()) { SCEVHandle Base = UsersToProcess.back().Base; - DEBUG(std::cerr << " INSERTING code for BASE = " << *Base << ":\n"); + DOUT << " INSERTING code for BASE = " << *Base << ":\n"; // Emit the code for Base into the preheader. Value *BaseV = PreheaderRewriter.expandCodeFor(Base, PreInsertPt, @@ -1048,19 +1138,21 @@ void LoopStrengthReduce::StrengthReduceStridedIVUsers(const SCEVHandle &Stride, // If BaseV is a constant other than 0, make sure that it gets inserted into // the preheader, instead of being forward substituted into the uses. We do - // this by forcing a noop cast to be inserted into the preheader in this - // case. - if (Constant *C = dyn_cast(BaseV)) - if (!C->isNullValue() && !isTargetConstant(Base, TLI)) { - // We want this constant emitted into the preheader! - BaseV = new CastInst(BaseV, BaseV->getType(), "preheaderinsert", + // this by forcing a BitCast (noop cast) to be inserted into the preheader + // in this case. + if (Constant *C = dyn_cast(BaseV)) { + if (!C->isNullValue() && !isTargetConstant(Base, ReplacedTy, TLI)) { + // We want this constant emitted into the preheader! This is just + // using cast as a copy so BitCast (no-op cast) is appropriate + BaseV = new BitCastInst(BaseV, BaseV->getType(), "preheaderinsert", PreInsertPt); } - + } + // Emit the code to add the immediate offset to the Phi value, just before // the instructions that we identified as using this stride and base. - unsigned ScanPos = 0; do { + // FIXME: Use emitted users to emit other users. BasedUser &User = UsersToProcess.back(); // If this instruction wants to use the post-incremented value, move it @@ -1074,6 +1166,14 @@ void LoopStrengthReduce::StrengthReduceStridedIVUsers(const SCEVHandle &Stride, if (L->contains(User.Inst->getParent())) User.Inst->moveBefore(LatchBlock->getTerminator()); } + if (RewriteOp->getType() != ReplacedTy) { + Instruction::CastOps opcode = Instruction::Trunc; + if (ReplacedTy->getPrimitiveSizeInBits() == + RewriteOp->getType()->getPrimitiveSizeInBits()) + opcode = Instruction::BitCast; + RewriteOp = SCEVExpander::InsertCastOfTo(opcode, RewriteOp, ReplacedTy); + } + SCEVHandle RewriteExpr = SCEVUnknown::get(RewriteOp); // Clear the SCEVExpander's expression map so that we are guaranteed @@ -1092,14 +1192,14 @@ void LoopStrengthReduce::StrengthReduceStridedIVUsers(const SCEVHandle &Stride, // are reusing an IV, it has not been used to initialize the PHI node. // Add it to the expression used to rewrite the uses. if (!isa(CommonBaseV) || - !cast(CommonBaseV)->isNullValue()) + !cast(CommonBaseV)->isZero()) RewriteExpr = SCEVAddExpr::get(RewriteExpr, SCEVUnknown::get(CommonBaseV)); } // Now that we know what we need to do, insert code before User for the // immediate and any loop-variant expressions. - if (!isa(BaseV) || !cast(BaseV)->isNullValue()) + if (!isa(BaseV) || !cast(BaseV)->isZero()) // Add BaseV to the PHI value if needed. RewriteExpr = SCEVAddExpr::get(RewriteExpr, SCEVUnknown::get(BaseV)); @@ -1112,15 +1212,8 @@ void LoopStrengthReduce::StrengthReduceStridedIVUsers(const SCEVHandle &Stride, UsersToProcess.pop_back(); ++NumReduced; - // If there are any more users to process with the same base, move one of - // them to the end of the list so that we will process it. - if (!UsersToProcess.empty()) { - for (unsigned e = UsersToProcess.size(); ScanPos != e; ++ScanPos) - if (UsersToProcess[ScanPos].Base == Base) { - std::swap(UsersToProcess[ScanPos], UsersToProcess.back()); - break; - } - } + // If there are any more users to process with the same base, process them + // now. We sorted by base above, so we just have to check the last elt. } while (!UsersToProcess.empty() && UsersToProcess.back().Base == Base); // TODO: Next, find out which base index is the most common, pull it out. } @@ -1135,22 +1228,19 @@ void LoopStrengthReduce::StrengthReduceStridedIVUsers(const SCEVHandle &Stride, void LoopStrengthReduce::OptimizeIndvars(Loop *L) { // TODO: implement optzns here. - - - // Finally, get the terminating condition for the loop if possible. If we // can, we want to change it to use a post-incremented version of its - // induction variable, to allow coallescing the live ranges for the IV into + // induction variable, to allow coalescing the live ranges for the IV into // one register value. PHINode *SomePHI = cast(L->getHeader()->begin()); BasicBlock *Preheader = L->getLoopPreheader(); BasicBlock *LatchBlock = SomePHI->getIncomingBlock(SomePHI->getIncomingBlock(0) == Preheader); BranchInst *TermBr = dyn_cast(LatchBlock->getTerminator()); - if (!TermBr || TermBr->isUnconditional() || - !isa(TermBr->getCondition())) + if (!TermBr || TermBr->isUnconditional() || + !isa(TermBr->getCondition())) return; - SetCondInst *Cond = cast(TermBr->getCondition()); + ICmpInst *Cond = cast(TermBr->getCondition()); // Search IVUsesByStride to find Cond's IVUse if there is one. IVStrideUse *CondUse = 0; @@ -1175,10 +1265,6 @@ void LoopStrengthReduce::OptimizeIndvars(Loop *L) { } if (!CondUse) return; // setcc doesn't use the IV. - // setcc stride is complex, don't mess with users. - // FIXME: Evaluate whether this is a good idea or not. - if (!isa(*CondStride)) return; - // It's possible for the setcc instruction to be anywhere in the loop, and // possible for it to have multiple users. If it is not immediately before // the latch block branch, move it. @@ -1187,7 +1273,7 @@ void LoopStrengthReduce::OptimizeIndvars(Loop *L) { Cond->moveBefore(TermBr); } else { // Otherwise, clone the terminating condition and insert into the loopend. - Cond = cast(Cond->clone()); + Cond = cast(Cond->clone()); Cond->setName(L->getHeader()->getName() + ".termcond"); LatchBlock->getInstList().insert(TermBr, Cond); @@ -1199,7 +1285,7 @@ void LoopStrengthReduce::OptimizeIndvars(Loop *L) { } // If we get to here, we know that we can transform the setcc instruction to - // use the post-incremented version of the IV, allowing us to coallesce the + // use the post-incremented version of the IV, allowing us to coalesce the // live ranges for the IV correctly. CondUse->Offset = SCEV::getMinusSCEV(CondUse->Offset, *CondStride); CondUse->isUseOfPostIncrementedValue = true; @@ -1229,12 +1315,15 @@ namespace { }; } -void LoopStrengthReduce::runOnLoop(Loop *L) { - // First step, transform all loops nesting inside of this loop. - for (LoopInfo::iterator I = L->begin(), E = L->end(); I != E; ++I) - runOnLoop(*I); +bool LoopStrengthReduce::runOnLoop(Loop *L, LPPassManager &LPM) { - // Next, find all uses of induction variables in this loop, and catagorize + LI = &getAnalysis(); + EF = &getAnalysis(); + SE = &getAnalysis(); + TD = &getAnalysis(); + UIntPtrTy = TD->getIntPtrType(); + + // Find all uses of induction variables in this loop, and catagorize // them by stride. Start by finding all of the PHI nodes in the header for // this loop. If they are induction variables, inspect their uses. std::set Processed; // Don't reprocess instructions. @@ -1242,7 +1331,7 @@ void LoopStrengthReduce::runOnLoop(Loop *L) { AddUsersIfInteresting(I, L, Processed); // If we have nothing to do, return. - if (IVUsesByStride.empty()) return; + if (IVUsesByStride.empty()) return false; // Optimize induction variables. Some indvar uses can be transformed to use // strides that will be needed for other purposes. A common example of this @@ -1264,7 +1353,7 @@ void LoopStrengthReduce::runOnLoop(Loop *L) { bool HasOneStride = IVUsesByStride.size() == 1; #ifndef NDEBUG - DEBUG(std::cerr << "\nLSR on "); + DOUT << "\nLSR on "; DEBUG(L->dump()); #endif @@ -1308,9 +1397,9 @@ void LoopStrengthReduce::runOnLoop(Loop *L) { // FIXME: this needs to eliminate an induction variable even if it's being // compared against some value to decide loop termination. if (PN->hasOneUse()) { - BinaryOperator *BO = dyn_cast(*(PN->use_begin())); - if (BO && BO->hasOneUse()) { - if (PN == *(BO->use_begin())) { + Instruction *BO = dyn_cast(*PN->use_begin()); + if (BO && (isa(BO) || isa(BO))) { + if (BO->hasOneUse() && PN == *(BO->use_begin())) { DeadInsts.insert(BO); // Break the cycle, then delete the PHI. PN->replaceAllUsesWith(UndefValue::get(PN->getType())); @@ -1326,5 +1415,5 @@ void LoopStrengthReduce::runOnLoop(Loop *L) { CastedPointers.clear(); IVUsesByStride.clear(); StrideOrder.clear(); - return; + return false; }