X-Git-Url: http://demsky.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FTransforms%2FScalar%2FLoopStrengthReduce.cpp;h=db8ab485e394c428ae1b60e83ab27864e3f17893;hb=1d31290634eccc3b360c427282d59780d76b9169;hp=ad88fc10b25ae88232c9e86332b07b72965a31fa;hpb=7b445c521bc191d0d25799b289e17b45f202a1af;p=oota-llvm.git diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp index ad88fc10b25..db8ab485e39 100644 --- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -23,6 +23,7 @@ #include "llvm/DerivedTypes.h" #include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/ScalarEvolutionExpander.h" #include "llvm/Support/CFG.h" #include "llvm/Support/GetElementPtrTypeIterator.h" @@ -31,20 +32,25 @@ #include "llvm/Target/TargetData.h" #include "llvm/ADT/Statistic.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Target/TargetLowering.h" #include #include using namespace llvm; +STATISTIC(NumReduced , "Number of GEPs strength reduced"); +STATISTIC(NumInserted, "Number of PHIs inserted"); +STATISTIC(NumVariable, "Number of PHIs with variable strides"); + namespace { - Statistic<> NumReduced ("loop-reduce", "Number of GEPs strength reduced"); - Statistic<> NumInserted("loop-reduce", "Number of PHIs inserted"); - Statistic<> NumVariable("loop-reduce","Number of PHIs with variable strides"); + + struct BasedUser; /// IVStrideUse - Keep track of one use of a strided induction variable, where /// the stride is stored externally. The Offset member keeps track of the /// offset from the IV, User is the actual user of the operand, and 'Operand' /// is the operand # of the User that is the use. - struct IVStrideUse { + struct VISIBILITY_HIDDEN IVStrideUse { SCEVHandle Offset; Instruction *User; Value *OperandValToReplace; @@ -64,7 +70,7 @@ namespace { /// have an operand that is based on the trip count multiplied by some stride. /// The stride for all of these users is common and kept external to this /// structure. - struct IVUsersOfOneStride { + struct VISIBILITY_HIDDEN IVUsersOfOneStride { /// Users - Keep track of all of the users of this stride as well as the /// initial value and the operand that uses the IV. std::vector Users; @@ -74,23 +80,50 @@ namespace { } }; + /// IVInfo - This structure keeps track of one IV expression inserted during + /// StrengthReduceStridedIVUsers. It contains the stride, the common base, as + /// well as the PHI node and increment value created for rewrite. + struct VISIBILITY_HIDDEN IVExpr { + SCEVHandle Stride; + SCEVHandle Base; + PHINode *PHI; + Value *IncV; + + IVExpr() + : Stride(SCEVUnknown::getIntegerSCEV(0, Type::Int32Ty)), + Base (SCEVUnknown::getIntegerSCEV(0, Type::Int32Ty)) {} + IVExpr(const SCEVHandle &stride, const SCEVHandle &base, PHINode *phi, + Value *incv) + : Stride(stride), Base(base), PHI(phi), IncV(incv) {} + }; - class LoopStrengthReduce : public FunctionPass { + /// IVsOfOneStride - This structure keeps track of all IV expression inserted + /// during StrengthReduceStridedIVUsers for a particular stride of the IV. + struct VISIBILITY_HIDDEN IVsOfOneStride { + std::vector IVs; + + void addIV(const SCEVHandle &Stride, const SCEVHandle &Base, PHINode *PHI, + Value *IncV) { + IVs.push_back(IVExpr(Stride, Base, PHI, IncV)); + } + }; + + class VISIBILITY_HIDDEN LoopStrengthReduce : public LoopPass { LoopInfo *LI; - DominatorSet *DS; + ETForest *EF; ScalarEvolution *SE; const TargetData *TD; const Type *UIntPtrTy; bool Changed; - /// MaxTargetAMSize - This is the maximum power-of-two scale value that the - /// target can handle for free with its addressing modes. - unsigned MaxTargetAMSize; - /// IVUsesByStride - Keep track of all uses of induction variables that we /// are interested in. The key of the map is the stride of the access. std::map IVUsesByStride; + /// IVsByStride - Keep track of all IVs that have been inserted for a + /// particular stride. + std::map IVsByStride; + /// StrideOrder - An ordering of the keys in IVUsesByStride that is stable: /// We use this to iterate over the IVUsesByStride collection without being /// dependent on random ordering of pointers in the process. @@ -104,94 +137,76 @@ namespace { /// DeadInsts - Keep track of instructions we may have made dead, so that /// we can remove them after we are done working. std::set DeadInsts; - public: - LoopStrengthReduce(unsigned MTAMS = 1) - : MaxTargetAMSize(MTAMS) { - } - virtual bool runOnFunction(Function &) { - LI = &getAnalysis(); - DS = &getAnalysis(); - SE = &getAnalysis(); - TD = &getAnalysis(); - UIntPtrTy = TD->getIntPtrType(); - Changed = false; + /// TLI - Keep a pointer of a TargetLowering to consult for determining + /// transformation profitability. + const TargetLowering *TLI; - for (LoopInfo::iterator I = LI->begin(), E = LI->end(); I != E; ++I) - runOnLoop(*I); - - return Changed; + public: + LoopStrengthReduce(const TargetLowering *tli = NULL) + : TLI(tli) { } + bool runOnLoop(Loop *L, LPPassManager &LPM); + virtual void getAnalysisUsage(AnalysisUsage &AU) const { // We split critical edges, so we change the CFG. However, we do update // many analyses if they are around. AU.addPreservedID(LoopSimplifyID); AU.addPreserved(); AU.addPreserved(); + AU.addPreserved(); AU.addPreserved(); AU.addPreserved(); AU.addPreserved(); AU.addRequiredID(LoopSimplifyID); AU.addRequired(); - AU.addRequired(); + AU.addRequired(); AU.addRequired(); AU.addRequired(); } /// getCastedVersionOf - Return the specified value casted to uintptr_t. /// - Value *getCastedVersionOf(Value *V); + Value *getCastedVersionOf(Instruction::CastOps opcode, Value *V); private: - void runOnLoop(Loop *L); bool AddUsersIfInteresting(Instruction *I, Loop *L, std::set &Processed); SCEVHandle GetExpressionSCEV(Instruction *E, Loop *L); void OptimizeIndvars(Loop *L); + unsigned CheckForIVReuse(const SCEVHandle&, IVExpr&, const Type*, + const std::vector& UsersToProcess); + + bool ValidStride(int64_t, const std::vector& UsersToProcess); + void StrengthReduceStridedIVUsers(const SCEVHandle &Stride, IVUsersOfOneStride &Uses, Loop *L, bool isOnlyStride); void DeleteTriviallyDeadInstructions(std::set &Insts); }; - RegisterOpt X("loop-reduce", - "Loop Strength Reduction"); + RegisterPass X("loop-reduce", "Loop Strength Reduction"); } -FunctionPass *llvm::createLoopStrengthReducePass(unsigned MaxTargetAMSize) { - return new LoopStrengthReduce(MaxTargetAMSize); +LoopPass *llvm::createLoopStrengthReducePass(const TargetLowering *TLI) { + return new LoopStrengthReduce(TLI); } -/// getCastedVersionOf - Return the specified value casted to uintptr_t. +/// getCastedVersionOf - Return the specified value casted to uintptr_t. This +/// assumes that the Value* V is of integer or pointer type only. /// -Value *LoopStrengthReduce::getCastedVersionOf(Value *V) { +Value *LoopStrengthReduce::getCastedVersionOf(Instruction::CastOps opcode, + Value *V) { if (V->getType() == UIntPtrTy) return V; if (Constant *CB = dyn_cast(V)) - return ConstantExpr::getCast(CB, UIntPtrTy); + return ConstantExpr::getCast(opcode, CB, UIntPtrTy); Value *&New = CastedPointers[V]; if (New) return New; - BasicBlock::iterator InsertPt; - if (Argument *Arg = dyn_cast(V)) { - // Insert into the entry of the function, after any allocas. - InsertPt = Arg->getParent()->begin()->begin(); - while (isa(InsertPt)) ++InsertPt; - } else { - if (InvokeInst *II = dyn_cast(V)) { - InsertPt = II->getNormalDest()->begin(); - } else { - InsertPt = cast(V); - ++InsertPt; - } - - // Do not insert casts into the middle of PHI node blocks. - while (isa(InsertPt)) ++InsertPt; - } - - New = new CastInst(V, UIntPtrTy, V->getName(), InsertPt); + New = SCEVExpander::InsertCastOfTo(opcode, V, UIntPtrTy); DeadInsts.insert(cast(New)); return New; } @@ -220,6 +235,16 @@ DeleteTriviallyDeadInstructions(std::set &Insts) { /// GetExpressionSCEV - Compute and return the SCEV for the specified /// instruction. SCEVHandle LoopStrengthReduce::GetExpressionSCEV(Instruction *Exp, Loop *L) { + // Pointer to pointer bitcast instructions return the same value as their + // operand. + if (BitCastInst *BCI = dyn_cast(Exp)) { + if (SE->hasSCEV(BCI) || !isa(BCI->getOperand(0))) + return SE->getSCEV(BCI); + SCEVHandle R = GetExpressionSCEV(cast(BCI->getOperand(0)), L); + SE->setSCEV(BCI, R); + return R; + } + // Scalar Evolutions doesn't know how to compute SCEV's for GEP instructions. // If this is a GEP that SE doesn't know about, compute it now and insert it. // If this is not a GEP, or if we have already done this computation, just let @@ -234,7 +259,8 @@ SCEVHandle LoopStrengthReduce::GetExpressionSCEV(Instruction *Exp, Loop *L) { // Build up the base expression. Insert an LLVM cast of the pointer to // uintptr_t first. - SCEVHandle GEPVal = SCEVUnknown::get(getCastedVersionOf(GEP->getOperand(0))); + SCEVHandle GEPVal = SCEVUnknown::get( + getCastedVersionOf(Instruction::PtrToInt, GEP->getOperand(0))); gep_type_iterator GTI = gep_type_begin(GEP); @@ -244,18 +270,24 @@ SCEVHandle LoopStrengthReduce::GetExpressionSCEV(Instruction *Exp, Loop *L) { // operand. if (const StructType *STy = dyn_cast(*GTI)) { const StructLayout *SL = TD->getStructLayout(STy); - unsigned Idx = cast(GEP->getOperand(i))->getValue(); - uint64_t Offset = SL->MemberOffsets[Idx]; + unsigned Idx = cast(GEP->getOperand(i))->getZExtValue(); + uint64_t Offset = SL->getElementOffset(Idx); GEPVal = SCEVAddExpr::get(GEPVal, SCEVUnknown::getIntegerSCEV(Offset, UIntPtrTy)); } else { - Value *OpVal = getCastedVersionOf(GEP->getOperand(i)); + unsigned GEPOpiBits = + GEP->getOperand(i)->getType()->getPrimitiveSizeInBits(); + unsigned IntPtrBits = UIntPtrTy->getPrimitiveSizeInBits(); + Instruction::CastOps opcode = (GEPOpiBits < IntPtrBits ? + Instruction::SExt : (GEPOpiBits > IntPtrBits ? Instruction::Trunc : + Instruction::BitCast)); + Value *OpVal = getCastedVersionOf(opcode, GEP->getOperand(i)); SCEVHandle Idx = SE->getSCEV(OpVal); uint64_t TypeSize = TD->getTypeSize(GTI.getIndexedType()); if (TypeSize != 1) Idx = SCEVMulExpr::get(Idx, - SCEVConstant::get(ConstantUInt::get(UIntPtrTy, + SCEVConstant::get(ConstantInt::get(UIntPtrTy, TypeSize))); GEPVal = SCEVAddExpr::get(GEPVal, Idx); } @@ -287,7 +319,7 @@ static bool getSCEVStartAndStride(const SCEVHandle &SH, Loop *L, Start = SCEVAddExpr::get(Start, AE->getOperand(i)); } - } else if (SCEVAddRecExpr *AddRec = dyn_cast(SH)) { + } else if (isa(SH)) { TheAddRec = SH; } else { return false; // not analyzable. @@ -302,16 +334,10 @@ static bool getSCEVStartAndStride(const SCEVHandle &SH, Loop *L, Start = SCEVAddExpr::get(Start, AddRec->getOperand(0)); if (!isa(AddRec->getOperand(1))) - DEBUG(std::cerr << "[" << L->getHeader()->getName() - << "] Variable stride: " << *AddRec << "\n"); + DOUT << "[" << L->getHeader()->getName() + << "] Variable stride: " << *AddRec << "\n"; Stride = AddRec->getOperand(1); - // Check that all constant strides are the unsigned type, we don't want to - // have two IV's one of signed stride 4 and one of unsigned stride 4 to not be - // merged. - assert((!isa(Stride) || Stride->getType()->isUnsigned()) && - "Constants should be canonicalized to unsigned!"); - return true; } @@ -324,7 +350,7 @@ static bool getSCEVStartAndStride(const SCEVHandle &SH, Loop *L, /// the loop, resulting in reg-reg copies (if we use the pre-inc value when we /// should use the post-inc value). static bool IVUseShouldUsePostIncValue(Instruction *User, Instruction *IV, - Loop *L, DominatorSet *DS, Pass *P) { + Loop *L, ETForest *EF, Pass *P) { // If the user is in the loop, use the preinc value. if (L->contains(User->getParent())) return false; @@ -332,7 +358,7 @@ static bool IVUseShouldUsePostIncValue(Instruction *User, Instruction *IV, // Ok, the user is outside of the loop. If it is dominated by the latch // block, use the post-inc value. - if (DS->dominates(LatchBlock, User->getParent())) + if (EF->dominates(LatchBlock, User->getParent())) return true; // There is one case we have to be careful of: PHI nodes. These little guys @@ -349,7 +375,7 @@ static bool IVUseShouldUsePostIncValue(Instruction *User, Instruction *IV, for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) if (PN->getIncomingValue(i) == IV) { ++NumUses; - if (!DS->dominates(LatchBlock, PN->getIncomingBlock(i))) + if (!EF->dominates(LatchBlock, PN->getIncomingBlock(i))) return false; } @@ -358,7 +384,11 @@ static bool IVUseShouldUsePostIncValue(Instruction *User, Instruction *IV, // post-incremented value. for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) if (PN->getIncomingValue(i) == IV) { - SplitCriticalEdge(PN->getIncomingBlock(i), PN->getParent(), P); + SplitCriticalEdge(PN->getIncomingBlock(i), PN->getParent(), P, + true); + // Splitting the critical edge can reduce the number of entries in this + // PHI. + e = PN->getNumIncomingValues(); if (--NumUses == 0) break; } @@ -372,7 +402,8 @@ static bool IVUseShouldUsePostIncValue(Instruction *User, Instruction *IV, /// return true. Otherwise, return false. bool LoopStrengthReduce::AddUsersIfInteresting(Instruction *I, Loop *L, std::set &Processed) { - if (I->getType() == Type::VoidTy) return false; + if (!I->getType()->isInteger() && !isa(I->getType())) + return false; // Void and FP expressions cannot be reduced. if (!Processed.insert(I).second) return true; // Instruction already handled. @@ -385,10 +416,14 @@ bool LoopStrengthReduce::AddUsersIfInteresting(Instruction *I, Loop *L, SCEVHandle Stride = Start; if (!getSCEVStartAndStride(ISE, L, Start, Stride)) return false; // Non-reducible symbolic expression, bail out. - - for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI != E;++UI){ + + for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI != E;) { Instruction *User = cast(*UI); + // Increment iterator now because IVUseShouldUsePostIncValue may remove + // User from the list of I users. + ++UI; + // Do not infinitely recurse on PHI nodes. if (isa(User) && Processed.count(User)) continue; @@ -397,12 +432,12 @@ bool LoopStrengthReduce::AddUsersIfInteresting(Instruction *I, Loop *L, // don't recurse into it. bool AddUserToIVUsers = false; if (LI->getLoopFor(User->getParent()) != L) { - DEBUG(std::cerr << "FOUND USER in other loop: " << *User - << " OF SCEV: " << *ISE << "\n"); + DOUT << "FOUND USER in other loop: " << *User + << " OF SCEV: " << *ISE << "\n"; AddUserToIVUsers = true; } else if (!AddUsersIfInteresting(User, L, Processed)) { - DEBUG(std::cerr << "FOUND USER: " << *User - << " OF SCEV: " << *ISE << "\n"); + DOUT << "FOUND USER: " << *User + << " OF SCEV: " << *ISE << "\n"; AddUserToIVUsers = true; } @@ -414,13 +449,13 @@ bool LoopStrengthReduce::AddUsersIfInteresting(Instruction *I, Loop *L, // Okay, we found a user that we cannot reduce. Analyze the instruction // and decide what to do with it. If we are a use inside of the loop, use // the value before incrementation, otherwise use it after incrementation. - if (IVUseShouldUsePostIncValue(User, I, L, DS, this)) { + if (IVUseShouldUsePostIncValue(User, I, L, EF, this)) { // The value used will be incremented by the stride more than we are // expecting, so subtract this off. SCEVHandle NewStart = SCEV::getMinusSCEV(Start, Stride); StrideUses.addUser(NewStart, User, I); StrideUses.Users.back().isUseOfPostIncrementedValue = true; - DEBUG(std::cerr << " USING POSTINC SCEV, START=" << *NewStart<< "\n"); + DOUT << " USING POSTINC SCEV, START=" << *NewStart<< "\n"; } else { StrideUses.addUser(Start, User, I); } @@ -474,19 +509,58 @@ namespace { void RewriteInstructionToUseNewBase(const SCEVHandle &NewBase, SCEVExpander &Rewriter, Loop *L, Pass *P); + + Value *InsertCodeForBaseAtPosition(const SCEVHandle &NewBase, + SCEVExpander &Rewriter, + Instruction *IP, Loop *L); void dump() const; }; } void BasedUser::dump() const { - std::cerr << " Base=" << *Base; - std::cerr << " Imm=" << *Imm; + cerr << " Base=" << *Base; + cerr << " Imm=" << *Imm; if (EmittedBase) - std::cerr << " EB=" << *EmittedBase; + cerr << " EB=" << *EmittedBase; - std::cerr << " Inst: " << *Inst; + cerr << " Inst: " << *Inst; } +Value *BasedUser::InsertCodeForBaseAtPosition(const SCEVHandle &NewBase, + SCEVExpander &Rewriter, + Instruction *IP, Loop *L) { + // Figure out where we *really* want to insert this code. In particular, if + // the user is inside of a loop that is nested inside of L, we really don't + // want to insert this expression before the user, we'd rather pull it out as + // many loops as possible. + LoopInfo &LI = Rewriter.getLoopInfo(); + Instruction *BaseInsertPt = IP; + + // Figure out the most-nested loop that IP is in. + Loop *InsertLoop = LI.getLoopFor(IP->getParent()); + + // If InsertLoop is not L, and InsertLoop is nested inside of L, figure out + // the preheader of the outer-most loop where NewBase is not loop invariant. + while (InsertLoop && NewBase->isLoopInvariant(InsertLoop)) { + BaseInsertPt = InsertLoop->getLoopPreheader()->getTerminator(); + InsertLoop = InsertLoop->getParentLoop(); + } + + // If there is no immediate value, skip the next part. + if (SCEVConstant *SC = dyn_cast(Imm)) + if (SC->getValue()->isZero()) + return Rewriter.expandCodeFor(NewBase, BaseInsertPt, + OperandValToReplace->getType()); + + Value *Base = Rewriter.expandCodeFor(NewBase, BaseInsertPt); + + // Always emit the immediate (if non-zero) into the same block as the user. + SCEVHandle NewValSCEV = SCEVAddExpr::get(SCEVUnknown::get(Base), Imm); + return Rewriter.expandCodeFor(NewValSCEV, IP, + OperandValToReplace->getType()); +} + + // Once we rewrite the code to insert the new IVs we want, update the // operands of Inst to use the new expression 'NewBase', with 'Imm' added // to it. @@ -494,12 +568,10 @@ void BasedUser::RewriteInstructionToUseNewBase(const SCEVHandle &NewBase, SCEVExpander &Rewriter, Loop *L, Pass *P) { if (!isa(Inst)) { - SCEVHandle NewValSCEV = SCEVAddExpr::get(NewBase, Imm); - Value *NewVal = Rewriter.expandCodeFor(NewValSCEV, Inst, - OperandValToReplace->getType()); + Value *NewVal = InsertCodeForBaseAtPosition(NewBase, Rewriter, Inst, L); // Replace the use of the operand Value with the new Phi we just created. Inst->replaceUsesOfWith(OperandValToReplace, NewVal); - DEBUG(std::cerr << " CHANGED: IMM =" << *Imm << " Inst = " << *Inst); + DOUT << " CHANGED: IMM =" << *Imm << " Inst = " << *Inst; return; } @@ -521,7 +593,7 @@ void BasedUser::RewriteInstructionToUseNewBase(const SCEVHandle &NewBase, (PN->getParent() != L->getHeader() || !L->contains(PHIPred))) { // First step, split the critical edge. - SplitCriticalEdge(PHIPred, PN->getParent(), P); + SplitCriticalEdge(PHIPred, PN->getParent(), P, true); // Next step: move the basic block. In particular, if the PHI node // is outside of the loop, and PredTI is in the loop, we want to @@ -531,16 +603,16 @@ void BasedUser::RewriteInstructionToUseNewBase(const SCEVHandle &NewBase, BasicBlock *NewBB = PN->getIncomingBlock(i); NewBB->moveBefore(PN->getParent()); } + + // Splitting the edge can reduce the number of PHI entries we have. + e = PN->getNumIncomingValues(); } Value *&Code = InsertedCode[PN->getIncomingBlock(i)]; if (!Code) { // Insert the code into the end of the predecessor block. - BasicBlock::iterator InsertPt =PN->getIncomingBlock(i)->getTerminator(); - - SCEVHandle NewValSCEV = SCEVAddExpr::get(NewBase, Imm); - Code = Rewriter.expandCodeFor(NewValSCEV, InsertPt, - OperandValToReplace->getType()); + Instruction *InsertPt = PN->getIncomingBlock(i)->getTerminator(); + Code = InsertCodeForBaseAtPosition(NewBase, Rewriter, InsertPt, L); } // Replace the use of the operand Value with the new Phi we just created. @@ -548,31 +620,31 @@ void BasedUser::RewriteInstructionToUseNewBase(const SCEVHandle &NewBase, Rewriter.clear(); } } - DEBUG(std::cerr << " CHANGED: IMM =" << *Imm << " Inst = " << *Inst); + DOUT << " CHANGED: IMM =" << *Imm << " Inst = " << *Inst; } /// isTargetConstant - Return true if the following can be referenced by the /// immediate field of a target instruction. -static bool isTargetConstant(const SCEVHandle &V) { - - // FIXME: Look at the target to decide if &GV is a legal constant immediate. +static bool isTargetConstant(const SCEVHandle &V, const Type *UseTy, + const TargetLowering *TLI) { if (SCEVConstant *SC = dyn_cast(V)) { - // PPC allows a sign-extended 16-bit immediate field. - if ((int64_t)SC->getValue()->getRawValue() > -(1 << 16) && - (int64_t)SC->getValue()->getRawValue() < (1 << 16)-1) - return true; - return false; + int64_t VC = SC->getValue()->getSExtValue(); + if (TLI) + return TLI->isLegalAddressImmediate(VC, UseTy); + else + // Defaults to PPC. PPC allows a sign-extended 16-bit immediate field. + return (VC > -(1 << 16) && VC < (1 << 16)-1); } - return false; // ENABLE this for x86 - if (SCEVUnknown *SU = dyn_cast(V)) if (ConstantExpr *CE = dyn_cast(SU->getValue())) - if (CE->getOpcode() == Instruction::Cast) - if (isa(CE->getOperand(0))) - // FIXME: should check to see that the dest is uintptr_t! + if (CE->getOpcode() == Instruction::PtrToInt) { + Constant *Op0 = CE->getOperand(0); + if (isa(Op0) && TLI && + TLI->isLegalAddressImmediate(cast(Op0))) return true; + } return false; } @@ -618,22 +690,30 @@ static void MoveLoopVariantsToImediateField(SCEVHandle &Val, SCEVHandle &Imm, /// MoveImmediateValues - Look at Val, and pull out any additions of constants /// that can fit into the immediate field of instructions in the target. /// Accumulate these immediate values into the Imm value. -static void MoveImmediateValues(SCEVHandle &Val, SCEVHandle &Imm, +static void MoveImmediateValues(const TargetLowering *TLI, + Instruction *User, + SCEVHandle &Val, SCEVHandle &Imm, bool isAddress, Loop *L) { + const Type *UseTy = User->getType(); + if (StoreInst *SI = dyn_cast(User)) + UseTy = SI->getOperand(0)->getType(); + if (SCEVAddExpr *SAE = dyn_cast(Val)) { std::vector NewOps; NewOps.reserve(SAE->getNumOperands()); - for (unsigned i = 0; i != SAE->getNumOperands(); ++i) - if (isAddress && isTargetConstant(SAE->getOperand(i))) { - Imm = SCEVAddExpr::get(Imm, SAE->getOperand(i)); - } else if (!SAE->getOperand(i)->isLoopInvariant(L)) { + for (unsigned i = 0; i != SAE->getNumOperands(); ++i) { + SCEVHandle NewOp = SAE->getOperand(i); + MoveImmediateValues(TLI, User, NewOp, Imm, isAddress, L); + + if (!NewOp->isLoopInvariant(L)) { // If this is a loop-variant expression, it must stay in the immediate // field of the expression. - Imm = SCEVAddExpr::get(Imm, SAE->getOperand(i)); + Imm = SCEVAddExpr::get(Imm, NewOp); } else { - NewOps.push_back(SAE->getOperand(i)); + NewOps.push_back(NewOp); } + } if (NewOps.empty()) Val = SCEVUnknown::getIntegerSCEV(0, Val->getType()); @@ -643,7 +723,7 @@ static void MoveImmediateValues(SCEVHandle &Val, SCEVHandle &Imm, } else if (SCEVAddRecExpr *SARE = dyn_cast(Val)) { // Try to pull immediates out of the start value of nested addrec's. SCEVHandle Start = SARE->getStart(); - MoveImmediateValues(Start, Imm, isAddress, L); + MoveImmediateValues(TLI, User, Start, Imm, isAddress, L); if (Start != SARE->getStart()) { std::vector Ops(SARE->op_begin(), SARE->op_end()); @@ -651,11 +731,36 @@ static void MoveImmediateValues(SCEVHandle &Val, SCEVHandle &Imm, Val = SCEVAddRecExpr::get(Ops, SARE->getLoop()); } return; + } else if (SCEVMulExpr *SME = dyn_cast(Val)) { + // Transform "8 * (4 + v)" -> "32 + 8*V" if "32" fits in the immed field. + if (isAddress && isTargetConstant(SME->getOperand(0), UseTy, TLI) && + SME->getNumOperands() == 2 && SME->isLoopInvariant(L)) { + + SCEVHandle SubImm = SCEVUnknown::getIntegerSCEV(0, Val->getType()); + SCEVHandle NewOp = SME->getOperand(1); + MoveImmediateValues(TLI, User, NewOp, SubImm, isAddress, L); + + // If we extracted something out of the subexpressions, see if we can + // simplify this! + if (NewOp != SME->getOperand(1)) { + // Scale SubImm up by "8". If the result is a target constant, we are + // good. + SubImm = SCEVMulExpr::get(SubImm, SME->getOperand(0)); + if (isTargetConstant(SubImm, UseTy, TLI)) { + // Accumulate the immediate. + Imm = SCEVAddExpr::get(Imm, SubImm); + + // Update what is left of 'Val'. + Val = SCEVMulExpr::get(SME->getOperand(0), NewOp); + return; + } + } + } } // Loop-variant expressions must stay in the immediate field of the // expression. - if ((isAddress && isTargetConstant(Val)) || + if ((isAddress && isTargetConstant(Val, UseTy, TLI)) || !Val->isLoopInvariant(L)) { Imm = SCEVAddExpr::get(Imm, Val); Val = SCEVUnknown::getIntegerSCEV(0, Val->getType()); @@ -666,9 +771,9 @@ static void MoveImmediateValues(SCEVHandle &Val, SCEVHandle &Imm, } -/// IncrementAddExprUses - Decompose the specified expression into its added -/// subexpressions, and increment SubExpressionUseCounts for each of these -/// decomposed parts. +/// SeparateSubExprs - Decompose Expr into all of the subexpressions that are +/// added together. This is used to reassociate common addition subexprs +/// together for maximal sharing when rewriting bases. static void SeparateSubExprs(std::vector &SubExprs, SCEVHandle Expr) { if (SCEVAddExpr *AE = dyn_cast(Expr)) { @@ -688,7 +793,7 @@ static void SeparateSubExprs(std::vector &SubExprs, SeparateSubExprs(SubExprs, SARE->getOperand(0)); } } else if (!isa(Expr) || - !cast(Expr)->getValue()->isNullValue()) { + !cast(Expr)->getValue()->isZero()) { // Do not add zero. SubExprs.push_back(Expr); } @@ -715,6 +820,10 @@ RemoveCommonExpressionsFromUseBases(std::vector &Uses) { // If any subexpressions are used Uses.size() times, they are common. std::map SubExpressionUseCounts; + // UniqueSubExprs - Keep track of all of the subexpressions we see in the + // order we see them. + std::vector UniqueSubExprs; + std::vector SubExprs; for (unsigned i = 0; i != NumUses; ++i) { // If the base is zero (which is common), return zero now, there are no @@ -725,22 +834,24 @@ RemoveCommonExpressionsFromUseBases(std::vector &Uses) { SeparateSubExprs(SubExprs, Uses[i].Base); // Add one to SubExpressionUseCounts for each subexpr present. for (unsigned j = 0, e = SubExprs.size(); j != e; ++j) - SubExpressionUseCounts[SubExprs[j]]++; + if (++SubExpressionUseCounts[SubExprs[j]] == 1) + UniqueSubExprs.push_back(SubExprs[j]); SubExprs.clear(); } - - // Now that we know how many times each is used, build Result. - for (std::map::iterator I = - SubExpressionUseCounts.begin(), E = SubExpressionUseCounts.end(); - I != E; ) + // Now that we know how many times each is used, build Result. Iterate over + // UniqueSubexprs so that we have a stable ordering. + for (unsigned i = 0, e = UniqueSubExprs.size(); i != e; ++i) { + std::map::iterator I = + SubExpressionUseCounts.find(UniqueSubExprs[i]); + assert(I != SubExpressionUseCounts.end() && "Entry not found?"); if (I->second == NumUses) { // Found CSE! Result = SCEVAddExpr::get(Result, I->first); - ++I; } else { // Remove non-cse's from SubExpressionUseCounts. - SubExpressionUseCounts.erase(I++); + SubExpressionUseCounts.erase(I); } + } // If we found no CSE's, return now. if (Result == Zero) return Result; @@ -768,6 +879,83 @@ RemoveCommonExpressionsFromUseBases(std::vector &Uses) { return Result; } +/// isZero - returns true if the scalar evolution expression is zero. +/// +static bool isZero(SCEVHandle &V) { + if (SCEVConstant *SC = dyn_cast(V)) + return SC->getValue()->isZero(); + return false; +} + +/// ValidStride - Check whether the given Scale is valid for all loads and +/// stores in UsersToProcess. Pulled into a function to avoid disturbing the +/// sensibilities of those who dislike goto's. +/// +bool LoopStrengthReduce::ValidStride(int64_t Scale, + const std::vector& UsersToProcess) { + int64_t Imm; + for (unsigned i=0, e = UsersToProcess.size(); i!=e; ++i) { + if (SCEVConstant *SC = dyn_cast(UsersToProcess[i].Imm)) + Imm = SC->getValue()->getSExtValue(); + else + Imm = 0; + + // If this is a load or other access, pass the type of the access in. + const Type *AccessTy = Type::VoidTy; + if (StoreInst *SI = dyn_cast(UsersToProcess[i].Inst)) + AccessTy = SI->getOperand(0)->getType(); + else if (LoadInst *LI = dyn_cast(UsersToProcess[i].Inst)) + AccessTy = LI->getType(); + + if (!TLI->isLegalAddressScaleAndImm(Scale, Imm, AccessTy)) + return false; + } + return true; +} + +/// CheckForIVReuse - Returns the multiple if the stride is the multiple +/// of a previous stride and it is a legal value for the target addressing +/// mode scale component. This allows the users of this stride to be rewritten +/// as prev iv * factor. It returns 0 if no reuse is possible. +unsigned LoopStrengthReduce::CheckForIVReuse(const SCEVHandle &Stride, + IVExpr &IV, const Type *Ty, + const std::vector& UsersToProcess) { + if (!TLI) return 0; + + if (SCEVConstant *SC = dyn_cast(Stride)) { + int64_t SInt = SC->getValue()->getSExtValue(); + if (SInt == 1) return 0; + + for (std::map::iterator SI= IVsByStride.begin(), + SE = IVsByStride.end(); SI != SE; ++SI) { + int64_t SSInt = cast(SI->first)->getValue()->getSExtValue(); + if (SInt != -SSInt && + (unsigned(abs(SInt)) < SSInt || (SInt % SSInt) != 0)) + continue; + int64_t Scale = SInt / SSInt; + // Check that this stride is valid for all the types used for loads and + // stores; if it can be used for some and not others, we might as well use + // the original stride everywhere, since we have to create the IV for it + // anyway. + if (ValidStride(Scale, UsersToProcess)) + for (std::vector::iterator II = SI->second.IVs.begin(), + IE = SI->second.IVs.end(); II != IE; ++II) + // FIXME: Only handle base == 0 for now. + // Only reuse previous IV if it would not require a type conversion. + if (isZero(II->Base) && II->Base->getType() == Ty) { + IV = *II; + return Scale; + } + } + } + return 0; +} + +/// PartitionByIsUseOfPostIncrementedValue - Simple boolean predicate that +/// returns true if Val's isUseOfPostIncrementedValue is true. +static bool PartitionByIsUseOfPostIncrementedValue(const BasedUser &Val) { + return Val.isUseOfPostIncrementedValue; +} /// StrengthReduceStridedIVUsers - Strength reduce all of the users of a single /// stride of IV. All of the users may have different starting values, and this @@ -794,7 +982,7 @@ void LoopStrengthReduce::StrengthReduceStridedIVUsers(const SCEVHandle &Stride, assert(UsersToProcess.back().Base->isLoopInvariant(L) && "Base value is not loop invariant!"); } - + // We now have a whole bunch of uses of like-strided induction variables, but // they might all have different bases. We want to emit one PHI node for this // stride which we fold as many common expressions (between the IVs) into as @@ -802,7 +990,8 @@ void LoopStrengthReduce::StrengthReduceStridedIVUsers(const SCEVHandle &Stride, // for the strides (e.g. if we have "A+C+B" and "A+B+D" as our bases, find // "A+B"), emit it to the preheader, then remove the expression from the // UsersToProcess base values. - SCEVHandle CommonExprs = RemoveCommonExpressionsFromUseBases(UsersToProcess); + SCEVHandle CommonExprs = + RemoveCommonExpressionsFromUseBases(UsersToProcess); // Next, figure out what we can represent in the immediate fields of // instructions. If we can represent anything there, move it to the imm @@ -826,16 +1015,35 @@ void LoopStrengthReduce::StrengthReduceStridedIVUsers(const SCEVHandle &Stride, if (SI->getOperand(1) == UsersToProcess[i].OperandValToReplace) isAddress = true; - MoveImmediateValues(UsersToProcess[i].Base, UsersToProcess[i].Imm, - isAddress, L); + MoveImmediateValues(TLI, UsersToProcess[i].Inst, UsersToProcess[i].Base, + UsersToProcess[i].Imm, isAddress, L); } } - + + // Check if it is possible to reuse a IV with stride that is factor of this + // stride. And the multiple is a number that can be encoded in the scale + // field of the target addressing mode. And we will have a valid + // instruction after this substition, including the immediate field, if any. + PHINode *NewPHI = NULL; + Value *IncV = NULL; + IVExpr ReuseIV; + unsigned RewriteFactor = CheckForIVReuse(Stride, ReuseIV, + CommonExprs->getType(), + UsersToProcess); + if (RewriteFactor != 0) { + DOUT << "BASED ON IV of STRIDE " << *ReuseIV.Stride + << " and BASE " << *ReuseIV.Base << " :\n"; + NewPHI = ReuseIV.PHI; + IncV = ReuseIV.IncV; + } + + const Type *ReplacedTy = CommonExprs->getType(); + // Now that we know what we need to do, insert the PHI node itself. // - DEBUG(std::cerr << "INSERTING IV of STRIDE " << *Stride << " and BASE " - << *CommonExprs << " :\n"); - + DOUT << "INSERTING IV of TYPE " << *ReplacedTy << " of STRIDE " + << *Stride << " and BASE " << *CommonExprs << " :\n"; + SCEVExpander Rewriter(*SE, *LI); SCEVExpander PreheaderRewriter(*SE, *LI); @@ -844,40 +1052,85 @@ void LoopStrengthReduce::StrengthReduceStridedIVUsers(const SCEVHandle &Stride, Instruction *PhiInsertBefore = L->getHeader()->begin(); BasicBlock *LatchBlock = L->getLoopLatch(); + + + // Emit the initial base value into the loop preheader. + Value *CommonBaseV + = PreheaderRewriter.expandCodeFor(CommonExprs, PreInsertPt, + ReplacedTy); + + if (RewriteFactor == 0) { + // Create a new Phi for this base, and stick it in the loop header. + NewPHI = new PHINode(ReplacedTy, "iv.", PhiInsertBefore); + ++NumInserted; - // Create a new Phi for this base, and stick it in the loop header. - const Type *ReplacedTy = CommonExprs->getType(); - PHINode *NewPHI = new PHINode(ReplacedTy, "iv.", PhiInsertBefore); - ++NumInserted; + // Add common base to the new Phi node. + NewPHI->addIncoming(CommonBaseV, Preheader); + + // Insert the stride into the preheader. + Value *StrideV = PreheaderRewriter.expandCodeFor(Stride, PreInsertPt, + ReplacedTy); + if (!isa(StrideV)) ++NumVariable; + + // Emit the increment of the base value before the terminator of the loop + // latch block, and add it to the Phi node. + SCEVHandle IncExp = SCEVAddExpr::get(SCEVUnknown::get(NewPHI), + SCEVUnknown::get(StrideV)); - // Insert the stride into the preheader. - Value *StrideV = PreheaderRewriter.expandCodeFor(Stride, PreInsertPt, - ReplacedTy); - if (!isa(StrideV)) ++NumVariable; + IncV = Rewriter.expandCodeFor(IncExp, LatchBlock->getTerminator(), + ReplacedTy); + IncV->setName(NewPHI->getName()+".inc"); + NewPHI->addIncoming(IncV, LatchBlock); + // Remember this in case a later stride is multiple of this. + IVsByStride[Stride].addIV(Stride, CommonExprs, NewPHI, IncV); + } else { + Constant *C = dyn_cast(CommonBaseV); + if (!C || + (!C->isNullValue() && + !isTargetConstant(SCEVUnknown::get(CommonBaseV), ReplacedTy, TLI))) + // We want the common base emitted into the preheader! This is just + // using cast as a copy so BitCast (no-op cast) is appropriate + CommonBaseV = new BitCastInst(CommonBaseV, CommonBaseV->getType(), + "commonbase", PreInsertPt); + } - // Emit the initial base value into the loop preheader, and add it to the - // Phi node. - Value *PHIBaseV = PreheaderRewriter.expandCodeFor(CommonExprs, PreInsertPt, - ReplacedTy); - NewPHI->addIncoming(PHIBaseV, Preheader); - - // Emit the increment of the base value before the terminator of the loop - // latch block, and add it to the Phi node. - SCEVHandle IncExp = SCEVAddExpr::get(SCEVUnknown::get(NewPHI), - SCEVUnknown::get(StrideV)); + // We want to emit code for users inside the loop first. To do this, we + // rearrange BasedUser so that the entries at the end have + // isUseOfPostIncrementedValue = false, because we pop off the end of the + // vector (so we handle them first). + std::partition(UsersToProcess.begin(), UsersToProcess.end(), + PartitionByIsUseOfPostIncrementedValue); - Value *IncV = Rewriter.expandCodeFor(IncExp, LatchBlock->getTerminator(), - ReplacedTy); - IncV->setName(NewPHI->getName()+".inc"); - NewPHI->addIncoming(IncV, LatchBlock); + // Sort this by base, so that things with the same base are handled + // together. By partitioning first and stable-sorting later, we are + // guaranteed that within each base we will pop off users from within the + // loop before users outside of the loop with a particular base. + // + // We would like to use stable_sort here, but we can't. The problem is that + // SCEVHandle's don't have a deterministic ordering w.r.t to each other, so + // we don't have anything to do a '<' comparison on. Because we think the + // number of uses is small, do a horrible bubble sort which just relies on + // ==. + for (unsigned i = 0, e = UsersToProcess.size(); i != e; ++i) { + // Get a base value. + SCEVHandle Base = UsersToProcess[i].Base; + + // Compact everything with this base to be consequetive with this one. + for (unsigned j = i+1; j != e; ++j) { + if (UsersToProcess[j].Base == Base) { + std::swap(UsersToProcess[i+1], UsersToProcess[j]); + ++i; + } + } + } - // Sort by the base value, so that all IVs with identical bases are next to - // each other. + // Process all the users now. This outer loop handles all bases, the inner + // loop handles all users of a particular base. while (!UsersToProcess.empty()) { SCEVHandle Base = UsersToProcess.back().Base; - DEBUG(std::cerr << " INSERTING code for BASE = " << *Base << ":\n"); + DOUT << " INSERTING code for BASE = " << *Base << ":\n"; // Emit the code for Base into the preheader. Value *BaseV = PreheaderRewriter.expandCodeFor(Base, PreInsertPt, @@ -885,19 +1138,21 @@ void LoopStrengthReduce::StrengthReduceStridedIVUsers(const SCEVHandle &Stride, // If BaseV is a constant other than 0, make sure that it gets inserted into // the preheader, instead of being forward substituted into the uses. We do - // this by forcing a noop cast to be inserted into the preheader in this - // case. - if (Constant *C = dyn_cast(BaseV)) - if (!C->isNullValue() && !isTargetConstant(Base)) { - // We want this constant emitted into the preheader! - BaseV = new CastInst(BaseV, BaseV->getType(), "preheaderinsert", + // this by forcing a BitCast (noop cast) to be inserted into the preheader + // in this case. + if (Constant *C = dyn_cast(BaseV)) { + if (!C->isNullValue() && !isTargetConstant(Base, ReplacedTy, TLI)) { + // We want this constant emitted into the preheader! This is just + // using cast as a copy so BitCast (no-op cast) is appropriate + BaseV = new BitCastInst(BaseV, BaseV->getType(), "preheaderinsert", PreInsertPt); } - + } + // Emit the code to add the immediate offset to the Phi value, just before // the instructions that we identified as using this stride and base. - unsigned ScanPos = 0; do { + // FIXME: Use emitted users to emit other users. BasedUser &User = UsersToProcess.back(); // If this instruction wants to use the post-incremented value, move it @@ -911,18 +1166,43 @@ void LoopStrengthReduce::StrengthReduceStridedIVUsers(const SCEVHandle &Stride, if (L->contains(User.Inst->getParent())) User.Inst->moveBefore(LatchBlock->getTerminator()); } + if (RewriteOp->getType() != ReplacedTy) { + Instruction::CastOps opcode = Instruction::Trunc; + if (ReplacedTy->getPrimitiveSizeInBits() == + RewriteOp->getType()->getPrimitiveSizeInBits()) + opcode = Instruction::BitCast; + RewriteOp = SCEVExpander::InsertCastOfTo(opcode, RewriteOp, ReplacedTy); + } + SCEVHandle RewriteExpr = SCEVUnknown::get(RewriteOp); // Clear the SCEVExpander's expression map so that we are guaranteed // to have the code emitted where we expect it. Rewriter.clear(); - + + // If we are reusing the iv, then it must be multiplied by a constant + // factor take advantage of addressing mode scale component. + if (RewriteFactor != 0) { + RewriteExpr = + SCEVMulExpr::get(SCEVUnknown::getIntegerSCEV(RewriteFactor, + RewriteExpr->getType()), + RewriteExpr); + + // The common base is emitted in the loop preheader. But since we + // are reusing an IV, it has not been used to initialize the PHI node. + // Add it to the expression used to rewrite the uses. + if (!isa(CommonBaseV) || + !cast(CommonBaseV)->isZero()) + RewriteExpr = SCEVAddExpr::get(RewriteExpr, + SCEVUnknown::get(CommonBaseV)); + } + // Now that we know what we need to do, insert code before User for the // immediate and any loop-variant expressions. - if (!isa(BaseV) || !cast(BaseV)->isNullValue()) + if (!isa(BaseV) || !cast(BaseV)->isZero()) // Add BaseV to the PHI value if needed. RewriteExpr = SCEVAddExpr::get(RewriteExpr, SCEVUnknown::get(BaseV)); - + User.RewriteInstructionToUseNewBase(RewriteExpr, Rewriter, L, this); // Mark old value we replaced as possibly dead, so that it is elminated @@ -932,15 +1212,8 @@ void LoopStrengthReduce::StrengthReduceStridedIVUsers(const SCEVHandle &Stride, UsersToProcess.pop_back(); ++NumReduced; - // If there are any more users to process with the same base, move one of - // them to the end of the list so that we will process it. - if (!UsersToProcess.empty()) { - for (unsigned e = UsersToProcess.size(); ScanPos != e; ++ScanPos) - if (UsersToProcess[ScanPos].Base == Base) { - std::swap(UsersToProcess[ScanPos], UsersToProcess.back()); - break; - } - } + // If there are any more users to process with the same base, process them + // now. We sorted by base above, so we just have to check the last elt. } while (!UsersToProcess.empty() && UsersToProcess.back().Base == Base); // TODO: Next, find out which base index is the most common, pull it out. } @@ -955,22 +1228,19 @@ void LoopStrengthReduce::StrengthReduceStridedIVUsers(const SCEVHandle &Stride, void LoopStrengthReduce::OptimizeIndvars(Loop *L) { // TODO: implement optzns here. - - - // Finally, get the terminating condition for the loop if possible. If we // can, we want to change it to use a post-incremented version of its - // induction variable, to allow coallescing the live ranges for the IV into + // induction variable, to allow coalescing the live ranges for the IV into // one register value. PHINode *SomePHI = cast(L->getHeader()->begin()); BasicBlock *Preheader = L->getLoopPreheader(); BasicBlock *LatchBlock = SomePHI->getIncomingBlock(SomePHI->getIncomingBlock(0) == Preheader); BranchInst *TermBr = dyn_cast(LatchBlock->getTerminator()); - if (!TermBr || TermBr->isUnconditional() || - !isa(TermBr->getCondition())) + if (!TermBr || TermBr->isUnconditional() || + !isa(TermBr->getCondition())) return; - SetCondInst *Cond = cast(TermBr->getCondition()); + ICmpInst *Cond = cast(TermBr->getCondition()); // Search IVUsesByStride to find Cond's IVUse if there is one. IVStrideUse *CondUse = 0; @@ -995,10 +1265,6 @@ void LoopStrengthReduce::OptimizeIndvars(Loop *L) { } if (!CondUse) return; // setcc doesn't use the IV. - // setcc stride is complex, don't mess with users. - // FIXME: Evaluate whether this is a good idea or not. - if (!isa(*CondStride)) return; - // It's possible for the setcc instruction to be anywhere in the loop, and // possible for it to have multiple users. If it is not immediately before // the latch block branch, move it. @@ -1007,7 +1273,7 @@ void LoopStrengthReduce::OptimizeIndvars(Loop *L) { Cond->moveBefore(TermBr); } else { // Otherwise, clone the terminating condition and insert into the loopend. - Cond = cast(Cond->clone()); + Cond = cast(Cond->clone()); Cond->setName(L->getHeader()->getName() + ".termcond"); LatchBlock->getInstList().insert(TermBr, Cond); @@ -1019,18 +1285,45 @@ void LoopStrengthReduce::OptimizeIndvars(Loop *L) { } // If we get to here, we know that we can transform the setcc instruction to - // use the post-incremented version of the IV, allowing us to coallesce the + // use the post-incremented version of the IV, allowing us to coalesce the // live ranges for the IV correctly. CondUse->Offset = SCEV::getMinusSCEV(CondUse->Offset, *CondStride); CondUse->isUseOfPostIncrementedValue = true; } -void LoopStrengthReduce::runOnLoop(Loop *L) { - // First step, transform all loops nesting inside of this loop. - for (LoopInfo::iterator I = L->begin(), E = L->end(); I != E; ++I) - runOnLoop(*I); +namespace { + // Constant strides come first which in turns are sorted by their absolute + // values. If absolute values are the same, then positive strides comes first. + // e.g. + // 4, -1, X, 1, 2 ==> 1, -1, 2, 4, X + struct StrideCompare { + bool operator()(const SCEVHandle &LHS, const SCEVHandle &RHS) { + SCEVConstant *LHSC = dyn_cast(LHS); + SCEVConstant *RHSC = dyn_cast(RHS); + if (LHSC && RHSC) { + int64_t LV = LHSC->getValue()->getSExtValue(); + int64_t RV = RHSC->getValue()->getSExtValue(); + uint64_t ALV = (LV < 0) ? -LV : LV; + uint64_t ARV = (RV < 0) ? -RV : RV; + if (ALV == ARV) + return LV > RV; + else + return ALV < ARV; + } + return (LHSC && !RHSC); + } + }; +} + +bool LoopStrengthReduce::runOnLoop(Loop *L, LPPassManager &LPM) { + + LI = &getAnalysis(); + EF = &getAnalysis(); + SE = &getAnalysis(); + TD = &getAnalysis(); + UIntPtrTy = TD->getIntPtrType(); - // Next, find all uses of induction variables in this loop, and catagorize + // Find all uses of induction variables in this loop, and catagorize // them by stride. Start by finding all of the PHI nodes in the header for // this loop. If they are induction variables, inspect their uses. std::set Processed; // Don't reprocess instructions. @@ -1038,7 +1331,7 @@ void LoopStrengthReduce::runOnLoop(Loop *L) { AddUsersIfInteresting(I, L, Processed); // If we have nothing to do, return. - if (IVUsesByStride.empty()) return; + if (IVUsesByStride.empty()) return false; // Optimize induction variables. Some indvar uses can be transformed to use // strides that will be needed for other purposes. A common example of this @@ -1058,7 +1351,18 @@ void LoopStrengthReduce::runOnLoop(Loop *L) { // If we only have one stride, we can more aggressively eliminate some things. bool HasOneStride = IVUsesByStride.size() == 1; - + +#ifndef NDEBUG + DOUT << "\nLSR on "; + DEBUG(L->dump()); +#endif + + // IVsByStride keeps IVs for one particular loop. + IVsByStride.clear(); + + // Sort the StrideOrder so we process larger strides first. + std::stable_sort(StrideOrder.begin(), StrideOrder.end(), StrideCompare()); + // Note: this processes each stride/type pair individually. All users passed // into StrengthReduceStridedIVUsers have the same type AND stride. Also, // node that we iterate over IVUsesByStride indirectly by using StrideOrder. @@ -1093,9 +1397,9 @@ void LoopStrengthReduce::runOnLoop(Loop *L) { // FIXME: this needs to eliminate an induction variable even if it's being // compared against some value to decide loop termination. if (PN->hasOneUse()) { - BinaryOperator *BO = dyn_cast(*(PN->use_begin())); - if (BO && BO->hasOneUse()) { - if (PN == *(BO->use_begin())) { + Instruction *BO = dyn_cast(*PN->use_begin()); + if (BO && (isa(BO) || isa(BO))) { + if (BO->hasOneUse() && PN == *(BO->use_begin())) { DeadInsts.insert(BO); // Break the cycle, then delete the PHI. PN->replaceAllUsesWith(UndefValue::get(PN->getType())); @@ -1111,5 +1415,5 @@ void LoopStrengthReduce::runOnLoop(Loop *L) { CastedPointers.clear(); IVUsesByStride.clear(); StrideOrder.clear(); - return; + return false; }