X-Git-Url: http://demsky.eecs.uci.edu/git/?a=blobdiff_plain;ds=sidebyside;f=lib%2FTransforms%2FScalar%2FLoopStrengthReduce.cpp;h=d825ea789b596a7862387ed18501df5f01227952;hb=cbfe5bbe88f5f2ee03a388585112f7609c8151ad;hp=38cfd91fe1b5564b78b356c3812c357b8d8adb29;hpb=3dc6776b338f81e2d47daa42cc12c9f91053043d;p=oota-llvm.git diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp index 38cfd91fe1b..d825ea789b5 100644 --- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -2,8 +2,8 @@ // // The LLVM Compiler Infrastructure // -// This file was developed by Nate Begeman and is distributed under the -// University of Illinois Open Source License. See LICENSE.TXT for details. +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // @@ -19,6 +19,7 @@ #include "llvm/Transforms/Scalar.h" #include "llvm/Constants.h" #include "llvm/Instructions.h" +#include "llvm/IntrinsicInst.h" #include "llvm/Type.h" #include "llvm/DerivedTypes.h" #include "llvm/Analysis/Dominators.h" @@ -30,6 +31,8 @@ #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Target/TargetData.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Statistic.h" #include "llvm/Support/Debug.h" #include "llvm/Support/Compiler.h" @@ -38,9 +41,10 @@ #include using namespace llvm; -STATISTIC(NumReduced , "Number of GEPs strength reduced"); -STATISTIC(NumInserted, "Number of PHIs inserted"); -STATISTIC(NumVariable, "Number of PHIs with variable strides"); +STATISTIC(NumReduced , "Number of GEPs strength reduced"); +STATISTIC(NumInserted, "Number of PHIs inserted"); +STATISTIC(NumVariable, "Number of PHIs with variable strides"); +STATISTIC(NumEliminated , "Number of strides eliminated"); namespace { @@ -48,8 +52,8 @@ namespace { /// IVStrideUse - Keep track of one use of a strided induction variable, where /// the stride is stored externally. The Offset member keeps track of the - /// offset from the IV, User is the actual user of the operand, and 'Operand' - /// is the operand # of the User that is the use. + /// offset from the IV, User is the actual user of the operand, and + /// 'OperandValToReplace' is the operand of the User that is the use. struct VISIBILITY_HIDDEN IVStrideUse { SCEVHandle Offset; Instruction *User; @@ -89,9 +93,6 @@ namespace { PHINode *PHI; Value *IncV; - IVExpr() - : Stride(SCEVUnknown::getIntegerSCEV(0, Type::Int32Ty)), - Base (SCEVUnknown::getIntegerSCEV(0, Type::Int32Ty)) {} IVExpr(const SCEVHandle &stride, const SCEVHandle &base, PHINode *phi, Value *incv) : Stride(stride), Base(base), PHI(phi), IncV(incv) {} @@ -110,7 +111,7 @@ namespace { class VISIBILITY_HIDDEN LoopStrengthReduce : public LoopPass { LoopInfo *LI; - ETForest *EF; + DominatorTree *DT; ScalarEvolution *SE; const TargetData *TD; const Type *UIntPtrTy; @@ -127,23 +128,25 @@ namespace { /// StrideOrder - An ordering of the keys in IVUsesByStride that is stable: /// We use this to iterate over the IVUsesByStride collection without being /// dependent on random ordering of pointers in the process. - std::vector StrideOrder; + SmallVector StrideOrder; /// CastedValues - As we need to cast values to uintptr_t, this keeps track /// of the casted version of each value. This is accessed by /// getCastedVersionOf. - std::map CastedPointers; + DenseMap CastedPointers; /// DeadInsts - Keep track of instructions we may have made dead, so that /// we can remove them after we are done working. - std::set DeadInsts; + SetVector DeadInsts; /// TLI - Keep a pointer of a TargetLowering to consult for determining /// transformation profitability. const TargetLowering *TLI; public: - LoopStrengthReduce(const TargetLowering *tli = NULL) : TLI(tli) { + static char ID; // Pass ID, replacement for typeid + explicit LoopStrengthReduce(const TargetLowering *tli = NULL) : + LoopPass((intptr_t)&ID), TLI(tli) { } bool runOnLoop(Loop *L, LPPassManager &LPM); @@ -153,13 +156,12 @@ namespace { // many analyses if they are around. AU.addPreservedID(LoopSimplifyID); AU.addPreserved(); - AU.addPreserved(); AU.addPreserved(); AU.addPreserved(); AU.addRequiredID(LoopSimplifyID); AU.addRequired(); - AU.addRequired(); + AU.addRequired(); AU.addRequired(); AU.addRequired(); } @@ -169,26 +171,36 @@ namespace { Value *getCastedVersionOf(Instruction::CastOps opcode, Value *V); private: bool AddUsersIfInteresting(Instruction *I, Loop *L, - std::set &Processed); - SCEVHandle GetExpressionSCEV(Instruction *E, Loop *L); - + SmallPtrSet &Processed); + SCEVHandle GetExpressionSCEV(Instruction *E); + ICmpInst *ChangeCompareStride(Loop *L, ICmpInst *Cond, + IVStrideUse* &CondUse, + const SCEVHandle* &CondStride); void OptimizeIndvars(Loop *L); bool FindIVForUser(ICmpInst *Cond, IVStrideUse *&CondUse, const SCEVHandle *&CondStride); - - unsigned CheckForIVReuse(const SCEVHandle&, IVExpr&, const Type*, + bool RequiresTypeConversion(const Type *Ty, const Type *NewTy); + unsigned CheckForIVReuse(bool, bool, const SCEVHandle&, + IVExpr&, const Type*, const std::vector& UsersToProcess); - - bool ValidStride(int64_t, const std::vector& UsersToProcess); - + bool ValidStride(bool, int64_t, + const std::vector& UsersToProcess); + SCEVHandle CollectIVUsers(const SCEVHandle &Stride, + IVUsersOfOneStride &Uses, + Loop *L, + bool &AllUsesAreAddresses, + std::vector &UsersToProcess); void StrengthReduceStridedIVUsers(const SCEVHandle &Stride, IVUsersOfOneStride &Uses, Loop *L, bool isOnlyStride); - void DeleteTriviallyDeadInstructions(std::set &Insts); + void DeleteTriviallyDeadInstructions(SetVector &Insts); }; - RegisterPass X("loop-reduce", "Loop Strength Reduction"); } +char LoopStrengthReduce::ID = 0; +static RegisterPass +X("loop-reduce", "Loop Strength Reduction"); + LoopPass *llvm::createLoopStrengthReducePass(const TargetLowering *TLI) { return new LoopStrengthReduce(TLI); } @@ -215,15 +227,30 @@ Value *LoopStrengthReduce::getCastedVersionOf(Instruction::CastOps opcode, /// specified set are trivially dead, delete them and see if this makes any of /// their operands subsequently dead. void LoopStrengthReduce:: -DeleteTriviallyDeadInstructions(std::set &Insts) { +DeleteTriviallyDeadInstructions(SetVector &Insts) { while (!Insts.empty()) { - Instruction *I = *Insts.begin(); - Insts.erase(Insts.begin()); + Instruction *I = Insts.back(); + Insts.pop_back(); + + if (PHINode *PN = dyn_cast(I)) { + // If all incoming values to the Phi are the same, we can replace the Phi + // with that value. + if (Value *PNV = PN->hasConstantValue()) { + if (Instruction *U = dyn_cast(PNV)) + Insts.insert(U); + SE->deleteValueFromRecords(PN); + PN->replaceAllUsesWith(PNV); + PN->eraseFromParent(); + Changed = true; + continue; + } + } + if (isInstructionTriviallyDead(I)) { - for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) - if (Instruction *U = dyn_cast(I->getOperand(i))) + for (User::op_iterator i = I->op_begin(), e = I->op_end(); i != e; ++i) + if (Instruction *U = dyn_cast(*i)) Insts.insert(U); - SE->deleteInstructionFromRecords(I); + SE->deleteValueFromRecords(I); I->eraseFromParent(); Changed = true; } @@ -233,13 +260,13 @@ DeleteTriviallyDeadInstructions(std::set &Insts) { /// GetExpressionSCEV - Compute and return the SCEV for the specified /// instruction. -SCEVHandle LoopStrengthReduce::GetExpressionSCEV(Instruction *Exp, Loop *L) { +SCEVHandle LoopStrengthReduce::GetExpressionSCEV(Instruction *Exp) { // Pointer to pointer bitcast instructions return the same value as their // operand. if (BitCastInst *BCI = dyn_cast(Exp)) { if (SE->hasSCEV(BCI) || !isa(BCI->getOperand(0))) return SE->getSCEV(BCI); - SCEVHandle R = GetExpressionSCEV(cast(BCI->getOperand(0)), L); + SCEVHandle R = GetExpressionSCEV(cast(BCI->getOperand(0))); SE->setSCEV(BCI, R); return R; } @@ -253,42 +280,43 @@ SCEVHandle LoopStrengthReduce::GetExpressionSCEV(Instruction *Exp, Loop *L) { return SE->getSCEV(Exp); // Analyze all of the subscripts of this getelementptr instruction, looking - // for uses that are determined by the trip count of L. First, skip all - // operands the are not dependent on the IV. + // for uses that are determined by the trip count of the loop. First, skip + // all operands the are not dependent on the IV. // Build up the base expression. Insert an LLVM cast of the pointer to // uintptr_t first. - SCEVHandle GEPVal = SCEVUnknown::get( + SCEVHandle GEPVal = SE->getUnknown( getCastedVersionOf(Instruction::PtrToInt, GEP->getOperand(0))); gep_type_iterator GTI = gep_type_begin(GEP); - for (unsigned i = 1, e = GEP->getNumOperands(); i != e; ++i, ++GTI) { + for (User::op_iterator i = GEP->op_begin() + 1, e = GEP->op_end(); + i != e; ++i, ++GTI) { // If this is a use of a recurrence that we can analyze, and it comes before // Op does in the GEP operand list, we will handle this when we process this // operand. if (const StructType *STy = dyn_cast(*GTI)) { const StructLayout *SL = TD->getStructLayout(STy); - unsigned Idx = cast(GEP->getOperand(i))->getZExtValue(); + unsigned Idx = cast(*i)->getZExtValue(); uint64_t Offset = SL->getElementOffset(Idx); - GEPVal = SCEVAddExpr::get(GEPVal, - SCEVUnknown::getIntegerSCEV(Offset, UIntPtrTy)); + GEPVal = SE->getAddExpr(GEPVal, + SE->getIntegerSCEV(Offset, UIntPtrTy)); } else { unsigned GEPOpiBits = - GEP->getOperand(i)->getType()->getPrimitiveSizeInBits(); + (*i)->getType()->getPrimitiveSizeInBits(); unsigned IntPtrBits = UIntPtrTy->getPrimitiveSizeInBits(); Instruction::CastOps opcode = (GEPOpiBits < IntPtrBits ? Instruction::SExt : (GEPOpiBits > IntPtrBits ? Instruction::Trunc : Instruction::BitCast)); - Value *OpVal = getCastedVersionOf(opcode, GEP->getOperand(i)); + Value *OpVal = getCastedVersionOf(opcode, *i); SCEVHandle Idx = SE->getSCEV(OpVal); - uint64_t TypeSize = TD->getTypeSize(GTI.getIndexedType()); + uint64_t TypeSize = TD->getABITypeSize(GTI.getIndexedType()); if (TypeSize != 1) - Idx = SCEVMulExpr::get(Idx, - SCEVConstant::get(ConstantInt::get(UIntPtrTy, - TypeSize))); - GEPVal = SCEVAddExpr::get(GEPVal, Idx); + Idx = SE->getMulExpr(Idx, + SE->getConstant(ConstantInt::get(UIntPtrTy, + TypeSize))); + GEPVal = SE->getAddExpr(GEPVal, Idx); } } @@ -301,7 +329,8 @@ SCEVHandle LoopStrengthReduce::GetExpressionSCEV(Instruction *Exp, Loop *L) { /// is. The stride must be a loop invariant expression, but the start may be /// a mix of loop invariant and loop variant expressions. static bool getSCEVStartAndStride(const SCEVHandle &SH, Loop *L, - SCEVHandle &Start, SCEVHandle &Stride) { + SCEVHandle &Start, SCEVHandle &Stride, + ScalarEvolution *SE) { SCEVHandle TheAddRec = Start; // Initialize to zero. // If the outer level is an AddExpr, the operands are all start values except @@ -311,11 +340,11 @@ static bool getSCEVStartAndStride(const SCEVHandle &SH, Loop *L, if (SCEVAddRecExpr *AddRec = dyn_cast(AE->getOperand(i))) { if (AddRec->getLoop() == L) - TheAddRec = SCEVAddExpr::get(AddRec, TheAddRec); + TheAddRec = SE->getAddExpr(AddRec, TheAddRec); else return false; // Nested IV of some sort? } else { - Start = SCEVAddExpr::get(Start, AE->getOperand(i)); + Start = SE->getAddExpr(Start, AE->getOperand(i)); } } else if (isa(SH)) { @@ -330,7 +359,7 @@ static bool getSCEVStartAndStride(const SCEVHandle &SH, Loop *L, // FIXME: Generalize to non-affine IV's. if (!AddRec->isAffine()) return false; - Start = SCEVAddExpr::get(Start, AddRec->getOperand(0)); + Start = SE->getAddExpr(Start, AddRec->getOperand(0)); if (!isa(AddRec->getOperand(1))) DOUT << "[" << L->getHeader()->getName() @@ -349,7 +378,8 @@ static bool getSCEVStartAndStride(const SCEVHandle &SH, Loop *L, /// the loop, resulting in reg-reg copies (if we use the pre-inc value when we /// should use the post-inc value). static bool IVUseShouldUsePostIncValue(Instruction *User, Instruction *IV, - Loop *L, ETForest *EF, Pass *P) { + Loop *L, DominatorTree *DT, Pass *P, + SetVector &DeadInsts){ // If the user is in the loop, use the preinc value. if (L->contains(User->getParent())) return false; @@ -357,7 +387,7 @@ static bool IVUseShouldUsePostIncValue(Instruction *User, Instruction *IV, // Ok, the user is outside of the loop. If it is dominated by the latch // block, use the post-inc value. - if (EF->dominates(LatchBlock, User->getParent())) + if (DT->dominates(LatchBlock, User->getParent())) return true; // There is one case we have to be careful of: PHI nodes. These little guys @@ -374,7 +404,7 @@ static bool IVUseShouldUsePostIncValue(Instruction *User, Instruction *IV, for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) if (PN->getIncomingValue(i) == IV) { ++NumUses; - if (!EF->dominates(LatchBlock, PN->getIncomingBlock(i))) + if (!DT->dominates(LatchBlock, PN->getIncomingBlock(i))) return false; } @@ -383,13 +413,15 @@ static bool IVUseShouldUsePostIncValue(Instruction *User, Instruction *IV, // post-incremented value. for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) if (PN->getIncomingValue(i) == IV) { - SplitCriticalEdge(PN->getIncomingBlock(i), PN->getParent(), P, - true); + SplitCriticalEdge(PN->getIncomingBlock(i), PN->getParent(), P, false); // Splitting the critical edge can reduce the number of entries in this // PHI. e = PN->getNumIncomingValues(); if (--NumUses == 0) break; } + + // PHI node might have become a constant value after SplitCriticalEdge. + DeadInsts.insert(User); return true; } @@ -400,28 +432,32 @@ static bool IVUseShouldUsePostIncValue(Instruction *User, Instruction *IV, /// reducible SCEV, recursively add its users to the IVUsesByStride set and /// return true. Otherwise, return false. bool LoopStrengthReduce::AddUsersIfInteresting(Instruction *I, Loop *L, - std::set &Processed) { + SmallPtrSet &Processed) { if (!I->getType()->isInteger() && !isa(I->getType())) - return false; // Void and FP expressions cannot be reduced. - if (!Processed.insert(I).second) + return false; // Void and FP expressions cannot be reduced. + if (!Processed.insert(I)) return true; // Instruction already handled. // Get the symbolic expression for this instruction. - SCEVHandle ISE = GetExpressionSCEV(I, L); + SCEVHandle ISE = GetExpressionSCEV(I); if (isa(ISE)) return false; // Get the start and stride for this expression. - SCEVHandle Start = SCEVUnknown::getIntegerSCEV(0, ISE->getType()); + SCEVHandle Start = SE->getIntegerSCEV(0, ISE->getType()); SCEVHandle Stride = Start; - if (!getSCEVStartAndStride(ISE, L, Start, Stride)) + if (!getSCEVStartAndStride(ISE, L, Start, Stride, SE)) return false; // Non-reducible symbolic expression, bail out. - for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI != E;) { - Instruction *User = cast(*UI); + std::vector IUsers; + // Collect all I uses now because IVUseShouldUsePostIncValue may + // invalidate use_iterator. + for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI != E; ++UI) + IUsers.push_back(cast(*UI)); + + for (unsigned iused_index = 0, iused_size = IUsers.size(); + iused_index != iused_size; ++iused_index) { - // Increment iterator now because IVUseShouldUsePostIncValue may remove - // User from the list of I users. - ++UI; + Instruction *User = IUsers[iused_index]; // Do not infinitely recurse on PHI nodes. if (isa(User) && Processed.count(User)) @@ -448,10 +484,10 @@ bool LoopStrengthReduce::AddUsersIfInteresting(Instruction *I, Loop *L, // Okay, we found a user that we cannot reduce. Analyze the instruction // and decide what to do with it. If we are a use inside of the loop, use // the value before incrementation, otherwise use it after incrementation. - if (IVUseShouldUsePostIncValue(User, I, L, EF, this)) { + if (IVUseShouldUsePostIncValue(User, I, L, DT, this, DeadInsts)) { // The value used will be incremented by the stride more than we are // expecting, so subtract this off. - SCEVHandle NewStart = SCEV::getMinusSCEV(Start, Stride); + SCEVHandle NewStart = SE->getMinusSCEV(Start, Stride); StrideUses.addUser(NewStart, User, I); StrideUses.Users.back().isUseOfPostIncrementedValue = true; DOUT << " USING POSTINC SCEV, START=" << *NewStart<< "\n"; @@ -467,6 +503,9 @@ namespace { /// BasedUser - For a particular base value, keep information about how we've /// partitioned the expression so far. struct BasedUser { + /// SE - The current ScalarEvolution object. + ScalarEvolution *SE; + /// Base - The Base value for the PHI node that needs to be inserted for /// this use. As the use is processed, information gets moved from this /// field to the Imm field (below). BasedUser values are sorted by this @@ -496,18 +535,19 @@ namespace { // the loop. bool isUseOfPostIncrementedValue; - BasedUser(IVStrideUse &IVSU) - : Base(IVSU.Offset), Inst(IVSU.User), + BasedUser(IVStrideUse &IVSU, ScalarEvolution *se) + : SE(se), Base(IVSU.Offset), Inst(IVSU.User), OperandValToReplace(IVSU.OperandValToReplace), - Imm(SCEVUnknown::getIntegerSCEV(0, Base->getType())), EmittedBase(0), + Imm(SE->getIntegerSCEV(0, Base->getType())), EmittedBase(0), isUseOfPostIncrementedValue(IVSU.isUseOfPostIncrementedValue) {} // Once we rewrite the code to insert the new IVs we want, update the // operands of Inst to use the new expression 'NewBase', with 'Imm' added // to it. void RewriteInstructionToUseNewBase(const SCEVHandle &NewBase, - SCEVExpander &Rewriter, Loop *L, - Pass *P); + Instruction *InsertPt, + SCEVExpander &Rewriter, Loop *L, Pass *P, + SetVector &DeadInsts); Value *InsertCodeForBaseAtPosition(const SCEVHandle &NewBase, SCEVExpander &Rewriter, @@ -546,26 +586,33 @@ Value *BasedUser::InsertCodeForBaseAtPosition(const SCEVHandle &NewBase, } // If there is no immediate value, skip the next part. - if (SCEVConstant *SC = dyn_cast(Imm)) - if (SC->getValue()->isZero()) - return Rewriter.expandCodeFor(NewBase, BaseInsertPt, - OperandValToReplace->getType()); + if (Imm->isZero()) + return Rewriter.expandCodeFor(NewBase, BaseInsertPt); Value *Base = Rewriter.expandCodeFor(NewBase, BaseInsertPt); + + // If we are inserting the base and imm values in the same block, make sure to + // adjust the IP position if insertion reused a result. + if (IP == BaseInsertPt) + IP = Rewriter.getInsertionPoint(); // Always emit the immediate (if non-zero) into the same block as the user. - SCEVHandle NewValSCEV = SCEVAddExpr::get(SCEVUnknown::get(Base), Imm); - return Rewriter.expandCodeFor(NewValSCEV, IP, - OperandValToReplace->getType()); + SCEVHandle NewValSCEV = SE->getAddExpr(SE->getUnknown(Base), Imm); + return Rewriter.expandCodeFor(NewValSCEV, IP); + } // Once we rewrite the code to insert the new IVs we want, update the // operands of Inst to use the new expression 'NewBase', with 'Imm' added -// to it. +// to it. NewBasePt is the last instruction which contributes to the +// value of NewBase in the case that it's a diffferent instruction from +// the PHI that NewBase is computed from, or null otherwise. +// void BasedUser::RewriteInstructionToUseNewBase(const SCEVHandle &NewBase, - SCEVExpander &Rewriter, - Loop *L, Pass *P) { + Instruction *NewBasePt, + SCEVExpander &Rewriter, Loop *L, Pass *P, + SetVector &DeadInsts) { if (!isa(Inst)) { // By default, insert code at the user instruction. BasicBlock::iterator InsertPt = Inst; @@ -579,16 +626,29 @@ void BasedUser::RewriteInstructionToUseNewBase(const SCEVHandle &NewBase, // value will be pinned to live somewhere after the original computation. // In this case, we have to back off. if (!isUseOfPostIncrementedValue) { - if (Instruction *OpInst = dyn_cast(OperandValToReplace)) { + if (NewBasePt && isa(OperandValToReplace)) { + InsertPt = NewBasePt; + ++InsertPt; + } else if (Instruction *OpInst + = dyn_cast(OperandValToReplace)) { InsertPt = OpInst; while (isa(InsertPt)) ++InsertPt; } } - Value *NewVal = InsertCodeForBaseAtPosition(NewBase, Rewriter, InsertPt, L); + // Adjust the type back to match the Inst. Note that we can't use InsertPt + // here because the SCEVExpander may have inserted the instructions after + // that point, in its efforts to avoid inserting redundant expressions. + if (isa(OperandValToReplace->getType())) { + NewVal = SCEVExpander::InsertCastOfTo(Instruction::IntToPtr, + NewVal, + OperandValToReplace->getType()); + } // Replace the use of the operand Value with the new Phi we just created. Inst->replaceUsesOfWith(OperandValToReplace, NewVal); - DOUT << " CHANGED: IMM =" << *Imm << " Inst = " << *Inst; + DOUT << " CHANGED: IMM =" << *Imm; + DOUT << " \tNEWBASE =" << *NewBase; + DOUT << " \tInst = " << *Inst; return; } @@ -597,7 +657,7 @@ void BasedUser::RewriteInstructionToUseNewBase(const SCEVHandle &NewBase, // have multiple entries for the same predecessor. We use a map to make sure // that a PHI node only has a single Value* for each predecessor (which also // prevents us from inserting duplicate code in some blocks). - std::map InsertedCode; + DenseMap InsertedCode; PHINode *PN = cast(Inst); for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { if (PN->getIncomingValue(i) == OperandValToReplace) { @@ -610,7 +670,7 @@ void BasedUser::RewriteInstructionToUseNewBase(const SCEVHandle &NewBase, (PN->getParent() != L->getHeader() || !L->contains(PHIPred))) { // First step, split the critical edge. - SplitCriticalEdge(PHIPred, PN->getParent(), P, true); + SplitCriticalEdge(PHIPred, PN->getParent(), P, false); // Next step: move the basic block. In particular, if the PHI node // is outside of the loop, and PredTI is in the loop, we want to @@ -630,6 +690,16 @@ void BasedUser::RewriteInstructionToUseNewBase(const SCEVHandle &NewBase, // Insert the code into the end of the predecessor block. Instruction *InsertPt = PN->getIncomingBlock(i)->getTerminator(); Code = InsertCodeForBaseAtPosition(NewBase, Rewriter, InsertPt, L); + + // Adjust the type back to match the PHI. Note that we can't use + // InsertPt here because the SCEVExpander may have inserted its + // instructions after that point, in its efforts to avoid inserting + // redundant expressions. + if (isa(PN->getType())) { + Code = SCEVExpander::InsertCastOfTo(Instruction::IntToPtr, + Code, + PN->getType()); + } } // Replace the use of the operand Value with the new Phi we just created. @@ -637,6 +707,10 @@ void BasedUser::RewriteInstructionToUseNewBase(const SCEVHandle &NewBase, Rewriter.clear(); } } + + // PHI node might have become a constant value after SplitCriticalEdge. + DeadInsts.insert(Inst); + DOUT << " CHANGED: IMM =" << *Imm << " Inst = " << *Inst; } @@ -673,7 +747,7 @@ static bool isTargetConstant(const SCEVHandle &V, const Type *UseTy, /// MoveLoopVariantsToImediateField - Move any subexpressions from Val that are /// loop varying to the Imm operand. static void MoveLoopVariantsToImediateField(SCEVHandle &Val, SCEVHandle &Imm, - Loop *L) { + Loop *L, ScalarEvolution *SE) { if (Val->isLoopInvariant(L)) return; // Nothing to do. if (SCEVAddExpr *SAE = dyn_cast(Val)) { @@ -684,27 +758,27 @@ static void MoveLoopVariantsToImediateField(SCEVHandle &Val, SCEVHandle &Imm, if (!SAE->getOperand(i)->isLoopInvariant(L)) { // If this is a loop-variant expression, it must stay in the immediate // field of the expression. - Imm = SCEVAddExpr::get(Imm, SAE->getOperand(i)); + Imm = SE->getAddExpr(Imm, SAE->getOperand(i)); } else { NewOps.push_back(SAE->getOperand(i)); } if (NewOps.empty()) - Val = SCEVUnknown::getIntegerSCEV(0, Val->getType()); + Val = SE->getIntegerSCEV(0, Val->getType()); else - Val = SCEVAddExpr::get(NewOps); + Val = SE->getAddExpr(NewOps); } else if (SCEVAddRecExpr *SARE = dyn_cast(Val)) { // Try to pull immediates out of the start value of nested addrec's. SCEVHandle Start = SARE->getStart(); - MoveLoopVariantsToImediateField(Start, Imm, L); + MoveLoopVariantsToImediateField(Start, Imm, L, SE); std::vector Ops(SARE->op_begin(), SARE->op_end()); Ops[0] = Start; - Val = SCEVAddRecExpr::get(Ops, SARE->getLoop()); + Val = SE->getAddRecExpr(Ops, SARE->getLoop()); } else { // Otherwise, all of Val is variant, move the whole thing over. - Imm = SCEVAddExpr::get(Imm, Val); - Val = SCEVUnknown::getIntegerSCEV(0, Val->getType()); + Imm = SE->getAddExpr(Imm, Val); + Val = SE->getIntegerSCEV(0, Val->getType()); } } @@ -715,7 +789,8 @@ static void MoveLoopVariantsToImediateField(SCEVHandle &Val, SCEVHandle &Imm, static void MoveImmediateValues(const TargetLowering *TLI, Instruction *User, SCEVHandle &Val, SCEVHandle &Imm, - bool isAddress, Loop *L) { + bool isAddress, Loop *L, + ScalarEvolution *SE) { const Type *UseTy = User->getType(); if (StoreInst *SI = dyn_cast(User)) UseTy = SI->getOperand(0)->getType(); @@ -726,31 +801,31 @@ static void MoveImmediateValues(const TargetLowering *TLI, for (unsigned i = 0; i != SAE->getNumOperands(); ++i) { SCEVHandle NewOp = SAE->getOperand(i); - MoveImmediateValues(TLI, User, NewOp, Imm, isAddress, L); + MoveImmediateValues(TLI, User, NewOp, Imm, isAddress, L, SE); if (!NewOp->isLoopInvariant(L)) { // If this is a loop-variant expression, it must stay in the immediate // field of the expression. - Imm = SCEVAddExpr::get(Imm, NewOp); + Imm = SE->getAddExpr(Imm, NewOp); } else { NewOps.push_back(NewOp); } } if (NewOps.empty()) - Val = SCEVUnknown::getIntegerSCEV(0, Val->getType()); + Val = SE->getIntegerSCEV(0, Val->getType()); else - Val = SCEVAddExpr::get(NewOps); + Val = SE->getAddExpr(NewOps); return; } else if (SCEVAddRecExpr *SARE = dyn_cast(Val)) { // Try to pull immediates out of the start value of nested addrec's. SCEVHandle Start = SARE->getStart(); - MoveImmediateValues(TLI, User, Start, Imm, isAddress, L); + MoveImmediateValues(TLI, User, Start, Imm, isAddress, L, SE); if (Start != SARE->getStart()) { std::vector Ops(SARE->op_begin(), SARE->op_end()); Ops[0] = Start; - Val = SCEVAddRecExpr::get(Ops, SARE->getLoop()); + Val = SE->getAddRecExpr(Ops, SARE->getLoop()); } return; } else if (SCEVMulExpr *SME = dyn_cast(Val)) { @@ -758,22 +833,22 @@ static void MoveImmediateValues(const TargetLowering *TLI, if (isAddress && isTargetConstant(SME->getOperand(0), UseTy, TLI) && SME->getNumOperands() == 2 && SME->isLoopInvariant(L)) { - SCEVHandle SubImm = SCEVUnknown::getIntegerSCEV(0, Val->getType()); + SCEVHandle SubImm = SE->getIntegerSCEV(0, Val->getType()); SCEVHandle NewOp = SME->getOperand(1); - MoveImmediateValues(TLI, User, NewOp, SubImm, isAddress, L); + MoveImmediateValues(TLI, User, NewOp, SubImm, isAddress, L, SE); // If we extracted something out of the subexpressions, see if we can // simplify this! if (NewOp != SME->getOperand(1)) { // Scale SubImm up by "8". If the result is a target constant, we are // good. - SubImm = SCEVMulExpr::get(SubImm, SME->getOperand(0)); + SubImm = SE->getMulExpr(SubImm, SME->getOperand(0)); if (isTargetConstant(SubImm, UseTy, TLI)) { // Accumulate the immediate. - Imm = SCEVAddExpr::get(Imm, SubImm); + Imm = SE->getAddExpr(Imm, SubImm); // Update what is left of 'Val'. - Val = SCEVMulExpr::get(SME->getOperand(0), NewOp); + Val = SE->getMulExpr(SME->getOperand(0), NewOp); return; } } @@ -784,8 +859,8 @@ static void MoveImmediateValues(const TargetLowering *TLI, // expression. if ((isAddress && isTargetConstant(Val, UseTy, TLI)) || !Val->isLoopInvariant(L)) { - Imm = SCEVAddExpr::get(Imm, Val); - Val = SCEVUnknown::getIntegerSCEV(0, Val->getType()); + Imm = SE->getAddExpr(Imm, Val); + Val = SE->getIntegerSCEV(0, Val->getType()); return; } @@ -797,25 +872,25 @@ static void MoveImmediateValues(const TargetLowering *TLI, /// added together. This is used to reassociate common addition subexprs /// together for maximal sharing when rewriting bases. static void SeparateSubExprs(std::vector &SubExprs, - SCEVHandle Expr) { + SCEVHandle Expr, + ScalarEvolution *SE) { if (SCEVAddExpr *AE = dyn_cast(Expr)) { for (unsigned j = 0, e = AE->getNumOperands(); j != e; ++j) - SeparateSubExprs(SubExprs, AE->getOperand(j)); + SeparateSubExprs(SubExprs, AE->getOperand(j), SE); } else if (SCEVAddRecExpr *SARE = dyn_cast(Expr)) { - SCEVHandle Zero = SCEVUnknown::getIntegerSCEV(0, Expr->getType()); + SCEVHandle Zero = SE->getIntegerSCEV(0, Expr->getType()); if (SARE->getOperand(0) == Zero) { SubExprs.push_back(Expr); } else { // Compute the addrec with zero as its base. std::vector Ops(SARE->op_begin(), SARE->op_end()); Ops[0] = Zero; // Start with zero base. - SubExprs.push_back(SCEVAddRecExpr::get(Ops, SARE->getLoop())); + SubExprs.push_back(SE->getAddRecExpr(Ops, SARE->getLoop())); - SeparateSubExprs(SubExprs, SARE->getOperand(0)); + SeparateSubExprs(SubExprs, SARE->getOperand(0), SE); } - } else if (!isa(Expr) || - !cast(Expr)->getValue()->isZero()) { + } else if (!Expr->isZero()) { // Do not add zero. SubExprs.push_back(Expr); } @@ -827,11 +902,12 @@ static void SeparateSubExprs(std::vector &SubExprs, /// removed, accumulated, and returned. This looks for things like (a+b+c) and /// (a+c+d) -> (a+c). The common expression is *removed* from the Bases. static SCEVHandle -RemoveCommonExpressionsFromUseBases(std::vector &Uses) { +RemoveCommonExpressionsFromUseBases(std::vector &Uses, + ScalarEvolution *SE) { unsigned NumUses = Uses.size(); // Only one use? Use its base, regardless of what it is! - SCEVHandle Zero = SCEVUnknown::getIntegerSCEV(0, Uses[0].Base->getType()); + SCEVHandle Zero = SE->getIntegerSCEV(0, Uses[0].Base->getType()); SCEVHandle Result = Zero; if (NumUses == 1) { std::swap(Result, Uses[0].Base); @@ -853,7 +929,7 @@ RemoveCommonExpressionsFromUseBases(std::vector &Uses) { if (Uses[i].Base == Zero) return Zero; // Split the expression into subexprs. - SeparateSubExprs(SubExprs, Uses[i].Base); + SeparateSubExprs(SubExprs, Uses[i].Base, SE); // Add one to SubExpressionUseCounts for each subexpr present. for (unsigned j = 0, e = SubExprs.size(); j != e; ++j) if (++SubExpressionUseCounts[SubExprs[j]] == 1) @@ -868,7 +944,7 @@ RemoveCommonExpressionsFromUseBases(std::vector &Uses) { SubExpressionUseCounts.find(UniqueSubExprs[i]); assert(I != SubExpressionUseCounts.end() && "Entry not found?"); if (I->second == NumUses) { // Found CSE! - Result = SCEVAddExpr::get(Result, I->first); + Result = SE->getAddExpr(Result, I->first); } else { // Remove non-cse's from SubExpressionUseCounts. SubExpressionUseCounts.erase(I); @@ -881,7 +957,7 @@ RemoveCommonExpressionsFromUseBases(std::vector &Uses) { // Otherwise, remove all of the CSE's we found from each of the base values. for (unsigned i = 0; i != NumUses; ++i) { // Split the expression into subexprs. - SeparateSubExprs(SubExprs, Uses[i].Base); + SeparateSubExprs(SubExprs, Uses[i].Base, SE); // Remove any common subexpressions. for (unsigned j = 0, e = SubExprs.size(); j != e; ++j) @@ -894,26 +970,22 @@ RemoveCommonExpressionsFromUseBases(std::vector &Uses) { if (SubExprs.empty()) Uses[i].Base = Zero; else - Uses[i].Base = SCEVAddExpr::get(SubExprs); + Uses[i].Base = SE->getAddExpr(SubExprs); SubExprs.clear(); } return Result; } -/// isZero - returns true if the scalar evolution expression is zero. -/// -static bool isZero(SCEVHandle &V) { - if (SCEVConstant *SC = dyn_cast(V)) - return SC->getValue()->isZero(); - return false; -} - /// ValidStride - Check whether the given Scale is valid for all loads and /// stores in UsersToProcess. /// -bool LoopStrengthReduce::ValidStride(int64_t Scale, +bool LoopStrengthReduce::ValidStride(bool HasBaseReg, + int64_t Scale, const std::vector& UsersToProcess) { + if (!TLI) + return true; + for (unsigned i=0, e = UsersToProcess.size(); i!=e; ++i) { // If this is a load or other access, pass the type of the access in. const Type *AccessTy = Type::VoidTy; @@ -921,10 +993,13 @@ bool LoopStrengthReduce::ValidStride(int64_t Scale, AccessTy = SI->getOperand(0)->getType(); else if (LoadInst *LI = dyn_cast(UsersToProcess[i].Inst)) AccessTy = LI->getType(); + else if (isa(UsersToProcess[i].Inst)) + continue; TargetLowering::AddrMode AM; if (SCEVConstant *SC = dyn_cast(UsersToProcess[i].Imm)) AM.BaseOffs = SC->getValue()->getSExtValue(); + AM.HasBaseReg = HasBaseReg || !UsersToProcess[i].Base->isZero(); AM.Scale = Scale; // If load[imm+r*scale] is illegal, bail out. @@ -934,36 +1009,58 @@ bool LoopStrengthReduce::ValidStride(int64_t Scale, return true; } +/// RequiresTypeConversion - Returns true if converting Ty to NewTy is not +/// a nop. +bool LoopStrengthReduce::RequiresTypeConversion(const Type *Ty1, + const Type *Ty2) { + if (Ty1 == Ty2) + return false; + if (TLI && TLI->isTruncateFree(Ty1, Ty2)) + return false; + return (!Ty1->canLosslesslyBitCastTo(Ty2) && + !(isa(Ty2) && + Ty1->canLosslesslyBitCastTo(UIntPtrTy)) && + !(isa(Ty1) && + Ty2->canLosslesslyBitCastTo(UIntPtrTy))); +} + /// CheckForIVReuse - Returns the multiple if the stride is the multiple /// of a previous stride and it is a legal value for the target addressing -/// mode scale component. This allows the users of this stride to be rewritten -/// as prev iv * factor. It returns 0 if no reuse is possible. -unsigned LoopStrengthReduce::CheckForIVReuse(const SCEVHandle &Stride, +/// mode scale component and optional base reg. This allows the users of +/// this stride to be rewritten as prev iv * factor. It returns 0 if no +/// reuse is possible. +unsigned LoopStrengthReduce::CheckForIVReuse(bool HasBaseReg, + bool AllUsesAreAddresses, + const SCEVHandle &Stride, IVExpr &IV, const Type *Ty, const std::vector& UsersToProcess) { - if (!TLI) return 0; - if (SCEVConstant *SC = dyn_cast(Stride)) { int64_t SInt = SC->getValue()->getSExtValue(); - if (SInt == 1) return 0; - - for (std::map::iterator SI= IVsByStride.begin(), - SE = IVsByStride.end(); SI != SE; ++SI) { + for (unsigned NewStride = 0, e = StrideOrder.size(); NewStride != e; + ++NewStride) { + std::map::iterator SI = + IVsByStride.find(StrideOrder[NewStride]); + if (SI == IVsByStride.end()) + continue; int64_t SSInt = cast(SI->first)->getValue()->getSExtValue(); - if (SInt != -SSInt && + if (SI->first != Stride && (unsigned(abs(SInt)) < SSInt || (SInt % SSInt) != 0)) continue; int64_t Scale = SInt / SSInt; // Check that this stride is valid for all the types used for loads and // stores; if it can be used for some and not others, we might as well use // the original stride everywhere, since we have to create the IV for it - // anyway. - if (ValidStride(Scale, UsersToProcess)) + // anyway. If the scale is 1, then we don't need to worry about folding + // multiplications. + if (Scale == 1 || + (AllUsesAreAddresses && + ValidStride(HasBaseReg, Scale, UsersToProcess))) for (std::vector::iterator II = SI->second.IVs.begin(), IE = SI->second.IVs.end(); II != IE; ++II) // FIXME: Only handle base == 0 for now. // Only reuse previous IV if it would not require a type conversion. - if (isZero(II->Base) && II->Base->getType() == Ty) { + if (II->Base->isZero() && + !RequiresTypeConversion(II->Base->getType(), Ty)) { IV = *II; return Scale; } @@ -978,28 +1075,67 @@ static bool PartitionByIsUseOfPostIncrementedValue(const BasedUser &Val) { return Val.isUseOfPostIncrementedValue; } -/// StrengthReduceStridedIVUsers - Strength reduce all of the users of a single -/// stride of IV. All of the users may have different starting values, and this -/// may not be the only stride (we know it is if isOnlyStride is true). -void LoopStrengthReduce::StrengthReduceStridedIVUsers(const SCEVHandle &Stride, - IVUsersOfOneStride &Uses, - Loop *L, - bool isOnlyStride) { - // Transform our list of users and offsets to a bit more complex table. In - // this new vector, each 'BasedUser' contains 'Base' the base of the - // strided accessas well as the old information from Uses. We progressively - // move information from the Base field to the Imm field, until we eventually - // have the full access expression to rewrite the use. - std::vector UsersToProcess; +/// isNonConstantNegative - Return true if the specified scev is negated, but +/// not a constant. +static bool isNonConstantNegative(const SCEVHandle &Expr) { + SCEVMulExpr *Mul = dyn_cast(Expr); + if (!Mul) return false; + + // If there is a constant factor, it will be first. + SCEVConstant *SC = dyn_cast(Mul->getOperand(0)); + if (!SC) return false; + + // Return true if the value is negative, this matches things like (-42 * V). + return SC->getValue()->getValue().isNegative(); +} + +/// isAddress - Returns true if the specified instruction is using the +/// specified value as an address. +static bool isAddressUse(Instruction *Inst, Value *OperandVal) { + bool isAddress = isa(Inst); + if (StoreInst *SI = dyn_cast(Inst)) { + if (SI->getOperand(1) == OperandVal) + isAddress = true; + } else if (IntrinsicInst *II = dyn_cast(Inst)) { + // Addressing modes can also be folded into prefetches and a variety + // of intrinsics. + switch (II->getIntrinsicID()) { + default: break; + case Intrinsic::prefetch: + case Intrinsic::x86_sse2_loadu_dq: + case Intrinsic::x86_sse2_loadu_pd: + case Intrinsic::x86_sse_loadu_ps: + case Intrinsic::x86_sse_storeu_ps: + case Intrinsic::x86_sse2_storeu_pd: + case Intrinsic::x86_sse2_storeu_dq: + case Intrinsic::x86_sse2_storel_dq: + if (II->getOperand(1) == OperandVal) + isAddress = true; + break; + } + } + return isAddress; +} + +// CollectIVUsers - Transform our list of users and offsets to a bit more +// complex table. In this new vector, each 'BasedUser' contains 'Base' the base +// of the strided accessas well as the old information from Uses. We +// progressively move information from the Base field to the Imm field, until +// we eventually have the full access expression to rewrite the use. +SCEVHandle LoopStrengthReduce::CollectIVUsers(const SCEVHandle &Stride, + IVUsersOfOneStride &Uses, + Loop *L, + bool &AllUsesAreAddresses, + std::vector &UsersToProcess) { UsersToProcess.reserve(Uses.Users.size()); for (unsigned i = 0, e = Uses.Users.size(); i != e; ++i) { - UsersToProcess.push_back(Uses.Users[i]); + UsersToProcess.push_back(BasedUser(Uses.Users[i], SE)); // Move any loop invariant operands from the offset field to the immediate // field of the use, so that we don't try to use something before it is // computed. MoveLoopVariantsToImediateField(UsersToProcess.back().Base, - UsersToProcess.back().Imm, L); + UsersToProcess.back().Imm, L, SE); assert(UsersToProcess.back().Base->isLoopInvariant(L) && "Base value is not loop invariant!"); } @@ -1012,45 +1148,97 @@ void LoopStrengthReduce::StrengthReduceStridedIVUsers(const SCEVHandle &Stride, // "A+B"), emit it to the preheader, then remove the expression from the // UsersToProcess base values. SCEVHandle CommonExprs = - RemoveCommonExpressionsFromUseBases(UsersToProcess); - + RemoveCommonExpressionsFromUseBases(UsersToProcess, SE); + // Next, figure out what we can represent in the immediate fields of // instructions. If we can represent anything there, move it to the imm // fields of the BasedUsers. We do this so that it increases the commonality // of the remaining uses. + unsigned NumPHI = 0; for (unsigned i = 0, e = UsersToProcess.size(); i != e; ++i) { // If the user is not in the current loop, this means it is using the exit // value of the IV. Do not put anything in the base, make sure it's all in // the immediate field to allow as much factoring as possible. if (!L->contains(UsersToProcess[i].Inst->getParent())) { - UsersToProcess[i].Imm = SCEVAddExpr::get(UsersToProcess[i].Imm, - UsersToProcess[i].Base); + UsersToProcess[i].Imm = SE->getAddExpr(UsersToProcess[i].Imm, + UsersToProcess[i].Base); UsersToProcess[i].Base = - SCEVUnknown::getIntegerSCEV(0, UsersToProcess[i].Base->getType()); + SE->getIntegerSCEV(0, UsersToProcess[i].Base->getType()); } else { // Addressing modes can be folded into loads and stores. Be careful that // the store is through the expression, not of the expression though. - bool isAddress = isa(UsersToProcess[i].Inst); - if (StoreInst *SI = dyn_cast(UsersToProcess[i].Inst)) - if (SI->getOperand(1) == UsersToProcess[i].OperandValToReplace) - isAddress = true; + bool isPHI = false; + bool isAddress = isAddressUse(UsersToProcess[i].Inst, + UsersToProcess[i].OperandValToReplace); + if (isa(UsersToProcess[i].Inst)) { + isPHI = true; + ++NumPHI; + } + + // If this use isn't an address, then not all uses are addresses. + if (!isAddress && !isPHI) + AllUsesAreAddresses = false; MoveImmediateValues(TLI, UsersToProcess[i].Inst, UsersToProcess[i].Base, - UsersToProcess[i].Imm, isAddress, L); + UsersToProcess[i].Imm, isAddress, L, SE); } } - // Check if it is possible to reuse a IV with stride that is factor of this - // stride. And the multiple is a number that can be encoded in the scale - // field of the target addressing mode. And we will have a valid - // instruction after this substition, including the immediate field, if any. + // If one of the use if a PHI node and all other uses are addresses, still + // allow iv reuse. Essentially we are trading one constant multiplication + // for one fewer iv. + if (NumPHI > 1) + AllUsesAreAddresses = false; + + return CommonExprs; +} + +/// StrengthReduceStridedIVUsers - Strength reduce all of the users of a single +/// stride of IV. All of the users may have different starting values, and this +/// may not be the only stride (we know it is if isOnlyStride is true). +void LoopStrengthReduce::StrengthReduceStridedIVUsers(const SCEVHandle &Stride, + IVUsersOfOneStride &Uses, + Loop *L, + bool isOnlyStride) { + // If all the users are moved to another stride, then there is nothing to do. + if (Uses.Users.empty()) + return; + + // Keep track if every use in UsersToProcess is an address. If they all are, + // we may be able to rewrite the entire collection of them in terms of a + // smaller-stride IV. + bool AllUsesAreAddresses = true; + + // Transform our list of users and offsets to a bit more complex table. In + // this new vector, each 'BasedUser' contains 'Base' the base of the + // strided accessas well as the old information from Uses. We progressively + // move information from the Base field to the Imm field, until we eventually + // have the full access expression to rewrite the use. + std::vector UsersToProcess; + SCEVHandle CommonExprs = CollectIVUsers(Stride, Uses, L, AllUsesAreAddresses, + UsersToProcess); + + // If we managed to find some expressions in common, we'll need to carry + // their value in a register and add it in for each use. This will take up + // a register operand, which potentially restricts what stride values are + // valid. + bool HaveCommonExprs = !CommonExprs->isZero(); + + // If all uses are addresses, check if it is possible to reuse an IV with a + // stride that is a factor of this stride. And that the multiple is a number + // that can be encoded in the scale field of the target addressing mode. And + // that we will have a valid instruction after this substition, including the + // immediate field, if any. PHINode *NewPHI = NULL; Value *IncV = NULL; - IVExpr ReuseIV; - unsigned RewriteFactor = CheckForIVReuse(Stride, ReuseIV, - CommonExprs->getType(), - UsersToProcess); + IVExpr ReuseIV(SE->getIntegerSCEV(0, Type::Int32Ty), + SE->getIntegerSCEV(0, Type::Int32Ty), + 0, 0); + unsigned RewriteFactor = 0; + RewriteFactor = CheckForIVReuse(HaveCommonExprs, AllUsesAreAddresses, + Stride, ReuseIV, CommonExprs->getType(), + UsersToProcess); if (RewriteFactor != 0) { DOUT << "BASED ON IV of STRIDE " << *ReuseIV.Stride << " and BASE " << *ReuseIV.Base << " :\n"; @@ -1063,7 +1251,7 @@ void LoopStrengthReduce::StrengthReduceStridedIVUsers(const SCEVHandle &Stride, // Now that we know what we need to do, insert the PHI node itself. // DOUT << "INSERTING IV of TYPE " << *ReplacedTy << " of STRIDE " - << *Stride << " and BASE " << *CommonExprs << " :\n"; + << *Stride << " and BASE " << *CommonExprs << ": "; SCEVExpander Rewriter(*SE, *LI); SCEVExpander PreheaderRewriter(*SE, *LI); @@ -1077,44 +1265,53 @@ void LoopStrengthReduce::StrengthReduceStridedIVUsers(const SCEVHandle &Stride, // Emit the initial base value into the loop preheader. Value *CommonBaseV - = PreheaderRewriter.expandCodeFor(CommonExprs, PreInsertPt, - ReplacedTy); + = PreheaderRewriter.expandCodeFor(CommonExprs, PreInsertPt); if (RewriteFactor == 0) { // Create a new Phi for this base, and stick it in the loop header. - NewPHI = new PHINode(ReplacedTy, "iv.", PhiInsertBefore); + NewPHI = PHINode::Create(ReplacedTy, "iv.", PhiInsertBefore); ++NumInserted; // Add common base to the new Phi node. NewPHI->addIncoming(CommonBaseV, Preheader); + // If the stride is negative, insert a sub instead of an add for the + // increment. + bool isNegative = isNonConstantNegative(Stride); + SCEVHandle IncAmount = Stride; + if (isNegative) + IncAmount = SE->getNegativeSCEV(Stride); + // Insert the stride into the preheader. - Value *StrideV = PreheaderRewriter.expandCodeFor(Stride, PreInsertPt, - ReplacedTy); + Value *StrideV = PreheaderRewriter.expandCodeFor(IncAmount, PreInsertPt); if (!isa(StrideV)) ++NumVariable; // Emit the increment of the base value before the terminator of the loop // latch block, and add it to the Phi node. - SCEVHandle IncExp = SCEVAddExpr::get(SCEVUnknown::get(NewPHI), - SCEVUnknown::get(StrideV)); + SCEVHandle IncExp = SE->getUnknown(StrideV); + if (isNegative) + IncExp = SE->getNegativeSCEV(IncExp); + IncExp = SE->getAddExpr(SE->getUnknown(NewPHI), IncExp); - IncV = Rewriter.expandCodeFor(IncExp, LatchBlock->getTerminator(), - ReplacedTy); + IncV = Rewriter.expandCodeFor(IncExp, LatchBlock->getTerminator()); IncV->setName(NewPHI->getName()+".inc"); NewPHI->addIncoming(IncV, LatchBlock); // Remember this in case a later stride is multiple of this. IVsByStride[Stride].addIV(Stride, CommonExprs, NewPHI, IncV); + + DOUT << " IV=%" << NewPHI->getNameStr() << " INC=%" << IncV->getNameStr(); } else { Constant *C = dyn_cast(CommonBaseV); if (!C || (!C->isNullValue() && - !isTargetConstant(SCEVUnknown::get(CommonBaseV), ReplacedTy, TLI))) + !isTargetConstant(SE->getUnknown(CommonBaseV), ReplacedTy, TLI))) // We want the common base emitted into the preheader! This is just // using cast as a copy so BitCast (no-op cast) is appropriate CommonBaseV = new BitCastInst(CommonBaseV, CommonBaseV->getType(), "commonbase", PreInsertPt); } + DOUT << "\n"; // We want to emit code for users inside the loop first. To do this, we // rearrange BasedUser so that the entries at the end have @@ -1137,7 +1334,7 @@ void LoopStrengthReduce::StrengthReduceStridedIVUsers(const SCEVHandle &Stride, // Get a base value. SCEVHandle Base = UsersToProcess[i].Base; - // Compact everything with this base to be consequetive with this one. + // Compact everything with this base to be consequtive with this one. for (unsigned j = i+1; j != e; ++j) { if (UsersToProcess[j].Base == Base) { std::swap(UsersToProcess[i+1], UsersToProcess[j]); @@ -1151,12 +1348,14 @@ void LoopStrengthReduce::StrengthReduceStridedIVUsers(const SCEVHandle &Stride, while (!UsersToProcess.empty()) { SCEVHandle Base = UsersToProcess.back().Base; - DOUT << " INSERTING code for BASE = " << *Base << ":\n"; - // Emit the code for Base into the preheader. - Value *BaseV = PreheaderRewriter.expandCodeFor(Base, PreInsertPt, - ReplacedTy); - + Value *BaseV = PreheaderRewriter.expandCodeFor(Base, PreInsertPt); + + DOUT << " INSERTING code for BASE = " << *Base << ":"; + if (BaseV->hasName()) + DOUT << " Result value name = %" << BaseV->getNameStr(); + DOUT << "\n"; + // If BaseV is a constant other than 0, make sure that it gets inserted into // the preheader, instead of being forward substituted into the uses. We do // this by forcing a BitCast (noop cast) to be inserted into the preheader @@ -1166,7 +1365,7 @@ void LoopStrengthReduce::StrengthReduceStridedIVUsers(const SCEVHandle &Stride, // We want this constant emitted into the preheader! This is just // using cast as a copy so BitCast (no-op cast) is appropriate BaseV = new BitCastInst(BaseV, BaseV->getType(), "preheaderinsert", - PreInsertPt); + PreInsertPt); } } @@ -1195,7 +1394,17 @@ void LoopStrengthReduce::StrengthReduceStridedIVUsers(const SCEVHandle &Stride, RewriteOp = SCEVExpander::InsertCastOfTo(opcode, RewriteOp, ReplacedTy); } - SCEVHandle RewriteExpr = SCEVUnknown::get(RewriteOp); + SCEVHandle RewriteExpr = SE->getUnknown(RewriteOp); + + // If we had to insert new instrutions for RewriteOp, we have to + // consider that they may not have been able to end up immediately + // next to RewriteOp, because non-PHI instructions may never precede + // PHI instructions in a block. In this case, remember where the last + // instruction was inserted so that if we're replacing a different + // PHI node, we can use the later point to expand the final + // RewriteExpr. + Instruction *NewBasePt = dyn_cast(RewriteOp); + if (RewriteOp == NewPHI) NewBasePt = 0; // Clear the SCEVExpander's expression map so that we are guaranteed // to have the code emitted where we expect it. @@ -1204,27 +1413,28 @@ void LoopStrengthReduce::StrengthReduceStridedIVUsers(const SCEVHandle &Stride, // If we are reusing the iv, then it must be multiplied by a constant // factor take advantage of addressing mode scale component. if (RewriteFactor != 0) { - RewriteExpr = - SCEVMulExpr::get(SCEVUnknown::getIntegerSCEV(RewriteFactor, - RewriteExpr->getType()), - RewriteExpr); + RewriteExpr = SE->getMulExpr(SE->getIntegerSCEV(RewriteFactor, + RewriteExpr->getType()), + RewriteExpr); // The common base is emitted in the loop preheader. But since we // are reusing an IV, it has not been used to initialize the PHI node. // Add it to the expression used to rewrite the uses. if (!isa(CommonBaseV) || !cast(CommonBaseV)->isZero()) - RewriteExpr = SCEVAddExpr::get(RewriteExpr, - SCEVUnknown::get(CommonBaseV)); + RewriteExpr = SE->getAddExpr(RewriteExpr, + SE->getUnknown(CommonBaseV)); } // Now that we know what we need to do, insert code before User for the // immediate and any loop-variant expressions. if (!isa(BaseV) || !cast(BaseV)->isZero()) // Add BaseV to the PHI value if needed. - RewriteExpr = SCEVAddExpr::get(RewriteExpr, SCEVUnknown::get(BaseV)); + RewriteExpr = SE->getAddExpr(RewriteExpr, SE->getUnknown(BaseV)); - User.RewriteInstructionToUseNewBase(RewriteExpr, Rewriter, L, this); + User.RewriteInstructionToUseNewBase(RewriteExpr, NewBasePt, + Rewriter, L, this, + DeadInsts); // Mark old value we replaced as possibly dead, so that it is elminated // if we just replaced the last use of that value. @@ -1268,6 +1478,210 @@ bool LoopStrengthReduce::FindIVForUser(ICmpInst *Cond, IVStrideUse *&CondUse, return false; } +namespace { + // Constant strides come first which in turns are sorted by their absolute + // values. If absolute values are the same, then positive strides comes first. + // e.g. + // 4, -1, X, 1, 2 ==> 1, -1, 2, 4, X + struct StrideCompare { + bool operator()(const SCEVHandle &LHS, const SCEVHandle &RHS) { + SCEVConstant *LHSC = dyn_cast(LHS); + SCEVConstant *RHSC = dyn_cast(RHS); + if (LHSC && RHSC) { + int64_t LV = LHSC->getValue()->getSExtValue(); + int64_t RV = RHSC->getValue()->getSExtValue(); + uint64_t ALV = (LV < 0) ? -LV : LV; + uint64_t ARV = (RV < 0) ? -RV : RV; + if (ALV == ARV) + return LV > RV; + else + return ALV < ARV; + } + return (LHSC && !RHSC); + } + }; +} + +/// ChangeCompareStride - If a loop termination compare instruction is the +/// only use of its stride, and the compaison is against a constant value, +/// try eliminate the stride by moving the compare instruction to another +/// stride and change its constant operand accordingly. e.g. +/// +/// loop: +/// ... +/// v1 = v1 + 3 +/// v2 = v2 + 1 +/// if (v2 < 10) goto loop +/// => +/// loop: +/// ... +/// v1 = v1 + 3 +/// if (v1 < 30) goto loop +ICmpInst *LoopStrengthReduce::ChangeCompareStride(Loop *L, ICmpInst *Cond, + IVStrideUse* &CondUse, + const SCEVHandle* &CondStride) { + if (StrideOrder.size() < 2 || + IVUsesByStride[*CondStride].Users.size() != 1) + return Cond; + const SCEVConstant *SC = dyn_cast(*CondStride); + if (!SC) return Cond; + ConstantInt *C = dyn_cast(Cond->getOperand(1)); + if (!C) return Cond; + + ICmpInst::Predicate Predicate = Cond->getPredicate(); + int64_t CmpSSInt = SC->getValue()->getSExtValue(); + int64_t CmpVal = C->getValue().getSExtValue(); + unsigned BitWidth = C->getValue().getBitWidth(); + uint64_t SignBit = 1ULL << (BitWidth-1); + const Type *CmpTy = C->getType(); + const Type *NewCmpTy = NULL; + unsigned TyBits = CmpTy->getPrimitiveSizeInBits(); + unsigned NewTyBits = 0; + int64_t NewCmpVal = CmpVal; + SCEVHandle *NewStride = NULL; + Value *NewIncV = NULL; + int64_t Scale = 1; + + // Look for a suitable stride / iv as replacement. + std::stable_sort(StrideOrder.begin(), StrideOrder.end(), StrideCompare()); + for (unsigned i = 0, e = StrideOrder.size(); i != e; ++i) { + std::map::iterator SI = + IVUsesByStride.find(StrideOrder[i]); + if (!isa(SI->first)) + continue; + int64_t SSInt = cast(SI->first)->getValue()->getSExtValue(); + if (abs(SSInt) <= abs(CmpSSInt) || (SSInt % CmpSSInt) != 0) + continue; + + Scale = SSInt / CmpSSInt; + NewCmpVal = CmpVal * Scale; + APInt Mul = APInt(BitWidth, NewCmpVal); + // Check for overflow. + if (Mul.getSExtValue() != NewCmpVal) { + NewCmpVal = CmpVal; + continue; + } + + // Watch out for overflow. + if (ICmpInst::isSignedPredicate(Predicate) && + (CmpVal & SignBit) != (NewCmpVal & SignBit)) + NewCmpVal = CmpVal; + + if (NewCmpVal != CmpVal) { + // Pick the best iv to use trying to avoid a cast. + NewIncV = NULL; + for (std::vector::iterator UI = SI->second.Users.begin(), + E = SI->second.Users.end(); UI != E; ++UI) { + NewIncV = UI->OperandValToReplace; + if (NewIncV->getType() == CmpTy) + break; + } + if (!NewIncV) { + NewCmpVal = CmpVal; + continue; + } + + NewCmpTy = NewIncV->getType(); + NewTyBits = isa(NewCmpTy) + ? UIntPtrTy->getPrimitiveSizeInBits() + : NewCmpTy->getPrimitiveSizeInBits(); + if (RequiresTypeConversion(NewCmpTy, CmpTy)) { + // Check if it is possible to rewrite it using + // an iv / stride of a smaller integer type. + bool TruncOk = false; + if (NewCmpTy->isInteger()) { + unsigned Bits = NewTyBits; + if (ICmpInst::isSignedPredicate(Predicate)) + --Bits; + uint64_t Mask = (1ULL << Bits) - 1; + if (((uint64_t)NewCmpVal & Mask) == (uint64_t)NewCmpVal) + TruncOk = true; + } + if (!TruncOk) { + NewCmpVal = CmpVal; + continue; + } + } + + // Don't rewrite if use offset is non-constant and the new type is + // of a different type. + // FIXME: too conservative? + if (NewTyBits != TyBits && !isa(CondUse->Offset)) { + NewCmpVal = CmpVal; + continue; + } + + bool AllUsesAreAddresses = true; + std::vector UsersToProcess; + SCEVHandle CommonExprs = CollectIVUsers(SI->first, SI->second, L, + AllUsesAreAddresses, + UsersToProcess); + // Avoid rewriting the compare instruction with an iv of new stride + // if it's likely the new stride uses will be rewritten using the + if (AllUsesAreAddresses && + ValidStride(!CommonExprs->isZero(), Scale, UsersToProcess)) { + NewCmpVal = CmpVal; + continue; + } + + // If scale is negative, use inverse predicate unless it's testing + // for equality. + if (Scale < 0 && !Cond->isEquality()) + Predicate = ICmpInst::getInversePredicate(Predicate); + + NewStride = &StrideOrder[i]; + break; + } + } + + // Forgo this transformation if it the increment happens to be + // unfortunately positioned after the condition, and the condition + // has multiple uses which prevent it from being moved immediately + // before the branch. See + // test/Transforms/LoopStrengthReduce/change-compare-stride-trickiness-*.ll + // for an example of this situation. + if (!Cond->hasOneUse()) + for (BasicBlock::iterator I = Cond, E = Cond->getParent()->end(); + I != E; ++I) + if (I == NewIncV) + return Cond; + + if (NewCmpVal != CmpVal) { + // Create a new compare instruction using new stride / iv. + ICmpInst *OldCond = Cond; + Value *RHS; + if (!isa(NewCmpTy)) + RHS = ConstantInt::get(NewCmpTy, NewCmpVal); + else { + RHS = ConstantInt::get(UIntPtrTy, NewCmpVal); + RHS = SCEVExpander::InsertCastOfTo(Instruction::IntToPtr, RHS, NewCmpTy); + } + // Insert new compare instruction. + Cond = new ICmpInst(Predicate, NewIncV, RHS, + L->getHeader()->getName() + ".termcond", + OldCond); + + // Remove the old compare instruction. The old indvar is probably dead too. + DeadInsts.insert(cast(CondUse->OperandValToReplace)); + SE->deleteValueFromRecords(OldCond); + OldCond->replaceAllUsesWith(Cond); + OldCond->eraseFromParent(); + + IVUsesByStride[*CondStride].Users.pop_back(); + SCEVHandle NewOffset = TyBits == NewTyBits + ? SE->getMulExpr(CondUse->Offset, + SE->getConstant(ConstantInt::get(CmpTy, Scale))) + : SE->getConstant(ConstantInt::get(NewCmpTy, + cast(CondUse->Offset)->getValue()->getSExtValue()*Scale)); + IVUsesByStride[*NewStride].addUser(NewOffset, Cond, NewIncV); + CondUse = &IVUsesByStride[*NewStride].Users.back(); + CondStride = NewStride; + ++NumEliminated; + } + + return Cond; +} + // OptimizeIndvars - Now that IVUsesByStride is set up with all of the indvar // uses in the loop, look to see if we can eliminate some, in favor of using // common indvars for the different uses. @@ -1294,7 +1708,10 @@ void LoopStrengthReduce::OptimizeIndvars(Loop *L) { if (!FindIVForUser(Cond, CondUse, CondStride)) return; // setcc doesn't use the IV. - + + // If possible, change stride and operands of the compare instruction to + // eliminate one stride. + Cond = ChangeCompareStride(L, Cond, CondUse, CondStride); // It's possible for the setcc instruction to be anywhere in the loop, and // possible for it to have multiple users. If it is not immediately before @@ -1318,38 +1735,14 @@ void LoopStrengthReduce::OptimizeIndvars(Loop *L) { // If we get to here, we know that we can transform the setcc instruction to // use the post-incremented version of the IV, allowing us to coalesce the // live ranges for the IV correctly. - CondUse->Offset = SCEV::getMinusSCEV(CondUse->Offset, *CondStride); + CondUse->Offset = SE->getMinusSCEV(CondUse->Offset, *CondStride); CondUse->isUseOfPostIncrementedValue = true; } -namespace { - // Constant strides come first which in turns are sorted by their absolute - // values. If absolute values are the same, then positive strides comes first. - // e.g. - // 4, -1, X, 1, 2 ==> 1, -1, 2, 4, X - struct StrideCompare { - bool operator()(const SCEVHandle &LHS, const SCEVHandle &RHS) { - SCEVConstant *LHSC = dyn_cast(LHS); - SCEVConstant *RHSC = dyn_cast(RHS); - if (LHSC && RHSC) { - int64_t LV = LHSC->getValue()->getSExtValue(); - int64_t RV = RHSC->getValue()->getSExtValue(); - uint64_t ALV = (LV < 0) ? -LV : LV; - uint64_t ARV = (RV < 0) ? -RV : RV; - if (ALV == ARV) - return LV > RV; - else - return ALV < ARV; - } - return (LHSC && !RHSC); - } - }; -} - bool LoopStrengthReduce::runOnLoop(Loop *L, LPPassManager &LPM) { LI = &getAnalysis(); - EF = &getAnalysis(); + DT = &getAnalysis(); SE = &getAnalysis(); TD = &getAnalysis(); UIntPtrTy = TD->getIntPtrType(); @@ -1357,7 +1750,7 @@ bool LoopStrengthReduce::runOnLoop(Loop *L, LPPassManager &LPM) { // Find all uses of induction variables in this loop, and catagorize // them by stride. Start by finding all of the PHI nodes in the header for // this loop. If they are induction variables, inspect their uses. - std::set Processed; // Don't reprocess instructions. + SmallPtrSet Processed; // Don't reprocess instructions. for (BasicBlock::iterator I = L->getHeader()->begin(); isa(I); ++I) AddUsersIfInteresting(I, L, Processed); @@ -1389,14 +1782,14 @@ bool LoopStrengthReduce::runOnLoop(Loop *L, LPPassManager &LPM) { #endif // IVsByStride keeps IVs for one particular loop. - IVsByStride.clear(); + assert(IVsByStride.empty() && "Stale entries in IVsByStride?"); // Sort the StrideOrder so we process larger strides first. std::stable_sort(StrideOrder.begin(), StrideOrder.end(), StrideCompare()); // Note: this processes each stride/type pair individually. All users passed // into StrengthReduceStridedIVUsers have the same type AND stride. Also, - // node that we iterate over IVUsesByStride indirectly by using StrideOrder. + // note that we iterate over IVUsesByStride indirectly by using StrideOrder. // This extra layer of indirection makes the ordering of strides deterministic // - not dependent on map order. for (unsigned Stride = 0, e = StrideOrder.size(); Stride != e; ++Stride) { @@ -1406,36 +1799,39 @@ bool LoopStrengthReduce::runOnLoop(Loop *L, LPPassManager &LPM) { StrengthReduceStridedIVUsers(SI->first, SI->second, L, HasOneStride); } + // We're done analyzing this loop; release all the state we built up for it. + CastedPointers.clear(); + IVUsesByStride.clear(); + IVsByStride.clear(); + StrideOrder.clear(); + // Clean up after ourselves if (!DeadInsts.empty()) { DeleteTriviallyDeadInstructions(DeadInsts); BasicBlock::iterator I = L->getHeader()->begin(); - PHINode *PN; - while ((PN = dyn_cast(I))) { - ++I; // Preincrement iterator to avoid invalidating it when deleting PN. - - // At this point, we know that we have killed one or more GEP - // instructions. It is worth checking to see if the cann indvar is also - // dead, so that we can remove it as well. The requirements for the cann - // indvar to be considered dead are: - // 1. the cann indvar has one use - // 2. the use is an add instruction - // 3. the add has one use - // 4. the add is used by the cann indvar - // If all four cases above are true, then we can remove both the add and - // the cann indvar. + while (PHINode *PN = dyn_cast(I++)) { + // At this point, we know that we have killed one or more IV users. + // It is worth checking to see if the cann indvar is also + // dead, so that we can remove it as well. + // + // We can remove a PHI if it is on a cycle in the def-use graph + // where each node in the cycle has degree one, i.e. only one use, + // and is an instruction with no side effects. + // // FIXME: this needs to eliminate an induction variable even if it's being // compared against some value to decide loop termination. if (PN->hasOneUse()) { - Instruction *BO = dyn_cast(*PN->use_begin()); - if (BO && (isa(BO) || isa(BO))) { - if (BO->hasOneUse() && PN == *(BO->use_begin())) { - DeadInsts.insert(BO); - // Break the cycle, then delete the PHI. + for (Instruction *J = dyn_cast(*PN->use_begin()); + J && J->hasOneUse() && !J->mayWriteToMemory(); + J = dyn_cast(*J->use_begin())) { + // If we find the original PHI, we've discovered a cycle. + if (J == PN) { + // Break the cycle and mark the PHI for deletion. + SE->deleteValueFromRecords(PN); PN->replaceAllUsesWith(UndefValue::get(PN->getType())); - SE->deleteInstructionFromRecords(PN); - PN->eraseFromParent(); + DeadInsts.insert(PN); + break; } } } @@ -1443,8 +1839,5 @@ bool LoopStrengthReduce::runOnLoop(Loop *L, LPPassManager &LPM) { DeleteTriviallyDeadInstructions(DeadInsts); } - CastedPointers.clear(); - IVUsesByStride.clear(); - StrideOrder.clear(); return false; }