#include "llvm/DerivedTypes.h"
#include "llvm/Analysis/Dominators.h"
#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/LoopPass.h"
#include "llvm/Analysis/ScalarEvolutionExpander.h"
#include "llvm/Support/CFG.h"
#include "llvm/Support/GetElementPtrTypeIterator.h"
#include "llvm/Target/TargetData.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/Compiler.h"
#include "llvm/Target/TargetLowering.h"
#include <algorithm>
-#include <iostream>
#include <set>
using namespace llvm;
+STATISTIC(NumReduced , "Number of GEPs strength reduced");
+STATISTIC(NumInserted, "Number of PHIs inserted");
+STATISTIC(NumVariable, "Number of PHIs with variable strides");
+
namespace {
- Statistic<> NumReduced ("loop-reduce", "Number of GEPs strength reduced");
- Statistic<> NumInserted("loop-reduce", "Number of PHIs inserted");
- Statistic<> NumVariable("loop-reduce","Number of PHIs with variable strides");
+
+ struct BasedUser;
/// IVStrideUse - Keep track of one use of a strided induction variable, where
/// the stride is stored externally. The Offset member keeps track of the
/// offset from the IV, User is the actual user of the operand, and 'Operand'
/// is the operand # of the User that is the use.
- struct IVStrideUse {
+ struct VISIBILITY_HIDDEN IVStrideUse {
SCEVHandle Offset;
Instruction *User;
Value *OperandValToReplace;
/// have an operand that is based on the trip count multiplied by some stride.
/// The stride for all of these users is common and kept external to this
/// structure.
- struct IVUsersOfOneStride {
+ struct VISIBILITY_HIDDEN IVUsersOfOneStride {
/// Users - Keep track of all of the users of this stride as well as the
/// initial value and the operand that uses the IV.
std::vector<IVStrideUse> Users;
/// IVInfo - This structure keeps track of one IV expression inserted during
/// StrengthReduceStridedIVUsers. It contains the stride, the common base, as
/// well as the PHI node and increment value created for rewrite.
- struct IVExpr {
+ struct VISIBILITY_HIDDEN IVExpr {
SCEVHandle Stride;
SCEVHandle Base;
PHINode *PHI;
Value *IncV;
IVExpr()
- : Stride(SCEVUnknown::getIntegerSCEV(0, Type::UIntTy)),
- Base (SCEVUnknown::getIntegerSCEV(0, Type::UIntTy)) {}
+ : Stride(SCEVUnknown::getIntegerSCEV(0, Type::Int32Ty)),
+ Base (SCEVUnknown::getIntegerSCEV(0, Type::Int32Ty)) {}
IVExpr(const SCEVHandle &stride, const SCEVHandle &base, PHINode *phi,
Value *incv)
: Stride(stride), Base(base), PHI(phi), IncV(incv) {}
/// IVsOfOneStride - This structure keeps track of all IV expression inserted
/// during StrengthReduceStridedIVUsers for a particular stride of the IV.
- struct IVsOfOneStride {
+ struct VISIBILITY_HIDDEN IVsOfOneStride {
std::vector<IVExpr> IVs;
void addIV(const SCEVHandle &Stride, const SCEVHandle &Base, PHINode *PHI,
}
};
- class LoopStrengthReduce : public FunctionPass {
+ class VISIBILITY_HIDDEN LoopStrengthReduce : public LoopPass {
LoopInfo *LI;
ETForest *EF;
ScalarEvolution *SE;
: TLI(tli) {
}
- virtual bool runOnFunction(Function &) {
- LI = &getAnalysis<LoopInfo>();
- EF = &getAnalysis<ETForest>();
- SE = &getAnalysis<ScalarEvolution>();
- TD = &getAnalysis<TargetData>();
- UIntPtrTy = TD->getIntPtrType();
- Changed = false;
-
- for (LoopInfo::iterator I = LI->begin(), E = LI->end(); I != E; ++I)
- runOnLoop(*I);
-
- return Changed;
- }
+ bool runOnLoop(Loop *L, LPPassManager &LPM);
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
// We split critical edges, so we change the CFG. However, we do update
/// getCastedVersionOf - Return the specified value casted to uintptr_t.
///
- Value *getCastedVersionOf(Value *V);
+ Value *getCastedVersionOf(Instruction::CastOps opcode, Value *V);
private:
- void runOnLoop(Loop *L);
bool AddUsersIfInteresting(Instruction *I, Loop *L,
std::set<Instruction*> &Processed);
SCEVHandle GetExpressionSCEV(Instruction *E, Loop *L);
void OptimizeIndvars(Loop *L);
- unsigned CheckForIVReuse(const SCEVHandle &Stride, IVExpr &IV);
+ unsigned CheckForIVReuse(const SCEVHandle&, IVExpr&, const Type*,
+ const std::vector<BasedUser>& UsersToProcess);
+
+ bool ValidStride(int64_t, const std::vector<BasedUser>& UsersToProcess);
void StrengthReduceStridedIVUsers(const SCEVHandle &Stride,
IVUsersOfOneStride &Uses,
Loop *L, bool isOnlyStride);
void DeleteTriviallyDeadInstructions(std::set<Instruction*> &Insts);
};
- RegisterOpt<LoopStrengthReduce> X("loop-reduce",
- "Loop Strength Reduction");
+ RegisterPass<LoopStrengthReduce> X("loop-reduce", "Loop Strength Reduction");
}
-FunctionPass *llvm::createLoopStrengthReducePass(const TargetLowering *TLI) {
+LoopPass *llvm::createLoopStrengthReducePass(const TargetLowering *TLI) {
return new LoopStrengthReduce(TLI);
}
-/// getCastedVersionOf - Return the specified value casted to uintptr_t.
+/// getCastedVersionOf - Return the specified value casted to uintptr_t. This
+/// assumes that the Value* V is of integer or pointer type only.
///
-Value *LoopStrengthReduce::getCastedVersionOf(Value *V) {
+Value *LoopStrengthReduce::getCastedVersionOf(Instruction::CastOps opcode,
+ Value *V) {
if (V->getType() == UIntPtrTy) return V;
if (Constant *CB = dyn_cast<Constant>(V))
- return ConstantExpr::getCast(CB, UIntPtrTy);
+ return ConstantExpr::getCast(opcode, CB, UIntPtrTy);
Value *&New = CastedPointers[V];
if (New) return New;
- New = SCEVExpander::InsertCastOfTo(V, UIntPtrTy);
+ New = SCEVExpander::InsertCastOfTo(opcode, V, UIntPtrTy);
DeadInsts.insert(cast<Instruction>(New));
return New;
}
/// GetExpressionSCEV - Compute and return the SCEV for the specified
/// instruction.
SCEVHandle LoopStrengthReduce::GetExpressionSCEV(Instruction *Exp, Loop *L) {
+ // Pointer to pointer bitcast instructions return the same value as their
+ // operand.
+ if (BitCastInst *BCI = dyn_cast<BitCastInst>(Exp)) {
+ if (SE->hasSCEV(BCI) || !isa<Instruction>(BCI->getOperand(0)))
+ return SE->getSCEV(BCI);
+ SCEVHandle R = GetExpressionSCEV(cast<Instruction>(BCI->getOperand(0)), L);
+ SE->setSCEV(BCI, R);
+ return R;
+ }
+
// Scalar Evolutions doesn't know how to compute SCEV's for GEP instructions.
// If this is a GEP that SE doesn't know about, compute it now and insert it.
// If this is not a GEP, or if we have already done this computation, just let
// Build up the base expression. Insert an LLVM cast of the pointer to
// uintptr_t first.
- SCEVHandle GEPVal = SCEVUnknown::get(getCastedVersionOf(GEP->getOperand(0)));
+ SCEVHandle GEPVal = SCEVUnknown::get(
+ getCastedVersionOf(Instruction::PtrToInt, GEP->getOperand(0)));
gep_type_iterator GTI = gep_type_begin(GEP);
// operand.
if (const StructType *STy = dyn_cast<StructType>(*GTI)) {
const StructLayout *SL = TD->getStructLayout(STy);
- unsigned Idx = cast<ConstantUInt>(GEP->getOperand(i))->getValue();
- uint64_t Offset = SL->MemberOffsets[Idx];
+ unsigned Idx = cast<ConstantInt>(GEP->getOperand(i))->getZExtValue();
+ uint64_t Offset = SL->getElementOffset(Idx);
GEPVal = SCEVAddExpr::get(GEPVal,
SCEVUnknown::getIntegerSCEV(Offset, UIntPtrTy));
} else {
- Value *OpVal = getCastedVersionOf(GEP->getOperand(i));
+ unsigned GEPOpiBits =
+ GEP->getOperand(i)->getType()->getPrimitiveSizeInBits();
+ unsigned IntPtrBits = UIntPtrTy->getPrimitiveSizeInBits();
+ Instruction::CastOps opcode = (GEPOpiBits < IntPtrBits ?
+ Instruction::SExt : (GEPOpiBits > IntPtrBits ? Instruction::Trunc :
+ Instruction::BitCast));
+ Value *OpVal = getCastedVersionOf(opcode, GEP->getOperand(i));
SCEVHandle Idx = SE->getSCEV(OpVal);
uint64_t TypeSize = TD->getTypeSize(GTI.getIndexedType());
if (TypeSize != 1)
Idx = SCEVMulExpr::get(Idx,
- SCEVConstant::get(ConstantUInt::get(UIntPtrTy,
+ SCEVConstant::get(ConstantInt::get(UIntPtrTy,
TypeSize)));
GEPVal = SCEVAddExpr::get(GEPVal, Idx);
}
Start = SCEVAddExpr::get(Start, AE->getOperand(i));
}
- } else if (SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(SH)) {
+ } else if (isa<SCEVAddRecExpr>(SH)) {
TheAddRec = SH;
} else {
return false; // not analyzable.
Start = SCEVAddExpr::get(Start, AddRec->getOperand(0));
if (!isa<SCEVConstant>(AddRec->getOperand(1)))
- DEBUG(std::cerr << "[" << L->getHeader()->getName()
- << "] Variable stride: " << *AddRec << "\n");
+ DOUT << "[" << L->getHeader()->getName()
+ << "] Variable stride: " << *AddRec << "\n";
Stride = AddRec->getOperand(1);
- // Check that all constant strides are the unsigned type, we don't want to
- // have two IV's one of signed stride 4 and one of unsigned stride 4 to not be
- // merged.
- assert((!isa<SCEVConstant>(Stride) || Stride->getType()->isUnsigned()) &&
- "Constants should be canonicalized to unsigned!");
-
return true;
}
// post-incremented value.
for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
if (PN->getIncomingValue(i) == IV) {
- SplitCriticalEdge(PN->getIncomingBlock(i), PN->getParent(), P);
+ SplitCriticalEdge(PN->getIncomingBlock(i), PN->getParent(), P,
+ true);
+ // Splitting the critical edge can reduce the number of entries in this
+ // PHI.
+ e = PN->getNumIncomingValues();
if (--NumUses == 0) break;
}
SCEVHandle Stride = Start;
if (!getSCEVStartAndStride(ISE, L, Start, Stride))
return false; // Non-reducible symbolic expression, bail out.
-
- for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI != E;++UI){
+
+ for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI != E;) {
Instruction *User = cast<Instruction>(*UI);
+ // Increment iterator now because IVUseShouldUsePostIncValue may remove
+ // User from the list of I users.
+ ++UI;
+
// Do not infinitely recurse on PHI nodes.
if (isa<PHINode>(User) && Processed.count(User))
continue;
// don't recurse into it.
bool AddUserToIVUsers = false;
if (LI->getLoopFor(User->getParent()) != L) {
- DEBUG(std::cerr << "FOUND USER in other loop: " << *User
- << " OF SCEV: " << *ISE << "\n");
+ DOUT << "FOUND USER in other loop: " << *User
+ << " OF SCEV: " << *ISE << "\n";
AddUserToIVUsers = true;
} else if (!AddUsersIfInteresting(User, L, Processed)) {
- DEBUG(std::cerr << "FOUND USER: " << *User
- << " OF SCEV: " << *ISE << "\n");
+ DOUT << "FOUND USER: " << *User
+ << " OF SCEV: " << *ISE << "\n";
AddUserToIVUsers = true;
}
SCEVHandle NewStart = SCEV::getMinusSCEV(Start, Stride);
StrideUses.addUser(NewStart, User, I);
StrideUses.Users.back().isUseOfPostIncrementedValue = true;
- DEBUG(std::cerr << " USING POSTINC SCEV, START=" << *NewStart<< "\n");
+ DOUT << " USING POSTINC SCEV, START=" << *NewStart<< "\n";
} else {
StrideUses.addUser(Start, User, I);
}
}
void BasedUser::dump() const {
- std::cerr << " Base=" << *Base;
- std::cerr << " Imm=" << *Imm;
+ cerr << " Base=" << *Base;
+ cerr << " Imm=" << *Imm;
if (EmittedBase)
- std::cerr << " EB=" << *EmittedBase;
+ cerr << " EB=" << *EmittedBase;
- std::cerr << " Inst: " << *Inst;
+ cerr << " Inst: " << *Inst;
}
Value *BasedUser::InsertCodeForBaseAtPosition(const SCEVHandle &NewBase,
// If there is no immediate value, skip the next part.
if (SCEVConstant *SC = dyn_cast<SCEVConstant>(Imm))
- if (SC->getValue()->isNullValue())
+ if (SC->getValue()->isZero())
return Rewriter.expandCodeFor(NewBase, BaseInsertPt,
OperandValToReplace->getType());
Value *NewVal = InsertCodeForBaseAtPosition(NewBase, Rewriter, Inst, L);
// Replace the use of the operand Value with the new Phi we just created.
Inst->replaceUsesOfWith(OperandValToReplace, NewVal);
- DEBUG(std::cerr << " CHANGED: IMM =" << *Imm << " Inst = " << *Inst);
+ DOUT << " CHANGED: IMM =" << *Imm << " Inst = " << *Inst;
return;
}
(PN->getParent() != L->getHeader() || !L->contains(PHIPred))) {
// First step, split the critical edge.
- SplitCriticalEdge(PHIPred, PN->getParent(), P);
+ SplitCriticalEdge(PHIPred, PN->getParent(), P, true);
// Next step: move the basic block. In particular, if the PHI node
// is outside of the loop, and PredTI is in the loop, we want to
BasicBlock *NewBB = PN->getIncomingBlock(i);
NewBB->moveBefore(PN->getParent());
}
+
+ // Splitting the edge can reduce the number of PHI entries we have.
+ e = PN->getNumIncomingValues();
}
Value *&Code = InsertedCode[PN->getIncomingBlock(i)];
Rewriter.clear();
}
}
- DEBUG(std::cerr << " CHANGED: IMM =" << *Imm << " Inst = " << *Inst);
+ DOUT << " CHANGED: IMM =" << *Imm << " Inst = " << *Inst;
}
/// isTargetConstant - Return true if the following can be referenced by the
/// immediate field of a target instruction.
-static bool isTargetConstant(const SCEVHandle &V, const TargetLowering *TLI) {
+static bool isTargetConstant(const SCEVHandle &V, const Type *UseTy,
+ const TargetLowering *TLI) {
if (SCEVConstant *SC = dyn_cast<SCEVConstant>(V)) {
- int64_t V = SC->getValue()->getSExtValue();
+ int64_t VC = SC->getValue()->getSExtValue();
if (TLI)
- return TLI->isLegalAddressImmediate(V);
+ return TLI->isLegalAddressImmediate(VC, UseTy);
else
// Defaults to PPC. PPC allows a sign-extended 16-bit immediate field.
- return (V > -(1 << 16) && V < (1 << 16)-1);
+ return (VC > -(1 << 16) && VC < (1 << 16)-1);
}
if (SCEVUnknown *SU = dyn_cast<SCEVUnknown>(V))
if (ConstantExpr *CE = dyn_cast<ConstantExpr>(SU->getValue()))
- if (CE->getOpcode() == Instruction::Cast) {
+ if (CE->getOpcode() == Instruction::PtrToInt) {
Constant *Op0 = CE->getOperand(0);
- if (isa<GlobalValue>(Op0) &&
- TLI &&
+ if (isa<GlobalValue>(Op0) && TLI &&
TLI->isLegalAddressImmediate(cast<GlobalValue>(Op0)))
return true;
}
/// that can fit into the immediate field of instructions in the target.
/// Accumulate these immediate values into the Imm value.
static void MoveImmediateValues(const TargetLowering *TLI,
+ Instruction *User,
SCEVHandle &Val, SCEVHandle &Imm,
bool isAddress, Loop *L) {
+ const Type *UseTy = User->getType();
+ if (StoreInst *SI = dyn_cast<StoreInst>(User))
+ UseTy = SI->getOperand(0)->getType();
+
if (SCEVAddExpr *SAE = dyn_cast<SCEVAddExpr>(Val)) {
std::vector<SCEVHandle> NewOps;
NewOps.reserve(SAE->getNumOperands());
for (unsigned i = 0; i != SAE->getNumOperands(); ++i) {
SCEVHandle NewOp = SAE->getOperand(i);
- MoveImmediateValues(TLI, NewOp, Imm, isAddress, L);
+ MoveImmediateValues(TLI, User, NewOp, Imm, isAddress, L);
if (!NewOp->isLoopInvariant(L)) {
// If this is a loop-variant expression, it must stay in the immediate
} else if (SCEVAddRecExpr *SARE = dyn_cast<SCEVAddRecExpr>(Val)) {
// Try to pull immediates out of the start value of nested addrec's.
SCEVHandle Start = SARE->getStart();
- MoveImmediateValues(TLI, Start, Imm, isAddress, L);
+ MoveImmediateValues(TLI, User, Start, Imm, isAddress, L);
if (Start != SARE->getStart()) {
std::vector<SCEVHandle> Ops(SARE->op_begin(), SARE->op_end());
return;
} else if (SCEVMulExpr *SME = dyn_cast<SCEVMulExpr>(Val)) {
// Transform "8 * (4 + v)" -> "32 + 8*V" if "32" fits in the immed field.
- if (isAddress && isTargetConstant(SME->getOperand(0), TLI) &&
+ if (isAddress && isTargetConstant(SME->getOperand(0), UseTy, TLI) &&
SME->getNumOperands() == 2 && SME->isLoopInvariant(L)) {
SCEVHandle SubImm = SCEVUnknown::getIntegerSCEV(0, Val->getType());
SCEVHandle NewOp = SME->getOperand(1);
- MoveImmediateValues(TLI, NewOp, SubImm, isAddress, L);
+ MoveImmediateValues(TLI, User, NewOp, SubImm, isAddress, L);
// If we extracted something out of the subexpressions, see if we can
// simplify this!
// Scale SubImm up by "8". If the result is a target constant, we are
// good.
SubImm = SCEVMulExpr::get(SubImm, SME->getOperand(0));
- if (isTargetConstant(SubImm, TLI)) {
+ if (isTargetConstant(SubImm, UseTy, TLI)) {
// Accumulate the immediate.
Imm = SCEVAddExpr::get(Imm, SubImm);
// Loop-variant expressions must stay in the immediate field of the
// expression.
- if ((isAddress && isTargetConstant(Val, TLI)) ||
+ if ((isAddress && isTargetConstant(Val, UseTy, TLI)) ||
!Val->isLoopInvariant(L)) {
Imm = SCEVAddExpr::get(Imm, Val);
Val = SCEVUnknown::getIntegerSCEV(0, Val->getType());
}
-/// IncrementAddExprUses - Decompose the specified expression into its added
-/// subexpressions, and increment SubExpressionUseCounts for each of these
-/// decomposed parts.
+/// SeparateSubExprs - Decompose Expr into all of the subexpressions that are
+/// added together. This is used to reassociate common addition subexprs
+/// together for maximal sharing when rewriting bases.
static void SeparateSubExprs(std::vector<SCEVHandle> &SubExprs,
SCEVHandle Expr) {
if (SCEVAddExpr *AE = dyn_cast<SCEVAddExpr>(Expr)) {
SeparateSubExprs(SubExprs, SARE->getOperand(0));
}
} else if (!isa<SCEVConstant>(Expr) ||
- !cast<SCEVConstant>(Expr)->getValue()->isNullValue()) {
+ !cast<SCEVConstant>(Expr)->getValue()->isZero()) {
// Do not add zero.
SubExprs.push_back(Expr);
}
///
static bool isZero(SCEVHandle &V) {
if (SCEVConstant *SC = dyn_cast<SCEVConstant>(V))
- return SC->getValue()->getRawValue() == 0;
+ return SC->getValue()->isZero();
return false;
}
+/// ValidStride - Check whether the given Scale is valid for all loads and
+/// stores in UsersToProcess. Pulled into a function to avoid disturbing the
+/// sensibilities of those who dislike goto's.
+///
+bool LoopStrengthReduce::ValidStride(int64_t Scale,
+ const std::vector<BasedUser>& UsersToProcess) {
+ int64_t Imm;
+ for (unsigned i=0, e = UsersToProcess.size(); i!=e; ++i) {
+ if (SCEVConstant *SC = dyn_cast<SCEVConstant>(UsersToProcess[i].Imm))
+ Imm = SC->getValue()->getSExtValue();
+ else
+ Imm = 0;
+
+ // If this is a load or other access, pass the type of the access in.
+ const Type *AccessTy = Type::VoidTy;
+ if (StoreInst *SI = dyn_cast<StoreInst>(UsersToProcess[i].Inst))
+ AccessTy = SI->getOperand(0)->getType();
+ else if (LoadInst *LI = dyn_cast<LoadInst>(UsersToProcess[i].Inst))
+ AccessTy = LI->getType();
+
+ if (!TLI->isLegalAddressScaleAndImm(Scale, Imm, AccessTy))
+ return false;
+ }
+ return true;
+}
/// CheckForIVReuse - Returns the multiple if the stride is the multiple
/// of a previous stride and it is a legal value for the target addressing
/// mode scale component. This allows the users of this stride to be rewritten
/// as prev iv * factor. It returns 0 if no reuse is possible.
-unsigned LoopStrengthReduce::CheckForIVReuse(const SCEVHandle &Stride,
- IVExpr &IV) {
+unsigned LoopStrengthReduce::CheckForIVReuse(const SCEVHandle &Stride,
+ IVExpr &IV, const Type *Ty,
+ const std::vector<BasedUser>& UsersToProcess) {
if (!TLI) return 0;
if (SCEVConstant *SC = dyn_cast<SCEVConstant>(Stride)) {
int64_t SInt = SC->getValue()->getSExtValue();
if (SInt == 1) return 0;
- for (TargetLowering::legal_am_scale_iterator
- I = TLI->legal_am_scale_begin(), E = TLI->legal_am_scale_end();
- I != E; ++I) {
- unsigned Scale = *I;
- if (unsigned(abs(SInt)) < Scale || (SInt % Scale) != 0)
+ for (std::map<SCEVHandle, IVsOfOneStride>::iterator SI= IVsByStride.begin(),
+ SE = IVsByStride.end(); SI != SE; ++SI) {
+ int64_t SSInt = cast<SCEVConstant>(SI->first)->getValue()->getSExtValue();
+ if (SInt != -SSInt &&
+ (unsigned(abs(SInt)) < SSInt || (SInt % SSInt) != 0))
continue;
- std::map<SCEVHandle, IVsOfOneStride>::iterator SI =
- IVsByStride.find(SCEVUnknown::getIntegerSCEV(SInt/Scale, Type::UIntTy));
- if (SI == IVsByStride.end())
- continue;
- for (std::vector<IVExpr>::iterator II = SI->second.IVs.begin(),
- IE = SI->second.IVs.end(); II != IE; ++II)
- // FIXME: Only handle base == 0 for now.
- if (isZero(II->Base)) {
- IV = *II;
- return Scale;
- }
+ int64_t Scale = SInt / SSInt;
+ // Check that this stride is valid for all the types used for loads and
+ // stores; if it can be used for some and not others, we might as well use
+ // the original stride everywhere, since we have to create the IV for it
+ // anyway.
+ if (ValidStride(Scale, UsersToProcess))
+ for (std::vector<IVExpr>::iterator II = SI->second.IVs.begin(),
+ IE = SI->second.IVs.end(); II != IE; ++II)
+ // FIXME: Only handle base == 0 for now.
+ // Only reuse previous IV if it would not require a type conversion.
+ if (isZero(II->Base) && II->Base->getType() == Ty) {
+ IV = *II;
+ return Scale;
+ }
}
}
-
return 0;
}
+/// PartitionByIsUseOfPostIncrementedValue - Simple boolean predicate that
+/// returns true if Val's isUseOfPostIncrementedValue is true.
+static bool PartitionByIsUseOfPostIncrementedValue(const BasedUser &Val) {
+ return Val.isUseOfPostIncrementedValue;
+}
/// StrengthReduceStridedIVUsers - Strength reduce all of the users of a single
/// stride of IV. All of the users may have different starting values, and this
"Base value is not loop invariant!");
}
- // Check if it is possible to reuse a IV with stride that is factor of this
- // stride. And the multiple is a number that can be encoded in the scale
- // field of the target addressing mode.
- PHINode *NewPHI = NULL;
- Value *IncV = NULL;
- IVExpr ReuseIV;
- unsigned RewriteFactor = CheckForIVReuse(Stride, ReuseIV);
- if (RewriteFactor != 0) {
- DEBUG(std::cerr << "BASED ON IV of STRIDE " << *ReuseIV.Stride
- << " and BASE " << *ReuseIV.Base << " :\n");
- NewPHI = ReuseIV.PHI;
- IncV = ReuseIV.IncV;
- }
-
// We now have a whole bunch of uses of like-strided induction variables, but
// they might all have different bases. We want to emit one PHI node for this
// stride which we fold as many common expressions (between the IVs) into as
if (SI->getOperand(1) == UsersToProcess[i].OperandValToReplace)
isAddress = true;
- MoveImmediateValues(TLI, UsersToProcess[i].Base, UsersToProcess[i].Imm,
- isAddress, L);
+ MoveImmediateValues(TLI, UsersToProcess[i].Inst, UsersToProcess[i].Base,
+ UsersToProcess[i].Imm, isAddress, L);
}
}
+ // Check if it is possible to reuse a IV with stride that is factor of this
+ // stride. And the multiple is a number that can be encoded in the scale
+ // field of the target addressing mode. And we will have a valid
+ // instruction after this substition, including the immediate field, if any.
+ PHINode *NewPHI = NULL;
+ Value *IncV = NULL;
+ IVExpr ReuseIV;
+ unsigned RewriteFactor = CheckForIVReuse(Stride, ReuseIV,
+ CommonExprs->getType(),
+ UsersToProcess);
+ if (RewriteFactor != 0) {
+ DOUT << "BASED ON IV of STRIDE " << *ReuseIV.Stride
+ << " and BASE " << *ReuseIV.Base << " :\n";
+ NewPHI = ReuseIV.PHI;
+ IncV = ReuseIV.IncV;
+ }
+
+ const Type *ReplacedTy = CommonExprs->getType();
+
// Now that we know what we need to do, insert the PHI node itself.
//
- DEBUG(std::cerr << "INSERTING IV of STRIDE " << *Stride << " and BASE "
- << *CommonExprs << " :\n");
+ DOUT << "INSERTING IV of TYPE " << *ReplacedTy << " of STRIDE "
+ << *Stride << " and BASE " << *CommonExprs << " :\n";
SCEVExpander Rewriter(*SE, *LI);
SCEVExpander PreheaderRewriter(*SE, *LI);
BasicBlock *LatchBlock = L->getLoopLatch();
- const Type *ReplacedTy = CommonExprs->getType();
// Emit the initial base value into the loop preheader.
Value *CommonBaseV
Constant *C = dyn_cast<Constant>(CommonBaseV);
if (!C ||
(!C->isNullValue() &&
- !isTargetConstant(SCEVUnknown::get(CommonBaseV), TLI)))
- // We want the common base emitted into the preheader!
- CommonBaseV = new CastInst(CommonBaseV, CommonBaseV->getType(),
- "commonbase", PreInsertPt);
+ !isTargetConstant(SCEVUnknown::get(CommonBaseV), ReplacedTy, TLI)))
+ // We want the common base emitted into the preheader! This is just
+ // using cast as a copy so BitCast (no-op cast) is appropriate
+ CommonBaseV = new BitCastInst(CommonBaseV, CommonBaseV->getType(),
+ "commonbase", PreInsertPt);
}
- // Sort by the base value, so that all IVs with identical bases are next to
- // each other.
+ // We want to emit code for users inside the loop first. To do this, we
+ // rearrange BasedUser so that the entries at the end have
+ // isUseOfPostIncrementedValue = false, because we pop off the end of the
+ // vector (so we handle them first).
+ std::partition(UsersToProcess.begin(), UsersToProcess.end(),
+ PartitionByIsUseOfPostIncrementedValue);
+
+ // Sort this by base, so that things with the same base are handled
+ // together. By partitioning first and stable-sorting later, we are
+ // guaranteed that within each base we will pop off users from within the
+ // loop before users outside of the loop with a particular base.
+ //
+ // We would like to use stable_sort here, but we can't. The problem is that
+ // SCEVHandle's don't have a deterministic ordering w.r.t to each other, so
+ // we don't have anything to do a '<' comparison on. Because we think the
+ // number of uses is small, do a horrible bubble sort which just relies on
+ // ==.
+ for (unsigned i = 0, e = UsersToProcess.size(); i != e; ++i) {
+ // Get a base value.
+ SCEVHandle Base = UsersToProcess[i].Base;
+
+ // Compact everything with this base to be consequetive with this one.
+ for (unsigned j = i+1; j != e; ++j) {
+ if (UsersToProcess[j].Base == Base) {
+ std::swap(UsersToProcess[i+1], UsersToProcess[j]);
+ ++i;
+ }
+ }
+ }
+
+ // Process all the users now. This outer loop handles all bases, the inner
+ // loop handles all users of a particular base.
while (!UsersToProcess.empty()) {
SCEVHandle Base = UsersToProcess.back().Base;
- DEBUG(std::cerr << " INSERTING code for BASE = " << *Base << ":\n");
+ DOUT << " INSERTING code for BASE = " << *Base << ":\n";
// Emit the code for Base into the preheader.
Value *BaseV = PreheaderRewriter.expandCodeFor(Base, PreInsertPt,
// If BaseV is a constant other than 0, make sure that it gets inserted into
// the preheader, instead of being forward substituted into the uses. We do
- // this by forcing a noop cast to be inserted into the preheader in this
- // case.
- if (Constant *C = dyn_cast<Constant>(BaseV))
- if (!C->isNullValue() && !isTargetConstant(Base, TLI)) {
- // We want this constant emitted into the preheader!
- BaseV = new CastInst(BaseV, BaseV->getType(), "preheaderinsert",
+ // this by forcing a BitCast (noop cast) to be inserted into the preheader
+ // in this case.
+ if (Constant *C = dyn_cast<Constant>(BaseV)) {
+ if (!C->isNullValue() && !isTargetConstant(Base, ReplacedTy, TLI)) {
+ // We want this constant emitted into the preheader! This is just
+ // using cast as a copy so BitCast (no-op cast) is appropriate
+ BaseV = new BitCastInst(BaseV, BaseV->getType(), "preheaderinsert",
PreInsertPt);
}
-
+ }
+
// Emit the code to add the immediate offset to the Phi value, just before
// the instructions that we identified as using this stride and base.
- unsigned ScanPos = 0;
do {
+ // FIXME: Use emitted users to emit other users.
BasedUser &User = UsersToProcess.back();
// If this instruction wants to use the post-incremented value, move it
if (L->contains(User.Inst->getParent()))
User.Inst->moveBefore(LatchBlock->getTerminator());
}
+ if (RewriteOp->getType() != ReplacedTy) {
+ Instruction::CastOps opcode = Instruction::Trunc;
+ if (ReplacedTy->getPrimitiveSizeInBits() ==
+ RewriteOp->getType()->getPrimitiveSizeInBits())
+ opcode = Instruction::BitCast;
+ RewriteOp = SCEVExpander::InsertCastOfTo(opcode, RewriteOp, ReplacedTy);
+ }
+
SCEVHandle RewriteExpr = SCEVUnknown::get(RewriteOp);
// Clear the SCEVExpander's expression map so that we are guaranteed
// are reusing an IV, it has not been used to initialize the PHI node.
// Add it to the expression used to rewrite the uses.
if (!isa<ConstantInt>(CommonBaseV) ||
- !cast<ConstantInt>(CommonBaseV)->isNullValue())
+ !cast<ConstantInt>(CommonBaseV)->isZero())
RewriteExpr = SCEVAddExpr::get(RewriteExpr,
SCEVUnknown::get(CommonBaseV));
}
// Now that we know what we need to do, insert code before User for the
// immediate and any loop-variant expressions.
- if (!isa<ConstantInt>(BaseV) || !cast<ConstantInt>(BaseV)->isNullValue())
+ if (!isa<ConstantInt>(BaseV) || !cast<ConstantInt>(BaseV)->isZero())
// Add BaseV to the PHI value if needed.
RewriteExpr = SCEVAddExpr::get(RewriteExpr, SCEVUnknown::get(BaseV));
UsersToProcess.pop_back();
++NumReduced;
- // If there are any more users to process with the same base, move one of
- // them to the end of the list so that we will process it.
- if (!UsersToProcess.empty()) {
- for (unsigned e = UsersToProcess.size(); ScanPos != e; ++ScanPos)
- if (UsersToProcess[ScanPos].Base == Base) {
- std::swap(UsersToProcess[ScanPos], UsersToProcess.back());
- break;
- }
- }
+ // If there are any more users to process with the same base, process them
+ // now. We sorted by base above, so we just have to check the last elt.
} while (!UsersToProcess.empty() && UsersToProcess.back().Base == Base);
// TODO: Next, find out which base index is the most common, pull it out.
}
void LoopStrengthReduce::OptimizeIndvars(Loop *L) {
// TODO: implement optzns here.
-
-
-
// Finally, get the terminating condition for the loop if possible. If we
// can, we want to change it to use a post-incremented version of its
// induction variable, to allow coalescing the live ranges for the IV into
BasicBlock *LatchBlock =
SomePHI->getIncomingBlock(SomePHI->getIncomingBlock(0) == Preheader);
BranchInst *TermBr = dyn_cast<BranchInst>(LatchBlock->getTerminator());
- if (!TermBr || TermBr->isUnconditional() ||
- !isa<SetCondInst>(TermBr->getCondition()))
+ if (!TermBr || TermBr->isUnconditional() ||
+ !isa<ICmpInst>(TermBr->getCondition()))
return;
- SetCondInst *Cond = cast<SetCondInst>(TermBr->getCondition());
+ ICmpInst *Cond = cast<ICmpInst>(TermBr->getCondition());
// Search IVUsesByStride to find Cond's IVUse if there is one.
IVStrideUse *CondUse = 0;
}
if (!CondUse) return; // setcc doesn't use the IV.
- // setcc stride is complex, don't mess with users.
- // FIXME: Evaluate whether this is a good idea or not.
- if (!isa<SCEVConstant>(*CondStride)) return;
-
// It's possible for the setcc instruction to be anywhere in the loop, and
// possible for it to have multiple users. If it is not immediately before
// the latch block branch, move it.
Cond->moveBefore(TermBr);
} else {
// Otherwise, clone the terminating condition and insert into the loopend.
- Cond = cast<SetCondInst>(Cond->clone());
+ Cond = cast<ICmpInst>(Cond->clone());
Cond->setName(L->getHeader()->getName() + ".termcond");
LatchBlock->getInstList().insert(TermBr, Cond);
};
}
-void LoopStrengthReduce::runOnLoop(Loop *L) {
- // First step, transform all loops nesting inside of this loop.
- for (LoopInfo::iterator I = L->begin(), E = L->end(); I != E; ++I)
- runOnLoop(*I);
+bool LoopStrengthReduce::runOnLoop(Loop *L, LPPassManager &LPM) {
- // Next, find all uses of induction variables in this loop, and catagorize
+ LI = &getAnalysis<LoopInfo>();
+ EF = &getAnalysis<ETForest>();
+ SE = &getAnalysis<ScalarEvolution>();
+ TD = &getAnalysis<TargetData>();
+ UIntPtrTy = TD->getIntPtrType();
+
+ // Find all uses of induction variables in this loop, and catagorize
// them by stride. Start by finding all of the PHI nodes in the header for
// this loop. If they are induction variables, inspect their uses.
std::set<Instruction*> Processed; // Don't reprocess instructions.
AddUsersIfInteresting(I, L, Processed);
// If we have nothing to do, return.
- if (IVUsesByStride.empty()) return;
+ if (IVUsesByStride.empty()) return false;
// Optimize induction variables. Some indvar uses can be transformed to use
// strides that will be needed for other purposes. A common example of this
bool HasOneStride = IVUsesByStride.size() == 1;
#ifndef NDEBUG
- DEBUG(std::cerr << "\nLSR on ");
+ DOUT << "\nLSR on ";
DEBUG(L->dump());
#endif
// FIXME: this needs to eliminate an induction variable even if it's being
// compared against some value to decide loop termination.
if (PN->hasOneUse()) {
- BinaryOperator *BO = dyn_cast<BinaryOperator>(*(PN->use_begin()));
- if (BO && BO->hasOneUse()) {
- if (PN == *(BO->use_begin())) {
+ Instruction *BO = dyn_cast<Instruction>(*PN->use_begin());
+ if (BO && (isa<BinaryOperator>(BO) || isa<CmpInst>(BO))) {
+ if (BO->hasOneUse() && PN == *(BO->use_begin())) {
DeadInsts.insert(BO);
// Break the cycle, then delete the PHI.
PN->replaceAllUsesWith(UndefValue::get(PN->getType()));
CastedPointers.clear();
IVUsesByStride.clear();
StrideOrder.clear();
- return;
+ return false;
}