//
// The LLVM Compiler Infrastructure
//
-// This file was developed by Nate Begeman and is distributed under the
-// University of Illinois Open Source License. See LICENSE.TXT for details.
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Target/TargetData.h"
+#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Support/Debug.h"
/// IVStrideUse - Keep track of one use of a strided induction variable, where
/// the stride is stored externally. The Offset member keeps track of the
- /// offset from the IV, User is the actual user of the operand, and 'Operand'
- /// is the operand # of the User that is the use.
+ /// offset from the IV, User is the actual user of the operand, and
+ /// 'OperandValToReplace' is the operand of the User that is the use.
struct VISIBILITY_HIDDEN IVStrideUse {
SCEVHandle Offset;
Instruction *User;
/// StrideOrder - An ordering of the keys in IVUsesByStride that is stable:
/// We use this to iterate over the IVUsesByStride collection without being
/// dependent on random ordering of pointers in the process.
- std::vector<SCEVHandle> StrideOrder;
+ SmallVector<SCEVHandle, 16> StrideOrder;
/// CastedValues - As we need to cast values to uintptr_t, this keeps track
/// of the casted version of each value. This is accessed by
/// getCastedVersionOf.
- std::map<Value*, Value*> CastedPointers;
+ DenseMap<Value*, Value*> CastedPointers;
/// DeadInsts - Keep track of instructions we may have made dead, so that
/// we can remove them after we are done working.
- SmallPtrSet<Instruction*,16> DeadInsts;
+ SetVector<Instruction*> DeadInsts;
/// TLI - Keep a pointer of a TargetLowering to consult for determining
/// transformation profitability.
private:
bool AddUsersIfInteresting(Instruction *I, Loop *L,
SmallPtrSet<Instruction*,16> &Processed);
- SCEVHandle GetExpressionSCEV(Instruction *E, Loop *L);
+ SCEVHandle GetExpressionSCEV(Instruction *E);
ICmpInst *ChangeCompareStride(Loop *L, ICmpInst *Cond,
IVStrideUse* &CondUse,
const SCEVHandle* &CondStride);
void StrengthReduceStridedIVUsers(const SCEVHandle &Stride,
IVUsersOfOneStride &Uses,
Loop *L, bool isOnlyStride);
- void DeleteTriviallyDeadInstructions(SmallPtrSet<Instruction*,16> &Insts);
+ void DeleteTriviallyDeadInstructions(SetVector<Instruction*> &Insts);
};
- char LoopStrengthReduce::ID = 0;
- RegisterPass<LoopStrengthReduce> X("loop-reduce", "Loop Strength Reduction");
}
+char LoopStrengthReduce::ID = 0;
+static RegisterPass<LoopStrengthReduce>
+X("loop-reduce", "Loop Strength Reduction");
+
LoopPass *llvm::createLoopStrengthReducePass(const TargetLowering *TLI) {
return new LoopStrengthReduce(TLI);
}
/// specified set are trivially dead, delete them and see if this makes any of
/// their operands subsequently dead.
void LoopStrengthReduce::
-DeleteTriviallyDeadInstructions(SmallPtrSet<Instruction*,16> &Insts) {
+DeleteTriviallyDeadInstructions(SetVector<Instruction*> &Insts) {
while (!Insts.empty()) {
- Instruction *I = *Insts.begin();
- Insts.erase(I);
+ Instruction *I = Insts.back();
+ Insts.pop_back();
+
+ if (PHINode *PN = dyn_cast<PHINode>(I)) {
+ // If all incoming values to the Phi are the same, we can replace the Phi
+ // with that value.
+ if (Value *PNV = PN->hasConstantValue()) {
+ if (Instruction *U = dyn_cast<Instruction>(PNV))
+ Insts.insert(U);
+ SE->deleteValueFromRecords(PN);
+ PN->replaceAllUsesWith(PNV);
+ PN->eraseFromParent();
+ Changed = true;
+ continue;
+ }
+ }
+
if (isInstructionTriviallyDead(I)) {
- for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i)
- if (Instruction *U = dyn_cast<Instruction>(I->getOperand(i)))
+ for (User::op_iterator i = I->op_begin(), e = I->op_end(); i != e; ++i)
+ if (Instruction *U = dyn_cast<Instruction>(*i))
Insts.insert(U);
SE->deleteValueFromRecords(I);
I->eraseFromParent();
/// GetExpressionSCEV - Compute and return the SCEV for the specified
/// instruction.
-SCEVHandle LoopStrengthReduce::GetExpressionSCEV(Instruction *Exp, Loop *L) {
+SCEVHandle LoopStrengthReduce::GetExpressionSCEV(Instruction *Exp) {
// Pointer to pointer bitcast instructions return the same value as their
// operand.
if (BitCastInst *BCI = dyn_cast<BitCastInst>(Exp)) {
if (SE->hasSCEV(BCI) || !isa<Instruction>(BCI->getOperand(0)))
return SE->getSCEV(BCI);
- SCEVHandle R = GetExpressionSCEV(cast<Instruction>(BCI->getOperand(0)), L);
+ SCEVHandle R = GetExpressionSCEV(cast<Instruction>(BCI->getOperand(0)));
SE->setSCEV(BCI, R);
return R;
}
return SE->getSCEV(Exp);
// Analyze all of the subscripts of this getelementptr instruction, looking
- // for uses that are determined by the trip count of L. First, skip all
- // operands the are not dependent on the IV.
+ // for uses that are determined by the trip count of the loop. First, skip
+ // all operands the are not dependent on the IV.
// Build up the base expression. Insert an LLVM cast of the pointer to
// uintptr_t first.
gep_type_iterator GTI = gep_type_begin(GEP);
- for (unsigned i = 1, e = GEP->getNumOperands(); i != e; ++i, ++GTI) {
+ for (User::op_iterator i = GEP->op_begin() + 1, e = GEP->op_end();
+ i != e; ++i, ++GTI) {
// If this is a use of a recurrence that we can analyze, and it comes before
// Op does in the GEP operand list, we will handle this when we process this
// operand.
if (const StructType *STy = dyn_cast<StructType>(*GTI)) {
const StructLayout *SL = TD->getStructLayout(STy);
- unsigned Idx = cast<ConstantInt>(GEP->getOperand(i))->getZExtValue();
+ unsigned Idx = cast<ConstantInt>(*i)->getZExtValue();
uint64_t Offset = SL->getElementOffset(Idx);
GEPVal = SE->getAddExpr(GEPVal,
SE->getIntegerSCEV(Offset, UIntPtrTy));
} else {
unsigned GEPOpiBits =
- GEP->getOperand(i)->getType()->getPrimitiveSizeInBits();
+ (*i)->getType()->getPrimitiveSizeInBits();
unsigned IntPtrBits = UIntPtrTy->getPrimitiveSizeInBits();
Instruction::CastOps opcode = (GEPOpiBits < IntPtrBits ?
Instruction::SExt : (GEPOpiBits > IntPtrBits ? Instruction::Trunc :
Instruction::BitCast));
- Value *OpVal = getCastedVersionOf(opcode, GEP->getOperand(i));
+ Value *OpVal = getCastedVersionOf(opcode, *i);
SCEVHandle Idx = SE->getSCEV(OpVal);
uint64_t TypeSize = TD->getABITypeSize(GTI.getIndexedType());
/// the loop, resulting in reg-reg copies (if we use the pre-inc value when we
/// should use the post-inc value).
static bool IVUseShouldUsePostIncValue(Instruction *User, Instruction *IV,
- Loop *L, DominatorTree *DT, Pass *P) {
+ Loop *L, DominatorTree *DT, Pass *P,
+ SetVector<Instruction*> &DeadInsts){
// If the user is in the loop, use the preinc value.
if (L->contains(User->getParent())) return false;
// post-incremented value.
for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
if (PN->getIncomingValue(i) == IV) {
- SplitCriticalEdge(PN->getIncomingBlock(i), PN->getParent(), P,
- true);
+ SplitCriticalEdge(PN->getIncomingBlock(i), PN->getParent(), P, false);
// Splitting the critical edge can reduce the number of entries in this
// PHI.
e = PN->getNumIncomingValues();
if (--NumUses == 0) break;
}
+
+ // PHI node might have become a constant value after SplitCriticalEdge.
+ DeadInsts.insert(User);
return true;
}
bool LoopStrengthReduce::AddUsersIfInteresting(Instruction *I, Loop *L,
SmallPtrSet<Instruction*,16> &Processed) {
if (!I->getType()->isInteger() && !isa<PointerType>(I->getType()))
- return false; // Void and FP expressions cannot be reduced.
+ return false; // Void and FP expressions cannot be reduced.
if (!Processed.insert(I))
return true; // Instruction already handled.
// Get the symbolic expression for this instruction.
- SCEVHandle ISE = GetExpressionSCEV(I, L);
+ SCEVHandle ISE = GetExpressionSCEV(I);
if (isa<SCEVCouldNotCompute>(ISE)) return false;
// Get the start and stride for this expression.
// Okay, we found a user that we cannot reduce. Analyze the instruction
// and decide what to do with it. If we are a use inside of the loop, use
// the value before incrementation, otherwise use it after incrementation.
- if (IVUseShouldUsePostIncValue(User, I, L, DT, this)) {
+ if (IVUseShouldUsePostIncValue(User, I, L, DT, this, DeadInsts)) {
// The value used will be incremented by the stride more than we are
// expecting, so subtract this off.
SCEVHandle NewStart = SE->getMinusSCEV(Start, Stride);
// operands of Inst to use the new expression 'NewBase', with 'Imm' added
// to it.
void RewriteInstructionToUseNewBase(const SCEVHandle &NewBase,
- SCEVExpander &Rewriter, Loop *L,
- Pass *P);
+ Instruction *InsertPt,
+ SCEVExpander &Rewriter, Loop *L, Pass *P,
+ SetVector<Instruction*> &DeadInsts);
Value *InsertCodeForBaseAtPosition(const SCEVHandle &NewBase,
SCEVExpander &Rewriter,
}
// If there is no immediate value, skip the next part.
- if (SCEVConstant *SC = dyn_cast<SCEVConstant>(Imm))
- if (SC->getValue()->isZero())
- return Rewriter.expandCodeFor(NewBase, BaseInsertPt);
+ if (Imm->isZero())
+ return Rewriter.expandCodeFor(NewBase, BaseInsertPt);
Value *Base = Rewriter.expandCodeFor(NewBase, BaseInsertPt);
// Once we rewrite the code to insert the new IVs we want, update the
// operands of Inst to use the new expression 'NewBase', with 'Imm' added
-// to it.
+// to it. NewBasePt is the last instruction which contributes to the
+// value of NewBase in the case that it's a diffferent instruction from
+// the PHI that NewBase is computed from, or null otherwise.
+//
void BasedUser::RewriteInstructionToUseNewBase(const SCEVHandle &NewBase,
- SCEVExpander &Rewriter,
- Loop *L, Pass *P) {
+ Instruction *NewBasePt,
+ SCEVExpander &Rewriter, Loop *L, Pass *P,
+ SetVector<Instruction*> &DeadInsts) {
if (!isa<PHINode>(Inst)) {
// By default, insert code at the user instruction.
BasicBlock::iterator InsertPt = Inst;
// value will be pinned to live somewhere after the original computation.
// In this case, we have to back off.
if (!isUseOfPostIncrementedValue) {
- if (Instruction *OpInst = dyn_cast<Instruction>(OperandValToReplace)) {
+ if (NewBasePt && isa<PHINode>(OperandValToReplace)) {
+ InsertPt = NewBasePt;
+ ++InsertPt;
+ } else if (Instruction *OpInst
+ = dyn_cast<Instruction>(OperandValToReplace)) {
InsertPt = OpInst;
while (isa<PHINode>(InsertPt)) ++InsertPt;
}
// have multiple entries for the same predecessor. We use a map to make sure
// that a PHI node only has a single Value* for each predecessor (which also
// prevents us from inserting duplicate code in some blocks).
- std::map<BasicBlock*, Value*> InsertedCode;
+ DenseMap<BasicBlock*, Value*> InsertedCode;
PHINode *PN = cast<PHINode>(Inst);
for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
if (PN->getIncomingValue(i) == OperandValToReplace) {
(PN->getParent() != L->getHeader() || !L->contains(PHIPred))) {
// First step, split the critical edge.
- SplitCriticalEdge(PHIPred, PN->getParent(), P, true);
+ SplitCriticalEdge(PHIPred, PN->getParent(), P, false);
// Next step: move the basic block. In particular, if the PHI node
// is outside of the loop, and PredTI is in the loop, we want to
Rewriter.clear();
}
}
+
+ // PHI node might have become a constant value after SplitCriticalEdge.
+ DeadInsts.insert(Inst);
+
DOUT << " CHANGED: IMM =" << *Imm << " Inst = " << *Inst;
}
SeparateSubExprs(SubExprs, SARE->getOperand(0), SE);
}
- } else if (!isa<SCEVConstant>(Expr) ||
- !cast<SCEVConstant>(Expr)->getValue()->isZero()) {
+ } else if (!Expr->isZero()) {
// Do not add zero.
SubExprs.push_back(Expr);
}
return Result;
}
-/// isZero - returns true if the scalar evolution expression is zero.
-///
-static bool isZero(const SCEVHandle &V) {
- if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(V))
- return SC->getValue()->isZero();
- return false;
-}
-
/// ValidStride - Check whether the given Scale is valid for all loads and
/// stores in UsersToProcess.
///
bool LoopStrengthReduce::ValidStride(bool HasBaseReg,
int64_t Scale,
const std::vector<BasedUser>& UsersToProcess) {
+ if (!TLI)
+ return true;
+
for (unsigned i=0, e = UsersToProcess.size(); i!=e; ++i) {
// If this is a load or other access, pass the type of the access in.
const Type *AccessTy = Type::VoidTy;
AccessTy = SI->getOperand(0)->getType();
else if (LoadInst *LI = dyn_cast<LoadInst>(UsersToProcess[i].Inst))
AccessTy = LI->getType();
+ else if (isa<PHINode>(UsersToProcess[i].Inst))
+ continue;
TargetLowering::AddrMode AM;
if (SCEVConstant *SC = dyn_cast<SCEVConstant>(UsersToProcess[i].Imm))
AM.BaseOffs = SC->getValue()->getSExtValue();
- AM.HasBaseReg = HasBaseReg || !isZero(UsersToProcess[i].Base);
+ AM.HasBaseReg = HasBaseReg || !UsersToProcess[i].Base->isZero();
AM.Scale = Scale;
// If load[imm+r*scale] is illegal, bail out.
- if (TLI && !TLI->isLegalAddressingMode(AM, AccessTy))
+ if (!TLI->isLegalAddressingMode(AM, AccessTy))
return false;
}
return true;
const std::vector<BasedUser>& UsersToProcess) {
if (SCEVConstant *SC = dyn_cast<SCEVConstant>(Stride)) {
int64_t SInt = SC->getValue()->getSExtValue();
- for (std::map<SCEVHandle, IVsOfOneStride>::iterator SI= IVsByStride.begin(),
- SE = IVsByStride.end(); SI != SE; ++SI) {
+ for (unsigned NewStride = 0, e = StrideOrder.size(); NewStride != e;
+ ++NewStride) {
+ std::map<SCEVHandle, IVsOfOneStride>::iterator SI =
+ IVsByStride.find(StrideOrder[NewStride]);
+ if (SI == IVsByStride.end())
+ continue;
int64_t SSInt = cast<SCEVConstant>(SI->first)->getValue()->getSExtValue();
if (SI->first != Stride &&
(unsigned(abs(SInt)) < SSInt || (SInt % SSInt) != 0))
IE = SI->second.IVs.end(); II != IE; ++II)
// FIXME: Only handle base == 0 for now.
// Only reuse previous IV if it would not require a type conversion.
- if (isZero(II->Base) &&
+ if (II->Base->isZero() &&
!RequiresTypeConversion(II->Base->getType(), Ty)) {
IV = *II;
return Scale;
return Val.isUseOfPostIncrementedValue;
}
-/// isNonConstantNegative - REturn true if the specified scev is negated, but
+/// isNonConstantNegative - Return true if the specified scev is negated, but
/// not a constant.
static bool isNonConstantNegative(const SCEVHandle &Expr) {
SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(Expr);
return SC->getValue()->getValue().isNegative();
}
+/// isAddress - Returns true if the specified instruction is using the
+/// specified value as an address.
+static bool isAddressUse(Instruction *Inst, Value *OperandVal) {
+ bool isAddress = isa<LoadInst>(Inst);
+ if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
+ if (SI->getOperand(1) == OperandVal)
+ isAddress = true;
+ } else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
+ // Addressing modes can also be folded into prefetches and a variety
+ // of intrinsics.
+ switch (II->getIntrinsicID()) {
+ default: break;
+ case Intrinsic::prefetch:
+ case Intrinsic::x86_sse2_loadu_dq:
+ case Intrinsic::x86_sse2_loadu_pd:
+ case Intrinsic::x86_sse_loadu_ps:
+ case Intrinsic::x86_sse_storeu_ps:
+ case Intrinsic::x86_sse2_storeu_pd:
+ case Intrinsic::x86_sse2_storeu_dq:
+ case Intrinsic::x86_sse2_storel_dq:
+ if (II->getOperand(1) == OperandVal)
+ isAddress = true;
+ break;
+ }
+ }
+ return isAddress;
+}
+
// CollectIVUsers - Transform our list of users and offsets to a bit more
// complex table. In this new vector, each 'BasedUser' contains 'Base' the base
// of the strided accessas well as the old information from Uses. We
// instructions. If we can represent anything there, move it to the imm
// fields of the BasedUsers. We do this so that it increases the commonality
// of the remaining uses.
+ unsigned NumPHI = 0;
for (unsigned i = 0, e = UsersToProcess.size(); i != e; ++i) {
// If the user is not in the current loop, this means it is using the exit
// value of the IV. Do not put anything in the base, make sure it's all in
// Addressing modes can be folded into loads and stores. Be careful that
// the store is through the expression, not of the expression though.
- bool isAddress = isa<LoadInst>(UsersToProcess[i].Inst);
- if (StoreInst *SI = dyn_cast<StoreInst>(UsersToProcess[i].Inst)) {
- if (SI->getOperand(1) == UsersToProcess[i].OperandValToReplace)
- isAddress = true;
- } else if (IntrinsicInst *II =
- dyn_cast<IntrinsicInst>(UsersToProcess[i].Inst)) {
- // Addressing modes can also be folded into prefetches and a variety
- // of intrinsics.
- switch (II->getIntrinsicID()) {
- default: break;
- case Intrinsic::prefetch:
- case Intrinsic::x86_sse2_loadu_dq:
- case Intrinsic::x86_sse2_loadu_pd:
- case Intrinsic::x86_sse_loadu_ps:
- case Intrinsic::x86_sse_storeu_ps:
- case Intrinsic::x86_sse2_storeu_pd:
- case Intrinsic::x86_sse2_storeu_dq:
- case Intrinsic::x86_sse2_storel_dq:
- if (II->getOperand(1) == UsersToProcess[i].OperandValToReplace)
- isAddress = true;
- break;
- case Intrinsic::x86_sse2_loadh_pd:
- case Intrinsic::x86_sse2_loadl_pd:
- if (II->getOperand(2) == UsersToProcess[i].OperandValToReplace)
- isAddress = true;
- break;
- }
+ bool isPHI = false;
+ bool isAddress = isAddressUse(UsersToProcess[i].Inst,
+ UsersToProcess[i].OperandValToReplace);
+ if (isa<PHINode>(UsersToProcess[i].Inst)) {
+ isPHI = true;
+ ++NumPHI;
}
// If this use isn't an address, then not all uses are addresses.
- if (!isAddress)
+ if (!isAddress && !isPHI)
AllUsesAreAddresses = false;
MoveImmediateValues(TLI, UsersToProcess[i].Inst, UsersToProcess[i].Base,
}
}
+ // If one of the use if a PHI node and all other uses are addresses, still
+ // allow iv reuse. Essentially we are trading one constant multiplication
+ // for one fewer iv.
+ if (NumPHI > 1)
+ AllUsesAreAddresses = false;
+
return CommonExprs;
}
Loop *L,
bool isOnlyStride) {
// If all the users are moved to another stride, then there is nothing to do.
- if (Uses.Users.size() == 0)
+ if (Uses.Users.empty())
return;
// Keep track if every use in UsersToProcess is an address. If they all are,
// their value in a register and add it in for each use. This will take up
// a register operand, which potentially restricts what stride values are
// valid.
- bool HaveCommonExprs = !isZero(CommonExprs);
+ bool HaveCommonExprs = !CommonExprs->isZero();
// If all uses are addresses, check if it is possible to reuse an IV with a
// stride that is a factor of this stride. And that the multiple is a number
if (RewriteFactor == 0) {
// Create a new Phi for this base, and stick it in the loop header.
- NewPHI = new PHINode(ReplacedTy, "iv.", PhiInsertBefore);
+ NewPHI = PHINode::Create(ReplacedTy, "iv.", PhiInsertBefore);
++NumInserted;
// Add common base to the new Phi node.
// Get a base value.
SCEVHandle Base = UsersToProcess[i].Base;
- // Compact everything with this base to be consequetive with this one.
+ // Compact everything with this base to be consequtive with this one.
for (unsigned j = i+1; j != e; ++j) {
if (UsersToProcess[j].Base == Base) {
std::swap(UsersToProcess[i+1], UsersToProcess[j]);
// We want this constant emitted into the preheader! This is just
// using cast as a copy so BitCast (no-op cast) is appropriate
BaseV = new BitCastInst(BaseV, BaseV->getType(), "preheaderinsert",
- PreInsertPt);
+ PreInsertPt);
}
}
SCEVHandle RewriteExpr = SE->getUnknown(RewriteOp);
+ // If we had to insert new instrutions for RewriteOp, we have to
+ // consider that they may not have been able to end up immediately
+ // next to RewriteOp, because non-PHI instructions may never precede
+ // PHI instructions in a block. In this case, remember where the last
+ // instruction was inserted so that if we're replacing a different
+ // PHI node, we can use the later point to expand the final
+ // RewriteExpr.
+ Instruction *NewBasePt = dyn_cast<Instruction>(RewriteOp);
+ if (RewriteOp == NewPHI) NewBasePt = 0;
+
// Clear the SCEVExpander's expression map so that we are guaranteed
// to have the code emitted where we expect it.
Rewriter.clear();
// If we are reusing the iv, then it must be multiplied by a constant
// factor take advantage of addressing mode scale component.
if (RewriteFactor != 0) {
- RewriteExpr =
- SE->getMulExpr(SE->getIntegerSCEV(RewriteFactor,
- RewriteExpr->getType()),
- RewriteExpr);
+ RewriteExpr = SE->getMulExpr(SE->getIntegerSCEV(RewriteFactor,
+ RewriteExpr->getType()),
+ RewriteExpr);
// The common base is emitted in the loop preheader. But since we
// are reusing an IV, it has not been used to initialize the PHI node.
// Add BaseV to the PHI value if needed.
RewriteExpr = SE->getAddExpr(RewriteExpr, SE->getUnknown(BaseV));
- User.RewriteInstructionToUseNewBase(RewriteExpr, Rewriter, L, this);
+ User.RewriteInstructionToUseNewBase(RewriteExpr, NewBasePt,
+ Rewriter, L, this,
+ DeadInsts);
// Mark old value we replaced as possibly dead, so that it is elminated
// if we just replaced the last use of that value.
/// v1 = v1 + 3
/// if (v1 < 30) goto loop
ICmpInst *LoopStrengthReduce::ChangeCompareStride(Loop *L, ICmpInst *Cond,
- IVStrideUse* &CondUse,
+ IVStrideUse* &CondUse,
const SCEVHandle* &CondStride) {
if (StrideOrder.size() < 2 ||
IVUsesByStride[*CondStride].Users.size() != 1)
uint64_t SignBit = 1ULL << (BitWidth-1);
const Type *CmpTy = C->getType();
const Type *NewCmpTy = NULL;
+ unsigned TyBits = CmpTy->getPrimitiveSizeInBits();
+ unsigned NewTyBits = 0;
int64_t NewCmpVal = CmpVal;
SCEVHandle *NewStride = NULL;
Value *NewIncV = NULL;
}
NewCmpTy = NewIncV->getType();
- if (RequiresTypeConversion(CmpTy, NewCmpTy)) {
+ NewTyBits = isa<PointerType>(NewCmpTy)
+ ? UIntPtrTy->getPrimitiveSizeInBits()
+ : NewCmpTy->getPrimitiveSizeInBits();
+ if (RequiresTypeConversion(NewCmpTy, CmpTy)) {
+ // Check if it is possible to rewrite it using
+ // an iv / stride of a smaller integer type.
+ bool TruncOk = false;
+ if (NewCmpTy->isInteger()) {
+ unsigned Bits = NewTyBits;
+ if (ICmpInst::isSignedPredicate(Predicate))
+ --Bits;
+ uint64_t Mask = (1ULL << Bits) - 1;
+ if (((uint64_t)NewCmpVal & Mask) == (uint64_t)NewCmpVal)
+ TruncOk = true;
+ }
+ if (!TruncOk) {
+ NewCmpVal = CmpVal;
+ continue;
+ }
+ }
+
+ // Don't rewrite if use offset is non-constant and the new type is
+ // of a different type.
+ // FIXME: too conservative?
+ if (NewTyBits != TyBits && !isa<SCEVConstant>(CondUse->Offset)) {
NewCmpVal = CmpVal;
continue;
}
// Avoid rewriting the compare instruction with an iv of new stride
// if it's likely the new stride uses will be rewritten using the
if (AllUsesAreAddresses &&
- ValidStride(!isZero(CommonExprs), Scale, UsersToProcess)) {
+ ValidStride(!CommonExprs->isZero(), Scale, UsersToProcess)) {
NewCmpVal = CmpVal;
continue;
}
}
}
+ // Forgo this transformation if it the increment happens to be
+ // unfortunately positioned after the condition, and the condition
+ // has multiple uses which prevent it from being moved immediately
+ // before the branch. See
+ // test/Transforms/LoopStrengthReduce/change-compare-stride-trickiness-*.ll
+ // for an example of this situation.
+ if (!Cond->hasOneUse())
+ for (BasicBlock::iterator I = Cond, E = Cond->getParent()->end();
+ I != E; ++I)
+ if (I == NewIncV)
+ return Cond;
+
if (NewCmpVal != CmpVal) {
// Create a new compare instruction using new stride / iv.
ICmpInst *OldCond = Cond;
- Value *RHS = ConstantInt::get(C->getType(), NewCmpVal);
- // Both sides of a ICmpInst must be of the same type.
- if (NewCmpTy != CmpTy) {
- if (isa<PointerType>(NewCmpTy) && !isa<PointerType>(CmpTy))
- RHS= SCEVExpander::InsertCastOfTo(Instruction::IntToPtr, RHS, NewCmpTy);
- else
- RHS = SCEVExpander::InsertCastOfTo(Instruction::BitCast, RHS, NewCmpTy);
+ Value *RHS;
+ if (!isa<PointerType>(NewCmpTy))
+ RHS = ConstantInt::get(NewCmpTy, NewCmpVal);
+ else {
+ RHS = ConstantInt::get(UIntPtrTy, NewCmpVal);
+ RHS = SCEVExpander::InsertCastOfTo(Instruction::IntToPtr, RHS, NewCmpTy);
}
// Insert new compare instruction.
- Cond = new ICmpInst(Predicate, NewIncV, RHS);
- Cond->setName(L->getHeader()->getName() + ".termcond");
- OldCond->getParent()->getInstList().insert(OldCond, Cond);
+ Cond = new ICmpInst(Predicate, NewIncV, RHS,
+ L->getHeader()->getName() + ".termcond",
+ OldCond);
// Remove the old compare instruction. The old indvar is probably dead too.
DeadInsts.insert(cast<Instruction>(CondUse->OperandValToReplace));
- OldCond->replaceAllUsesWith(Cond);
SE->deleteValueFromRecords(OldCond);
+ OldCond->replaceAllUsesWith(Cond);
OldCond->eraseFromParent();
IVUsesByStride[*CondStride].Users.pop_back();
- SCEVHandle NewOffset = SE->getMulExpr(CondUse->Offset,
- SE->getConstant(ConstantInt::get(CondUse->Offset->getType(), Scale)));
+ SCEVHandle NewOffset = TyBits == NewTyBits
+ ? SE->getMulExpr(CondUse->Offset,
+ SE->getConstant(ConstantInt::get(CmpTy, Scale)))
+ : SE->getConstant(ConstantInt::get(NewCmpTy,
+ cast<SCEVConstant>(CondUse->Offset)->getValue()->getSExtValue()*Scale));
IVUsesByStride[*NewStride].addUser(NewOffset, Cond, NewIncV);
CondUse = &IVUsesByStride[*NewStride].Users.back();
CondStride = NewStride;
#endif
// IVsByStride keeps IVs for one particular loop.
- IVsByStride.clear();
+ assert(IVsByStride.empty() && "Stale entries in IVsByStride?");
// Sort the StrideOrder so we process larger strides first.
std::stable_sort(StrideOrder.begin(), StrideOrder.end(), StrideCompare());
// Note: this processes each stride/type pair individually. All users passed
// into StrengthReduceStridedIVUsers have the same type AND stride. Also,
- // node that we iterate over IVUsesByStride indirectly by using StrideOrder.
+ // note that we iterate over IVUsesByStride indirectly by using StrideOrder.
// This extra layer of indirection makes the ordering of strides deterministic
// - not dependent on map order.
for (unsigned Stride = 0, e = StrideOrder.size(); Stride != e; ++Stride) {
StrengthReduceStridedIVUsers(SI->first, SI->second, L, HasOneStride);
}
+ // We're done analyzing this loop; release all the state we built up for it.
+ CastedPointers.clear();
+ IVUsesByStride.clear();
+ IVsByStride.clear();
+ StrideOrder.clear();
+
// Clean up after ourselves
if (!DeadInsts.empty()) {
DeleteTriviallyDeadInstructions(DeadInsts);
BasicBlock::iterator I = L->getHeader()->begin();
- PHINode *PN;
- while ((PN = dyn_cast<PHINode>(I))) {
- ++I; // Preincrement iterator to avoid invalidating it when deleting PN.
-
- // At this point, we know that we have killed one or more GEP
- // instructions. It is worth checking to see if the cann indvar is also
- // dead, so that we can remove it as well. The requirements for the cann
- // indvar to be considered dead are:
- // 1. the cann indvar has one use
- // 2. the use is an add instruction
- // 3. the add has one use
- // 4. the add is used by the cann indvar
- // If all four cases above are true, then we can remove both the add and
- // the cann indvar.
+ while (PHINode *PN = dyn_cast<PHINode>(I++)) {
+ // At this point, we know that we have killed one or more IV users.
+ // It is worth checking to see if the cann indvar is also
+ // dead, so that we can remove it as well.
+ //
+ // We can remove a PHI if it is on a cycle in the def-use graph
+ // where each node in the cycle has degree one, i.e. only one use,
+ // and is an instruction with no side effects.
+ //
// FIXME: this needs to eliminate an induction variable even if it's being
// compared against some value to decide loop termination.
if (PN->hasOneUse()) {
- Instruction *BO = dyn_cast<Instruction>(*PN->use_begin());
- if (BO && (isa<BinaryOperator>(BO) || isa<CmpInst>(BO))) {
- if (BO->hasOneUse() && PN == *(BO->use_begin())) {
- DeadInsts.insert(BO);
- // Break the cycle, then delete the PHI.
- PN->replaceAllUsesWith(UndefValue::get(PN->getType()));
+ for (Instruction *J = dyn_cast<Instruction>(*PN->use_begin());
+ J && J->hasOneUse() && !J->mayWriteToMemory();
+ J = dyn_cast<Instruction>(*J->use_begin())) {
+ // If we find the original PHI, we've discovered a cycle.
+ if (J == PN) {
+ // Break the cycle and mark the PHI for deletion.
SE->deleteValueFromRecords(PN);
- PN->eraseFromParent();
+ PN->replaceAllUsesWith(UndefValue::get(PN->getType()));
+ DeadInsts.insert(PN);
+ break;
}
}
}
DeleteTriviallyDeadInstructions(DeadInsts);
}
- CastedPointers.clear();
- IVUsesByStride.clear();
- StrideOrder.clear();
return false;
}