print(errs()); errs() << '\n';
}
-/// getSDiv - Return an expression for LHS /s RHS, if it can be determined,
-/// or null otherwise. If IgnoreSignificantBits is true, expressions like
-/// (X * Y) /s Y are simplified to Y, ignoring that the multiplication may
-/// overflow, which is useful when the result will be used in a context where
-/// the most significant bits are ignored.
-static const SCEV *getSDiv(const SCEV *LHS, const SCEV *RHS,
- ScalarEvolution &SE,
- bool IgnoreSignificantBits = false) {
+/// isAddRecSExtable - Return true if the given addrec can be sign-extended
+/// without changing its value.
+static bool isAddRecSExtable(const SCEVAddRecExpr *AR, ScalarEvolution &SE) {
+ const Type *WideTy =
+ IntegerType::get(SE.getContext(),
+ SE.getTypeSizeInBits(AR->getType()) + 1);
+ return isa<SCEVAddRecExpr>(SE.getSignExtendExpr(AR, WideTy));
+}
+
+/// isAddSExtable - Return true if the given add can be sign-extended
+/// without changing its value.
+static bool isAddSExtable(const SCEVAddExpr *A, ScalarEvolution &SE) {
+ const Type *WideTy =
+ IntegerType::get(SE.getContext(),
+ SE.getTypeSizeInBits(A->getType()) + 1);
+ return isa<SCEVAddExpr>(SE.getSignExtendExpr(A, WideTy));
+}
+
+/// isMulSExtable - Return true if the given add can be sign-extended
+/// without changing its value.
+static bool isMulSExtable(const SCEVMulExpr *A, ScalarEvolution &SE) {
+ const Type *WideTy =
+ IntegerType::get(SE.getContext(),
+ SE.getTypeSizeInBits(A->getType()) + 1);
+ return isa<SCEVMulExpr>(SE.getSignExtendExpr(A, WideTy));
+}
+
+/// getExactSDiv - Return an expression for LHS /s RHS, if it can be determined
+/// and if the remainder is known to be zero, or null otherwise. If
+/// IgnoreSignificantBits is true, expressions like (X * Y) /s Y are simplified
+/// to Y, ignoring that the multiplication may overflow, which is useful when
+/// the result will be used in a context where the most significant bits are
+/// ignored.
+static const SCEV *getExactSDiv(const SCEV *LHS, const SCEV *RHS,
+ ScalarEvolution &SE,
+ bool IgnoreSignificantBits = false) {
// Handle the trivial case, which works for any SCEV type.
if (LHS == RHS)
return SE.getIntegerSCEV(1, LHS->getType());
.sdiv(RC->getValue()->getValue()));
}
- // Distribute the sdiv over addrec operands.
+ // Distribute the sdiv over addrec operands, if the addrec doesn't overflow.
if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(LHS)) {
- const SCEV *Start = getSDiv(AR->getStart(), RHS, SE,
- IgnoreSignificantBits);
- if (!Start) return 0;
- const SCEV *Step = getSDiv(AR->getStepRecurrence(SE), RHS, SE,
- IgnoreSignificantBits);
- if (!Step) return 0;
- return SE.getAddRecExpr(Start, Step, AR->getLoop());
+ if (IgnoreSignificantBits || isAddRecSExtable(AR, SE)) {
+ const SCEV *Start = getExactSDiv(AR->getStart(), RHS, SE,
+ IgnoreSignificantBits);
+ if (!Start) return 0;
+ const SCEV *Step = getExactSDiv(AR->getStepRecurrence(SE), RHS, SE,
+ IgnoreSignificantBits);
+ if (!Step) return 0;
+ return SE.getAddRecExpr(Start, Step, AR->getLoop());
+ }
}
- // Distribute the sdiv over add operands.
+ // Distribute the sdiv over add operands, if the add doesn't overflow.
if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(LHS)) {
- SmallVector<const SCEV *, 8> Ops;
- for (SCEVAddExpr::op_iterator I = Add->op_begin(), E = Add->op_end();
- I != E; ++I) {
- const SCEV *Op = getSDiv(*I, RHS, SE,
- IgnoreSignificantBits);
- if (!Op) return 0;
- Ops.push_back(Op);
+ if (IgnoreSignificantBits || isAddSExtable(Add, SE)) {
+ SmallVector<const SCEV *, 8> Ops;
+ for (SCEVAddExpr::op_iterator I = Add->op_begin(), E = Add->op_end();
+ I != E; ++I) {
+ const SCEV *Op = getExactSDiv(*I, RHS, SE,
+ IgnoreSignificantBits);
+ if (!Op) return 0;
+ Ops.push_back(Op);
+ }
+ return SE.getAddExpr(Ops);
}
- return SE.getAddExpr(Ops);
}
// Check for a multiply operand that we can pull RHS out of.
if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(LHS))
- if (IgnoreSignificantBits || Mul->hasNoSignedWrap()) {
+ if (IgnoreSignificantBits || isMulSExtable(Mul, SE)) {
SmallVector<const SCEV *, 4> Ops;
bool Found = false;
for (SCEVMulExpr::op_iterator I = Mul->op_begin(), E = Mul->op_end();
I != E; ++I) {
if (!Found)
- if (const SCEV *Q = getSDiv(*I, RHS, SE, IgnoreSignificantBits)) {
+ if (const SCEV *Q = getExactSDiv(*I, RHS, SE,
+ IgnoreSignificantBits)) {
Ops.push_back(Q);
Found = true;
continue;
SmallPtrSet<const SCEV *, 16> &Regs,
const Loop *L,
ScalarEvolution &SE, DominatorTree &DT);
+ void RatePrimaryRegister(const SCEV *Reg,
+ SmallPtrSet<const SCEV *, 16> &Regs,
+ const Loop *L,
+ ScalarEvolution &SE, DominatorTree &DT);
};
}
SmallPtrSet<const SCEV *, 16> &Regs,
const Loop *L,
ScalarEvolution &SE, DominatorTree &DT) {
- if (Regs.insert(Reg)) {
- if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Reg)) {
- if (AR->getLoop() == L)
- AddRecCost += 1; /// TODO: This should be a function of the stride.
-
- // If this is an addrec for a loop that's already been visited by LSR,
- // don't second-guess its addrec phi nodes. LSR isn't currently smart
- // enough to reason about more than one loop at a time. Consider these
- // registers free and leave them alone.
- else if (L->contains(AR->getLoop()) ||
- (!AR->getLoop()->contains(L) &&
- DT.dominates(L->getHeader(), AR->getLoop()->getHeader()))) {
- for (BasicBlock::iterator I = AR->getLoop()->getHeader()->begin();
- PHINode *PN = dyn_cast<PHINode>(I); ++I)
- if (SE.isSCEVable(PN->getType()) &&
- (SE.getEffectiveSCEVType(PN->getType()) ==
- SE.getEffectiveSCEVType(AR->getType())) &&
- SE.getSCEV(PN) == AR)
- goto no_cost;
-
- // If this isn't one of the addrecs that the loop already has, it
- // would require a costly new phi and add.
- ++NumBaseAdds;
+ if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Reg)) {
+ if (AR->getLoop() == L)
+ AddRecCost += 1; /// TODO: This should be a function of the stride.
+
+ // If this is an addrec for a loop that's already been visited by LSR,
+ // don't second-guess its addrec phi nodes. LSR isn't currently smart
+ // enough to reason about more than one loop at a time. Consider these
+ // registers free and leave them alone.
+ else if (L->contains(AR->getLoop()) ||
+ (!AR->getLoop()->contains(L) &&
+ DT.dominates(L->getHeader(), AR->getLoop()->getHeader()))) {
+ for (BasicBlock::iterator I = AR->getLoop()->getHeader()->begin();
+ PHINode *PN = dyn_cast<PHINode>(I); ++I)
+ if (SE.isSCEVable(PN->getType()) &&
+ (SE.getEffectiveSCEVType(PN->getType()) ==
+ SE.getEffectiveSCEVType(AR->getType())) &&
+ SE.getSCEV(PN) == AR)
+ return;
+
+ // If this isn't one of the addrecs that the loop already has, it
+ // would require a costly new phi and add. TODO: This isn't
+ // precisely modeled right now.
+ ++NumBaseAdds;
+ if (!Regs.count(AR->getStart()))
RateRegister(AR->getStart(), Regs, L, SE, DT);
- }
-
- // Add the step value register, if it needs one.
- // TODO: The non-affine case isn't precisely modeled here.
- if (!AR->isAffine() || !isa<SCEVConstant>(AR->getOperand(1)))
- RateRegister(AR->getOperand(1), Regs, L, SE, DT);
}
- ++NumRegs;
- // Rough heuristic; favor registers which don't require extra setup
- // instructions in the preheader.
- if (!isa<SCEVUnknown>(Reg) &&
- !isa<SCEVConstant>(Reg) &&
- !(isa<SCEVAddRecExpr>(Reg) &&
- (isa<SCEVUnknown>(cast<SCEVAddRecExpr>(Reg)->getStart()) ||
- isa<SCEVConstant>(cast<SCEVAddRecExpr>(Reg)->getStart()))))
- ++SetupCost;
- no_cost:;
+ // Add the step value register, if it needs one.
+ // TODO: The non-affine case isn't precisely modeled here.
+ if (!AR->isAffine() || !isa<SCEVConstant>(AR->getOperand(1)))
+ if (!Regs.count(AR->getStart()))
+ RateRegister(AR->getOperand(1), Regs, L, SE, DT);
}
+ ++NumRegs;
+
+ // Rough heuristic; favor registers which don't require extra setup
+ // instructions in the preheader.
+ if (!isa<SCEVUnknown>(Reg) &&
+ !isa<SCEVConstant>(Reg) &&
+ !(isa<SCEVAddRecExpr>(Reg) &&
+ (isa<SCEVUnknown>(cast<SCEVAddRecExpr>(Reg)->getStart()) ||
+ isa<SCEVConstant>(cast<SCEVAddRecExpr>(Reg)->getStart()))))
+ ++SetupCost;
+}
+
+/// RatePrimaryRegister - Record this register in the set. If we haven't seen it
+/// before, rate it.
+void Cost::RatePrimaryRegister(const SCEV *Reg,
+ SmallPtrSet<const SCEV *, 16> &Regs,
+ const Loop *L,
+ ScalarEvolution &SE, DominatorTree &DT) {
+ if (Regs.insert(Reg))
+ RateRegister(Reg, Regs, L, SE, DT);
}
void Cost::RateFormula(const Formula &F,
Loose();
return;
}
- RateRegister(ScaledReg, Regs, L, SE, DT);
+ RatePrimaryRegister(ScaledReg, Regs, L, SE, DT);
}
for (SmallVectorImpl<const SCEV *>::const_iterator I = F.BaseRegs.begin(),
E = F.BaseRegs.end(); I != E; ++I) {
Loose();
return;
}
- RateRegister(BaseReg, Regs, L, SE, DT);
+ RatePrimaryRegister(BaseReg, Regs, L, SE, DT);
NumIVMuls += isa<SCEVMulExpr>(BaseReg) &&
BaseReg->hasComputableLoopEvolution(L);
MaxOffset(INT64_MIN),
AllFixupsOutsideLoop(true) {}
- bool InsertFormula(size_t LUIdx, const Formula &F);
+ bool InsertFormula(const Formula &F);
void check() const;
/// InsertFormula - If the given formula has not yet been inserted, add it to
/// the list, and return true. Return false otherwise.
-bool LSRUse::InsertFormula(size_t LUIdx, const Formula &F) {
+bool LSRUse::InsertFormula(const Formula &F) {
SmallVector<const SCEV *, 2> Key = F.BaseRegs;
if (F.ScaledReg) Key.push_back(F.ScaledReg);
// Unstable sort by host order ok, because this is only used for uniquifying.
case ICmpZero: OS << "ICmpZero"; break;
case Address:
OS << "Address of ";
- if (isa<PointerType>(AccessTy))
+ if (AccessTy->isPointerTy())
OS << "pointer"; // the full pointer type could be really verbose
else
OS << *AccessTy;
GlobalValue *BaseGV,
bool HasBaseReg,
LSRUse::KindType Kind, const Type *AccessTy,
- const TargetLowering *TLI,
- ScalarEvolution &SE) {
+ const TargetLowering *TLI) {
// Fast-path: zero is always foldable.
if (BaseOffs == 0 && !BaseGV) return true;
const Type *AccessTy);
public:
- void InsertInitialFormula(const SCEV *S, Loop *L, LSRUse &LU, size_t LUIdx);
+ void InsertInitialFormula(const SCEV *S, LSRUse &LU, size_t LUIdx);
void InsertSupplementalFormula(const SCEV *S, LSRUse &LU, size_t LUIdx);
void CountRegisters(const Formula &F, size_t LUIdx);
bool InsertFormula(LSRUse &LU, unsigned LUIdx, const Formula &F);
Value *Expand(const LSRFixup &LF,
const Formula &F,
- BasicBlock::iterator IP, Loop *L, Instruction *IVIncInsertPos,
+ BasicBlock::iterator IP,
SCEVExpander &Rewriter,
- SmallVectorImpl<WeakVH> &DeadInsts,
- ScalarEvolution &SE, DominatorTree &DT) const;
+ SmallVectorImpl<WeakVH> &DeadInsts) const;
+ void RewriteForPHI(PHINode *PN, const LSRFixup &LF,
+ const Formula &F,
+ SCEVExpander &Rewriter,
+ SmallVectorImpl<WeakVH> &DeadInsts,
+ Pass *P) const;
void Rewrite(const LSRFixup &LF,
const Formula &F,
- Loop *L, Instruction *IVIncInsertPos,
SCEVExpander &Rewriter,
SmallVectorImpl<WeakVH> &DeadInsts,
- ScalarEvolution &SE, DominatorTree &DT,
Pass *P) const;
void ImplementSolution(const SmallVectorImpl<const Formula *> &Solution,
Pass *P);
A = SE.getSignExtendExpr(A, B->getType());
}
if (const SCEVConstant *D =
- dyn_cast_or_null<SCEVConstant>(getSDiv(B, A, SE))) {
+ dyn_cast_or_null<SCEVConstant>(getExactSDiv(B, A, SE))) {
// Stride of one or negative one can have reuse with non-addresses.
if (D->getValue()->isOne() ||
D->getValue()->isAllOnesValue())
if (D->getValue()->getValue().getMinSignedBits() >= 64 ||
D->getValue()->getValue().isMinSignedValue())
goto decline_post_inc;
+ // Without TLI, assume that any stride might be valid, and so any
+ // use might be shared.
+ if (!TLI)
+ goto decline_post_inc;
// Check for possible scaled-address reuse.
const Type *AccessTy = getAccessType(UI->getUser());
TargetLowering::AddrMode AM;
// Conservatively assume HasBaseReg is true for now.
if (NewOffset < LU.MinOffset) {
if (!isAlwaysFoldable(LU.MaxOffset - NewOffset, 0, /*HasBaseReg=*/true,
- Kind, AccessTy, TLI, SE))
+ Kind, AccessTy, TLI))
return false;
NewMinOffset = NewOffset;
} else if (NewOffset > LU.MaxOffset) {
if (!isAlwaysFoldable(NewOffset - LU.MinOffset, 0, /*HasBaseReg=*/true,
- Kind, AccessTy, TLI, SE))
+ Kind, AccessTy, TLI))
return false;
NewMaxOffset = NewOffset;
}
int64_t Offset = ExtractImmediate(Expr, SE);
// Basic uses can't accept any offset, for example.
- if (!isAlwaysFoldable(Offset, 0, /*HasBaseReg=*/true,
- Kind, AccessTy, TLI, SE)) {
+ if (!isAlwaysFoldable(Offset, 0, /*HasBaseReg=*/true, Kind, AccessTy, TLI)) {
Expr = Copy;
Offset = 0;
}
void LSRInstance::CollectInterestingTypesAndFactors() {
SmallSetVector<const SCEV *, 4> Strides;
- // Collect interesting types and factors.
+ // Collect interesting types and strides.
for (IVUsers::const_iterator UI = IU.begin(), E = IU.end(); UI != E; ++UI) {
const SCEV *Stride = UI->getStride();
// Collect interesting types.
Types.insert(SE.getEffectiveSCEVType(Stride->getType()));
- // Collect interesting factors.
+ // Add the stride for this loop.
+ Strides.insert(Stride);
+
+ // Add strides for other mentioned loops.
+ for (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(UI->getOffset());
+ AR; AR = dyn_cast<SCEVAddRecExpr>(AR->getStart()))
+ Strides.insert(AR->getStepRecurrence(SE));
+ }
+
+ // Compute interesting factors from the set of interesting strides.
+ for (SmallSetVector<const SCEV *, 4>::const_iterator
+ I = Strides.begin(), E = Strides.end(); I != E; ++I)
for (SmallSetVector<const SCEV *, 4>::const_iterator NewStrideIter =
- Strides.begin(), SEnd = Strides.end(); NewStrideIter != SEnd;
- ++NewStrideIter) {
- const SCEV *OldStride = Stride;
+ next(I); NewStrideIter != E; ++NewStrideIter) {
+ const SCEV *OldStride = *I;
const SCEV *NewStride = *NewStrideIter;
- if (OldStride == NewStride)
- continue;
if (SE.getTypeSizeInBits(OldStride->getType()) !=
SE.getTypeSizeInBits(NewStride->getType())) {
OldStride = SE.getSignExtendExpr(OldStride, NewStride->getType());
}
if (const SCEVConstant *Factor =
- dyn_cast_or_null<SCEVConstant>(getSDiv(NewStride, OldStride,
- SE, true))) {
+ dyn_cast_or_null<SCEVConstant>(getExactSDiv(NewStride, OldStride,
+ SE, true))) {
if (Factor->getValue()->getValue().getMinSignedBits() <= 64)
Factors.insert(Factor->getValue()->getValue().getSExtValue());
} else if (const SCEVConstant *Factor =
- dyn_cast_or_null<SCEVConstant>(getSDiv(OldStride, NewStride,
- SE, true))) {
+ dyn_cast_or_null<SCEVConstant>(getExactSDiv(OldStride,
+ NewStride,
+ SE, true))) {
if (Factor->getValue()->getValue().getMinSignedBits() <= 64)
Factors.insert(Factor->getValue()->getValue().getSExtValue());
}
}
- Strides.insert(Stride);
- }
// If all uses use the same type, don't bother looking for truncation-based
// reuse.
// If this is the first use of this LSRUse, give it a formula.
if (LU.Formulae.empty()) {
- InsertInitialFormula(S, L, LU, LF.LUIdx);
+ InsertInitialFormula(S, LU, LF.LUIdx);
CountRegisters(LU.Formulae.back(), LF.LUIdx);
}
}
}
void
-LSRInstance::InsertInitialFormula(const SCEV *S, Loop *L,
- LSRUse &LU, size_t LUIdx) {
+LSRInstance::InsertInitialFormula(const SCEV *S, LSRUse &LU, size_t LUIdx) {
Formula F;
F.InitialMatch(S, L, SE, DT);
bool Inserted = InsertFormula(LU, LUIdx, F);
/// InsertFormula - If the given formula has not yet been inserted, add it to
/// the list, and return true. Return false otherwise.
bool LSRInstance::InsertFormula(LSRUse &LU, unsigned LUIdx, const Formula &F) {
- if (!LU.InsertFormula(LUIdx, F))
+ if (!LU.InsertFormula(F))
return false;
CountRegisters(F, LUIdx);
} else if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
// Split a non-zero base out of an addrec.
if (!AR->getStart()->isZero()) {
- CollectSubexprs(AR->getStart(), C, Ops, SE);
CollectSubexprs(SE.getAddRecExpr(SE.getIntegerSCEV(0, AR->getType()),
AR->getStepRecurrence(SE),
AR->getLoop()), C, Ops, SE);
+ CollectSubexprs(AR->getStart(), C, Ops, SE);
return;
}
} else if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(S)) {
/// GenerateCombinations - Generate a formula consisting of all of the
/// loop-dominating registers added into a single register.
void LSRInstance::GenerateCombinations(LSRUse &LU, unsigned LUIdx,
- Formula Base) {
+ Formula Base) {
// This method is only intersting on a plurality of registers.
if (Base.BaseRegs.size() <= 1) return;
F.BaseRegs.push_back(BaseReg);
}
if (Ops.size() > 1) {
- F.BaseRegs.push_back(SE.getAddExpr(Ops));
- (void)InsertFormula(LU, LUIdx, F);
+ const SCEV *Sum = SE.getAddExpr(Ops);
+ // TODO: If Sum is zero, it probably means ScalarEvolution missed an
+ // opportunity to fold something. For now, just ignore such cases
+ // rather than procede with zero in a register.
+ if (!Sum->isZero()) {
+ F.BaseRegs.push_back(Sum);
+ (void)InsertFormula(LU, LUIdx, F);
+ }
}
}
Formula F = Base;
// Check that the multiplication doesn't overflow.
+ if (F.AM.BaseOffs == INT64_MIN && Factor == -1)
+ continue;
F.AM.BaseOffs = (uint64_t)Base.AM.BaseOffs * Factor;
- if ((int64_t)F.AM.BaseOffs / Factor != Base.AM.BaseOffs)
+ if (F.AM.BaseOffs / Factor != Base.AM.BaseOffs)
continue;
// Check that multiplying with the use offset doesn't overflow.
int64_t Offset = LU.MinOffset;
+ if (Offset == INT64_MIN && Factor == -1)
+ continue;
Offset = (uint64_t)Offset * Factor;
- if ((int64_t)Offset / Factor != LU.MinOffset)
+ if (Offset / Factor != LU.MinOffset)
continue;
// Check that this scale is legal.
// Check that multiplying with each base register doesn't overflow.
for (size_t i = 0, e = F.BaseRegs.size(); i != e; ++i) {
F.BaseRegs[i] = SE.getMulExpr(F.BaseRegs[i], FactorS);
- if (getSDiv(F.BaseRegs[i], FactorS, SE) != Base.BaseRegs[i])
+ if (getExactSDiv(F.BaseRegs[i], FactorS, SE) != Base.BaseRegs[i])
goto next;
}
// Check that multiplying with the scaled register doesn't overflow.
if (F.ScaledReg) {
F.ScaledReg = SE.getMulExpr(F.ScaledReg, FactorS);
- if (getSDiv(F.ScaledReg, FactorS, SE) != Base.ScaledReg)
+ if (getExactSDiv(F.ScaledReg, FactorS, SE) != Base.ScaledReg)
continue;
}
continue;
// Divide out the factor, ignoring high bits, since we'll be
// scaling the value back up in the end.
- if (const SCEV *Quotient = getSDiv(AR, FactorS, SE, true)) {
+ if (const SCEV *Quotient = getExactSDiv(AR, FactorS, SE, true)) {
// TODO: This could be optimized to avoid all the copying.
Formula F = Base;
F.ScaledReg = Quotient;
namespace {
-/// WorkItem - Helper class for GenerateConstantOffsetReuse. It's used to
+/// WorkItem - Helper class for GenerateCrossUseConstantOffsets. It's used to
/// defer modifications so that the search phase doesn't have to worry about
/// the data structures moving underneath it.
struct WorkItem {
const SCEV *Reg = *I;
const ImmMapTy &Imms = Map.find(Reg)->second;
+ // It's not worthwhile looking for reuse if there's only one offset.
+ if (Imms.size() == 1)
+ continue;
+
DEBUG(dbgs() << "Generating cross-use offsets for " << *Reg << ':';
for (ImmMapTy::const_iterator J = Imms.begin(), JE = Imms.end();
J != JE; ++J)
};
for (size_t i = 0, e = array_lengthof(OtherImms); i != e; ++i) {
ImmMapTy::const_iterator M = OtherImms[i];
- if (M == J) continue;
+ if (M == J || M == JE) continue;
// Compute the difference between the two.
int64_t Imm = (uint64_t)JImm - M->first;
/// GenerateAllReuseFormulae - Generate formulae for each use.
void
LSRInstance::GenerateAllReuseFormulae() {
- // This is split into two loops so that hasRegsUsedByUsesOtherThan
+ // This is split into multiple loops so that hasRegsUsedByUsesOtherThan
// queries are more precise.
for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
LSRUse &LU = Uses[LUIdx];
GenerateICmpZeroScales(LU, LUIdx, LU.Formulae[i]);
for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
GenerateScales(LU, LUIdx, LU.Formulae[i]);
+ }
+ for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
+ LSRUse &LU = Uses[LUIdx];
for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
GenerateTruncates(LU, LUIdx, LU.Formulae[i]);
}
}
DEBUG(if (Changed) {
- dbgs() << "After filtering out undesirable candidates:\n";
+ dbgs() << "\n"
+ "After filtering out undesirable candidates:\n";
print_uses(dbgs());
});
}
SmallSetVector<const SCEV *, 4> ReqRegs;
for (SmallPtrSet<const SCEV *, 16>::const_iterator I = CurRegs.begin(),
E = CurRegs.end(); I != E; ++I)
- if (LU.Regs.count(*I)) {
+ if (LU.Regs.count(*I))
ReqRegs.insert(*I);
- break;
- }
+ bool AnySatisfiedReqRegs = false;
SmallPtrSet<const SCEV *, 16> NewRegs;
Cost NewCost;
+retry:
for (SmallVectorImpl<Formula>::const_iterator I = LU.Formulae.begin(),
E = LU.Formulae.end(); I != E; ++I) {
const Formula &F = *I;
F.BaseRegs.end())
goto skip;
}
+ AnySatisfiedReqRegs = true;
// Evaluate the cost of the current formula. If it's already worse than
// the current best, prune the search at that point.
}
skip:;
}
+
+ // If none of the formulae had all of the required registers, relax the
+ // constraint so that we don't exclude all formulae.
+ if (!AnySatisfiedReqRegs) {
+ ReqRegs.clear();
+ goto retry;
+ }
}
void LSRInstance::Solve(SmallVectorImpl<const Formula *> &Solution) const {
Value *LSRInstance::Expand(const LSRFixup &LF,
const Formula &F,
BasicBlock::iterator IP,
- Loop *L, Instruction *IVIncInsertPos,
SCEVExpander &Rewriter,
- SmallVectorImpl<WeakVH> &DeadInsts,
- ScalarEvolution &SE, DominatorTree &DT) const {
+ SmallVectorImpl<WeakVH> &DeadInsts) const {
const LSRUse &LU = Uses[LF.LUIdx];
// Then, collect some instructions which we will remain dominated by when
if (AR->getLoop() == LF.PostIncLoop) {
Reg = SE.getAddExpr(Reg, AR->getStepRecurrence(SE));
// If the user is inside the loop, insert the code after the increment
- // so that it is dominated by its operand.
- if (L->contains(LF.UserInst))
+ // so that it is dominated by its operand. If the original insert point
+ // was already dominated by the increment, keep it, because there may
+ // be loop-variant operands that need to be respected also.
+ if (L->contains(LF.UserInst) && !DT.dominates(IVIncInsertPos, IP))
IP = IVIncInsertPos;
break;
}
return FullV;
}
+/// RewriteForPHI - Helper for Rewrite. PHI nodes are special because the use
+/// of their operands effectively happens in their predecessor blocks, so the
+/// expression may need to be expanded in multiple places.
+void LSRInstance::RewriteForPHI(PHINode *PN,
+ const LSRFixup &LF,
+ const Formula &F,
+ SCEVExpander &Rewriter,
+ SmallVectorImpl<WeakVH> &DeadInsts,
+ Pass *P) const {
+ DenseMap<BasicBlock *, Value *> Inserted;
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+ if (PN->getIncomingValue(i) == LF.OperandValToReplace) {
+ BasicBlock *BB = PN->getIncomingBlock(i);
+
+ // If this is a critical edge, split the edge so that we do not insert
+ // the code on all predecessor/successor paths. We do this unless this
+ // is the canonical backedge for this loop, which complicates post-inc
+ // users.
+ if (e != 1 && BB->getTerminator()->getNumSuccessors() > 1 &&
+ !isa<IndirectBrInst>(BB->getTerminator()) &&
+ (PN->getParent() != L->getHeader() || !L->contains(BB))) {
+ // Split the critical edge.
+ BasicBlock *NewBB = SplitCriticalEdge(BB, PN->getParent(), P);
+
+ // If PN is outside of the loop and BB is in the loop, we want to
+ // move the block to be immediately before the PHI block, not
+ // immediately after BB.
+ if (L->contains(BB) && !L->contains(PN))
+ NewBB->moveBefore(PN->getParent());
+
+ // Splitting the edge can reduce the number of PHI entries we have.
+ e = PN->getNumIncomingValues();
+ BB = NewBB;
+ i = PN->getBasicBlockIndex(BB);
+ }
+
+ std::pair<DenseMap<BasicBlock *, Value *>::iterator, bool> Pair =
+ Inserted.insert(std::make_pair(BB, static_cast<Value *>(0)));
+ if (!Pair.second)
+ PN->setIncomingValue(i, Pair.first->second);
+ else {
+ Value *FullV = Expand(LF, F, BB->getTerminator(), Rewriter, DeadInsts);
+
+ // If this is reuse-by-noop-cast, insert the noop cast.
+ const Type *OpTy = LF.OperandValToReplace->getType();
+ if (FullV->getType() != OpTy)
+ FullV =
+ CastInst::Create(CastInst::getCastOpcode(FullV, false,
+ OpTy, false),
+ FullV, LF.OperandValToReplace->getType(),
+ "tmp", BB->getTerminator());
+
+ PN->setIncomingValue(i, FullV);
+ Pair.first->second = FullV;
+ }
+ }
+}
+
/// Rewrite - Emit instructions for the leading candidate expression for this
/// LSRUse (this is called "expanding"), and update the UserInst to reference
/// the newly expanded value.
void LSRInstance::Rewrite(const LSRFixup &LF,
const Formula &F,
- Loop *L, Instruction *IVIncInsertPos,
SCEVExpander &Rewriter,
SmallVectorImpl<WeakVH> &DeadInsts,
- ScalarEvolution &SE, DominatorTree &DT,
Pass *P) const {
- const Type *OpTy = LF.OperandValToReplace->getType();
-
// First, find an insertion point that dominates UserInst. For PHI nodes,
// find the nearest block which dominates all the relevant uses.
if (PHINode *PN = dyn_cast<PHINode>(LF.UserInst)) {
- DenseMap<BasicBlock *, Value *> Inserted;
- for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
- if (PN->getIncomingValue(i) == LF.OperandValToReplace) {
- BasicBlock *BB = PN->getIncomingBlock(i);
-
- // If this is a critical edge, split the edge so that we do not insert
- // the code on all predecessor/successor paths. We do this unless this
- // is the canonical backedge for this loop, which complicates post-inc
- // users.
- if (e != 1 && BB->getTerminator()->getNumSuccessors() > 1 &&
- !isa<IndirectBrInst>(BB->getTerminator()) &&
- (PN->getParent() != L->getHeader() || !L->contains(BB))) {
- // Split the critical edge.
- BasicBlock *NewBB = SplitCriticalEdge(BB, PN->getParent(), P);
-
- // If PN is outside of the loop and BB is in the loop, we want to
- // move the block to be immediately before the PHI block, not
- // immediately after BB.
- if (L->contains(BB) && !L->contains(PN))
- NewBB->moveBefore(PN->getParent());
-
- // Splitting the edge can reduce the number of PHI entries we have.
- e = PN->getNumIncomingValues();
- BB = NewBB;
- i = PN->getBasicBlockIndex(BB);
- }
-
- std::pair<DenseMap<BasicBlock *, Value *>::iterator, bool> Pair =
- Inserted.insert(std::make_pair(BB, static_cast<Value *>(0)));
- if (!Pair.second)
- PN->setIncomingValue(i, Pair.first->second);
- else {
- Value *FullV = Expand(LF, F, BB->getTerminator(), L, IVIncInsertPos,
- Rewriter, DeadInsts, SE, DT);
-
- // If this is reuse-by-noop-cast, insert the noop cast.
- if (FullV->getType() != OpTy)
- FullV =
- CastInst::Create(CastInst::getCastOpcode(FullV, false,
- OpTy, false),
- FullV, LF.OperandValToReplace->getType(),
- "tmp", BB->getTerminator());
-
- PN->setIncomingValue(i, FullV);
- Pair.first->second = FullV;
- }
- }
+ RewriteForPHI(PN, LF, F, Rewriter, DeadInsts, P);
} else {
- Value *FullV = Expand(LF, F, LF.UserInst, L, IVIncInsertPos,
- Rewriter, DeadInsts, SE, DT);
+ Value *FullV = Expand(LF, F, LF.UserInst, Rewriter, DeadInsts);
// If this is reuse-by-noop-cast, insert the noop cast.
+ const Type *OpTy = LF.OperandValToReplace->getType();
if (FullV->getType() != OpTy) {
Instruction *Cast =
CastInst::Create(CastInst::getCastOpcode(FullV, false, OpTy, false),
for (size_t i = 0, e = Fixups.size(); i != e; ++i) {
size_t LUIdx = Fixups[i].LUIdx;
- Rewrite(Fixups[i], *Solution[LUIdx], L, IVIncInsertPos, Rewriter,
- DeadInsts, SE, DT, P);
+ Rewrite(Fixups[i], *Solution[LUIdx], Rewriter, DeadInsts, P);
Changed = true;
}