//
// TODO: Handle multiple loops at a time.
//
-// TODO: Should TargetLowering::AddrMode::BaseGV be changed to a ConstantExpr
-// instead of a GlobalValue?
+// TODO: Should the addressing mode BaseGV be changed to a ConstantExpr instead
+// of a GlobalValue?
//
// TODO: When truncation is free, truncate ICmp users' operands to make it a
// smaller encoding (on x86 at least).
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "loop-reduce"
-#include "llvm/AddressingMode.h"
-#include "llvm/Constants.h"
-#include "llvm/Instructions.h"
-#include "llvm/IntrinsicInst.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Analysis/IVUsers.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallBitVector.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/IVUsers.h"
#include "llvm/Analysis/LoopPass.h"
#include "llvm/Analysis/ScalarEvolutionExpander.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Assembly/Writer.h"
-#include "llvm/Transforms/Scalar.h"
-#include "llvm/Transforms/Utils/BasicBlockUtils.h"
-#include "llvm/Transforms/Utils/Local.h"
-#include "llvm/ADT/SmallBitVector.h"
-#include "llvm/ADT/SetVector.h"
-#include "llvm/ADT/DenseSet.h"
-#include "llvm/Support/Debug.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
#include "llvm/Support/ValueHandle.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetLowering.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Local.h"
#include <algorithm>
using namespace llvm;
/// computing satisfying a use. It may include broken-out immediates and scaled
/// registers.
struct Formula {
- /// AM - This is used to represent complex addressing, as well as other kinds
- /// of interesting uses.
- AddrMode AM;
+ /// Global base address used for complex addressing.
+ GlobalValue *BaseGV;
+
+ /// Base offset for complex addressing.
+ int64_t BaseOffset;
+
+ /// Whether any complex addressing has a base register.
+ bool HasBaseReg;
+
+ /// The scale of any complex addressing.
+ int64_t Scale;
/// BaseRegs - The list of "base" registers for this use. When this is
- /// non-empty, AM.HasBaseReg should be set to true.
- SmallVector<const SCEV *, 2> BaseRegs;
+ /// non-empty,
+ SmallVector<const SCEV *, 4> BaseRegs;
/// ScaledReg - The 'scaled' register for this use. This should be non-null
- /// when AM.Scale is not zero.
+ /// when Scale is not zero.
const SCEV *ScaledReg;
/// UnfoldedOffset - An additional constant offset which added near the
/// live in an add immediate field rather than a register.
int64_t UnfoldedOffset;
- Formula() : ScaledReg(0), UnfoldedOffset(0) {}
+ Formula()
+ : BaseGV(0), BaseOffset(0), HasBaseReg(false), Scale(0), ScaledReg(0),
+ UnfoldedOffset(0) {}
void InitialMatch(const SCEV *S, Loop *L, ScalarEvolution &SE);
const SCEV *Sum = SE.getAddExpr(Good);
if (!Sum->isZero())
BaseRegs.push_back(Sum);
- AM.HasBaseReg = true;
+ HasBaseReg = true;
}
if (!Bad.empty()) {
const SCEV *Sum = SE.getAddExpr(Bad);
if (!Sum->isZero())
BaseRegs.push_back(Sum);
- AM.HasBaseReg = true;
+ HasBaseReg = true;
}
}
Type *Formula::getType() const {
return !BaseRegs.empty() ? BaseRegs.front()->getType() :
ScaledReg ? ScaledReg->getType() :
- AM.BaseGV ? AM.BaseGV->getType() :
+ BaseGV ? BaseGV->getType() :
0;
}
void Formula::print(raw_ostream &OS) const {
bool First = true;
- if (AM.BaseGV) {
+ if (BaseGV) {
if (!First) OS << " + "; else First = false;
- WriteAsOperand(OS, AM.BaseGV, /*PrintType=*/false);
+ WriteAsOperand(OS, BaseGV, /*PrintType=*/false);
}
- if (AM.BaseOffs != 0) {
+ if (BaseOffset != 0) {
if (!First) OS << " + "; else First = false;
- OS << AM.BaseOffs;
+ OS << BaseOffset;
}
for (SmallVectorImpl<const SCEV *>::const_iterator I = BaseRegs.begin(),
E = BaseRegs.end(); I != E; ++I) {
if (!First) OS << " + "; else First = false;
OS << "reg(" << **I << ')';
}
- if (AM.HasBaseReg && BaseRegs.empty()) {
+ if (HasBaseReg && BaseRegs.empty()) {
if (!First) OS << " + "; else First = false;
OS << "**error: HasBaseReg**";
- } else if (!AM.HasBaseReg && !BaseRegs.empty()) {
+ } else if (!HasBaseReg && !BaseRegs.empty()) {
if (!First) OS << " + "; else First = false;
OS << "**error: !HasBaseReg**";
}
- if (AM.Scale != 0) {
+ if (Scale != 0) {
if (!First) OS << " + "; else First = false;
- OS << AM.Scale << "*reg(";
+ OS << Scale << "*reg(";
if (ScaledReg)
OS << *ScaledReg;
else
return Changed;
}
+namespace {
+class LSRUse;
+}
+// Check if it is legal to fold 2 base registers.
+static bool isLegal2RegAMUse(const TargetTransformInfo &TTI, const LSRUse &LU,
+ const Formula &F);
+// Get the cost of the scaling factor used in F for LU.
+static unsigned getScalingFactorCost(const TargetTransformInfo &TTI,
+ const LSRUse &LU, const Formula &F);
+
namespace {
/// Cost - This class is used to measure and compare candidate formulae.
unsigned NumBaseAdds;
unsigned ImmCost;
unsigned SetupCost;
+ unsigned ScaleCost;
public:
Cost()
: NumRegs(0), AddRecCost(0), NumIVMuls(0), NumBaseAdds(0), ImmCost(0),
- SetupCost(0) {}
+ SetupCost(0), ScaleCost(0) {}
bool operator<(const Cost &Other) const;
// Once any of the metrics loses, they must all remain losers.
bool isValid() {
return ((NumRegs | AddRecCost | NumIVMuls | NumBaseAdds
- | ImmCost | SetupCost) != ~0u)
+ | ImmCost | SetupCost | ScaleCost) != ~0u)
|| ((NumRegs & AddRecCost & NumIVMuls & NumBaseAdds
- & ImmCost & SetupCost) == ~0u);
+ & ImmCost & SetupCost & ScaleCost) == ~0u);
}
#endif
return NumRegs == ~0u;
}
- void RateFormula(const Formula &F,
+ void RateFormula(const TargetTransformInfo &TTI,
+ const Formula &F,
SmallPtrSet<const SCEV *, 16> &Regs,
const DenseSet<const SCEV *> &VisitedRegs,
const Loop *L,
const SmallVectorImpl<int64_t> &Offsets,
ScalarEvolution &SE, DominatorTree &DT,
+ const LSRUse &LU,
SmallPtrSet<const SCEV *, 16> *LoserRegs = 0);
void print(raw_ostream &OS) const;
}
if (Regs.insert(Reg)) {
RateRegister(Reg, Regs, L, SE, DT);
- if (isLoser())
+ if (LoserRegs && isLoser())
LoserRegs->insert(Reg);
}
}
-void Cost::RateFormula(const Formula &F,
+void Cost::RateFormula(const TargetTransformInfo &TTI,
+ const Formula &F,
SmallPtrSet<const SCEV *, 16> &Regs,
const DenseSet<const SCEV *> &VisitedRegs,
const Loop *L,
const SmallVectorImpl<int64_t> &Offsets,
ScalarEvolution &SE, DominatorTree &DT,
+ const LSRUse &LU,
SmallPtrSet<const SCEV *, 16> *LoserRegs) {
// Tally up the registers.
if (const SCEV *ScaledReg = F.ScaledReg) {
// Determine how many (unfolded) adds we'll need inside the loop.
size_t NumBaseParts = F.BaseRegs.size() + (F.UnfoldedOffset != 0);
if (NumBaseParts > 1)
- NumBaseAdds += NumBaseParts - 1;
+ // Do not count the base and a possible second register if the target
+ // allows to fold 2 registers.
+ NumBaseAdds += NumBaseParts - (1 + isLegal2RegAMUse(TTI, LU, F));
+
+ // Accumulate non-free scaling amounts.
+ ScaleCost += getScalingFactorCost(TTI, LU, F);
// Tally up the non-zero immediates.
for (SmallVectorImpl<int64_t>::const_iterator I = Offsets.begin(),
E = Offsets.end(); I != E; ++I) {
- int64_t Offset = (uint64_t)*I + F.AM.BaseOffs;
- if (F.AM.BaseGV)
+ int64_t Offset = (uint64_t)*I + F.BaseOffset;
+ if (F.BaseGV)
ImmCost += 64; // Handle symbolic values conservatively.
// TODO: This should probably be the pointer size.
else if (Offset != 0)
NumBaseAdds = ~0u;
ImmCost = ~0u;
SetupCost = ~0u;
+ ScaleCost = ~0u;
}
/// operator< - Choose the lower cost.
return NumIVMuls < Other.NumIVMuls;
if (NumBaseAdds != Other.NumBaseAdds)
return NumBaseAdds < Other.NumBaseAdds;
+ if (ScaleCost != Other.ScaleCost)
+ return ScaleCost < Other.ScaleCost;
if (ImmCost != Other.ImmCost)
return ImmCost < Other.ImmCost;
if (SetupCost != Other.SetupCost)
if (NumBaseAdds != 0)
OS << ", plus " << NumBaseAdds << " base add"
<< (NumBaseAdds == 1 ? "" : "s");
+ if (ScaleCost != 0)
+ OS << ", plus " << ScaleCost << " scale cost";
if (ImmCost != 0)
OS << ", plus " << ImmCost << " imm cost";
if (SetupCost != 0)
/// UniquifierDenseMapInfo - A DenseMapInfo implementation for holding
/// DenseMaps and DenseSets of sorted SmallVectors of const SCEV*.
struct UniquifierDenseMapInfo {
- static SmallVector<const SCEV *, 2> getEmptyKey() {
- SmallVector<const SCEV *, 2> V;
+ static SmallVector<const SCEV *, 4> getEmptyKey() {
+ SmallVector<const SCEV *, 4> V;
V.push_back(reinterpret_cast<const SCEV *>(-1));
return V;
}
- static SmallVector<const SCEV *, 2> getTombstoneKey() {
- SmallVector<const SCEV *, 2> V;
+ static SmallVector<const SCEV *, 4> getTombstoneKey() {
+ SmallVector<const SCEV *, 4> V;
V.push_back(reinterpret_cast<const SCEV *>(-2));
return V;
}
- static unsigned getHashValue(const SmallVector<const SCEV *, 2> &V) {
+ static unsigned getHashValue(const SmallVector<const SCEV *, 4> &V) {
unsigned Result = 0;
for (SmallVectorImpl<const SCEV *>::const_iterator I = V.begin(),
E = V.end(); I != E; ++I)
return Result;
}
- static bool isEqual(const SmallVector<const SCEV *, 2> &LHS,
- const SmallVector<const SCEV *, 2> &RHS) {
+ static bool isEqual(const SmallVector<const SCEV *, 4> &LHS,
+ const SmallVector<const SCEV *, 4> &RHS) {
return LHS == RHS;
}
};
/// the user itself, and information about how the use may be satisfied.
/// TODO: Represent multiple users of the same expression in common?
class LSRUse {
- DenseSet<SmallVector<const SCEV *, 2>, UniquifierDenseMapInfo> Uniquifier;
+ DenseSet<SmallVector<const SCEV *, 4>, UniquifierDenseMapInfo> Uniquifier;
public:
/// KindType - An enum for a kind of use, indicating what types of
/// HasFormula - Test whether this use as a formula which has the same
/// registers as the given formula.
bool LSRUse::HasFormulaWithSameRegs(const Formula &F) const {
- SmallVector<const SCEV *, 2> Key = F.BaseRegs;
+ SmallVector<const SCEV *, 4> Key = F.BaseRegs;
if (F.ScaledReg) Key.push_back(F.ScaledReg);
// Unstable sort by host order ok, because this is only used for uniquifying.
std::sort(Key.begin(), Key.end());
/// InsertFormula - If the given formula has not yet been inserted, add it to
/// the list, and return true. Return false otherwise.
bool LSRUse::InsertFormula(const Formula &F) {
- SmallVector<const SCEV *, 2> Key = F.BaseRegs;
+ SmallVector<const SCEV *, 4> Key = F.BaseRegs;
if (F.ScaledReg) Key.push_back(F.ScaledReg);
// Unstable sort by host order ok, because this is only used for uniquifying.
std::sort(Key.begin(), Key.end());
/// isLegalUse - Test whether the use described by AM is "legal", meaning it can
/// be completely folded into the user instruction at isel time. This includes
/// address-mode folding and special icmp tricks.
-static bool isLegalUse(const AddrMode &AM,
- LSRUse::KindType Kind, Type *AccessTy,
- const TargetLowering *TLI) {
+static bool isLegalUse(const TargetTransformInfo &TTI, LSRUse::KindType Kind,
+ Type *AccessTy, GlobalValue *BaseGV, int64_t BaseOffset,
+ bool HasBaseReg, int64_t Scale) {
switch (Kind) {
case LSRUse::Address:
- // If we have low-level target information, ask the target if it can
- // completely fold this address.
- if (TLI) return TLI->isLegalAddressingMode(AM, AccessTy);
+ return TTI.isLegalAddressingMode(AccessTy, BaseGV, BaseOffset, HasBaseReg, Scale);
// Otherwise, just guess that reg+reg addressing is legal.
- return !AM.BaseGV && AM.BaseOffs == 0 && AM.Scale <= 1;
+ //return ;
case LSRUse::ICmpZero:
// There's not even a target hook for querying whether it would be legal to
// fold a GV into an ICmp.
- if (AM.BaseGV)
+ if (BaseGV)
return false;
// ICmp only has two operands; don't allow more than two non-trivial parts.
- if (AM.Scale != 0 && AM.HasBaseReg && AM.BaseOffs != 0)
+ if (Scale != 0 && HasBaseReg && BaseOffset != 0)
return false;
// ICmp only supports no scale or a -1 scale, as we can "fold" a -1 scale by
// putting the scaled register in the other operand of the icmp.
- if (AM.Scale != 0 && AM.Scale != -1)
+ if (Scale != 0 && Scale != -1)
return false;
// If we have low-level target information, ask the target if it can fold an
// integer immediate on an icmp.
- if (AM.BaseOffs != 0) {
- if (!TLI)
- return false;
+ if (BaseOffset != 0) {
// We have one of:
- // ICmpZero BaseReg + Offset => ICmp BaseReg, -Offset
- // ICmpZero -1*ScaleReg + Offset => ICmp ScaleReg, Offset
+ // ICmpZero BaseReg + BaseOffset => ICmp BaseReg, -BaseOffset
+ // ICmpZero -1*ScaleReg + BaseOffset => ICmp ScaleReg, BaseOffset
// Offs is the ICmp immediate.
- int64_t Offs = AM.BaseOffs;
- if (AM.Scale == 0)
- Offs = -(uint64_t)Offs; // The cast does the right thing with INT64_MIN.
- return TLI->isLegalICmpImmediate(Offs);
+ if (Scale == 0)
+ // The cast does the right thing with INT64_MIN.
+ BaseOffset = -(uint64_t)BaseOffset;
+ return TTI.isLegalICmpImmediate(BaseOffset);
}
// ICmpZero BaseReg + -1*ScaleReg => ICmp BaseReg, ScaleReg
case LSRUse::Basic:
// Only handle single-register values.
- return !AM.BaseGV && AM.Scale == 0 && AM.BaseOffs == 0;
+ return !BaseGV && Scale == 0 && BaseOffset == 0;
case LSRUse::Special:
// Special case Basic to handle -1 scales.
- return !AM.BaseGV && (AM.Scale == 0 || AM.Scale == -1) && AM.BaseOffs == 0;
+ return !BaseGV && (Scale == 0 || Scale == -1) && BaseOffset == 0;
}
llvm_unreachable("Invalid LSRUse Kind!");
}
-static bool isLegalUse(AddrMode AM,
- int64_t MinOffset, int64_t MaxOffset,
- LSRUse::KindType Kind, Type *AccessTy,
- const TargetLowering *TLI) {
+static bool isLegalUse(const TargetTransformInfo &TTI, int64_t MinOffset,
+ int64_t MaxOffset, LSRUse::KindType Kind, Type *AccessTy,
+ GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg,
+ int64_t Scale) {
// Check for overflow.
- if (((int64_t)((uint64_t)AM.BaseOffs + MinOffset) > AM.BaseOffs) !=
+ if (((int64_t)((uint64_t)BaseOffset + MinOffset) > BaseOffset) !=
(MinOffset > 0))
return false;
- AM.BaseOffs = (uint64_t)AM.BaseOffs + MinOffset;
- if (isLegalUse(AM, Kind, AccessTy, TLI)) {
- AM.BaseOffs = (uint64_t)AM.BaseOffs - MinOffset;
- // Check for overflow.
- if (((int64_t)((uint64_t)AM.BaseOffs + MaxOffset) > AM.BaseOffs) !=
- (MaxOffset > 0))
- return false;
- AM.BaseOffs = (uint64_t)AM.BaseOffs + MaxOffset;
- return isLegalUse(AM, Kind, AccessTy, TLI);
+ MinOffset = (uint64_t)BaseOffset + MinOffset;
+ if (((int64_t)((uint64_t)BaseOffset + MaxOffset) > BaseOffset) !=
+ (MaxOffset > 0))
+ return false;
+ MaxOffset = (uint64_t)BaseOffset + MaxOffset;
+
+ return isLegalUse(TTI, Kind, AccessTy, BaseGV, MinOffset, HasBaseReg,
+ Scale) &&
+ isLegalUse(TTI, Kind, AccessTy, BaseGV, MaxOffset, HasBaseReg, Scale);
+}
+
+static bool isLegalUse(const TargetTransformInfo &TTI, int64_t MinOffset,
+ int64_t MaxOffset, LSRUse::KindType Kind, Type *AccessTy,
+ const Formula &F) {
+ return isLegalUse(TTI, MinOffset, MaxOffset, Kind, AccessTy, F.BaseGV,
+ F.BaseOffset, F.HasBaseReg, F.Scale);
+}
+
+static bool isLegal2RegAMUse(const TargetTransformInfo &TTI, const LSRUse &LU,
+ const Formula &F) {
+ // If F is used as an Addressing Mode, it may fold one Base plus one
+ // scaled register. If the scaled register is nil, do as if another
+ // element of the base regs is a 1-scaled register.
+ // This is possible if BaseRegs has at least 2 registers.
+
+ // If this is not an address calculation, this is not an addressing mode
+ // use.
+ if (LU.Kind != LSRUse::Address)
+ return false;
+
+ // F is already scaled.
+ if (F.Scale != 0)
+ return false;
+
+ // We need to keep one register for the base and one to scale.
+ if (F.BaseRegs.size() < 2)
+ return false;
+
+ return isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy,
+ F.BaseGV, F.BaseOffset, F.HasBaseReg, 1);
+ }
+
+static unsigned getScalingFactorCost(const TargetTransformInfo &TTI,
+ const LSRUse &LU, const Formula &F) {
+ if (!F.Scale)
+ return 0;
+ assert(isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind,
+ LU.AccessTy, F) && "Illegal formula in use.");
+
+ switch (LU.Kind) {
+ case LSRUse::Address: {
+ int CurScaleCost = TTI.getScalingFactorCost(LU.AccessTy, F.BaseGV,
+ F.BaseOffset, F.HasBaseReg,
+ F.Scale);
+ assert(CurScaleCost >= 0 && "Legal addressing mode has an illegal cost!");
+ return CurScaleCost;
}
- return false;
+ case LSRUse::ICmpZero:
+ // ICmpZero BaseReg + -1*ScaleReg => ICmp BaseReg, ScaleReg.
+ // Therefore, return 0 in case F.Scale == -1.
+ return F.Scale != -1;
+
+ case LSRUse::Basic:
+ case LSRUse::Special:
+ return 0;
+ }
+
+ llvm_unreachable("Invalid LSRUse Kind!");
}
-static bool isAlwaysFoldable(int64_t BaseOffs,
- GlobalValue *BaseGV,
- bool HasBaseReg,
+static bool isAlwaysFoldable(const TargetTransformInfo &TTI,
LSRUse::KindType Kind, Type *AccessTy,
- const TargetLowering *TLI) {
+ GlobalValue *BaseGV, int64_t BaseOffset,
+ bool HasBaseReg) {
// Fast-path: zero is always foldable.
- if (BaseOffs == 0 && !BaseGV) return true;
+ if (BaseOffset == 0 && !BaseGV) return true;
// Conservatively, create an address with an immediate and a
// base and a scale.
- AddrMode AM;
- AM.BaseOffs = BaseOffs;
- AM.BaseGV = BaseGV;
- AM.HasBaseReg = HasBaseReg;
- AM.Scale = Kind == LSRUse::ICmpZero ? -1 : 1;
+ int64_t Scale = Kind == LSRUse::ICmpZero ? -1 : 1;
// Canonicalize a scale of 1 to a base register if the formula doesn't
// already have a base register.
- if (!AM.HasBaseReg && AM.Scale == 1) {
- AM.Scale = 0;
- AM.HasBaseReg = true;
+ if (!HasBaseReg && Scale == 1) {
+ Scale = 0;
+ HasBaseReg = true;
}
- return isLegalUse(AM, Kind, AccessTy, TLI);
+ return isLegalUse(TTI, Kind, AccessTy, BaseGV, BaseOffset, HasBaseReg, Scale);
}
-static bool isAlwaysFoldable(const SCEV *S,
- int64_t MinOffset, int64_t MaxOffset,
- bool HasBaseReg,
- LSRUse::KindType Kind, Type *AccessTy,
- const TargetLowering *TLI,
- ScalarEvolution &SE) {
+static bool isAlwaysFoldable(const TargetTransformInfo &TTI,
+ ScalarEvolution &SE, int64_t MinOffset,
+ int64_t MaxOffset, LSRUse::KindType Kind,
+ Type *AccessTy, const SCEV *S, bool HasBaseReg) {
// Fast-path: zero is always foldable.
if (S->isZero()) return true;
// Conservatively, create an address with an immediate and a
// base and a scale.
- int64_t BaseOffs = ExtractImmediate(S, SE);
+ int64_t BaseOffset = ExtractImmediate(S, SE);
GlobalValue *BaseGV = ExtractSymbol(S, SE);
// If there's anything else involved, it's not foldable.
if (!S->isZero()) return false;
// Fast-path: zero is always foldable.
- if (BaseOffs == 0 && !BaseGV) return true;
+ if (BaseOffset == 0 && !BaseGV) return true;
// Conservatively, create an address with an immediate and a
// base and a scale.
- AddrMode AM;
- AM.BaseOffs = BaseOffs;
- AM.BaseGV = BaseGV;
- AM.HasBaseReg = HasBaseReg;
- AM.Scale = Kind == LSRUse::ICmpZero ? -1 : 1;
+ int64_t Scale = Kind == LSRUse::ICmpZero ? -1 : 1;
- return isLegalUse(AM, MinOffset, MaxOffset, Kind, AccessTy, TLI);
+ return isLegalUse(TTI, MinOffset, MaxOffset, Kind, AccessTy, BaseGV,
+ BaseOffset, HasBaseReg, Scale);
}
namespace {
ScalarEvolution &SE;
DominatorTree &DT;
LoopInfo &LI;
- const TargetLowering *const TLI;
+ const TargetTransformInfo &TTI;
Loop *const L;
bool Changed;
Pass *P);
public:
- LSRInstance(const TargetLowering *tli, Loop *l, Pass *P);
+ LSRInstance(Loop *L, Pass *P);
bool getChanged() const { return Changed; }
}
if (!DestTy) continue;
- if (TLI) {
- // If target does not support DestTy natively then do not apply
- // this transformation.
- EVT DVT = TLI->getValueType(DestTy);
- if (!TLI->isTypeLegal(DVT)) continue;
- }
+ // If target does not support DestTy natively then do not apply
+ // this transformation.
+ if (!TTI.isTypeLegal(DestTy)) continue;
PHINode *PH = dyn_cast<PHINode>(ShadowUse->getOperand(0));
if (!PH) continue;
if (ICmpInst::isTrueWhenEqual(Pred)) {
// Look for n+1, and grab n.
if (AddOperator *BO = dyn_cast<AddOperator>(Sel->getOperand(1)))
- if (isa<ConstantInt>(BO->getOperand(1)) &&
- cast<ConstantInt>(BO->getOperand(1))->isOne() &&
- SE.getSCEV(BO->getOperand(0)) == MaxRHS)
- NewRHS = BO->getOperand(0);
+ if (ConstantInt *BO1 = dyn_cast<ConstantInt>(BO->getOperand(1)))
+ if (BO1->isOne() && SE.getSCEV(BO->getOperand(0)) == MaxRHS)
+ NewRHS = BO->getOperand(0);
if (AddOperator *BO = dyn_cast<AddOperator>(Sel->getOperand(2)))
- if (isa<ConstantInt>(BO->getOperand(1)) &&
- cast<ConstantInt>(BO->getOperand(1))->isOne() &&
- SE.getSCEV(BO->getOperand(0)) == MaxRHS)
- NewRHS = BO->getOperand(0);
+ if (ConstantInt *BO1 = dyn_cast<ConstantInt>(BO->getOperand(1)))
+ if (BO1->isOne() && SE.getSCEV(BO->getOperand(0)) == MaxRHS)
+ NewRHS = BO->getOperand(0);
if (!NewRHS)
return Cond;
} else if (SE.getSCEV(Sel->getOperand(1)) == MaxRHS)
if (C->getValue().getMinSignedBits() >= 64 ||
C->getValue().isMinSignedValue())
goto decline_post_inc;
- // Without TLI, assume that any stride might be valid, and so any
- // use might be shared.
- if (!TLI)
- goto decline_post_inc;
// Check for possible scaled-address reuse.
Type *AccessTy = getAccessType(UI->getUser());
- AddrMode AM;
- AM.Scale = C->getSExtValue();
- if (TLI->isLegalAddressingMode(AM, AccessTy))
+ int64_t Scale = C->getSExtValue();
+ if (TTI.isLegalAddressingMode(AccessTy, /*BaseGV=*/ 0,
+ /*BaseOffset=*/ 0,
+ /*HasBaseReg=*/ false, Scale))
goto decline_post_inc;
- AM.Scale = -AM.Scale;
- if (TLI->isLegalAddressingMode(AM, AccessTy))
+ Scale = -Scale;
+ if (TTI.isLegalAddressingMode(AccessTy, /*BaseGV=*/ 0,
+ /*BaseOffset=*/ 0,
+ /*HasBaseReg=*/ false, Scale))
goto decline_post_inc;
}
}
return false;
// Conservatively assume HasBaseReg is true for now.
if (NewOffset < LU.MinOffset) {
- if (!isAlwaysFoldable(LU.MaxOffset - NewOffset, 0, HasBaseReg,
- Kind, AccessTy, TLI))
+ if (!isAlwaysFoldable(TTI, Kind, AccessTy, /*BaseGV=*/ 0,
+ LU.MaxOffset - NewOffset, HasBaseReg))
return false;
NewMinOffset = NewOffset;
} else if (NewOffset > LU.MaxOffset) {
- if (!isAlwaysFoldable(NewOffset - LU.MinOffset, 0, HasBaseReg,
- Kind, AccessTy, TLI))
+ if (!isAlwaysFoldable(TTI, Kind, AccessTy, /*BaseGV=*/ 0,
+ NewOffset - LU.MinOffset, HasBaseReg))
return false;
NewMaxOffset = NewOffset;
}
int64_t Offset = ExtractImmediate(Expr, SE);
// Basic uses can't accept any offset, for example.
- if (!isAlwaysFoldable(Offset, 0, /*HasBaseReg=*/true, Kind, AccessTy, TLI)) {
+ if (!isAlwaysFoldable(TTI, Kind, AccessTy, /*BaseGV=*/ 0,
+ Offset, /*HasBaseReg=*/ true)) {
Expr = Copy;
Offset = 0;
}
// as OrigF.
if (F.BaseRegs == OrigF.BaseRegs &&
F.ScaledReg == OrigF.ScaledReg &&
- F.AM.BaseGV == OrigF.AM.BaseGV &&
- F.AM.Scale == OrigF.AM.Scale &&
+ F.BaseGV == OrigF.BaseGV &&
+ F.Scale == OrigF.Scale &&
F.UnfoldedOffset == OrigF.UnfoldedOffset) {
- if (F.AM.BaseOffs == 0)
+ if (F.BaseOffset == 0)
return &LU;
// This is the formula where all the registers and symbols matched;
// there aren't going to be any others. Since we declined it, we
/// TODO: Consider IVInc free if it's already used in another chains.
static bool
isProfitableChain(IVChain &Chain, SmallPtrSet<Instruction*, 4> &Users,
- ScalarEvolution &SE, const TargetLowering *TLI) {
+ ScalarEvolution &SE, const TargetTransformInfo &TTI) {
if (StressIVChain)
return true;
// Add this IV user to the end of the chain.
IVChainVec[ChainIdx].add(IVInc(UserInst, IVOper, LastIncExpr));
}
+ IVChain &Chain = IVChainVec[ChainIdx];
SmallPtrSet<Instruction*,4> &NearUsers = ChainUsersVec[ChainIdx].NearUsers;
// This chain's NearUsers become FarUsers.
for (Value::use_iterator UseIter = IVOper->use_begin(),
UseEnd = IVOper->use_end(); UseIter != UseEnd; ++UseIter) {
Instruction *OtherUse = dyn_cast<Instruction>(*UseIter);
- if (!OtherUse || OtherUse == UserInst)
+ if (!OtherUse)
continue;
+ // Uses in the chain will no longer be uses if the chain is formed.
+ // Include the head of the chain in this iteration (not Chain.begin()).
+ IVChain::const_iterator IncIter = Chain.Incs.begin();
+ IVChain::const_iterator IncEnd = Chain.Incs.end();
+ for( ; IncIter != IncEnd; ++IncIter) {
+ if (IncIter->UserInst == OtherUse)
+ break;
+ }
+ if (IncIter != IncEnd)
+ continue;
+
if (SE.isSCEVable(OtherUse->getType())
&& !isa<SCEVUnknown>(SE.getSCEV(OtherUse))
&& IU.isIVUserOrOperand(OtherUse)) {
for (unsigned UsersIdx = 0, NChains = IVChainVec.size();
UsersIdx < NChains; ++UsersIdx) {
if (!isProfitableChain(IVChainVec[UsersIdx],
- ChainUsersVec[UsersIdx].FarUsers, SE, TLI))
+ ChainUsersVec[UsersIdx].FarUsers, SE, TTI))
continue;
// Preserve the chain at UsesIdx.
if (ChainIdx != UsersIdx)
/// Return true if the IVInc can be folded into an addressing mode.
static bool canFoldIVIncExpr(const SCEV *IncExpr, Instruction *UserInst,
- Value *Operand, const TargetLowering *TLI) {
+ Value *Operand, const TargetTransformInfo &TTI) {
const SCEVConstant *IncConst = dyn_cast<SCEVConstant>(IncExpr);
if (!IncConst || !isAddressUse(UserInst, Operand))
return false;
return false;
int64_t IncOffset = IncConst->getValue()->getSExtValue();
- if (!isAlwaysFoldable(IncOffset, /*BaseGV=*/0, /*HaseBaseReg=*/false,
- LSRUse::Address, getAccessType(UserInst), TLI))
+ if (!isAlwaysFoldable(TTI, LSRUse::Address,
+ getAccessType(UserInst), /*BaseGV=*/ 0,
+ IncOffset, /*HaseBaseReg=*/ false))
return false;
return true;
// by LSR.
const IVInc &Head = Chain.Incs[0];
User::op_iterator IVOpEnd = Head.UserInst->op_end();
+ // findIVOperand returns IVOpEnd if it can no longer find a valid IV user.
User::op_iterator IVOpIter = findIVOperand(Head.UserInst->op_begin(),
IVOpEnd, L, SE);
Value *IVSrc = 0;
// If an IV increment can't be folded, use it as the next IV value.
if (!canFoldIVIncExpr(LeftOverExpr, IncI->UserInst, IncI->IVOperand,
- TLI)) {
+ TTI)) {
assert(IVTy == IVOper->getType() && "inconsistent IV increment type");
IVSrc = IVOper;
LeftOverExpr = 0;
LSRUse &LU, size_t LUIdx) {
Formula F;
F.BaseRegs.push_back(S);
- F.AM.HasBaseReg = true;
+ F.HasBaseReg = true;
bool Inserted = InsertFormula(LU, LUIdx, F);
assert(Inserted && "Supplemental formula already exists!"); (void)Inserted;
}
// Don't pull a constant into a register if the constant could be folded
// into an immediate field.
- if (isAlwaysFoldable(*J, LU.MinOffset, LU.MaxOffset,
- Base.getNumRegs() > 1,
- LU.Kind, LU.AccessTy, TLI, SE))
+ if (isAlwaysFoldable(TTI, SE, LU.MinOffset, LU.MaxOffset, LU.Kind,
+ LU.AccessTy, *J, Base.getNumRegs() > 1))
continue;
// Collect all operands except *J.
// Don't leave just a constant behind in a register if the constant could
// be folded into an immediate field.
if (InnerAddOps.size() == 1 &&
- isAlwaysFoldable(InnerAddOps[0], LU.MinOffset, LU.MaxOffset,
- Base.getNumRegs() > 1,
- LU.Kind, LU.AccessTy, TLI, SE))
+ isAlwaysFoldable(TTI, SE, LU.MinOffset, LU.MaxOffset, LU.Kind,
+ LU.AccessTy, InnerAddOps[0], Base.getNumRegs() > 1))
continue;
const SCEV *InnerSum = SE.getAddExpr(InnerAddOps);
// Add the remaining pieces of the add back into the new formula.
const SCEVConstant *InnerSumSC = dyn_cast<SCEVConstant>(InnerSum);
- if (TLI && InnerSumSC &&
+ if (InnerSumSC &&
SE.getTypeSizeInBits(InnerSumSC->getType()) <= 64 &&
- TLI->isLegalAddImmediate((uint64_t)F.UnfoldedOffset +
- InnerSumSC->getValue()->getZExtValue())) {
+ TTI.isLegalAddImmediate((uint64_t)F.UnfoldedOffset +
+ InnerSumSC->getValue()->getZExtValue())) {
F.UnfoldedOffset = (uint64_t)F.UnfoldedOffset +
InnerSumSC->getValue()->getZExtValue();
F.BaseRegs.erase(F.BaseRegs.begin() + i);
// Add J as its own register, or an unfolded immediate.
const SCEVConstant *SC = dyn_cast<SCEVConstant>(*J);
- if (TLI && SC && SE.getTypeSizeInBits(SC->getType()) <= 64 &&
- TLI->isLegalAddImmediate((uint64_t)F.UnfoldedOffset +
- SC->getValue()->getZExtValue()))
+ if (SC && SE.getTypeSizeInBits(SC->getType()) <= 64 &&
+ TTI.isLegalAddImmediate((uint64_t)F.UnfoldedOffset +
+ SC->getValue()->getZExtValue()))
F.UnfoldedOffset = (uint64_t)F.UnfoldedOffset +
SC->getValue()->getZExtValue();
else
void LSRInstance::GenerateSymbolicOffsets(LSRUse &LU, unsigned LUIdx,
Formula Base) {
// We can't add a symbolic offset if the address already contains one.
- if (Base.AM.BaseGV) return;
+ if (Base.BaseGV) return;
for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i) {
const SCEV *G = Base.BaseRegs[i];
if (G->isZero() || !GV)
continue;
Formula F = Base;
- F.AM.BaseGV = GV;
- if (!isLegalUse(F.AM, LU.MinOffset, LU.MaxOffset,
- LU.Kind, LU.AccessTy, TLI))
+ F.BaseGV = GV;
+ if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, F))
continue;
F.BaseRegs[i] = G;
(void)InsertFormula(LU, LUIdx, F);
for (SmallVectorImpl<int64_t>::const_iterator I = Worklist.begin(),
E = Worklist.end(); I != E; ++I) {
Formula F = Base;
- F.AM.BaseOffs = (uint64_t)Base.AM.BaseOffs - *I;
- if (isLegalUse(F.AM, LU.MinOffset - *I, LU.MaxOffset - *I,
- LU.Kind, LU.AccessTy, TLI)) {
+ F.BaseOffset = (uint64_t)Base.BaseOffset - *I;
+ if (isLegalUse(TTI, LU.MinOffset - *I, LU.MaxOffset - *I, LU.Kind,
+ LU.AccessTy, F)) {
// Add the offset to the base register.
const SCEV *NewG = SE.getAddExpr(SE.getConstant(G->getType(), *I), G);
// If it cancelled out, drop the base register, otherwise update it.
if (G->isZero() || Imm == 0)
continue;
Formula F = Base;
- F.AM.BaseOffs = (uint64_t)F.AM.BaseOffs + Imm;
- if (!isLegalUse(F.AM, LU.MinOffset, LU.MaxOffset,
- LU.Kind, LU.AccessTy, TLI))
+ F.BaseOffset = (uint64_t)F.BaseOffset + Imm;
+ if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, F))
continue;
F.BaseRegs[i] = G;
(void)InsertFormula(LU, LUIdx, F);
// Don't do this if there is more than one offset.
if (LU.MinOffset != LU.MaxOffset) return;
- assert(!Base.AM.BaseGV && "ICmpZero use is not legal!");
+ assert(!Base.BaseGV && "ICmpZero use is not legal!");
// Check each interesting stride.
for (SmallSetVector<int64_t, 8>::const_iterator
int64_t Factor = *I;
// Check that the multiplication doesn't overflow.
- if (Base.AM.BaseOffs == INT64_MIN && Factor == -1)
+ if (Base.BaseOffset == INT64_MIN && Factor == -1)
continue;
- int64_t NewBaseOffs = (uint64_t)Base.AM.BaseOffs * Factor;
- if (NewBaseOffs / Factor != Base.AM.BaseOffs)
+ int64_t NewBaseOffset = (uint64_t)Base.BaseOffset * Factor;
+ if (NewBaseOffset / Factor != Base.BaseOffset)
continue;
// Check that multiplying with the use offset doesn't overflow.
continue;
Formula F = Base;
- F.AM.BaseOffs = NewBaseOffs;
+ F.BaseOffset = NewBaseOffset;
// Check that this scale is legal.
- if (!isLegalUse(F.AM, Offset, Offset, LU.Kind, LU.AccessTy, TLI))
+ if (!isLegalUse(TTI, Offset, Offset, LU.Kind, LU.AccessTy, F))
continue;
// Compensate for the use having MinOffset built into it.
- F.AM.BaseOffs = (uint64_t)F.AM.BaseOffs + Offset - LU.MinOffset;
+ F.BaseOffset = (uint64_t)F.BaseOffset + Offset - LU.MinOffset;
const SCEV *FactorS = SE.getConstant(IntTy, Factor);
if (!IntTy) return;
// If this Formula already has a scaled register, we can't add another one.
- if (Base.AM.Scale != 0) return;
+ if (Base.Scale != 0) return;
// Check each interesting stride.
for (SmallSetVector<int64_t, 8>::const_iterator
I = Factors.begin(), E = Factors.end(); I != E; ++I) {
int64_t Factor = *I;
- Base.AM.Scale = Factor;
- Base.AM.HasBaseReg = Base.BaseRegs.size() > 1;
+ Base.Scale = Factor;
+ Base.HasBaseReg = Base.BaseRegs.size() > 1;
// Check whether this scale is going to be legal.
- if (!isLegalUse(Base.AM, LU.MinOffset, LU.MaxOffset,
- LU.Kind, LU.AccessTy, TLI)) {
+ if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy,
+ Base)) {
// As a special-case, handle special out-of-loop Basic users specially.
// TODO: Reconsider this special case.
if (LU.Kind == LSRUse::Basic &&
- isLegalUse(Base.AM, LU.MinOffset, LU.MaxOffset,
- LSRUse::Special, LU.AccessTy, TLI) &&
+ isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LSRUse::Special,
+ LU.AccessTy, Base) &&
LU.AllFixupsOutsideLoop)
LU.Kind = LSRUse::Special;
else
// For an ICmpZero, negating a solitary base register won't lead to
// new solutions.
if (LU.Kind == LSRUse::ICmpZero &&
- !Base.AM.HasBaseReg && Base.AM.BaseOffs == 0 && !Base.AM.BaseGV)
+ !Base.HasBaseReg && Base.BaseOffset == 0 && !Base.BaseGV)
continue;
// For each addrec base reg, apply the scale, if possible.
for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i)
/// GenerateTruncates - Generate reuse formulae from different IV types.
void LSRInstance::GenerateTruncates(LSRUse &LU, unsigned LUIdx, Formula Base) {
- // This requires TargetLowering to tell us which truncates are free.
- if (!TLI) return;
-
// Don't bother truncating symbolic values.
- if (Base.AM.BaseGV) return;
+ if (Base.BaseGV) return;
// Determine the integer type for the base formula.
Type *DstTy = Base.getType();
for (SmallSetVector<Type *, 4>::const_iterator
I = Types.begin(), E = Types.end(); I != E; ++I) {
Type *SrcTy = *I;
- if (SrcTy != DstTy && TLI->isTruncateFree(SrcTy, DstTy)) {
+ if (SrcTy != DstTy && TTI.isTruncateFree(SrcTy, DstTy)) {
Formula F = Base;
if (F.ScaledReg) F.ScaledReg = SE.getAnyExtendExpr(F.ScaledReg, *I);
const Formula &F = LU.Formulae[L];
// Use the immediate in the scaled register.
if (F.ScaledReg == OrigReg) {
- int64_t Offs = (uint64_t)F.AM.BaseOffs +
- Imm * (uint64_t)F.AM.Scale;
+ int64_t Offset = (uint64_t)F.BaseOffset + Imm * (uint64_t)F.Scale;
// Don't create 50 + reg(-50).
if (F.referencesReg(SE.getSCEV(
- ConstantInt::get(IntTy, -(uint64_t)Offs))))
+ ConstantInt::get(IntTy, -(uint64_t)Offset))))
continue;
Formula NewF = F;
- NewF.AM.BaseOffs = Offs;
- if (!isLegalUse(NewF.AM, LU.MinOffset, LU.MaxOffset,
- LU.Kind, LU.AccessTy, TLI))
+ NewF.BaseOffset = Offset;
+ if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy,
+ NewF))
continue;
NewF.ScaledReg = SE.getAddExpr(NegImmS, NewF.ScaledReg);
// immediate itself, then the formula isn't worthwhile.
if (const SCEVConstant *C = dyn_cast<SCEVConstant>(NewF.ScaledReg))
if (C->getValue()->isNegative() !=
- (NewF.AM.BaseOffs < 0) &&
- (C->getValue()->getValue().abs() * APInt(BitWidth, F.AM.Scale))
- .ule(abs64(NewF.AM.BaseOffs)))
+ (NewF.BaseOffset < 0) &&
+ (C->getValue()->getValue().abs() * APInt(BitWidth, F.Scale))
+ .ule(abs64(NewF.BaseOffset)))
continue;
// OK, looks good.
if (BaseReg != OrigReg)
continue;
Formula NewF = F;
- NewF.AM.BaseOffs = (uint64_t)NewF.AM.BaseOffs + Imm;
- if (!isLegalUse(NewF.AM, LU.MinOffset, LU.MaxOffset,
- LU.Kind, LU.AccessTy, TLI)) {
- if (!TLI ||
- !TLI->isLegalAddImmediate((uint64_t)NewF.UnfoldedOffset + Imm))
+ NewF.BaseOffset = (uint64_t)NewF.BaseOffset + Imm;
+ if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset,
+ LU.Kind, LU.AccessTy, NewF)) {
+ if (!TTI.isLegalAddImmediate((uint64_t)NewF.UnfoldedOffset + Imm))
continue;
NewF = F;
NewF.UnfoldedOffset = (uint64_t)NewF.UnfoldedOffset + Imm;
J = NewF.BaseRegs.begin(), JE = NewF.BaseRegs.end();
J != JE; ++J)
if (const SCEVConstant *C = dyn_cast<SCEVConstant>(*J))
- if ((C->getValue()->getValue() + NewF.AM.BaseOffs).abs().slt(
- abs64(NewF.AM.BaseOffs)) &&
+ if ((C->getValue()->getValue() + NewF.BaseOffset).abs().slt(
+ abs64(NewF.BaseOffset)) &&
(C->getValue()->getValue() +
- NewF.AM.BaseOffs).countTrailingZeros() >=
- CountTrailingZeros_64(NewF.AM.BaseOffs))
+ NewF.BaseOffset).countTrailingZeros() >=
+ countTrailingZeros<uint64_t>(NewF.BaseOffset))
goto skip_formula;
// Ok, looks good.
// Collect the best formula for each unique set of shared registers. This
// is reset for each use.
- typedef DenseMap<SmallVector<const SCEV *, 2>, size_t, UniquifierDenseMapInfo>
+ typedef DenseMap<SmallVector<const SCEV *, 4>, size_t, UniquifierDenseMapInfo>
BestFormulaeTy;
BestFormulaeTy BestFormulae;
// the corresponding bad register from the Regs set.
Cost CostF;
Regs.clear();
- CostF.RateFormula(F, Regs, VisitedRegs, L, LU.Offsets, SE, DT,
+ CostF.RateFormula(TTI, F, Regs, VisitedRegs, L, LU.Offsets, SE, DT, LU,
&LoserRegs);
if (CostF.isLoser()) {
// During initial formula generation, undesirable formulae are generated
dbgs() << "\n");
}
else {
- SmallVector<const SCEV *, 2> Key;
+ SmallVector<const SCEV *, 4> Key;
for (SmallVectorImpl<const SCEV *>::const_iterator J = F.BaseRegs.begin(),
JE = F.BaseRegs.end(); J != JE; ++J) {
const SCEV *Reg = *J;
Cost CostBest;
Regs.clear();
- CostBest.RateFormula(Best, Regs, VisitedRegs, L, LU.Offsets, SE, DT);
+ CostBest.RateFormula(TTI, Best, Regs, VisitedRegs, L, LU.Offsets, SE,
+ DT, LU);
if (CostF < CostBest)
std::swap(F, Best);
DEBUG(dbgs() << " Filtering out formula "; F.print(dbgs());
I = F.BaseRegs.begin(), E = F.BaseRegs.end(); I != E; ++I) {
if (const SCEVConstant *C = dyn_cast<SCEVConstant>(*I)) {
Formula NewF = F;
- NewF.AM.BaseOffs += C->getValue()->getSExtValue();
+ NewF.BaseOffset += C->getValue()->getSExtValue();
NewF.BaseRegs.erase(NewF.BaseRegs.begin() +
(I - F.BaseRegs.begin()));
if (LU.HasFormulaWithSameRegs(NewF)) {
}
} else if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(*I)) {
if (GlobalValue *GV = dyn_cast<GlobalValue>(U->getValue()))
- if (!F.AM.BaseGV) {
+ if (!F.BaseGV) {
Formula NewF = F;
- NewF.AM.BaseGV = GV;
+ NewF.BaseGV = GV;
NewF.BaseRegs.erase(NewF.BaseRegs.begin() +
(I - F.BaseRegs.begin()));
if (LU.HasFormulaWithSameRegs(NewF)) {
/// for expressions like A, A+1, A+2, etc., allocate a single register for
/// them.
void LSRInstance::NarrowSearchSpaceByCollapsingUnrolledCode() {
- if (EstimateSearchSpaceComplexity() >= ComplexityLimit) {
- DEBUG(dbgs() << "The search space is too complex.\n");
+ if (EstimateSearchSpaceComplexity() < ComplexityLimit)
+ return;
- DEBUG(dbgs() << "Narrowing the search space by assuming that uses "
- "separated by a constant offset will use the same "
- "registers.\n");
+ DEBUG(dbgs() << "The search space is too complex.\n"
+ "Narrowing the search space by assuming that uses separated "
+ "by a constant offset will use the same registers.\n");
- // This is especially useful for unrolled loops.
+ // This is especially useful for unrolled loops.
- for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
- LSRUse &LU = Uses[LUIdx];
- for (SmallVectorImpl<Formula>::const_iterator I = LU.Formulae.begin(),
- E = LU.Formulae.end(); I != E; ++I) {
- const Formula &F = *I;
- if (F.AM.BaseOffs != 0 && F.AM.Scale == 0) {
- if (LSRUse *LUThatHas = FindUseWithSimilarFormula(F, LU)) {
- if (reconcileNewOffset(*LUThatHas, F.AM.BaseOffs,
- /*HasBaseReg=*/false,
- LU.Kind, LU.AccessTy)) {
- DEBUG(dbgs() << " Deleting use "; LU.print(dbgs());
- dbgs() << '\n');
-
- LUThatHas->AllFixupsOutsideLoop &= LU.AllFixupsOutsideLoop;
-
- // Update the relocs to reference the new use.
- for (SmallVectorImpl<LSRFixup>::iterator I = Fixups.begin(),
- E = Fixups.end(); I != E; ++I) {
- LSRFixup &Fixup = *I;
- if (Fixup.LUIdx == LUIdx) {
- Fixup.LUIdx = LUThatHas - &Uses.front();
- Fixup.Offset += F.AM.BaseOffs;
- // Add the new offset to LUThatHas' offset list.
- if (LUThatHas->Offsets.back() != Fixup.Offset) {
- LUThatHas->Offsets.push_back(Fixup.Offset);
- if (Fixup.Offset > LUThatHas->MaxOffset)
- LUThatHas->MaxOffset = Fixup.Offset;
- if (Fixup.Offset < LUThatHas->MinOffset)
- LUThatHas->MinOffset = Fixup.Offset;
- }
- DEBUG(dbgs() << "New fixup has offset "
- << Fixup.Offset << '\n');
- }
- if (Fixup.LUIdx == NumUses-1)
- Fixup.LUIdx = LUIdx;
- }
+ for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
+ LSRUse &LU = Uses[LUIdx];
+ for (SmallVectorImpl<Formula>::const_iterator I = LU.Formulae.begin(),
+ E = LU.Formulae.end(); I != E; ++I) {
+ const Formula &F = *I;
+ if (F.BaseOffset == 0 || F.Scale != 0)
+ continue;
- // Delete formulae from the new use which are no longer legal.
- bool Any = false;
- for (size_t i = 0, e = LUThatHas->Formulae.size(); i != e; ++i) {
- Formula &F = LUThatHas->Formulae[i];
- if (!isLegalUse(F.AM,
- LUThatHas->MinOffset, LUThatHas->MaxOffset,
- LUThatHas->Kind, LUThatHas->AccessTy, TLI)) {
- DEBUG(dbgs() << " Deleting "; F.print(dbgs());
- dbgs() << '\n');
- LUThatHas->DeleteFormula(F);
- --i;
- --e;
- Any = true;
- }
- }
- if (Any)
- LUThatHas->RecomputeRegs(LUThatHas - &Uses.front(), RegUses);
+ LSRUse *LUThatHas = FindUseWithSimilarFormula(F, LU);
+ if (!LUThatHas)
+ continue;
- // Delete the old use.
- DeleteUse(LU, LUIdx);
- --LUIdx;
- --NumUses;
- break;
- }
+ if (!reconcileNewOffset(*LUThatHas, F.BaseOffset, /*HasBaseReg=*/ false,
+ LU.Kind, LU.AccessTy))
+ continue;
+
+ DEBUG(dbgs() << " Deleting use "; LU.print(dbgs()); dbgs() << '\n');
+
+ LUThatHas->AllFixupsOutsideLoop &= LU.AllFixupsOutsideLoop;
+
+ // Update the relocs to reference the new use.
+ for (SmallVectorImpl<LSRFixup>::iterator I = Fixups.begin(),
+ E = Fixups.end(); I != E; ++I) {
+ LSRFixup &Fixup = *I;
+ if (Fixup.LUIdx == LUIdx) {
+ Fixup.LUIdx = LUThatHas - &Uses.front();
+ Fixup.Offset += F.BaseOffset;
+ // Add the new offset to LUThatHas' offset list.
+ if (LUThatHas->Offsets.back() != Fixup.Offset) {
+ LUThatHas->Offsets.push_back(Fixup.Offset);
+ if (Fixup.Offset > LUThatHas->MaxOffset)
+ LUThatHas->MaxOffset = Fixup.Offset;
+ if (Fixup.Offset < LUThatHas->MinOffset)
+ LUThatHas->MinOffset = Fixup.Offset;
}
+ DEBUG(dbgs() << "New fixup has offset " << Fixup.Offset << '\n');
}
+ if (Fixup.LUIdx == NumUses-1)
+ Fixup.LUIdx = LUIdx;
}
- }
- DEBUG(dbgs() << "After pre-selection:\n";
- print_uses(dbgs()));
+ // Delete formulae from the new use which are no longer legal.
+ bool Any = false;
+ for (size_t i = 0, e = LUThatHas->Formulae.size(); i != e; ++i) {
+ Formula &F = LUThatHas->Formulae[i];
+ if (!isLegalUse(TTI, LUThatHas->MinOffset, LUThatHas->MaxOffset,
+ LUThatHas->Kind, LUThatHas->AccessTy, F)) {
+ DEBUG(dbgs() << " Deleting "; F.print(dbgs());
+ dbgs() << '\n');
+ LUThatHas->DeleteFormula(F);
+ --i;
+ --e;
+ Any = true;
+ }
+ }
+
+ if (Any)
+ LUThatHas->RecomputeRegs(LUThatHas - &Uses.front(), RegUses);
+
+ // Delete the old use.
+ DeleteUse(LU, LUIdx);
+ --LUIdx;
+ --NumUses;
+ break;
+ }
}
+
+ DEBUG(dbgs() << "After pre-selection:\n"; print_uses(dbgs()));
}
/// NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters - Call
// the current best, prune the search at that point.
NewCost = CurCost;
NewRegs = CurRegs;
- NewCost.RateFormula(F, NewRegs, VisitedRegs, L, LU.Offsets, SE, DT);
+ NewCost.RateFormula(TTI, F, NewRegs, VisitedRegs, L, LU.Offsets, SE, DT,
+ LU);
if (NewCost < SolutionCost) {
Workspace.push_back(&F);
if (Workspace.size() != Uses.size()) {
// Expand the ScaledReg portion.
Value *ICmpScaledV = 0;
- if (F.AM.Scale != 0) {
+ if (F.Scale != 0) {
const SCEV *ScaledS = F.ScaledReg;
// If we're expanding for a post-inc user, make the post-inc adjustment.
// An interesting way of "folding" with an icmp is to use a negated
// scale, which we'll implement by inserting it into the other operand
// of the icmp.
- assert(F.AM.Scale == -1 &&
+ assert(F.Scale == -1 &&
"The only scale supported by ICmpZero uses is -1!");
ICmpScaledV = Rewriter.expandCodeFor(ScaledS, 0, IP);
} else {
}
ScaledS = SE.getUnknown(Rewriter.expandCodeFor(ScaledS, 0, IP));
ScaledS = SE.getMulExpr(ScaledS,
- SE.getConstant(ScaledS->getType(), F.AM.Scale));
+ SE.getConstant(ScaledS->getType(), F.Scale));
Ops.push_back(ScaledS);
}
}
// Expand the GV portion.
- if (F.AM.BaseGV) {
+ if (F.BaseGV) {
// Flush the operand list to suppress SCEVExpander hoisting.
if (!Ops.empty()) {
Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty, IP);
Ops.clear();
Ops.push_back(SE.getUnknown(FullV));
}
- Ops.push_back(SE.getUnknown(F.AM.BaseGV));
+ Ops.push_back(SE.getUnknown(F.BaseGV));
}
// Flush the operand list to suppress SCEVExpander hoisting of both folded and
}
// Expand the immediate portion.
- int64_t Offset = (uint64_t)F.AM.BaseOffs + LF.Offset;
+ int64_t Offset = (uint64_t)F.BaseOffset + LF.Offset;
if (Offset != 0) {
if (LU.Kind == LSRUse::ICmpZero) {
// The other interesting way of "folding" with an ICmpZero is to use a
if (LU.Kind == LSRUse::ICmpZero) {
ICmpInst *CI = cast<ICmpInst>(LF.UserInst);
DeadInsts.push_back(CI->getOperand(1));
- assert(!F.AM.BaseGV && "ICmp does not support folding a global value and "
+ assert(!F.BaseGV && "ICmp does not support folding a global value and "
"a scale at the same time!");
- if (F.AM.Scale == -1) {
+ if (F.Scale == -1) {
if (ICmpScaledV->getType() != OpTy) {
Instruction *Cast =
CastInst::Create(CastInst::getCastOpcode(ICmpScaledV, false,
}
CI->setOperand(1, ICmpScaledV);
} else {
- assert(F.AM.Scale == 0 &&
+ assert(F.Scale == 0 &&
"ICmp does not support folding a global value and "
"a scale at the same time!");
Constant *C = ConstantInt::getSigned(SE.getEffectiveSCEVType(OpTy),
Changed |= DeleteTriviallyDeadInstructions(DeadInsts);
}
-LSRInstance::LSRInstance(const TargetLowering *tli, Loop *l, Pass *P)
- : IU(P->getAnalysis<IVUsers>()),
- SE(P->getAnalysis<ScalarEvolution>()),
- DT(P->getAnalysis<DominatorTree>()),
- LI(P->getAnalysis<LoopInfo>()),
- TLI(tli), L(l), Changed(false), IVIncInsertPos(0) {
-
+LSRInstance::LSRInstance(Loop *L, Pass *P)
+ : IU(P->getAnalysis<IVUsers>()), SE(P->getAnalysis<ScalarEvolution>()),
+ DT(P->getAnalysis<DominatorTree>()), LI(P->getAnalysis<LoopInfo>()),
+ TTI(P->getAnalysis<TargetTransformInfo>()), L(L), Changed(false),
+ IVIncInsertPos(0) {
// If LoopSimplify form is not available, stay out of trouble.
if (!L->isLoopSimplifyForm())
return;
#ifndef NDEBUG
// Formulae should be legal.
- for (SmallVectorImpl<LSRUse>::const_iterator I = Uses.begin(),
- E = Uses.end(); I != E; ++I) {
- const LSRUse &LU = *I;
- for (SmallVectorImpl<Formula>::const_iterator J = LU.Formulae.begin(),
- JE = LU.Formulae.end(); J != JE; ++J)
- assert(isLegalUse(J->AM, LU.MinOffset, LU.MaxOffset,
- LU.Kind, LU.AccessTy, TLI) &&
- "Illegal formula generated!");
+ for (SmallVectorImpl<LSRUse>::const_iterator I = Uses.begin(), E = Uses.end();
+ I != E; ++I) {
+ const LSRUse &LU = *I;
+ for (SmallVectorImpl<Formula>::const_iterator J = LU.Formulae.begin(),
+ JE = LU.Formulae.end();
+ J != JE; ++J)
+ assert(isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy,
+ *J) && "Illegal formula generated!");
};
#endif
namespace {
class LoopStrengthReduce : public LoopPass {
- /// TLI - Keep a pointer of a TargetLowering to consult for determining
- /// transformation profitability.
- const TargetLowering *const TLI;
-
public:
static char ID; // Pass ID, replacement for typeid
- explicit LoopStrengthReduce(const TargetLowering *tli = 0);
+ LoopStrengthReduce();
private:
bool runOnLoop(Loop *L, LPPassManager &LPM);
char LoopStrengthReduce::ID = 0;
INITIALIZE_PASS_BEGIN(LoopStrengthReduce, "loop-reduce",
"Loop Strength Reduction", false, false)
+INITIALIZE_AG_DEPENDENCY(TargetTransformInfo)
INITIALIZE_PASS_DEPENDENCY(DominatorTree)
INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
INITIALIZE_PASS_DEPENDENCY(IVUsers)
"Loop Strength Reduction", false, false)
-Pass *llvm::createLoopStrengthReducePass(const TargetLowering *TLI) {
- return new LoopStrengthReduce(TLI);
+Pass *llvm::createLoopStrengthReducePass() {
+ return new LoopStrengthReduce();
}
-LoopStrengthReduce::LoopStrengthReduce(const TargetLowering *tli)
- : LoopPass(ID), TLI(tli) {
- initializeLoopStrengthReducePass(*PassRegistry::getPassRegistry());
- }
+LoopStrengthReduce::LoopStrengthReduce() : LoopPass(ID) {
+ initializeLoopStrengthReducePass(*PassRegistry::getPassRegistry());
+}
void LoopStrengthReduce::getAnalysisUsage(AnalysisUsage &AU) const {
// We split critical edges, so we change the CFG. However, we do update
AU.addRequiredID(LoopSimplifyID);
AU.addRequired<IVUsers>();
AU.addPreserved<IVUsers>();
+ AU.addRequired<TargetTransformInfo>();
}
bool LoopStrengthReduce::runOnLoop(Loop *L, LPPassManager & /*LPM*/) {
bool Changed = false;
// Run the main LSR transformation.
- Changed |= LSRInstance(TLI, L, this).getChanged();
+ Changed |= LSRInstance(L, this).getChanged();
// Remove any extra phis created by processing inner loops.
Changed |= DeleteDeadPHIs(L->getHeader());
- if (EnablePhiElim) {
+ if (EnablePhiElim && L->isLoopSimplifyForm()) {
SmallVector<WeakVH, 16> DeadInsts;
SCEVExpander Rewriter(getAnalysis<ScalarEvolution>(), "lsr");
#ifndef NDEBUG
Rewriter.setDebugType(DEBUG_TYPE);
#endif
- unsigned numFolded = Rewriter.
- replaceCongruentIVs(L, &getAnalysis<DominatorTree>(), DeadInsts, TLI);
+ unsigned numFolded =
+ Rewriter.replaceCongruentIVs(L, &getAnalysis<DominatorTree>(),
+ DeadInsts,
+ &getAnalysis<TargetTransformInfo>());
if (numFolded) {
Changed = true;
DeleteTriviallyDeadInstructions(DeadInsts);