/// Maximum SIMD width.
const unsigned VectorizerParams::MaxVectorWidth = 64;
-/// \brief We collect interesting dependences up to this threshold.
-static cl::opt<unsigned> MaxInterestingDependence(
- "max-interesting-dependences", cl::Hidden,
- cl::desc("Maximum number of interesting dependences collected by "
- "loop-access analysis (default = 100)"),
- cl::init(100));
+/// \brief We collect dependences up to this threshold.
+static cl::opt<unsigned>
+ MaxDependences("max-dependences", cl::Hidden,
+ cl::desc("Maximum number of dependences collected by "
+ "loop-access analysis (default = 100)"),
+ cl::init(100));
bool VectorizerParams::isInterleaveForced() {
return ::VectorizationInterleave.getNumOccurrences() > 0;
const SCEV *llvm::replaceSymbolicStrideSCEV(ScalarEvolution *SE,
const ValueToValueMap &PtrToStride,
+ SCEVUnionPredicate &Preds,
Value *Ptr, Value *OrigPtr) {
-
const SCEV *OrigSCEV = SE->getSCEV(Ptr);
// If there is an entry in the map return the SCEV of the pointer with the
ValueToValueMap RewriteMap;
RewriteMap[StrideVal] = One;
- const SCEV *ByOne =
- SCEVParameterRewriter::rewrite(OrigSCEV, *SE, RewriteMap, true);
+ const auto *U = cast<SCEVUnknown>(SE->getSCEV(StrideVal));
+ const auto *CT =
+ static_cast<const SCEVConstant *>(SE->getOne(StrideVal->getType()));
+
+ Preds.add(SE->getEqualPredicate(U, CT));
+
+ const SCEV *ByOne = SE->rewriteUsingPredicate(OrigSCEV, Preds);
DEBUG(dbgs() << "LAA: Replacing SCEV: " << *OrigSCEV << " by: " << *ByOne
<< "\n");
return ByOne;
}
// Otherwise, just return the SCEV of the original pointer.
- return SE->getSCEV(Ptr);
+ return OrigSCEV;
}
void RuntimePointerChecking::insert(Loop *Lp, Value *Ptr, bool WritePtr,
unsigned DepSetId, unsigned ASId,
- const ValueToValueMap &Strides) {
+ const ValueToValueMap &Strides,
+ SCEVUnionPredicate &Preds) {
// Get the stride replaced scev.
- const SCEV *Sc = replaceSymbolicStrideSCEV(SE, Strides, Ptr);
+ const SCEV *Sc = replaceSymbolicStrideSCEV(SE, Strides, Preds, Ptr);
const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Sc);
assert(AR && "Invalid addrec expression");
const SCEV *Ex = SE->getBackedgeTakenCount(Lp);
}
SmallVector<RuntimePointerChecking::PointerCheck, 4>
-RuntimePointerChecking::generateChecks(
- const SmallVectorImpl<int> *PtrPartition) const {
+RuntimePointerChecking::generateChecks() const {
SmallVector<PointerCheck, 4> Checks;
for (unsigned I = 0; I < CheckingGroups.size(); ++I) {
const RuntimePointerChecking::CheckingPtrGroup &CGI = CheckingGroups[I];
const RuntimePointerChecking::CheckingPtrGroup &CGJ = CheckingGroups[J];
- if (needsChecking(CGI, CGJ, PtrPartition))
+ if (needsChecking(CGI, CGJ))
Checks.push_back(std::make_pair(&CGI, &CGJ));
}
}
return Checks;
}
-bool RuntimePointerChecking::needsChecking(
- const CheckingPtrGroup &M, const CheckingPtrGroup &N,
- const SmallVectorImpl<int> *PtrPartition) const {
+void RuntimePointerChecking::generateChecks(
+ MemoryDepChecker::DepCandidates &DepCands, bool UseDependencies) {
+ assert(Checks.empty() && "Checks is not empty");
+ groupChecks(DepCands, UseDependencies);
+ Checks = generateChecks();
+}
+
+bool RuntimePointerChecking::needsChecking(const CheckingPtrGroup &M,
+ const CheckingPtrGroup &N) const {
for (unsigned I = 0, EI = M.Members.size(); EI != I; ++I)
for (unsigned J = 0, EJ = N.Members.size(); EJ != J; ++J)
- if (needsChecking(M.Members[I], N.Members[J], PtrPartition))
+ if (needsChecking(M.Members[I], N.Members[J]))
return true;
return false;
}
// ShouldRetryWithRuntimeCheck is set, and therefore UseDependencies
// is also false. In this case we will use the fallback path and create
// separate checking groups for all pointers.
-
+
// If we don't have the dependency partitions, construct a new
// checking pointer group for each pointer. This is also required
// for correctness, because in this case we can have checking between
PtrToPartition[PtrIdx1] == PtrToPartition[PtrIdx2]);
}
-bool RuntimePointerChecking::needsChecking(
- unsigned I, unsigned J, const SmallVectorImpl<int> *PtrPartition) const {
+bool RuntimePointerChecking::needsChecking(unsigned I, unsigned J) const {
const PointerInfo &PointerI = Pointers[I];
const PointerInfo &PointerJ = Pointers[J];
if (PointerI.AliasSetId != PointerJ.AliasSetId)
return false;
- // If PtrPartition is set omit checks between pointers of the same partition.
- if (PtrPartition && arePointersInSamePartition(*PtrPartition, I, J))
- return false;
-
return true;
}
}
}
-void RuntimePointerChecking::print(
- raw_ostream &OS, unsigned Depth,
- const SmallVectorImpl<int> *PtrPartition) const {
+void RuntimePointerChecking::print(raw_ostream &OS, unsigned Depth) const {
OS.indent(Depth) << "Run-time memory checks:\n";
- printChecks(OS, generateChecks(PtrPartition), Depth);
+ printChecks(OS, Checks, Depth);
OS.indent(Depth) << "Grouped accesses:\n";
for (unsigned I = 0; I < CheckingGroups.size(); ++I) {
}
}
-unsigned RuntimePointerChecking::getNumberOfChecks(
- const SmallVectorImpl<int> *PtrPartition) const {
-
- unsigned NumPartitions = CheckingGroups.size();
- unsigned CheckCount = 0;
-
- for (unsigned I = 0; I < NumPartitions; ++I)
- for (unsigned J = I + 1; J < NumPartitions; ++J)
- if (needsChecking(CheckingGroups[I], CheckingGroups[J], PtrPartition))
- CheckCount++;
- return CheckCount;
-}
-
-bool RuntimePointerChecking::needsAnyChecking(
- const SmallVectorImpl<int> *PtrPartition) const {
- unsigned NumPointers = Pointers.size();
-
- for (unsigned I = 0; I < NumPointers; ++I)
- for (unsigned J = I + 1; J < NumPointers; ++J)
- if (needsChecking(I, J, PtrPartition))
- return true;
- return false;
-}
-
namespace {
/// \brief Analyses memory accesses in a loop.
///
typedef SmallPtrSet<MemAccessInfo, 8> MemAccessInfoSet;
AccessAnalysis(const DataLayout &Dl, AliasAnalysis *AA, LoopInfo *LI,
- MemoryDepChecker::DepCandidates &DA)
- : DL(Dl), AST(*AA), LI(LI), DepCands(DA),
- IsRTCheckAnalysisNeeded(false) {}
+ MemoryDepChecker::DepCandidates &DA, SCEVUnionPredicate &Preds)
+ : DL(Dl), AST(*AA), LI(LI), DepCands(DA), IsRTCheckAnalysisNeeded(false),
+ Preds(Preds) {}
/// \brief Register a load and whether it is only read from.
void addLoad(MemoryLocation &Loc, bool IsReadOnly) {
/// We decided that no dependence analysis would be used. Reset the state.
void resetDepChecks(MemoryDepChecker &DepChecker) {
CheckDeps.clear();
- DepChecker.clearInterestingDependences();
+ DepChecker.clearDependences();
}
MemAccessInfoSet &getDependenciesToCheck() { return CheckDeps; }
/// (i.e. ShouldRetryWithRuntimeCheck), isDependencyCheckNeeded is cleared
/// while this remains set if we have potentially dependent accesses.
bool IsRTCheckAnalysisNeeded;
+
+ /// The SCEV predicate containing all the SCEV-related assumptions.
+ SCEVUnionPredicate &Preds;
};
} // end anonymous namespace
/// \brief Check whether a pointer can participate in a runtime bounds check.
static bool hasComputableBounds(ScalarEvolution *SE,
- const ValueToValueMap &Strides, Value *Ptr) {
- const SCEV *PtrScev = replaceSymbolicStrideSCEV(SE, Strides, Ptr);
+ const ValueToValueMap &Strides, Value *Ptr,
+ Loop *L, SCEVUnionPredicate &Preds) {
+ const SCEV *PtrScev = replaceSymbolicStrideSCEV(SE, Strides, Preds, Ptr);
const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(PtrScev);
if (!AR)
return false;
else
++NumReadPtrChecks;
- if (hasComputableBounds(SE, StridesMap, Ptr) &&
+ if (hasComputableBounds(SE, StridesMap, Ptr, TheLoop, Preds) &&
// When we run after a failing dependency check we have to make sure
// we don't have wrapping pointers.
(!ShouldCheckStride ||
- isStridedPtr(SE, Ptr, TheLoop, StridesMap) == 1)) {
+ isStridedPtr(SE, Ptr, TheLoop, StridesMap, Preds) == 1)) {
// The id of the dependence set.
unsigned DepId;
// Each access has its own dependence set.
DepId = RunningDepId++;
- RtCheck.insert(TheLoop, Ptr, IsWrite, DepId, ASId, StridesMap);
+ RtCheck.insert(TheLoop, Ptr, IsWrite, DepId, ASId, StridesMap, Preds);
DEBUG(dbgs() << "LAA: Found a runtime check ptr:" << *Ptr << '\n');
} else {
}
if (NeedRTCheck && CanDoRT)
- RtCheck.groupChecks(DepCands, IsDepCheckNeeded);
+ RtCheck.generateChecks(DepCands, IsDepCheckNeeded);
- DEBUG(dbgs() << "LAA: We need to do " << RtCheck.getNumberOfChecks(nullptr)
+ DEBUG(dbgs() << "LAA: We need to do " << RtCheck.getNumberOfChecks()
<< " pointer comparisons.\n");
RtCheck.Need = NeedRTCheck;
GetUnderlyingObjects(Ptr, TempObjects, DL, LI);
DEBUG(dbgs() << "Underlying objects for pointer " << *Ptr << "\n");
for (Value *UnderlyingObj : TempObjects) {
+ // nullptr never alias, don't join sets for pointer that have "null"
+ // in their UnderlyingObjects list.
+ if (isa<ConstantPointerNull>(UnderlyingObj))
+ continue;
+
UnderlyingObjToAccessMap::iterator Prev =
ObjToLastAccess.find(UnderlyingObj);
if (Prev != ObjToLastAccess.end())
/// \brief Check whether the access through \p Ptr has a constant stride.
int llvm::isStridedPtr(ScalarEvolution *SE, Value *Ptr, const Loop *Lp,
- const ValueToValueMap &StridesMap) {
- const Type *Ty = Ptr->getType();
+ const ValueToValueMap &StridesMap,
+ SCEVUnionPredicate &Preds) {
+ Type *Ty = Ptr->getType();
assert(Ty->isPointerTy() && "Unexpected non-ptr");
// Make sure that the pointer does not point to aggregate types.
- const PointerType *PtrTy = cast<PointerType>(Ty);
+ auto *PtrTy = cast<PointerType>(Ty);
if (PtrTy->getElementType()->isAggregateType()) {
DEBUG(dbgs() << "LAA: Bad stride - Not a pointer to a scalar type"
<< *Ptr << "\n");
return 0;
}
- const SCEV *PtrScev = replaceSymbolicStrideSCEV(SE, StridesMap, Ptr);
+ const SCEV *PtrScev = replaceSymbolicStrideSCEV(SE, StridesMap, Preds, Ptr);
const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(PtrScev);
if (!AR) {
llvm_unreachable("unexpected DepType!");
}
-bool MemoryDepChecker::Dependence::isInterestingDependence(DepType Type) {
+bool MemoryDepChecker::Dependence::isBackward() const {
switch (Type) {
case NoDep:
case Forward:
+ case ForwardButPreventsForwarding:
+ case Unknown:
return false;
case BackwardVectorizable:
- case Unknown:
- case ForwardButPreventsForwarding:
case Backward:
case BackwardVectorizableButPreventsForwarding:
return true;
}
bool MemoryDepChecker::Dependence::isPossiblyBackward() const {
+ return isBackward() || Type == Unknown;
+}
+
+bool MemoryDepChecker::Dependence::isForward() const {
switch (Type) {
- case NoDep:
case Forward:
case ForwardButPreventsForwarding:
- return false;
+ return true;
+ case NoDep:
case Unknown:
case BackwardVectorizable:
case Backward:
case BackwardVectorizableButPreventsForwarding:
- return true;
+ return false;
}
llvm_unreachable("unexpected DepType!");
}
BPtr->getType()->getPointerAddressSpace())
return Dependence::Unknown;
- const SCEV *AScev = replaceSymbolicStrideSCEV(SE, Strides, APtr);
- const SCEV *BScev = replaceSymbolicStrideSCEV(SE, Strides, BPtr);
+ const SCEV *AScev = replaceSymbolicStrideSCEV(SE, Strides, Preds, APtr);
+ const SCEV *BScev = replaceSymbolicStrideSCEV(SE, Strides, Preds, BPtr);
- int StrideAPtr = isStridedPtr(SE, APtr, InnermostLoop, Strides);
- int StrideBPtr = isStridedPtr(SE, BPtr, InnermostLoop, Strides);
+ int StrideAPtr = isStridedPtr(SE, APtr, InnermostLoop, Strides, Preds);
+ int StrideBPtr = isStridedPtr(SE, BPtr, InnermostLoop, Strides, Preds);
const SCEV *Src = AScev;
const SCEV *Sink = BScev;
// Could be improved to assert type sizes are the same (i32 == float, etc).
if (Val == 0) {
if (ATy == BTy)
- return Dependence::NoDep;
+ return Dependence::Forward;
DEBUG(dbgs() << "LAA: Zero dependence difference but different types\n");
return Dependence::Unknown;
}
isDependent(*A.first, A.second, *B.first, B.second, Strides);
SafeForVectorization &= Dependence::isSafeForVectorization(Type);
- // Gather dependences unless we accumulated MaxInterestingDependence
+ // Gather dependences unless we accumulated MaxDependences
// dependences. In that case return as soon as we find the first
// unsafe dependence. This puts a limit on this quadratic
// algorithm.
- if (RecordInterestingDependences) {
- if (Dependence::isInterestingDependence(Type))
- InterestingDependences.push_back(
- Dependence(A.second, B.second, Type));
-
- if (InterestingDependences.size() >= MaxInterestingDependence) {
- RecordInterestingDependences = false;
- InterestingDependences.clear();
+ if (RecordDependences) {
+ if (Type != Dependence::NoDep)
+ Dependences.push_back(Dependence(A.second, B.second, Type));
+
+ if (Dependences.size() >= MaxDependences) {
+ RecordDependences = false;
+ Dependences.clear();
DEBUG(dbgs() << "Too many dependences, stopped recording\n");
}
}
- if (!RecordInterestingDependences && !SafeForVectorization)
+ if (!RecordDependences && !SafeForVectorization)
return false;
}
++OI;
}
}
- DEBUG(dbgs() << "Total Interesting Dependences: "
- << InterestingDependences.size() << "\n");
+ DEBUG(dbgs() << "Total Dependences: " << Dependences.size() << "\n");
return SafeForVectorization;
}
if (it->mayWriteToMemory()) {
StoreInst *St = dyn_cast<StoreInst>(it);
if (!St) {
- emitAnalysis(LoopAccessReport(it) <<
+ emitAnalysis(LoopAccessReport(&*it) <<
"instruction cannot be vectorized");
CanVecMem = false;
return;
MemoryDepChecker::DepCandidates DependentAccesses;
AccessAnalysis Accesses(TheLoop->getHeader()->getModule()->getDataLayout(),
- AA, LI, DependentAccesses);
+ AA, LI, DependentAccesses, Preds);
// Holds the analyzed pointers. We don't want to call GetUnderlyingObjects
// multiple times on the same object. If the ptr is accessed twice, once
// read a few words, modify, and write a few words, and some of the
// words may be written to the same address.
bool IsReadOnlyPtr = false;
- if (Seen.insert(Ptr).second || !isStridedPtr(SE, Ptr, TheLoop, Strides)) {
+ if (Seen.insert(Ptr).second ||
+ !isStridedPtr(SE, Ptr, TheLoop, Strides, Preds)) {
++NumReads;
IsReadOnlyPtr = true;
}
return nullptr;
}
-/// \brief IR Values for the lower and upper bounds of a pointer evolution.
+namespace {
+/// \brief IR Values for the lower and upper bounds of a pointer evolution. We
+/// need to use value-handles because SCEV expansion can invalidate previously
+/// expanded values. Thus expansion of a pointer can invalidate the bounds for
+/// a previous one.
struct PointerBounds {
- Value *Start;
- Value *End;
+ TrackingVH<Value> Start;
+ TrackingVH<Value> End;
};
+} // end anonymous namespace
/// \brief Expand code for the lower and upper bound of the pointer group \p CG
/// in \p TheLoop. \return the values for the bounds.
return ChecksWithBounds;
}
-std::pair<Instruction *, Instruction *> LoopAccessInfo::addRuntimeCheck(
+std::pair<Instruction *, Instruction *> LoopAccessInfo::addRuntimeChecks(
Instruction *Loc,
const SmallVectorImpl<RuntimePointerChecking::PointerCheck> &PointerChecks)
const {
for (const auto &Check : ExpandedChecks) {
const PointerBounds &A = Check.first, &B = Check.second;
+ // Check if two pointers (A and B) conflict where conflict is computed as:
+ // start(A) <= end(B) && start(B) <= end(A)
unsigned AS0 = A.Start->getType()->getPointerAddressSpace();
unsigned AS1 = B.Start->getType()->getPointerAddressSpace();
return std::make_pair(FirstInst, Check);
}
-std::pair<Instruction *, Instruction *> LoopAccessInfo::addRuntimeCheck(
- Instruction *Loc, const SmallVectorImpl<int> *PtrPartition) const {
+std::pair<Instruction *, Instruction *>
+LoopAccessInfo::addRuntimeChecks(Instruction *Loc) const {
if (!PtrRtChecking.Need)
return std::make_pair(nullptr, nullptr);
- return addRuntimeCheck(Loc, PtrRtChecking.generateChecks(PtrPartition));
+ return addRuntimeChecks(Loc, PtrRtChecking.getChecks());
}
LoopAccessInfo::LoopAccessInfo(Loop *L, ScalarEvolution *SE,
const TargetLibraryInfo *TLI, AliasAnalysis *AA,
DominatorTree *DT, LoopInfo *LI,
const ValueToValueMap &Strides)
- : PtrRtChecking(SE), DepChecker(SE, L), TheLoop(L), SE(SE), DL(DL),
+ : PtrRtChecking(SE), DepChecker(SE, L, Preds), TheLoop(L), SE(SE), DL(DL),
TLI(TLI), AA(AA), DT(DT), LI(LI), NumLoads(0), NumStores(0),
MaxSafeDepDistBytes(-1U), CanVecMem(false),
StoreToLoopInvariantAddress(false) {
if (Report)
OS.indent(Depth) << "Report: " << Report->str() << "\n";
- if (auto *InterestingDependences = DepChecker.getInterestingDependences()) {
- OS.indent(Depth) << "Interesting Dependences:\n";
- for (auto &Dep : *InterestingDependences) {
+ if (auto *Dependences = DepChecker.getDependences()) {
+ OS.indent(Depth) << "Dependences:\n";
+ for (auto &Dep : *Dependences) {
Dep.print(OS, Depth + 2, DepChecker.getMemoryInstructions());
OS << "\n";
}
} else
- OS.indent(Depth) << "Too many interesting dependences, not recorded\n";
+ OS.indent(Depth) << "Too many dependences, not recorded\n";
// List the pair of accesses need run-time checks to prove independence.
PtrRtChecking.print(OS, Depth);
OS.indent(Depth) << "Store to invariant address was "
<< (StoreToLoopInvariantAddress ? "" : "not ")
<< "found in loop.\n";
+
+ OS.indent(Depth) << "SCEV assumptions:\n";
+ Preds.print(OS, Depth);
}
const LoopAccessInfo &
if (!LAI) {
const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
- LAI = llvm::make_unique<LoopAccessInfo>(L, SE, DL, TLI, AA, DT, LI,
- Strides);
+ LAI =
+ llvm::make_unique<LoopAccessInfo>(L, SE, DL, TLI, AA, DT, LI, Strides);
#ifndef NDEBUG
LAI->NumSymbolicStrides = Strides.size();
#endif
}
bool LoopAccessAnalysis::runOnFunction(Function &F) {
- SE = &getAnalysis<ScalarEvolution>();
+ SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
TLI = TLIP ? &TLIP->getTLI() : nullptr;
- AA = &getAnalysis<AliasAnalysis>();
+ AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
}
void LoopAccessAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
- AU.addRequired<ScalarEvolution>();
- AU.addRequired<AliasAnalysis>();
+ AU.addRequired<ScalarEvolutionWrapperPass>();
+ AU.addRequired<AAResultsWrapperPass>();
AU.addRequired<DominatorTreeWrapperPass>();
AU.addRequired<LoopInfoWrapperPass>();
#define LAA_NAME "loop-accesses"
INITIALIZE_PASS_BEGIN(LoopAccessAnalysis, LAA_NAME, laa_name, false, true)
-INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
-INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
+INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
INITIALIZE_PASS_END(LoopAccessAnalysis, LAA_NAME, laa_name, false, true)