LoopAccessInfo(Loop *L, ScalarEvolution *SE, const DataLayout &DL,
const TargetLibraryInfo *TLI, AliasAnalysis *AA,
- DominatorTree *DT, const ValueToValueMap &Strides);
+ DominatorTree *DT, LoopInfo *LI,
+ const ValueToValueMap &Strides);
/// Return true we can analyze the memory accesses in the loop and there are
/// no memory dependence cycles.
const TargetLibraryInfo *TLI;
AliasAnalysis *AA;
DominatorTree *DT;
+ LoopInfo *LI;
unsigned NumLoads;
unsigned NumStores;
const TargetLibraryInfo *TLI;
AliasAnalysis *AA;
DominatorTree *DT;
+ LoopInfo *LI;
};
} // End llvm namespace
class AssumptionCache;
class DominatorTree;
class TargetLibraryInfo;
+ class LoopInfo;
/// Determine which bits of V are known to be either zero or one and return
/// them in the KnownZero/KnownOne bit sets.
return GetUnderlyingObject(const_cast<Value *>(V), DL, MaxLookup);
}
- /// GetUnderlyingObjects - This method is similar to GetUnderlyingObject
- /// except that it can look through phi and select instructions and return
- /// multiple objects.
+ /// \brief This method is similar to GetUnderlyingObject except that it can
+ /// look through phi and select instructions and return multiple objects.
+ ///
+ /// If LoopInfo is passed, loop phis are further analyzed. If a pointer
+ /// accesses different objects in each iteration, we don't look through the
+ /// phi node. E.g. consider this loop nest:
+ ///
+ /// int **A;
+ /// for (i)
+ /// for (j) {
+ /// A[i][j] = A[i-1][j] * B[j]
+ /// }
+ ///
+ /// This is transformed by Load-PRE to stash away A[i] for the next iteration
+ /// of the outer loop:
+ ///
+ /// Curr = A[0]; // Prev_0
+ /// for (i: 1..N) {
+ /// Prev = Curr; // Prev = PHI (Prev_0, Curr)
+ /// Curr = A[i];
+ /// for (j: 0..N) {
+ /// Curr[j] = Prev[j] * B[j]
+ /// }
+ /// }
+ ///
+ /// Since A[i] and A[i-1] are independent pointers, getUnderlyingObjects
+ /// should not assume that Curr and Prev share the same underlying object thus
+ /// it shouldn't look through the phi above.
void GetUnderlyingObjects(Value *V, SmallVectorImpl<Value *> &Objects,
- const DataLayout &DL, unsigned MaxLookup = 6);
+ const DataLayout &DL, LoopInfo *LI = nullptr,
+ unsigned MaxLookup = 6);
/// onlyUsedByLifetimeMarkers - Return true if the only users of this pointer
/// are lifetime markers.
typedef PointerIntPair<Value *, 1, bool> MemAccessInfo;
typedef SmallPtrSet<MemAccessInfo, 8> MemAccessInfoSet;
- AccessAnalysis(const DataLayout &Dl, AliasAnalysis *AA,
+ AccessAnalysis(const DataLayout &Dl, AliasAnalysis *AA, LoopInfo *LI,
MemoryDepChecker::DepCandidates &DA)
- : DL(Dl), AST(*AA), DepCands(DA), IsRTCheckNeeded(false) {}
+ : DL(Dl), AST(*AA), LI(LI), DepCands(DA), IsRTCheckNeeded(false) {}
/// \brief Register a load and whether it is only read from.
void addLoad(AliasAnalysis::Location &Loc, bool IsReadOnly) {
//intrinsic property (such as TBAA metadata).
AliasSetTracker AST;
+ LoopInfo *LI;
+
/// Sets of potentially dependent accesses - members of one set share an
/// underlying pointer. The set "CheckDeps" identfies which sets really need a
/// dependence check.
// underlying object.
typedef SmallVector<Value *, 16> ValueVector;
ValueVector TempObjects;
- GetUnderlyingObjects(Ptr, TempObjects, DL);
+
+ GetUnderlyingObjects(Ptr, TempObjects, DL, LI);
+ DEBUG(dbgs() << "Underlying objects for pointer " << *Ptr << "\n");
for (Value *UnderlyingObj : TempObjects) {
UnderlyingObjToAccessMap::iterator Prev =
ObjToLastAccess.find(UnderlyingObj);
DepCands.unionSets(Access, Prev->second);
ObjToLastAccess[UnderlyingObj] = Access;
+ DEBUG(dbgs() << " " << *UnderlyingObj << "\n");
}
}
}
MemoryDepChecker::DepCandidates DependentAccesses;
AccessAnalysis Accesses(TheLoop->getHeader()->getModule()->getDataLayout(),
- AA, DependentAccesses);
+ AA, LI, DependentAccesses);
// Holds the analyzed pointers. We don't want to call GetUnderlyingObjects
// multiple times on the same object. If the ptr is accessed twice, once
LoopAccessInfo::LoopAccessInfo(Loop *L, ScalarEvolution *SE,
const DataLayout &DL,
const TargetLibraryInfo *TLI, AliasAnalysis *AA,
- DominatorTree *DT,
+ DominatorTree *DT, LoopInfo *LI,
const ValueToValueMap &Strides)
: DepChecker(SE, L), NumComparisons(0), TheLoop(L), SE(SE), DL(DL),
- TLI(TLI), AA(AA), DT(DT), NumLoads(0), NumStores(0),
+ TLI(TLI), AA(AA), DT(DT), LI(LI), NumLoads(0), NumStores(0),
MaxSafeDepDistBytes(-1U), CanVecMem(false),
StoreToLoopInvariantAddress(false) {
if (canAnalyzeLoop())
if (!LAI) {
const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
- LAI = llvm::make_unique<LoopAccessInfo>(L, SE, DL, TLI, AA, DT, Strides);
+ LAI = llvm::make_unique<LoopAccessInfo>(L, SE, DL, TLI, AA, DT, LI,
+ Strides);
#ifndef NDEBUG
LAI->NumSymbolicStrides = Strides.size();
#endif
void LoopAccessAnalysis::print(raw_ostream &OS, const Module *M) const {
LoopAccessAnalysis &LAA = *const_cast<LoopAccessAnalysis *>(this);
- LoopInfo *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
ValueToValueMap NoSymbolicStrides;
for (Loop *TopLevelLoop : *LI)
TLI = TLIP ? &TLIP->getTLI() : nullptr;
AA = &getAnalysis<AliasAnalysis>();
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+ LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
return false;
}
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/MemoryBuiltins.h"
+#include "llvm/Analysis/LoopInfo.h"
#include "llvm/IR/CallSite.h"
#include "llvm/IR/ConstantRange.h"
#include "llvm/IR/Constants.h"
return Len == ~0ULL ? 1 : Len;
}
+/// \brief \p PN defines a loop-variant pointer to an object. Check if the
+/// previous iteration of the loop was referring to the same object as \p PN.
+static bool isSameUnderlyingObjectInLoop(PHINode *PN, LoopInfo *LI) {
+ // Find the loop-defined value.
+ Loop *L = LI->getLoopFor(PN->getParent());
+ if (PN->getNumIncomingValues() != 2)
+ return true;
+
+ // Find the value from previous iteration.
+ auto *PrevValue = dyn_cast<Instruction>(PN->getIncomingValue(0));
+ if (!PrevValue || LI->getLoopFor(PrevValue->getParent()) != L)
+ PrevValue = dyn_cast<Instruction>(PN->getIncomingValue(1));
+ if (!PrevValue || LI->getLoopFor(PrevValue->getParent()) != L)
+ return true;
+
+ // If a new pointer is loaded in the loop, the pointer references a different
+ // object in every iteration. E.g.:
+ // for (i)
+ // int *p = a[i];
+ // ...
+ if (auto *Load = dyn_cast<LoadInst>(PrevValue))
+ if (!L->isLoopInvariant(Load->getPointerOperand()))
+ return false;
+ return true;
+}
+
Value *llvm::GetUnderlyingObject(Value *V, const DataLayout &DL,
unsigned MaxLookup) {
if (!V->getType()->isPointerTy())
}
void llvm::GetUnderlyingObjects(Value *V, SmallVectorImpl<Value *> &Objects,
- const DataLayout &DL, unsigned MaxLookup) {
+ const DataLayout &DL, LoopInfo *LI,
+ unsigned MaxLookup) {
SmallPtrSet<Value *, 4> Visited;
SmallVector<Value *, 4> Worklist;
Worklist.push_back(V);
}
if (PHINode *PN = dyn_cast<PHINode>(P)) {
- for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
- Worklist.push_back(PN->getIncomingValue(i));
+ // If this PHI changes the underlying object in every iteration of the
+ // loop, don't look through it. Consider:
+ // int **A;
+ // for (i) {
+ // Prev = Curr; // Prev = PHI (Prev_0, Curr)
+ // Curr = A[i];
+ // *Prev, *Curr;
+ //
+ // Prev is tracking Curr one iteration behind so they refer to different
+ // underlying objects.
+ if (!LI || !LI->isLoopHeader(PN->getParent()) ||
+ isSameUnderlyingObjectInLoop(PN, LI))
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+ Worklist.push_back(PN->getIncomingValue(i));
continue;
}
--- /dev/null
+; RUN: opt -basicaa -loop-accesses -analyze < %s | FileCheck %s
+
+; In:
+;
+; store_ptr = A;
+; load_ptr = &A[2];
+; for (i = 0; i < n; i++)
+; *store_ptr++ = *load_ptr++ *10; // A[i] = A[i+2] * 10
+;
+; make sure, we look through the PHI to conclude that store_ptr and load_ptr
+; both have A as their underlying object. The dependence is safe for
+; vectorization requiring no memchecks.
+;
+; Otherwise we would try to prove independence with a memcheck that is going
+; to always fail.
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.10.0"
+
+; CHECK: Memory dependences are safe{{$}}
+
+define void @f(i8* noalias %A, i64 %width) {
+for.body.preheader:
+ %A_ahead = getelementptr inbounds i8, i8* %A, i64 2
+ br label %for.body
+
+for.body:
+ %i = phi i64 [ %i.1, %for.body ], [ 0, %for.body.preheader ]
+ %load_ptr = phi i8* [ %load_ptr.1, %for.body ], [ %A_ahead, %for.body.preheader ]
+ %store_ptr = phi i8* [ %store_ptr.1, %for.body ], [ %A, %for.body.preheader ]
+
+ %loadA = load i8, i8* %load_ptr, align 1
+
+ %mul = mul i8 %loadA, 10
+
+ store i8 %mul, i8* %store_ptr, align 1
+
+ %load_ptr.1 = getelementptr inbounds i8, i8* %load_ptr, i64 1
+ %store_ptr.1 = getelementptr inbounds i8, i8* %store_ptr, i64 1
+ %i.1 = add nuw i64 %i, 1
+
+ %exitcond = icmp eq i64 %i.1, %width
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+ ret void
+}
--- /dev/null
+; RUN: opt -basicaa -loop-accesses -analyze < %s | FileCheck %s
+
+; This loop:
+;
+; int **A;
+; for (i)
+; for (j) {
+; A[i][j] = A[i-1][j] * B[j]
+; B[j+1] = 2 // backward dep between this and the previous
+; }
+;
+; is transformed by Load-PRE to stash away A[i] for the next iteration of the
+; outer loop:
+;
+; Curr = A[0]; // Prev_0
+; for (i: 1..N) {
+; Prev = Curr; // Prev = PHI (Prev_0, Curr)
+; Curr = A[i];
+; for (j: 0..N) {
+; Curr[j] = Prev[j] * B[j]
+; B[j+1] = 2 // backward dep between this and the previous
+; }
+; }
+;
+; Since A[i] and A[i-1] are likely to be independent, getUnderlyingObjects
+; should not assume that Curr and Prev share the same underlying object.
+;
+; If it did we would try to dependence-analyze Curr and Prev and the analysis
+; would fail with non-constant distance.
+;
+; To illustrate one of the negative consequences of this, if the loop has a
+; backward dependence we won't detect this but instead fully fall back on
+; memchecks (that is what LAA does after encountering a case of non-constant
+; distance).
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.10.0"
+
+; CHECK: for_j.body:
+; CHECK-NEXT: Store to invariant address was not found in loop
+; CHECK-NEXT: Report: unsafe dependent memory operations in loop
+; CHECK-NEXT: Interesting Dependences:
+; CHECK-NEXT: Backward:
+; CHECK-NEXT: %loadB = load i8, i8* %gepB, align 1 ->
+; CHECK-NEXT: store i8 2, i8* %gepB_plus_one, align 1
+
+define void @f(i8** noalias %A, i8* noalias %B, i64 %N) {
+for_i.preheader:
+ %prev_0 = load i8*, i8** %A, align 8
+ br label %for_i.body
+
+for_i.body:
+ %i = phi i64 [1, %for_i.preheader], [%i.1, %for_j.end]
+ %prev = phi i8* [%prev_0, %for_i.preheader], [%curr, %for_j.end]
+ %gep = getelementptr inbounds i8*, i8** %A, i64 %i
+ %curr = load i8*, i8** %gep, align 8
+ br label %for_j.preheader
+
+for_j.preheader:
+ br label %for_j.body
+
+for_j.body:
+ %j = phi i64 [0, %for_j.preheader], [%j.1, %for_j.body]
+
+ %gepPrev = getelementptr inbounds i8, i8* %prev, i64 %j
+ %gepCurr = getelementptr inbounds i8, i8* %curr, i64 %j
+ %gepB = getelementptr inbounds i8, i8* %B, i64 %j
+
+ %loadPrev = load i8, i8* %gepPrev, align 1
+ %loadB = load i8, i8* %gepB, align 1
+
+ %mul = mul i8 %loadPrev, %loadB
+
+ store i8 %mul, i8* %gepCurr, align 1
+
+ %gepB_plus_one = getelementptr inbounds i8, i8* %gepB, i64 1
+ store i8 2, i8* %gepB_plus_one, align 1
+
+ %j.1 = add nuw i64 %j, 1
+ %exitcondj = icmp eq i64 %j.1, %N
+ br i1 %exitcondj, label %for_j.end, label %for_j.body
+
+for_j.end:
+
+ %i.1 = add nuw i64 %i, 1
+ %exitcond = icmp eq i64 %i.1, %N
+ br i1 %exitcond, label %for_i.end, label %for_i.body
+
+for_i.end:
+ ret void
+}