#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/AssumptionTracker.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/MemoryBuiltins.h"
#include "llvm/Analysis/PHITransAddr.h"
"Number of block queries that were completely cached");
// Limit for the number of instructions to scan in a block.
-static const int BlockScanLimit = 100;
+static const unsigned int BlockScanLimit = 100;
+
+// Limit on the number of memdep results to process.
+static const unsigned int NumResultsLimit = 100;
char MemoryDependenceAnalysis::ID = 0;
// Register this pass...
INITIALIZE_PASS_BEGIN(MemoryDependenceAnalysis, "memdep",
"Memory Dependence Analysis", false, true)
+INITIALIZE_PASS_DEPENDENCY(AssumptionTracker)
INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
INITIALIZE_PASS_END(MemoryDependenceAnalysis, "memdep",
"Memory Dependence Analysis", false, true)
///
void MemoryDependenceAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesAll();
+ AU.addRequired<AssumptionTracker>();
AU.addRequiredTransitive<AliasAnalysis>();
}
bool MemoryDependenceAnalysis::runOnFunction(Function &) {
AA = &getAnalysis<AliasAnalysis>();
+ AT = &getAnalysis<AssumptionTracker>();
DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
DL = DLP ? &DLP->getDataLayout() : nullptr;
DominatorTreeWrapperPass *DTWP =
int64_t MemLocOffset = 0;
unsigned Limit = BlockScanLimit;
bool isInvariantLoad = false;
+
+ // We must be careful with atomic accesses, as they may allow another thread
+ // to touch this location, cloberring it. We are conservative: if the
+ // QueryInst is not a simple (non-atomic) memory access, we automatically
+ // return getClobber.
+ // If it is simple, we know based on the results of
+ // "Compiler testing via a theory of sound optimisations in the C11/C++11
+ // memory model" in PLDI 2013, that a non-atomic location can only be
+ // clobbered between a pair of a release and an acquire action, with no
+ // access to the location in between.
+ // Here is an example for giving the general intuition behind this rule.
+ // In the following code:
+ // store x 0;
+ // release action; [1]
+ // acquire action; [4]
+ // %val = load x;
+ // It is unsafe to replace %val by 0 because another thread may be running:
+ // acquire action; [2]
+ // store x 42;
+ // release action; [3]
+ // with synchronization from 1 to 2 and from 3 to 4, resulting in %val
+ // being 42. A key property of this program however is that if either
+ // 1 or 4 were missing, there would be a race between the store of 42
+ // either the store of 0 or the load (making the whole progam racy).
+ // The paper mentionned above shows that the same property is respected
+ // by every program that can detect any optimisation of that kind: either
+ // it is racy (undefined) or there is a release followed by an acquire
+ // between the pair of accesses under consideration.
+ bool HasSeenAcquire = false;
+
if (isLoad && QueryInst) {
LoadInst *LI = dyn_cast<LoadInst>(QueryInst);
if (LI && LI->getMetadata(LLVMContext::MD_invariant_load) != nullptr)
// be accessing the location.
if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
// Atomic loads have complications involved.
- // A monotonic load is OK if the query inst is itself not atomic.
+ // A Monotonic (or higher) load is OK if the query inst is itself not atomic.
+ // An Acquire (or higher) load sets the HasSeenAcquire flag, so that any
+ // release store will know to return getClobber.
// FIXME: This is overly conservative.
if (!LI->isUnordered()) {
if (!QueryInst)
return MemDepResult::getClobber(LI);
- if (LI->getOrdering() != Monotonic)
- return MemDepResult::getClobber(LI);
- if (auto *QueryLI = dyn_cast<LoadInst>(QueryInst))
+ if (auto *QueryLI = dyn_cast<LoadInst>(QueryInst)) {
if (!QueryLI->isSimple())
return MemDepResult::getClobber(LI);
- if (auto *QuerySI = dyn_cast<StoreInst>(QueryInst))
+ } else if (auto *QuerySI = dyn_cast<StoreInst>(QueryInst)) {
if (!QuerySI->isSimple())
return MemDepResult::getClobber(LI);
+ } else if (QueryInst->mayReadOrWriteMemory()) {
+ return MemDepResult::getClobber(LI);
+ }
+
+ if (isAtLeastAcquire(LI->getOrdering()))
+ HasSeenAcquire = true;
}
// FIXME: this is overly conservative.
if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
// Atomic stores have complications involved.
- // A monotonic store is OK if the query inst is itself not atomic.
+ // A Monotonic store is OK if the query inst is itself not atomic.
+ // A Release (or higher) store further requires that no acquire load
+ // has been seen.
// FIXME: This is overly conservative.
if (!SI->isUnordered()) {
if (!QueryInst)
return MemDepResult::getClobber(SI);
- if (SI->getOrdering() != Monotonic)
- return MemDepResult::getClobber(SI);
- if (auto *QueryLI = dyn_cast<LoadInst>(QueryInst))
+ if (auto *QueryLI = dyn_cast<LoadInst>(QueryInst)) {
if (!QueryLI->isSimple())
return MemDepResult::getClobber(SI);
- if (auto *QuerySI = dyn_cast<StoreInst>(QueryInst))
+ } else if (auto *QuerySI = dyn_cast<StoreInst>(QueryInst)) {
if (!QuerySI->isSimple())
return MemDepResult::getClobber(SI);
+ } else if (QueryInst->mayReadOrWriteMemory()) {
+ return MemDepResult::getClobber(SI);
+ }
+
+ if (HasSeenAcquire && isAtLeastRelease(SI->getOrdering()))
+ return MemDepResult::getClobber(SI);
}
// FIXME: this is overly conservative.
DirtyBlocks.pop_back();
// Already processed this block?
- if (!Visited.insert(DirtyBB))
+ if (!Visited.insert(DirtyBB).second)
continue;
// Do a binary search to see if we already have an entry for this block in
"Can't get pointer deps of a non-pointer!");
Result.clear();
- PHITransAddr Address(const_cast<Value *>(Loc.Ptr), DL);
+ PHITransAddr Address(const_cast<Value *>(Loc.Ptr), DL, AT);
// This is the set of blocks we've inspected, and the pointer we consider in
// each block. Because of critical edges, we currently bail out if querying
return Dep;
}
-/// SortNonLocalDepInfoCache - Sort the a NonLocalDepInfo cache, given a certain
+/// SortNonLocalDepInfoCache - Sort the NonLocalDepInfo cache, given a certain
/// number of elements in the array that are already properly ordered. This is
/// optimized for the case when only a few entries are added.
static void
while (!Worklist.empty()) {
BasicBlock *BB = Worklist.pop_back_val();
+ // If we do process a large number of blocks it becomes very expensive and
+ // likely it isn't worth worrying about
+ if (Result.size() > NumResultsLimit) {
+ Worklist.clear();
+ // Sort it now (if needed) so that recursive invocations of
+ // getNonLocalPointerDepFromBB and other routines that could reuse the
+ // cache value will only see properly sorted cache arrays.
+ if (Cache && NumSortedEntries != Cache->size()) {
+ SortNonLocalDepInfoCache(*Cache, NumSortedEntries);
+ }
+ // Since we bail out, the "Cache" set won't contain all of the
+ // results for the query. This is ok (we can still use it to accelerate
+ // specific block queries) but we can't do the fastpath "return all
+ // results from the set". Clear out the indicator for this.
+ CacheInfo->Pair = BBSkipFirstBlockPair();
+ return true;
+ }
+
// Skip the first block if we have it.
if (!SkipFirstBlock) {
// Analyze the dependency of *Pointer in FromBB. See if we already have
if (I->getBB() != BB)
continue;
- assert(I->getResult().isNonLocal() &&
+ assert((I->getResult().isNonLocal() || !DT->isReachableFromEntry(BB)) &&
"Should only be here with transparent block");
I->setResult(MemDepResult::getUnknown());
Result.push_back(NonLocalDepResult(I->getBB(), I->getResult(),
ReverseDepMapType::iterator ReverseDepIt = ReverseLocalDeps.find(RemInst);
if (ReverseDepIt != ReverseLocalDeps.end()) {
- SmallPtrSet<Instruction*, 4> &ReverseDeps = ReverseDepIt->second;
// RemInst can't be the terminator if it has local stuff depending on it.
- assert(!ReverseDeps.empty() && !isa<TerminatorInst>(RemInst) &&
+ assert(!ReverseDepIt->second.empty() && !isa<TerminatorInst>(RemInst) &&
"Nothing can locally depend on a terminator");
- for (SmallPtrSet<Instruction*, 4>::iterator I = ReverseDeps.begin(),
- E = ReverseDeps.end(); I != E; ++I) {
- Instruction *InstDependingOnRemInst = *I;
+ for (Instruction *InstDependingOnRemInst : ReverseDepIt->second) {
assert(InstDependingOnRemInst != RemInst &&
"Already removed our local dep info");
ReverseDepIt = ReverseNonLocalDeps.find(RemInst);
if (ReverseDepIt != ReverseNonLocalDeps.end()) {
- SmallPtrSet<Instruction*, 4> &Set = ReverseDepIt->second;
- for (SmallPtrSet<Instruction*, 4>::iterator I = Set.begin(), E = Set.end();
- I != E; ++I) {
- assert(*I != RemInst && "Already removed NonLocalDep info for RemInst");
+ for (Instruction *I : ReverseDepIt->second) {
+ assert(I != RemInst && "Already removed NonLocalDep info for RemInst");
- PerInstNLInfo &INLD = NonLocalDeps[*I];
+ PerInstNLInfo &INLD = NonLocalDeps[I];
// The information is now dirty!
INLD.second = true;
DI->setResult(NewDirtyVal);
if (Instruction *NextI = NewDirtyVal.getInst())
- ReverseDepsToAdd.push_back(std::make_pair(NextI, *I));
+ ReverseDepsToAdd.push_back(std::make_pair(NextI, I));
}
}
ReverseNonLocalPtrDepTy::iterator ReversePtrDepIt =
ReverseNonLocalPtrDeps.find(RemInst);
if (ReversePtrDepIt != ReverseNonLocalPtrDeps.end()) {
- SmallPtrSet<ValueIsLoadPair, 4> &Set = ReversePtrDepIt->second;
SmallVector<std::pair<Instruction*, ValueIsLoadPair>,8> ReversePtrDepsToAdd;
- for (SmallPtrSet<ValueIsLoadPair, 4>::iterator I = Set.begin(),
- E = Set.end(); I != E; ++I) {
- ValueIsLoadPair P = *I;
+ for (ValueIsLoadPair P : ReversePtrDepIt->second) {
assert(P.getPointer() != RemInst &&
"Already removed NonLocalPointerDeps info for RemInst");
DEBUG(verifyRemoved(RemInst));
}
/// verifyRemoved - Verify that the specified instruction does not occur
-/// in our internal data structures.
+/// in our internal data structures. This function verifies by asserting in
+/// debug builds.
void MemoryDependenceAnalysis::verifyRemoved(Instruction *D) const {
+#ifndef NDEBUG
for (LocalDepMapType::const_iterator I = LocalDeps.begin(),
E = LocalDeps.end(); I != E; ++I) {
assert(I->first != D && "Inst occurs in data structures");
for (ReverseDepMapType::const_iterator I = ReverseLocalDeps.begin(),
E = ReverseLocalDeps.end(); I != E; ++I) {
assert(I->first != D && "Inst occurs in data structures");
- for (SmallPtrSet<Instruction*, 4>::const_iterator II = I->second.begin(),
- EE = I->second.end(); II != EE; ++II)
- assert(*II != D && "Inst occurs in data structures");
+ for (Instruction *Inst : I->second)
+ assert(Inst != D && "Inst occurs in data structures");
}
for (ReverseDepMapType::const_iterator I = ReverseNonLocalDeps.begin(),
E = ReverseNonLocalDeps.end();
I != E; ++I) {
assert(I->first != D && "Inst occurs in data structures");
- for (SmallPtrSet<Instruction*, 4>::const_iterator II = I->second.begin(),
- EE = I->second.end(); II != EE; ++II)
- assert(*II != D && "Inst occurs in data structures");
+ for (Instruction *Inst : I->second)
+ assert(Inst != D && "Inst occurs in data structures");
}
for (ReverseNonLocalPtrDepTy::const_iterator
E = ReverseNonLocalPtrDeps.end(); I != E; ++I) {
assert(I->first != D && "Inst occurs in rev NLPD map");
- for (SmallPtrSet<ValueIsLoadPair, 4>::const_iterator II = I->second.begin(),
- E = I->second.end(); II != E; ++II)
- assert(*II != ValueIsLoadPair(D, false) &&
- *II != ValueIsLoadPair(D, true) &&
+ for (ValueIsLoadPair P : I->second)
+ assert(P != ValueIsLoadPair(D, false) &&
+ P != ValueIsLoadPair(D, true) &&
"Inst occurs in ReverseNonLocalPtrDeps map");
}
-
+#endif
}