Thumb1 frame lowering: Mark CFI instructions with the FrameSetup flag.

[oota-llvm.git] / lib / Analysis / MemoryDependenceAnalysis.cpp
diff --git a/lib/Analysis/MemoryDependenceAnalysis.cpp b/lib/Analysis/MemoryDependenceAnalysis.cpp

index 8f22b0ed3de3dd6dfd99ee49430ed9ea8d698715..881a25655406cbf2a431b2d88c6ec417e3525f7f 100644 (file)
--- a/lib/Analysis/MemoryDependenceAnalysis.cpp
+++ b/lib/Analysis/MemoryDependenceAnalysis.cpp
@@ -18,6 +18,7 @@
  #include "llvm/ADT/STLExtras.h"
  #include "llvm/ADT/Statistic.h"
  #include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/AssumptionTracker.h"
  #include "llvm/Analysis/InstructionSimplify.h"
  #include "llvm/Analysis/MemoryBuiltins.h"
  #include "llvm/Analysis/PHITransAddr.h"
@@ -48,13 +49,17 @@ STATISTIC(NumCacheCompleteNonLocalPtr,
            "Number of block queries that were completely cached");
  
  // Limit for the number of instructions to scan in a block.
-static const int BlockScanLimit = 100;
+static const unsigned int BlockScanLimit = 100;
+
+// Limit on the number of memdep results to process.
+static const unsigned int NumResultsLimit = 100;
  
  char MemoryDependenceAnalysis::ID = 0;
  
  // Register this pass...
  INITIALIZE_PASS_BEGIN(MemoryDependenceAnalysis, "memdep",
                  "Memory Dependence Analysis", false, true)
+INITIALIZE_PASS_DEPENDENCY(AssumptionTracker)
  INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
  INITIALIZE_PASS_END(MemoryDependenceAnalysis, "memdep",
                        "Memory Dependence Analysis", false, true)
@@ -83,11 +88,13 @@ void MemoryDependenceAnalysis::releaseMemory() {
  ///
  void MemoryDependenceAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
    AU.setPreservesAll();
+  AU.addRequired<AssumptionTracker>();
    AU.addRequiredTransitive<AliasAnalysis>();
  }
  
  bool MemoryDependenceAnalysis::runOnFunction(Function &) {
    AA = &getAnalysis<AliasAnalysis>();
+  AT = &getAnalysis<AssumptionTracker>();
    DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
    DL = DLP ? &DLP->getDataLayout() : nullptr;
    DominatorTreeWrapperPass *DTWP =
@@ -370,6 +377,36 @@ getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad,
    int64_t MemLocOffset = 0;
    unsigned Limit = BlockScanLimit;
    bool isInvariantLoad = false;
+
+  // We must be careful with atomic accesses, as they may allow another thread
+  //   to touch this location, cloberring it. We are conservative: if the
+  //   QueryInst is not a simple (non-atomic) memory access, we automatically
+  //   return getClobber.
+  // If it is simple, we know based on the results of
+  // "Compiler testing via a theory of sound optimisations in the C11/C++11
+  //   memory model" in PLDI 2013, that a non-atomic location can only be
+  //   clobbered between a pair of a release and an acquire action, with no
+  //   access to the location in between.
+  // Here is an example for giving the general intuition behind this rule.
+  // In the following code:
+  //   store x 0;
+  //   release action; [1]
+  //   acquire action; [4]
+  //   %val = load x;
+  // It is unsafe to replace %val by 0 because another thread may be running:
+  //   acquire action; [2]
+  //   store x 42;
+  //   release action; [3]
+  // with synchronization from 1 to 2 and from 3 to 4, resulting in %val
+  // being 42. A key property of this program however is that if either
+  // 1 or 4 were missing, there would be a race between the store of 42
+  // either the store of 0 or the load (making the whole progam racy).
+  // The paper mentionned above shows that the same property is respected
+  // by every program that can detect any optimisation of that kind: either
+  // it is racy (undefined) or there is a release followed by an acquire
+  // between the pair of accesses under consideration.
+  bool HasSeenAcquire = false;
+
    if (isLoad && QueryInst) {
      LoadInst *LI = dyn_cast<LoadInst>(QueryInst);
      if (LI && LI->getMetadata(LLVMContext::MD_invariant_load) != nullptr)
@@ -412,19 +449,25 @@ getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad,
      // be accessing the location.
      if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
        // Atomic loads have complications involved.
-      // A monotonic load is OK if the query inst is itself not atomic.
+      // A Monotonic (or higher) load is OK if the query inst is itself not atomic.
+      // An Acquire (or higher) load sets the HasSeenAcquire flag, so that any
+      //   release store will know to return getClobber.
        // FIXME: This is overly conservative.
        if (!LI->isUnordered()) {
          if (!QueryInst)
            return MemDepResult::getClobber(LI);
-        if (LI->getOrdering() != Monotonic)
-          return MemDepResult::getClobber(LI);
-        if (auto *QueryLI = dyn_cast<LoadInst>(QueryInst))
+        if (auto *QueryLI = dyn_cast<LoadInst>(QueryInst)) {
            if (!QueryLI->isSimple())
              return MemDepResult::getClobber(LI);
-        if (auto *QuerySI = dyn_cast<StoreInst>(QueryInst))
+        } else if (auto *QuerySI = dyn_cast<StoreInst>(QueryInst)) {
            if (!QuerySI->isSimple())
              return MemDepResult::getClobber(LI);
+        } else if (QueryInst->mayReadOrWriteMemory()) {
+          return MemDepResult::getClobber(LI);
+        }
+
+        if (isAtLeastAcquire(LI->getOrdering()))
+          HasSeenAcquire = true;
        }
  
        // FIXME: this is overly conservative.
@@ -490,19 +533,25 @@ getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad,
  
      if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
        // Atomic stores have complications involved.
-      // A monotonic store is OK if the query inst is itself not atomic.
+      // A Monotonic store is OK if the query inst is itself not atomic.
+      // A Release (or higher) store further requires that no acquire load
+      //   has been seen.
        // FIXME: This is overly conservative.
        if (!SI->isUnordered()) {
          if (!QueryInst)
            return MemDepResult::getClobber(SI);
-        if (SI->getOrdering() != Monotonic)
-          return MemDepResult::getClobber(SI);
-        if (auto *QueryLI = dyn_cast<LoadInst>(QueryInst))
+        if (auto *QueryLI = dyn_cast<LoadInst>(QueryInst)) {
            if (!QueryLI->isSimple())
              return MemDepResult::getClobber(SI);
-        if (auto *QuerySI = dyn_cast<StoreInst>(QueryInst))
+        } else if (auto *QuerySI = dyn_cast<StoreInst>(QueryInst)) {
            if (!QuerySI->isSimple())
              return MemDepResult::getClobber(SI);
+        } else if (QueryInst->mayReadOrWriteMemory()) {
+          return MemDepResult::getClobber(SI);
+        }
+
+        if (HasSeenAcquire && isAtLeastRelease(SI->getOrdering()))
+          return MemDepResult::getClobber(SI);
        }
  
        // FIXME: this is overly conservative.
@@ -727,7 +776,7 @@ MemoryDependenceAnalysis::getNonLocalCallDependency(CallSite QueryCS) {
      DirtyBlocks.pop_back();
  
      // Already processed this block?
-    if (!Visited.insert(DirtyBB))
+    if (!Visited.insert(DirtyBB).second)
        continue;
  
      // Do a binary search to see if we already have an entry for this block in
@@ -817,7 +866,7 @@ getNonLocalPointerDependency(const AliasAnalysis::Location &Loc, bool isLoad,
           "Can't get pointer deps of a non-pointer!");
    Result.clear();
  
-  PHITransAddr Address(const_cast<Value *>(Loc.Ptr), DL);
+  PHITransAddr Address(const_cast<Value *>(Loc.Ptr), DL, AT);
  
    // This is the set of blocks we've inspected, and the pointer we consider in
    // each block.  Because of critical edges, we currently bail out if querying
@@ -903,7 +952,7 @@ GetNonLocalInfoForBlock(const AliasAnalysis::Location &Loc,
    return Dep;
  }
  
-/// SortNonLocalDepInfoCache - Sort the a NonLocalDepInfo cache, given a certain
+/// SortNonLocalDepInfoCache - Sort the NonLocalDepInfo cache, given a certain
  /// number of elements in the array that are already properly ordered.  This is
  /// optimized for the case when only a few entries are added.
  static void
@@ -1087,6 +1136,24 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer,
    while (!Worklist.empty()) {
      BasicBlock *BB = Worklist.pop_back_val();
  
+    // If we do process a large number of blocks it becomes very expensive and
+    // likely it isn't worth worrying about
+    if (Result.size() > NumResultsLimit) {
+      Worklist.clear();
+      // Sort it now (if needed) so that recursive invocations of
+      // getNonLocalPointerDepFromBB and other routines that could reuse the
+      // cache value will only see properly sorted cache arrays.
+      if (Cache && NumSortedEntries != Cache->size()) {
+        SortNonLocalDepInfoCache(*Cache, NumSortedEntries);
+      }
+      // Since we bail out, the "Cache" set won't contain all of the
+      // results for the query.  This is ok (we can still use it to accelerate
+      // specific block queries) but we can't do the fastpath "return all
+      // results from the set".  Clear out the indicator for this.
+      CacheInfo->Pair = BBSkipFirstBlockPair();
+      return true;
+    }
+
      // Skip the first block if we have it.
      if (!SkipFirstBlock) {
        // Analyze the dependency of *Pointer in FromBB.  See if we already have
@@ -1293,7 +1360,7 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer,
        if (I->getBB() != BB)
          continue;
  
-      assert(I->getResult().isNonLocal() &&
+      assert((I->getResult().isNonLocal() || !DT->isReachableFromEntry(BB)) &&
               "Should only be here with transparent block");
        I->setResult(MemDepResult::getUnknown());
        Result.push_back(NonLocalDepResult(I->getBB(), I->getResult(),
@@ -1411,14 +1478,11 @@ void MemoryDependenceAnalysis::removeInstruction(Instruction *RemInst) {
  
    ReverseDepMapType::iterator ReverseDepIt = ReverseLocalDeps.find(RemInst);
    if (ReverseDepIt != ReverseLocalDeps.end()) {
-    SmallPtrSet<Instruction*, 4> &ReverseDeps = ReverseDepIt->second;
      // RemInst can't be the terminator if it has local stuff depending on it.
-    assert(!ReverseDeps.empty() && !isa<TerminatorInst>(RemInst) &&
+    assert(!ReverseDepIt->second.empty() && !isa<TerminatorInst>(RemInst) &&
             "Nothing can locally depend on a terminator");
  
-    for (SmallPtrSet<Instruction*, 4>::iterator I = ReverseDeps.begin(),
-         E = ReverseDeps.end(); I != E; ++I) {
-      Instruction *InstDependingOnRemInst = *I;
+    for (Instruction *InstDependingOnRemInst : ReverseDepIt->second) {
        assert(InstDependingOnRemInst != RemInst &&
               "Already removed our local dep info");
  
@@ -1444,12 +1508,10 @@ void MemoryDependenceAnalysis::removeInstruction(Instruction *RemInst) {
  
    ReverseDepIt = ReverseNonLocalDeps.find(RemInst);
    if (ReverseDepIt != ReverseNonLocalDeps.end()) {
-    SmallPtrSet<Instruction*, 4> &Set = ReverseDepIt->second;
-    for (SmallPtrSet<Instruction*, 4>::iterator I = Set.begin(), E = Set.end();
-         I != E; ++I) {
-      assert(*I != RemInst && "Already removed NonLocalDep info for RemInst");
+    for (Instruction *I : ReverseDepIt->second) {
+      assert(I != RemInst && "Already removed NonLocalDep info for RemInst");
  
-      PerInstNLInfo &INLD = NonLocalDeps[*I];
+      PerInstNLInfo &INLD = NonLocalDeps[I];
        // The information is now dirty!
        INLD.second = true;
  
@@ -1461,7 +1523,7 @@ void MemoryDependenceAnalysis::removeInstruction(Instruction *RemInst) {
          DI->setResult(NewDirtyVal);
  
          if (Instruction *NextI = NewDirtyVal.getInst())
-          ReverseDepsToAdd.push_back(std::make_pair(NextI, *I));
+          ReverseDepsToAdd.push_back(std::make_pair(NextI, I));
        }
      }
  
@@ -1480,12 +1542,9 @@ void MemoryDependenceAnalysis::removeInstruction(Instruction *RemInst) {
    ReverseNonLocalPtrDepTy::iterator ReversePtrDepIt =
      ReverseNonLocalPtrDeps.find(RemInst);
    if (ReversePtrDepIt != ReverseNonLocalPtrDeps.end()) {
-    SmallPtrSet<ValueIsLoadPair, 4> &Set = ReversePtrDepIt->second;
      SmallVector<std::pair<Instruction*, ValueIsLoadPair>,8> ReversePtrDepsToAdd;
  
-    for (SmallPtrSet<ValueIsLoadPair, 4>::iterator I = Set.begin(),
-         E = Set.end(); I != E; ++I) {
-      ValueIsLoadPair P = *I;
+    for (ValueIsLoadPair P : ReversePtrDepIt->second) {
        assert(P.getPointer() != RemInst &&
               "Already removed NonLocalPointerDeps info for RemInst");
  
@@ -1526,8 +1585,10 @@ void MemoryDependenceAnalysis::removeInstruction(Instruction *RemInst) {
    DEBUG(verifyRemoved(RemInst));
  }
  /// verifyRemoved - Verify that the specified instruction does not occur
-/// in our internal data structures.
+/// in our internal data structures. This function verifies by asserting in
+/// debug builds.
  void MemoryDependenceAnalysis::verifyRemoved(Instruction *D) const {
+#ifndef NDEBUG
    for (LocalDepMapType::const_iterator I = LocalDeps.begin(),
         E = LocalDeps.end(); I != E; ++I) {
      assert(I->first != D && "Inst occurs in data structures");
@@ -1556,18 +1617,16 @@ void MemoryDependenceAnalysis::verifyRemoved(Instruction *D) const {
    for (ReverseDepMapType::const_iterator I = ReverseLocalDeps.begin(),
         E = ReverseLocalDeps.end(); I != E; ++I) {
      assert(I->first != D && "Inst occurs in data structures");
-    for (SmallPtrSet<Instruction*, 4>::const_iterator II = I->second.begin(),
-         EE = I->second.end(); II != EE; ++II)
-      assert(*II != D && "Inst occurs in data structures");
+    for (Instruction *Inst : I->second)
+      assert(Inst != D && "Inst occurs in data structures");
    }
  
    for (ReverseDepMapType::const_iterator I = ReverseNonLocalDeps.begin(),
         E = ReverseNonLocalDeps.end();
         I != E; ++I) {
      assert(I->first != D && "Inst occurs in data structures");
-    for (SmallPtrSet<Instruction*, 4>::const_iterator II = I->second.begin(),
-         EE = I->second.end(); II != EE; ++II)
-      assert(*II != D && "Inst occurs in data structures");
+    for (Instruction *Inst : I->second)
+      assert(Inst != D && "Inst occurs in data structures");
    }
  
    for (ReverseNonLocalPtrDepTy::const_iterator
@@ -1575,11 +1634,10 @@ void MemoryDependenceAnalysis::verifyRemoved(Instruction *D) const {
         E = ReverseNonLocalPtrDeps.end(); I != E; ++I) {
      assert(I->first != D && "Inst occurs in rev NLPD map");
  
-    for (SmallPtrSet<ValueIsLoadPair, 4>::const_iterator II = I->second.begin(),
-         E = I->second.end(); II != E; ++II)
-      assert(*II != ValueIsLoadPair(D, false) &&
-             *II != ValueIsLoadPair(D, true) &&
+    for (ValueIsLoadPair P : I->second)
+      assert(P != ValueIsLoadPair(D, false) &&
+             P != ValueIsLoadPair(D, true) &&
               "Inst occurs in ReverseNonLocalPtrDeps map");
    }
-
+#endif
  }