[getUnderlyingOjbects] Analyze loop PHIs further to remove false positives

author Adam Nemet <anemet@apple.com>

Thu, 23 Apr 2015 20:09:20 +0000 (20:09 +0000)

committer Adam Nemet <anemet@apple.com>

Thu, 23 Apr 2015 20:09:20 +0000 (20:09 +0000)
author Adam Nemet <anemet@apple.com>
Thu, 23 Apr 2015 20:09:20 +0000 (20:09 +0000)
committer Adam Nemet <anemet@apple.com>
Thu, 23 Apr 2015 20:09:20 +0000 (20:09 +0000)
diff --git a/include/llvm/Analysis/LoopAccessAnalysis.h b/include/llvm/Analysis/LoopAccessAnalysis.h

index a4393bba2321c18fafa71dcab6c9b7dbab058c07..0bd17c570ea5ac6841667b04d3b6a065f24c9671 100644 (file)
--- a/include/llvm/Analysis/LoopAccessAnalysis.h
+++ b/include/llvm/Analysis/LoopAccessAnalysis.h
@@ -370,7 +370,8 @@ public:
  
    LoopAccessInfo(Loop *L, ScalarEvolution *SE, const DataLayout &DL,
                   const TargetLibraryInfo *TLI, AliasAnalysis *AA,
-                 DominatorTree *DT, const ValueToValueMap &Strides);
+                 DominatorTree *DT, LoopInfo *LI,
+                 const ValueToValueMap &Strides);
  
    /// Return true we can analyze the memory accesses in the loop and there are
    /// no memory dependence cycles.
@@ -467,6 +468,7 @@ private:
    const TargetLibraryInfo *TLI;
    AliasAnalysis *AA;
    DominatorTree *DT;
+  LoopInfo *LI;
  
    unsigned NumLoads;
    unsigned NumStores;
@@ -541,6 +543,7 @@ private:
    const TargetLibraryInfo *TLI;
    AliasAnalysis *AA;
    DominatorTree *DT;
+  LoopInfo *LI;
  };
  } // End llvm namespace
  
diff --git a/include/llvm/Analysis/ValueTracking.h b/include/llvm/Analysis/ValueTracking.h

index ded756b8f1dde336f877a0150da60744bd42a970..f46c603e2bbc2fab53525fbe0b65c860aae9b058 100644 (file)
--- a/include/llvm/Analysis/ValueTracking.h
+++ b/include/llvm/Analysis/ValueTracking.h
@@ -28,6 +28,7 @@ namespace llvm {
    class AssumptionCache;
    class DominatorTree;
    class TargetLibraryInfo;
+  class LoopInfo;
  
    /// Determine which bits of V are known to be either zero or one and return
    /// them in the KnownZero/KnownOne bit sets.
@@ -176,11 +177,37 @@ namespace llvm {
      return GetUnderlyingObject(const_cast<Value *>(V), DL, MaxLookup);
    }
  
-  /// GetUnderlyingObjects - This method is similar to GetUnderlyingObject
-  /// except that it can look through phi and select instructions and return
-  /// multiple objects.
+  /// \brief This method is similar to GetUnderlyingObject except that it can
+  /// look through phi and select instructions and return multiple objects.
+  ///
+  /// If LoopInfo is passed, loop phis are further analyzed.  If a pointer
+  /// accesses different objects in each iteration, we don't look through the
+  /// phi node. E.g. consider this loop nest:
+  ///
+  ///   int **A;
+  ///   for (i)
+  ///     for (j) {
+  ///        A[i][j] = A[i-1][j] * B[j]
+  ///     }
+  ///
+  /// This is transformed by Load-PRE to stash away A[i] for the next iteration
+  /// of the outer loop:
+  ///
+  ///   Curr = A[0];          // Prev_0
+  ///   for (i: 1..N) {
+  ///     Prev = Curr;        // Prev = PHI (Prev_0, Curr)
+  ///     Curr = A[i];
+  ///     for (j: 0..N) {
+  ///        Curr[j] = Prev[j] * B[j]
+  ///     }
+  ///   }
+  ///
+  /// Since A[i] and A[i-1] are independent pointers, getUnderlyingObjects
+  /// should not assume that Curr and Prev share the same underlying object thus
+  /// it shouldn't look through the phi above.
    void GetUnderlyingObjects(Value *V, SmallVectorImpl<Value *> &Objects,
-                            const DataLayout &DL, unsigned MaxLookup = 6);
+                            const DataLayout &DL, LoopInfo *LI = nullptr,
+                            unsigned MaxLookup = 6);
  
    /// onlyUsedByLifetimeMarkers - Return true if the only users of this pointer
    /// are lifetime markers.
diff --git a/lib/Analysis/LoopAccessAnalysis.cpp b/lib/Analysis/LoopAccessAnalysis.cpp

index cd94583c9e2cd78aced5612eb09a7c1f1ec3960d..cfa8fbe4a8c59c0a5a3f1c81bb1a4ee700819e37 100644 (file)
--- a/lib/Analysis/LoopAccessAnalysis.cpp
+++ b/lib/Analysis/LoopAccessAnalysis.cpp
@@ -199,9 +199,9 @@ public:
    typedef PointerIntPair<Value *, 1, bool> MemAccessInfo;
    typedef SmallPtrSet<MemAccessInfo, 8> MemAccessInfoSet;
  
-  AccessAnalysis(const DataLayout &Dl, AliasAnalysis *AA,
+  AccessAnalysis(const DataLayout &Dl, AliasAnalysis *AA, LoopInfo *LI,
                   MemoryDepChecker::DepCandidates &DA)
-      : DL(Dl), AST(*AA), DepCands(DA), IsRTCheckNeeded(false) {}
+      : DL(Dl), AST(*AA), LI(LI), DepCands(DA), IsRTCheckNeeded(false) {}
  
    /// \brief Register a load  and whether it is only read from.
    void addLoad(AliasAnalysis::Location &Loc, bool IsReadOnly) {
@@ -261,6 +261,8 @@ private:
    //intrinsic property (such as TBAA metadata).
    AliasSetTracker AST;
  
+  LoopInfo *LI;
+
    /// Sets of potentially dependent accesses - members of one set share an
    /// underlying pointer. The set "CheckDeps" identfies which sets really need a
    /// dependence check.
@@ -477,7 +479,9 @@ void AccessAnalysis::processMemAccesses() {
            // underlying object.
            typedef SmallVector<Value *, 16> ValueVector;
            ValueVector TempObjects;
-          GetUnderlyingObjects(Ptr, TempObjects, DL);
+
+          GetUnderlyingObjects(Ptr, TempObjects, DL, LI);
+          DEBUG(dbgs() << "Underlying objects for pointer " << *Ptr << "\n");
            for (Value *UnderlyingObj : TempObjects) {
              UnderlyingObjToAccessMap::iterator Prev =
                  ObjToLastAccess.find(UnderlyingObj);
@@ -485,6 +489,7 @@ void AccessAnalysis::processMemAccesses() {
                DepCands.unionSets(Access, Prev->second);
  
              ObjToLastAccess[UnderlyingObj] = Access;
+            DEBUG(dbgs() << "  " << *UnderlyingObj << "\n");
            }
          }
        }
@@ -1035,7 +1040,7 @@ void LoopAccessInfo::analyzeLoop(const ValueToValueMap &Strides) {
  
    MemoryDepChecker::DepCandidates DependentAccesses;
    AccessAnalysis Accesses(TheLoop->getHeader()->getModule()->getDataLayout(),
-                          AA, DependentAccesses);
+                          AA, LI, DependentAccesses);
  
    // Holds the analyzed pointers. We don't want to call GetUnderlyingObjects
    // multiple times on the same object. If the ptr is accessed twice, once
@@ -1306,10 +1311,10 @@ std::pair<Instruction *, Instruction *> LoopAccessInfo::addRuntimeCheck(
  LoopAccessInfo::LoopAccessInfo(Loop *L, ScalarEvolution *SE,
                                 const DataLayout &DL,
                                 const TargetLibraryInfo *TLI, AliasAnalysis *AA,
-                               DominatorTree *DT,
+                               DominatorTree *DT, LoopInfo *LI,
                                 const ValueToValueMap &Strides)
      : DepChecker(SE, L), NumComparisons(0), TheLoop(L), SE(SE), DL(DL),
-      TLI(TLI), AA(AA), DT(DT), NumLoads(0), NumStores(0),
+      TLI(TLI), AA(AA), DT(DT), LI(LI), NumLoads(0), NumStores(0),
        MaxSafeDepDistBytes(-1U), CanVecMem(false),
        StoreToLoopInvariantAddress(false) {
    if (canAnalyzeLoop())
@@ -1356,7 +1361,8 @@ LoopAccessAnalysis::getInfo(Loop *L, const ValueToValueMap &Strides) {
  
    if (!LAI) {
      const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
-    LAI = llvm::make_unique<LoopAccessInfo>(L, SE, DL, TLI, AA, DT, Strides);
+    LAI = llvm::make_unique<LoopAccessInfo>(L, SE, DL, TLI, AA, DT, LI,
+                                            Strides);
  #ifndef NDEBUG
      LAI->NumSymbolicStrides = Strides.size();
  #endif
@@ -1367,7 +1373,6 @@ LoopAccessAnalysis::getInfo(Loop *L, const ValueToValueMap &Strides) {
  void LoopAccessAnalysis::print(raw_ostream &OS, const Module *M) const {
    LoopAccessAnalysis &LAA = *const_cast<LoopAccessAnalysis *>(this);
  
-  LoopInfo *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
    ValueToValueMap NoSymbolicStrides;
  
    for (Loop *TopLevelLoop : *LI)
@@ -1384,6 +1389,7 @@ bool LoopAccessAnalysis::runOnFunction(Function &F) {
    TLI = TLIP ? &TLIP->getTLI() : nullptr;
    AA = &getAnalysis<AliasAnalysis>();
    DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+  LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
  
    return false;
  }
diff --git a/lib/Analysis/ValueTracking.cpp b/lib/Analysis/ValueTracking.cpp

index de175fc523d303f89b44d8701ae51caff172d080..52a8c69688f92fc8bf2aa9fd337a326e35cf1ab5 100644 (file)
--- a/lib/Analysis/ValueTracking.cpp
+++ b/lib/Analysis/ValueTracking.cpp
@@ -17,6 +17,7 @@
  #include "llvm/Analysis/AssumptionCache.h"
  #include "llvm/Analysis/InstructionSimplify.h"
  #include "llvm/Analysis/MemoryBuiltins.h"
+#include "llvm/Analysis/LoopInfo.h"
  #include "llvm/IR/CallSite.h"
  #include "llvm/IR/ConstantRange.h"
  #include "llvm/IR/Constants.h"
@@ -2737,6 +2738,32 @@ uint64_t llvm::GetStringLength(Value *V) {
    return Len == ~0ULL ? 1 : Len;
  }
  
+/// \brief \p PN defines a loop-variant pointer to an object.  Check if the
+/// previous iteration of the loop was referring to the same object as \p PN.
+static bool isSameUnderlyingObjectInLoop(PHINode *PN, LoopInfo *LI) {
+  // Find the loop-defined value.
+  Loop *L = LI->getLoopFor(PN->getParent());
+  if (PN->getNumIncomingValues() != 2)
+    return true;
+
+  // Find the value from previous iteration.
+  auto *PrevValue = dyn_cast<Instruction>(PN->getIncomingValue(0));
+  if (!PrevValue || LI->getLoopFor(PrevValue->getParent()) != L)
+    PrevValue = dyn_cast<Instruction>(PN->getIncomingValue(1));
+  if (!PrevValue || LI->getLoopFor(PrevValue->getParent()) != L)
+    return true;
+
+  // If a new pointer is loaded in the loop, the pointer references a different
+  // object in every iteration.  E.g.:
+  //    for (i)
+  //       int *p = a[i];
+  //       ...
+  if (auto *Load = dyn_cast<LoadInst>(PrevValue))
+    if (!L->isLoopInvariant(Load->getPointerOperand()))
+      return false;
+  return true;
+}
+
  Value *llvm::GetUnderlyingObject(Value *V, const DataLayout &DL,
                                   unsigned MaxLookup) {
    if (!V->getType()->isPointerTy())
@@ -2768,7 +2795,8 @@ Value *llvm::GetUnderlyingObject(Value *V, const DataLayout &DL,
  }
  
  void llvm::GetUnderlyingObjects(Value *V, SmallVectorImpl<Value *> &Objects,
-                                const DataLayout &DL, unsigned MaxLookup) {
+                                const DataLayout &DL, LoopInfo *LI,
+                                unsigned MaxLookup) {
    SmallPtrSet<Value *, 4> Visited;
    SmallVector<Value *, 4> Worklist;
    Worklist.push_back(V);
@@ -2786,8 +2814,20 @@ void llvm::GetUnderlyingObjects(Value *V, SmallVectorImpl<Value *> &Objects,
      }
  
      if (PHINode *PN = dyn_cast<PHINode>(P)) {
-      for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
-        Worklist.push_back(PN->getIncomingValue(i));
+      // If this PHI changes the underlying object in every iteration of the
+      // loop, don't look through it.  Consider:
+      //   int **A;
+      //   for (i) {
+      //     Prev = Curr;     // Prev = PHI (Prev_0, Curr)
+      //     Curr = A[i];
+      //     *Prev, *Curr;
+      //
+      // Prev is tracking Curr one iteration behind so they refer to different
+      // underlying objects.
+      if (!LI || !LI->isLoopHeader(PN->getParent()) ||
+          isSameUnderlyingObjectInLoop(PN, LI))
+        for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+          Worklist.push_back(PN->getIncomingValue(i));
        continue;
      }
  
diff --git a/test/Analysis/LoopAccessAnalysis/underlying-objects-1.ll b/test/Analysis/LoopAccessAnalysis/underlying-objects-1.ll

new file mode 100644 (file)

index 0000000..44820ed
--- /dev/null
+++ b/test/Analysis/LoopAccessAnalysis/underlying-objects-1.ll
@@ -0,0 +1,47 @@
+; RUN: opt -basicaa -loop-accesses -analyze < %s | FileCheck %s
+
+; In:
+;
+;   store_ptr = A;
+;   load_ptr = &A[2];
+;   for (i = 0; i < n; i++)
+;    *store_ptr++ = *load_ptr++ *10;  // A[i] = A[i+2] * 10
+;
+; make sure, we look through the PHI to conclude that store_ptr and load_ptr
+; both have A as their underlying object.  The dependence is safe for
+; vectorization requiring no memchecks.
+;
+; Otherwise we would try to prove independence with a memcheck that is going
+; to always fail.
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.10.0"
+
+; CHECK: Memory dependences are safe{{$}}
+
+define void @f(i8* noalias %A, i64 %width) {
+for.body.preheader:
+  %A_ahead = getelementptr inbounds i8, i8* %A, i64 2
+  br label %for.body
+
+for.body:
+  %i = phi i64 [ %i.1, %for.body ], [ 0, %for.body.preheader ]
+  %load_ptr = phi i8* [ %load_ptr.1, %for.body ], [ %A_ahead, %for.body.preheader ]
+  %store_ptr = phi i8* [ %store_ptr.1, %for.body ], [ %A, %for.body.preheader ]
+
+  %loadA = load i8, i8* %load_ptr, align 1
+
+  %mul = mul i8 %loadA, 10
+
+  store i8 %mul, i8* %store_ptr, align 1
+
+  %load_ptr.1 = getelementptr inbounds i8, i8* %load_ptr, i64 1
+  %store_ptr.1 = getelementptr inbounds i8, i8* %store_ptr, i64 1
+  %i.1 = add nuw i64 %i, 1
+
+  %exitcond = icmp eq i64 %i.1, %width
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret void
+}
diff --git a/test/Analysis/LoopAccessAnalysis/underlying-objects-2.ll b/test/Analysis/LoopAccessAnalysis/underlying-objects-2.ll

new file mode 100644 (file)

index 0000000..57c6412
--- /dev/null
+++ b/test/Analysis/LoopAccessAnalysis/underlying-objects-2.ll
@@ -0,0 +1,91 @@
+; RUN: opt -basicaa -loop-accesses -analyze < %s | FileCheck %s
+
+; This loop:
+;
+;   int **A;
+;   for (i)
+;     for (j) {
+;        A[i][j] = A[i-1][j] * B[j]
+;        B[j+1] = 2       // backward dep between this and the previous
+;     }
+;
+; is transformed by Load-PRE to stash away A[i] for the next iteration of the
+; outer loop:
+;
+;   Curr = A[0];          // Prev_0
+;   for (i: 1..N) {
+;     Prev = Curr;        // Prev = PHI (Prev_0, Curr)
+;     Curr = A[i];
+;     for (j: 0..N) {
+;        Curr[j] = Prev[j] * B[j]
+;        B[j+1] = 2       // backward dep between this and the previous
+;     }
+;   }
+;
+; Since A[i] and A[i-1] are likely to be independent, getUnderlyingObjects
+; should not assume that Curr and Prev share the same underlying object.
+;
+; If it did we would try to dependence-analyze Curr and Prev and the analysis
+; would fail with non-constant distance.
+;
+; To illustrate one of the negative consequences of this, if the loop has a
+; backward dependence we won't detect this but instead fully fall back on
+; memchecks (that is what LAA does after encountering a case of non-constant
+; distance).
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.10.0"
+
+; CHECK: for_j.body:
+; CHECK-NEXT: Store to invariant address was not found in loop
+; CHECK-NEXT: Report: unsafe dependent memory operations in loop
+; CHECK-NEXT: Interesting Dependences:
+; CHECK-NEXT: Backward:
+; CHECK-NEXT: %loadB = load i8, i8* %gepB, align 1 ->
+; CHECK-NEXT: store i8 2, i8* %gepB_plus_one, align 1
+
+define void @f(i8** noalias %A, i8* noalias %B, i64 %N) {
+for_i.preheader:
+  %prev_0 = load i8*, i8** %A, align 8
+  br label %for_i.body
+
+for_i.body:
+  %i = phi i64 [1, %for_i.preheader], [%i.1, %for_j.end]
+  %prev = phi i8* [%prev_0, %for_i.preheader], [%curr, %for_j.end]
+  %gep = getelementptr inbounds i8*, i8** %A, i64 %i
+  %curr = load i8*, i8** %gep, align 8
+  br label %for_j.preheader
+
+for_j.preheader:
+  br label %for_j.body
+
+for_j.body:
+  %j = phi i64 [0, %for_j.preheader], [%j.1, %for_j.body]
+
+  %gepPrev = getelementptr inbounds i8, i8* %prev, i64 %j
+  %gepCurr = getelementptr inbounds i8, i8* %curr, i64 %j
+  %gepB = getelementptr inbounds i8, i8* %B, i64 %j
+
+  %loadPrev = load i8, i8* %gepPrev, align 1
+  %loadB = load i8, i8* %gepB, align 1
+
+  %mul = mul i8 %loadPrev, %loadB
+
+  store i8 %mul, i8* %gepCurr, align 1
+
+  %gepB_plus_one = getelementptr inbounds i8, i8* %gepB, i64 1
+  store i8 2, i8* %gepB_plus_one, align 1
+
+  %j.1 = add nuw i64 %j, 1
+  %exitcondj = icmp eq i64 %j.1, %N
+  br i1 %exitcondj, label %for_j.end, label %for_j.body
+
+for_j.end:
+
+  %i.1 = add nuw i64 %i, 1
+  %exitcond = icmp eq i64 %i.1, %N
+  br i1 %exitcond, label %for_i.end, label %for_i.body
+
+for_i.end:
+  ret void
+}
author	Adam Nemet <anemet@apple.com>
	Thu, 23 Apr 2015 20:09:20 +0000 (20:09 +0000)
committer	Adam Nemet <anemet@apple.com>
	Thu, 23 Apr 2015 20:09:20 +0000 (20:09 +0000)
include/llvm/Analysis/LoopAccessAnalysis.h		patch \| blob \| history
include/llvm/Analysis/ValueTracking.h		patch \| blob \| history
lib/Analysis/LoopAccessAnalysis.cpp		patch \| blob \| history
lib/Analysis/ValueTracking.cpp		patch \| blob \| history
test/Analysis/LoopAccessAnalysis/underlying-objects-1.ll	[new file with mode: 0644]	patch \| blob
test/Analysis/LoopAccessAnalysis/underlying-objects-2.ll	[new file with mode: 0644]	patch \| blob