Infer known bits from dominating conditions

author Philip Reames <listmail@philipreames.com>

Tue, 10 Mar 2015 22:43:20 +0000 (22:43 +0000)

committer Philip Reames <listmail@philipreames.com>

Tue, 10 Mar 2015 22:43:20 +0000 (22:43 +0000)
author Philip Reames <listmail@philipreames.com>
Tue, 10 Mar 2015 22:43:20 +0000 (22:43 +0000)
committer Philip Reames <listmail@philipreames.com>
Tue, 10 Mar 2015 22:43:20 +0000 (22:43 +0000)
diff --git a/lib/Analysis/ValueTracking.cpp b/lib/Analysis/ValueTracking.cpp

index cf6b92d74f93914280ace572716393ee2f53161d..edccd8c0914c3885c02da13e1347e047c0ba3695 100644 (file)
--- a/lib/Analysis/ValueTracking.cpp
+++ b/lib/Analysis/ValueTracking.cpp
@@ -39,6 +39,34 @@ using namespace llvm::PatternMatch;
  
  const unsigned MaxDepth = 6;
  
+/// Enable an experimental feature to leverage information about dominating
+/// conditions to compute known bits.  The individual options below control how
+/// hard we search.  The defaults are choosen to be fairly aggressive.  If you
+/// run into compile time problems when testing, scale them back and report
+/// your findings.
+static cl::opt<bool> EnableDomConditions("value-tracking-dom-conditions",
+                                         cl::Hidden, cl::init(false));
+
+// This is expensive, so we only do it for the top level query value.
+// (TODO: evaluate cost vs profit, consider higher thresholds)
+static cl::opt<unsigned> DomConditionsMaxDepth("dom-conditions-max-depth",
+                                               cl::Hidden, cl::init(1));
+
+/// How many dominating blocks should be scanned looking for dominating
+/// conditions?
+static cl::opt<unsigned> DomConditionsMaxDomBlocks("dom-conditions-dom-blocks",
+                                                   cl::Hidden,
+                                                   cl::init(20000));
+
+// Controls the number of uses of the value searched for possible
+// dominating comparisons.
+static cl::opt<unsigned> DomConditionsMaxUses("dom-conditions-max-uses",
+                                              cl::Hidden, cl::init(2000));
+
+// If true, don't consider only compares whose only use is a branch.
+static cl::opt<bool> DomConditionsSingleCmpUse("dom-conditions-single-cmp-use",
+                                               cl::Hidden, cl::init(false));
+
  /// Returns the bitwidth of the given scalar or pointer type (if unknown returns
  /// 0). For vector types, returns the element type's bitwidth.
  static unsigned getBitWidth(Type *Ty, const DataLayout &DL) {
@@ -470,6 +498,179 @@ m_c_Xor(const LHS &L, const RHS &R) {
    return m_CombineOr(m_Xor(L, R), m_Xor(R, L));
  }
  
+/// Compute known bits in 'V' under the assumption that the condition 'Cmp' is
+/// true (at the context instruction.)  This is mostly a utility function for
+/// the prototype dominating conditions reasoning below.
+static void computeKnownBitsFromTrueCondition(Value *V, ICmpInst *Cmp,
+                                              APInt &KnownZero,
+                                              APInt &KnownOne,
+                                              const DataLayout &DL,
+                                              unsigned Depth, const Query &Q) {
+  Value *LHS = Cmp->getOperand(0);
+  Value *RHS = Cmp->getOperand(1);
+  // TODO: We could potentially be more aggressive here.  This would be worth
+  // evaluating.  If we can, explore commoning this code with the assume
+  // handling logic.
+  if (LHS != V && RHS != V)
+    return;
+
+  const unsigned BitWidth = KnownZero.getBitWidth();
+
+  switch (Cmp->getPredicate()) {
+  default:
+    // We know nothing from this condition
+    break;
+  // TODO: implement unsigned bound from below (known one bits)
+  // TODO: common condition check implementations with assumes
+  // TODO: implement other patterns from assume (e.g. V & B == A)
+  case ICmpInst::ICMP_SGT:
+    if (LHS == V) {
+      APInt KnownZeroTemp(BitWidth, 0), KnownOneTemp(BitWidth, 0);
+      computeKnownBits(RHS, KnownZeroTemp, KnownOneTemp, DL, Depth + 1, Q);
+      if (KnownOneTemp.isAllOnesValue() || KnownZeroTemp.isNegative()) {
+        // We know that the sign bit is zero.
+        KnownZero |= APInt::getSignBit(BitWidth);
+      }
+    }
+    break;
+  case ICmpInst::ICMP_EQ:
+    if (LHS == V)
+      computeKnownBits(RHS, KnownZero, KnownOne, DL, Depth + 1, Q);
+    else if (RHS == V)
+      computeKnownBits(LHS, KnownZero, KnownOne, DL, Depth + 1, Q);
+    else
+      llvm_unreachable("missing use?");
+    break;
+  case ICmpInst::ICMP_ULE:
+    if (LHS == V) {
+      APInt KnownZeroTemp(BitWidth, 0), KnownOneTemp(BitWidth, 0);
+      computeKnownBits(RHS, KnownZeroTemp, KnownOneTemp, DL, Depth + 1, Q);
+      // The known zero bits carry over
+      unsigned SignBits = KnownZeroTemp.countLeadingOnes();
+      KnownZero |= APInt::getHighBitsSet(BitWidth, SignBits);
+    }
+    break;
+  case ICmpInst::ICMP_ULT:
+    if (LHS == V) {
+      APInt KnownZeroTemp(BitWidth, 0), KnownOneTemp(BitWidth, 0);
+      computeKnownBits(RHS, KnownZeroTemp, KnownOneTemp, DL, Depth + 1, Q);
+      // Whatever high bits in rhs are zero are known to be zero (if rhs is a
+      // power of 2, then one more).
+      unsigned SignBits = KnownZeroTemp.countLeadingOnes();
+      if (isKnownToBeAPowerOfTwo(RHS, false, Depth + 1, Query(Q, Cmp), DL))
+        SignBits++;
+      KnownZero |= APInt::getHighBitsSet(BitWidth, SignBits);
+    }
+    break;
+  };
+}
+
+/// Compute known bits in 'V' from conditions which are known to be true along
+/// all paths leading to the context instruction.  In particular, look for
+/// cases where one branch of an interesting condition dominates the context
+/// instruction.  This does not do general dataflow.
+/// NOTE: This code is EXPERIMENTAL and currently off by default.
+static void computeKnownBitsFromDominatingCondition(Value *V, APInt &KnownZero,
+                                                    APInt &KnownOne,
+                                                    const DataLayout &DL,
+                                                    unsigned Depth,
+                                                    const Query &Q) {
+  // Need both the dominator tree and the query location to do anything useful
+  if (!Q.DT || !Q.CxtI)
+    return;
+  Instruction *Cxt = const_cast<Instruction *>(Q.CxtI);
+
+  // Avoid useless work
+  if (auto VI = dyn_cast<Instruction>(V))
+    if (VI->getParent() == Cxt->getParent())
+      return;
+
+  // Note: We currently implement two options.  It's not clear which of these
+  // will survive long term, we need data for that.
+  // Option 1 - Try walking the dominator tree looking for conditions which
+  // might apply.  This works well for local conditions (loop guards, etc..),
+  // but not as well for things far from the context instruction (presuming a
+  // low max blocks explored).  If we can set an high enough limit, this would
+  // be all we need.
+  // Option 2 - We restrict out search to those conditions which are uses of
+  // the value we're interested in.  This is independent of dom structure,
+  // but is slightly less powerful without looking through lots of use chains.
+  // It does handle conditions far from the context instruction (e.g. early
+  // function exits on entry) really well though.
+
+  // Option 1 - Search the dom tree
+  unsigned NumBlocksExplored = 0;
+  BasicBlock *Current = Cxt->getParent();
+  while (true) {
+    // Stop searching if we've gone too far up the chain
+    if (NumBlocksExplored >= DomConditionsMaxDomBlocks)
+      break;
+    NumBlocksExplored++;
+
+    if (!Q.DT->getNode(Current)->getIDom())
+      break;
+    Current = Q.DT->getNode(Current)->getIDom()->getBlock();
+    if (!Current)
+      // found function entry
+      break;
+
+    BranchInst *BI = dyn_cast<BranchInst>(Current->getTerminator());
+    if (!BI || BI->isUnconditional())
+      continue;
+    ICmpInst *Cmp = dyn_cast<ICmpInst>(BI->getCondition());
+    if (!Cmp)
+      continue;
+
+    // We're looking for conditions that are guaranteed to hold at the context
+    // instruction.  Finding a condition where one path dominates the context
+    // isn't enough because both the true and false cases could merge before
+    // the context instruction we're actually interested in.  Instead, we need
+    // to ensure that the taken *edge* dominates the context instruction.
+    BasicBlock *BB0 = BI->getSuccessor(0);
+    BasicBlockEdge Edge(BI->getParent(), BB0);
+    if (!Edge.isSingleEdge() || !Q.DT->dominates(Edge, Q.CxtI->getParent()))
+      continue;
+
+    computeKnownBitsFromTrueCondition(V, Cmp, KnownZero, KnownOne, DL, Depth,
+                                      Q);
+  }
+
+  // Option 2 - Search the other uses of V
+  unsigned NumUsesExplored = 0;
+  for (auto U : V->users()) {
+    // Avoid massive lists
+    if (NumUsesExplored >= DomConditionsMaxUses)
+      break;
+    NumUsesExplored++;
+    // Consider only compare instructions uniquely controlling a branch
+    ICmpInst *Cmp = dyn_cast<ICmpInst>(U);
+    if (!Cmp)
+      continue;
+
+    if (DomConditionsSingleCmpUse && !Cmp->hasOneUse())
+      continue;
+
+    for (auto *CmpU : Cmp->users()) {
+      BranchInst *BI = dyn_cast<BranchInst>(CmpU);
+      if (!BI || BI->isUnconditional())
+        continue;
+      // We're looking for conditions that are guaranteed to hold at the
+      // context instruction.  Finding a condition where one path dominates
+      // the context isn't enough because both the true and false cases could
+      // merge before the context instruction we're actually interested in.
+      // Instead, we need to ensure that the taken *edge* dominates the context
+      // instruction. 
+      BasicBlock *BB0 = BI->getSuccessor(0);
+      BasicBlockEdge Edge(BI->getParent(), BB0);
+      if (!Edge.isSingleEdge() || !Q.DT->dominates(Edge, Q.CxtI->getParent()))
+        continue;
+
+      computeKnownBitsFromTrueCondition(V, Cmp, KnownZero, KnownOne, DL, Depth,
+                                        Q);
+    }
+  }
+}
+
  static void computeKnownBitsFromAssume(Value *V, APInt &KnownZero,
                                         APInt &KnownOne, const DataLayout &DL,
                                         unsigned Depth, const Query &Q) {
@@ -824,6 +1025,11 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
      // Don't give up yet... there might be an assumption that provides more
      // information...
      computeKnownBitsFromAssume(V, KnownZero, KnownOne, DL, Depth, Q);
+
+    // Or a dominating condition for that matter
+    if (EnableDomConditions && Depth <= DomConditionsMaxDepth)
+      computeKnownBitsFromDominatingCondition(V, KnownZero, KnownOne, DL,
+                                              Depth, Q);
      return;
    }
  
@@ -846,6 +1052,12 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
    // Check whether a nearby assume intrinsic can determine some known bits.
    computeKnownBitsFromAssume(V, KnownZero, KnownOne, DL, Depth, Q);
  
+  // Check whether there's a dominating condition which implies something about
+  // this value at the given context.
+  if (EnableDomConditions && Depth <= DomConditionsMaxDepth)
+    computeKnownBitsFromDominatingCondition(V, KnownZero, KnownOne, DL, Depth,
+                                            Q);
+
    Operator *I = dyn_cast<Operator>(V);
    if (!I) return;
  
diff --git a/test/Transforms/InstCombine/dom-conditions.ll b/test/Transforms/InstCombine/dom-conditions.ll

new file mode 100644 (file)

index 0000000..4264043
--- /dev/null
+++ b/test/Transforms/InstCombine/dom-conditions.ll
@@ -0,0 +1,152 @@
+; RUN: opt -instcombine -value-tracking-dom-conditions=1 -S < %s | FileCheck %s
+
+target datalayout = "e-p:64:64:64-p1:16:16:16-p2:32:32:32-p3:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+
+define i1 @test_cmp_ult(i64 %A) {
+; CHECK-LABEL: @test_cmp_ult
+entry:
+  %cmp = icmp ult i64 %A, 64
+  br i1 %cmp, label %taken, label %untaken
+
+taken:
+; CHECK-LABEL: taken:
+; CHECK-NEXT: ret i1 false
+  %cmp2 = icmp ugt i64 %A, 64
+  ret i1 %cmp2
+untaken:
+  ret i1 true
+}
+
+define i1 @test_cmp_ule(i64 %A) {
+; CHECK-LABEL: @test_cmp_ule
+entry:
+  %cmp = icmp ule i64 %A, 64
+  br i1 %cmp, label %taken, label %untaken
+
+taken:
+; CHECK-LABEL: taken:
+; CHECK-NEXT: ret i1 false
+  %cmp2 = icmp ugt i64 %A, 128
+  ret i1 %cmp2
+untaken:
+  ret i1 true
+}
+
+define i1 @test_cmp_sgt(i32 %A) {
+; CHECK-LABEL: @test_cmp_sgt
+entry:
+  %cmp = icmp sgt i32 %A, 10
+  br i1 %cmp, label %taken, label %untaken
+
+taken:
+; CHECK-LABEL: taken:
+; CHECK-NEXT: ret i1 true
+  %cmp2 = icmp sgt i32 %A, -1
+  ret i1 %cmp2
+untaken:
+  ret i1 true
+}
+
+define i64 @test_add_zero_bits(i64 %A) {
+; CHECK-LABEL: @test_add_zero_bits
+entry:
+  %cmp = icmp eq i64 %A, 2
+  br i1 %cmp, label %taken, label %untaken
+
+taken:
+; CHECK-LABEL: taken:
+; CHECK-NEXT: ret i64 3
+  %add = add i64 %A, 1
+  ret i64 %add
+untaken:
+  ret i64 %A
+}
+
+define i64 @test_add_nsw(i64 %A) {
+; CHECK-LABEL: @test_add_nsw
+entry:
+  %cmp = icmp ult i64 %A, 20
+  br i1 %cmp, label %taken, label %untaken
+
+taken:
+; CHECK-LABEL: taken:
+; CHECK-NEXT: %add = add nuw nsw i64 %A, 1
+; CHECK-NEXT: ret i64 %add
+  %add = add i64 %A, 1
+  ret i64 %add
+untaken:
+  ret i64 %A
+}
+
+; After sinking the instructions into the if block, check that we 
+; can simplify some of them using dominating conditions.
+define i32 @test_add_zero_bits_sink(i32 %x) nounwind ssp {
+; CHECK-LABEL: @test_add_zero_bits_sink(
+; CHECK-NOT: sdiv i32
+entry:
+  %a = add nsw i32 %x, 16
+  %b = sdiv i32 %a, %x
+  %cmp = icmp ult i32 %x, 7
+  br i1 %cmp, label %bb1, label %bb2
+
+bb1:
+; CHECK-LABEL: bb1:
+; CHECK-NEXT: or i32 %x, 16
+; CHECK-NEXT: udiv i32
+  ret i32 %b
+
+bb2:
+  ret i32 %x
+}
+
+; A condition in the same block gives no information
+define i32 @test_neg1(i32 %x) nounwind ssp {
+; CHECK-LABEL: @test_neg1
+; CHECK: add
+; CHECK: sdiv
+; CHECK: icmp
+; CHECK: select
+entry:
+  %a = add nsw i32 %x, 16
+  %b = sdiv i32 %a, %x
+  %cmp = icmp ult i32 %x, 7
+  %ret = select i1 %cmp, i32 %a, i32 %b
+  ret i32 %ret
+}
+
+; A non-dominating edge gives no information 
+define i32 @test_neg2(i32 %x) {
+; CHECK-LABEL: @test_neg2
+entry:
+  %cmp = icmp ult i32 %x, 7
+  br i1 %cmp, label %bb1, label %merge
+
+bb1:
+  br label %merge
+
+merge:
+; CHECK-LABEL: merge:
+; CHECK: icmp
+; CHECK: select
+  %cmp2 = icmp ult i32 %x, 7
+  %ret = select i1 %cmp2, i32 %x, i32 0
+  ret i32 %ret
+}
+
+; A unconditional branch expressed as a condition one gives no
+; information (and shouldn't trip any asserts.)
+define i32 @test_neg3(i32 %x) {
+; CHECK-LABEL: @test_neg3
+entry:
+  %cmp = icmp ult i32 %x, 7
+  br i1 %cmp, label %merge, label %merge
+merge:
+; CHECK-LABEL: merge:
+; CHECK: icmp
+; CHECK: select
+  %cmp2 = icmp ult i32 %x, 7
+  %ret = select i1 %cmp2, i32 %x, i32 0
+  ret i32 %ret
+}
+
+declare i32 @bar()
author	Philip Reames <listmail@philipreames.com>
	Tue, 10 Mar 2015 22:43:20 +0000 (22:43 +0000)
committer	Philip Reames <listmail@philipreames.com>
	Tue, 10 Mar 2015 22:43:20 +0000 (22:43 +0000)
lib/Analysis/ValueTracking.cpp		patch \| blob \| history
test/Transforms/InstCombine/dom-conditions.ll	[new file with mode: 0644]	patch \| blob