const unsigned MaxDepth = 6;
+/// Enable an experimental feature to leverage information about dominating
+/// conditions to compute known bits. The individual options below control how
+/// hard we search. The defaults are choosen to be fairly aggressive. If you
+/// run into compile time problems when testing, scale them back and report
+/// your findings.
+static cl::opt<bool> EnableDomConditions("value-tracking-dom-conditions",
+ cl::Hidden, cl::init(false));
+
+// This is expensive, so we only do it for the top level query value.
+// (TODO: evaluate cost vs profit, consider higher thresholds)
+static cl::opt<unsigned> DomConditionsMaxDepth("dom-conditions-max-depth",
+ cl::Hidden, cl::init(1));
+
+/// How many dominating blocks should be scanned looking for dominating
+/// conditions?
+static cl::opt<unsigned> DomConditionsMaxDomBlocks("dom-conditions-dom-blocks",
+ cl::Hidden,
+ cl::init(20000));
+
+// Controls the number of uses of the value searched for possible
+// dominating comparisons.
+static cl::opt<unsigned> DomConditionsMaxUses("dom-conditions-max-uses",
+ cl::Hidden, cl::init(2000));
+
+// If true, don't consider only compares whose only use is a branch.
+static cl::opt<bool> DomConditionsSingleCmpUse("dom-conditions-single-cmp-use",
+ cl::Hidden, cl::init(false));
+
/// Returns the bitwidth of the given scalar or pointer type (if unknown returns
/// 0). For vector types, returns the element type's bitwidth.
static unsigned getBitWidth(Type *Ty, const DataLayout &DL) {
return m_CombineOr(m_Xor(L, R), m_Xor(R, L));
}
+/// Compute known bits in 'V' under the assumption that the condition 'Cmp' is
+/// true (at the context instruction.) This is mostly a utility function for
+/// the prototype dominating conditions reasoning below.
+static void computeKnownBitsFromTrueCondition(Value *V, ICmpInst *Cmp,
+ APInt &KnownZero,
+ APInt &KnownOne,
+ const DataLayout &DL,
+ unsigned Depth, const Query &Q) {
+ Value *LHS = Cmp->getOperand(0);
+ Value *RHS = Cmp->getOperand(1);
+ // TODO: We could potentially be more aggressive here. This would be worth
+ // evaluating. If we can, explore commoning this code with the assume
+ // handling logic.
+ if (LHS != V && RHS != V)
+ return;
+
+ const unsigned BitWidth = KnownZero.getBitWidth();
+
+ switch (Cmp->getPredicate()) {
+ default:
+ // We know nothing from this condition
+ break;
+ // TODO: implement unsigned bound from below (known one bits)
+ // TODO: common condition check implementations with assumes
+ // TODO: implement other patterns from assume (e.g. V & B == A)
+ case ICmpInst::ICMP_SGT:
+ if (LHS == V) {
+ APInt KnownZeroTemp(BitWidth, 0), KnownOneTemp(BitWidth, 0);
+ computeKnownBits(RHS, KnownZeroTemp, KnownOneTemp, DL, Depth + 1, Q);
+ if (KnownOneTemp.isAllOnesValue() || KnownZeroTemp.isNegative()) {
+ // We know that the sign bit is zero.
+ KnownZero |= APInt::getSignBit(BitWidth);
+ }
+ }
+ break;
+ case ICmpInst::ICMP_EQ:
+ if (LHS == V)
+ computeKnownBits(RHS, KnownZero, KnownOne, DL, Depth + 1, Q);
+ else if (RHS == V)
+ computeKnownBits(LHS, KnownZero, KnownOne, DL, Depth + 1, Q);
+ else
+ llvm_unreachable("missing use?");
+ break;
+ case ICmpInst::ICMP_ULE:
+ if (LHS == V) {
+ APInt KnownZeroTemp(BitWidth, 0), KnownOneTemp(BitWidth, 0);
+ computeKnownBits(RHS, KnownZeroTemp, KnownOneTemp, DL, Depth + 1, Q);
+ // The known zero bits carry over
+ unsigned SignBits = KnownZeroTemp.countLeadingOnes();
+ KnownZero |= APInt::getHighBitsSet(BitWidth, SignBits);
+ }
+ break;
+ case ICmpInst::ICMP_ULT:
+ if (LHS == V) {
+ APInt KnownZeroTemp(BitWidth, 0), KnownOneTemp(BitWidth, 0);
+ computeKnownBits(RHS, KnownZeroTemp, KnownOneTemp, DL, Depth + 1, Q);
+ // Whatever high bits in rhs are zero are known to be zero (if rhs is a
+ // power of 2, then one more).
+ unsigned SignBits = KnownZeroTemp.countLeadingOnes();
+ if (isKnownToBeAPowerOfTwo(RHS, false, Depth + 1, Query(Q, Cmp), DL))
+ SignBits++;
+ KnownZero |= APInt::getHighBitsSet(BitWidth, SignBits);
+ }
+ break;
+ };
+}
+
+/// Compute known bits in 'V' from conditions which are known to be true along
+/// all paths leading to the context instruction. In particular, look for
+/// cases where one branch of an interesting condition dominates the context
+/// instruction. This does not do general dataflow.
+/// NOTE: This code is EXPERIMENTAL and currently off by default.
+static void computeKnownBitsFromDominatingCondition(Value *V, APInt &KnownZero,
+ APInt &KnownOne,
+ const DataLayout &DL,
+ unsigned Depth,
+ const Query &Q) {
+ // Need both the dominator tree and the query location to do anything useful
+ if (!Q.DT || !Q.CxtI)
+ return;
+ Instruction *Cxt = const_cast<Instruction *>(Q.CxtI);
+
+ // Avoid useless work
+ if (auto VI = dyn_cast<Instruction>(V))
+ if (VI->getParent() == Cxt->getParent())
+ return;
+
+ // Note: We currently implement two options. It's not clear which of these
+ // will survive long term, we need data for that.
+ // Option 1 - Try walking the dominator tree looking for conditions which
+ // might apply. This works well for local conditions (loop guards, etc..),
+ // but not as well for things far from the context instruction (presuming a
+ // low max blocks explored). If we can set an high enough limit, this would
+ // be all we need.
+ // Option 2 - We restrict out search to those conditions which are uses of
+ // the value we're interested in. This is independent of dom structure,
+ // but is slightly less powerful without looking through lots of use chains.
+ // It does handle conditions far from the context instruction (e.g. early
+ // function exits on entry) really well though.
+
+ // Option 1 - Search the dom tree
+ unsigned NumBlocksExplored = 0;
+ BasicBlock *Current = Cxt->getParent();
+ while (true) {
+ // Stop searching if we've gone too far up the chain
+ if (NumBlocksExplored >= DomConditionsMaxDomBlocks)
+ break;
+ NumBlocksExplored++;
+
+ if (!Q.DT->getNode(Current)->getIDom())
+ break;
+ Current = Q.DT->getNode(Current)->getIDom()->getBlock();
+ if (!Current)
+ // found function entry
+ break;
+
+ BranchInst *BI = dyn_cast<BranchInst>(Current->getTerminator());
+ if (!BI || BI->isUnconditional())
+ continue;
+ ICmpInst *Cmp = dyn_cast<ICmpInst>(BI->getCondition());
+ if (!Cmp)
+ continue;
+
+ // We're looking for conditions that are guaranteed to hold at the context
+ // instruction. Finding a condition where one path dominates the context
+ // isn't enough because both the true and false cases could merge before
+ // the context instruction we're actually interested in. Instead, we need
+ // to ensure that the taken *edge* dominates the context instruction.
+ BasicBlock *BB0 = BI->getSuccessor(0);
+ BasicBlockEdge Edge(BI->getParent(), BB0);
+ if (!Edge.isSingleEdge() || !Q.DT->dominates(Edge, Q.CxtI->getParent()))
+ continue;
+
+ computeKnownBitsFromTrueCondition(V, Cmp, KnownZero, KnownOne, DL, Depth,
+ Q);
+ }
+
+ // Option 2 - Search the other uses of V
+ unsigned NumUsesExplored = 0;
+ for (auto U : V->users()) {
+ // Avoid massive lists
+ if (NumUsesExplored >= DomConditionsMaxUses)
+ break;
+ NumUsesExplored++;
+ // Consider only compare instructions uniquely controlling a branch
+ ICmpInst *Cmp = dyn_cast<ICmpInst>(U);
+ if (!Cmp)
+ continue;
+
+ if (DomConditionsSingleCmpUse && !Cmp->hasOneUse())
+ continue;
+
+ for (auto *CmpU : Cmp->users()) {
+ BranchInst *BI = dyn_cast<BranchInst>(CmpU);
+ if (!BI || BI->isUnconditional())
+ continue;
+ // We're looking for conditions that are guaranteed to hold at the
+ // context instruction. Finding a condition where one path dominates
+ // the context isn't enough because both the true and false cases could
+ // merge before the context instruction we're actually interested in.
+ // Instead, we need to ensure that the taken *edge* dominates the context
+ // instruction.
+ BasicBlock *BB0 = BI->getSuccessor(0);
+ BasicBlockEdge Edge(BI->getParent(), BB0);
+ if (!Edge.isSingleEdge() || !Q.DT->dominates(Edge, Q.CxtI->getParent()))
+ continue;
+
+ computeKnownBitsFromTrueCondition(V, Cmp, KnownZero, KnownOne, DL, Depth,
+ Q);
+ }
+ }
+}
+
static void computeKnownBitsFromAssume(Value *V, APInt &KnownZero,
APInt &KnownOne, const DataLayout &DL,
unsigned Depth, const Query &Q) {
// Don't give up yet... there might be an assumption that provides more
// information...
computeKnownBitsFromAssume(V, KnownZero, KnownOne, DL, Depth, Q);
+
+ // Or a dominating condition for that matter
+ if (EnableDomConditions && Depth <= DomConditionsMaxDepth)
+ computeKnownBitsFromDominatingCondition(V, KnownZero, KnownOne, DL,
+ Depth, Q);
return;
}
// Check whether a nearby assume intrinsic can determine some known bits.
computeKnownBitsFromAssume(V, KnownZero, KnownOne, DL, Depth, Q);
+ // Check whether there's a dominating condition which implies something about
+ // this value at the given context.
+ if (EnableDomConditions && Depth <= DomConditionsMaxDepth)
+ computeKnownBitsFromDominatingCondition(V, KnownZero, KnownOne, DL, Depth,
+ Q);
+
Operator *I = dyn_cast<Operator>(V);
if (!I) return;
--- /dev/null
+; RUN: opt -instcombine -value-tracking-dom-conditions=1 -S < %s | FileCheck %s
+
+target datalayout = "e-p:64:64:64-p1:16:16:16-p2:32:32:32-p3:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+
+define i1 @test_cmp_ult(i64 %A) {
+; CHECK-LABEL: @test_cmp_ult
+entry:
+ %cmp = icmp ult i64 %A, 64
+ br i1 %cmp, label %taken, label %untaken
+
+taken:
+; CHECK-LABEL: taken:
+; CHECK-NEXT: ret i1 false
+ %cmp2 = icmp ugt i64 %A, 64
+ ret i1 %cmp2
+untaken:
+ ret i1 true
+}
+
+define i1 @test_cmp_ule(i64 %A) {
+; CHECK-LABEL: @test_cmp_ule
+entry:
+ %cmp = icmp ule i64 %A, 64
+ br i1 %cmp, label %taken, label %untaken
+
+taken:
+; CHECK-LABEL: taken:
+; CHECK-NEXT: ret i1 false
+ %cmp2 = icmp ugt i64 %A, 128
+ ret i1 %cmp2
+untaken:
+ ret i1 true
+}
+
+define i1 @test_cmp_sgt(i32 %A) {
+; CHECK-LABEL: @test_cmp_sgt
+entry:
+ %cmp = icmp sgt i32 %A, 10
+ br i1 %cmp, label %taken, label %untaken
+
+taken:
+; CHECK-LABEL: taken:
+; CHECK-NEXT: ret i1 true
+ %cmp2 = icmp sgt i32 %A, -1
+ ret i1 %cmp2
+untaken:
+ ret i1 true
+}
+
+define i64 @test_add_zero_bits(i64 %A) {
+; CHECK-LABEL: @test_add_zero_bits
+entry:
+ %cmp = icmp eq i64 %A, 2
+ br i1 %cmp, label %taken, label %untaken
+
+taken:
+; CHECK-LABEL: taken:
+; CHECK-NEXT: ret i64 3
+ %add = add i64 %A, 1
+ ret i64 %add
+untaken:
+ ret i64 %A
+}
+
+define i64 @test_add_nsw(i64 %A) {
+; CHECK-LABEL: @test_add_nsw
+entry:
+ %cmp = icmp ult i64 %A, 20
+ br i1 %cmp, label %taken, label %untaken
+
+taken:
+; CHECK-LABEL: taken:
+; CHECK-NEXT: %add = add nuw nsw i64 %A, 1
+; CHECK-NEXT: ret i64 %add
+ %add = add i64 %A, 1
+ ret i64 %add
+untaken:
+ ret i64 %A
+}
+
+; After sinking the instructions into the if block, check that we
+; can simplify some of them using dominating conditions.
+define i32 @test_add_zero_bits_sink(i32 %x) nounwind ssp {
+; CHECK-LABEL: @test_add_zero_bits_sink(
+; CHECK-NOT: sdiv i32
+entry:
+ %a = add nsw i32 %x, 16
+ %b = sdiv i32 %a, %x
+ %cmp = icmp ult i32 %x, 7
+ br i1 %cmp, label %bb1, label %bb2
+
+bb1:
+; CHECK-LABEL: bb1:
+; CHECK-NEXT: or i32 %x, 16
+; CHECK-NEXT: udiv i32
+ ret i32 %b
+
+bb2:
+ ret i32 %x
+}
+
+; A condition in the same block gives no information
+define i32 @test_neg1(i32 %x) nounwind ssp {
+; CHECK-LABEL: @test_neg1
+; CHECK: add
+; CHECK: sdiv
+; CHECK: icmp
+; CHECK: select
+entry:
+ %a = add nsw i32 %x, 16
+ %b = sdiv i32 %a, %x
+ %cmp = icmp ult i32 %x, 7
+ %ret = select i1 %cmp, i32 %a, i32 %b
+ ret i32 %ret
+}
+
+; A non-dominating edge gives no information
+define i32 @test_neg2(i32 %x) {
+; CHECK-LABEL: @test_neg2
+entry:
+ %cmp = icmp ult i32 %x, 7
+ br i1 %cmp, label %bb1, label %merge
+
+bb1:
+ br label %merge
+
+merge:
+; CHECK-LABEL: merge:
+; CHECK: icmp
+; CHECK: select
+ %cmp2 = icmp ult i32 %x, 7
+ %ret = select i1 %cmp2, i32 %x, i32 0
+ ret i32 %ret
+}
+
+; A unconditional branch expressed as a condition one gives no
+; information (and shouldn't trip any asserts.)
+define i32 @test_neg3(i32 %x) {
+; CHECK-LABEL: @test_neg3
+entry:
+ %cmp = icmp ult i32 %x, 7
+ br i1 %cmp, label %merge, label %merge
+merge:
+; CHECK-LABEL: merge:
+; CHECK: icmp
+; CHECK: select
+ %cmp2 = icmp ult i32 %x, 7
+ %ret = select i1 %cmp2, i32 %x, i32 0
+ ret i32 %ret
+}
+
+declare i32 @bar()