From b0a2d8c5a51ebf0944fe271dc742550979e6877c Mon Sep 17 00:00:00 2001
From: Fiona Glaser <escha@apple.com>
Date: Fri, 10 Jul 2015 18:29:02 +0000
Subject: [PATCH] ComputeKnownBits: be a bit smarter about ADDs

If our two inputs have known top-zero bit counts M and N, we trivially
know that the output cannot have any bits set in the top (min(M, N)-1)
bits, since nothing could carry past that point.


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@241927 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/CodeGen/SelectionDAG/SelectionDAG.cpp     | 19 ++++++++++++++-----
 .../AArch64/aarch64-dynamic-stack-layout.ll   | 10 +++++-----
 test/CodeGen/X86/win64_frame.ll               |  5 +++--
 3 files changed, 22 insertions(+), 12 deletions(-)

diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 32a0bf6b2d1..f6a12d270c0 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -2356,15 +2356,24 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero,
     // Output known-0 bits are known if clear or set in both the low clear bits
     // common to both LHS & RHS.  For example, 8+(X<<3) is known to have the
     // low 3 bits clear.
+    // Output known-0 bits are also known if the top bits of each input are
+    // known to be clear. For example, if one input has the top 10 bits clear
+    // and the other has the top 8 bits clear, we know the top 7 bits of the
+    // output must be clear.
     computeKnownBits(Op.getOperand(0), KnownZero2, KnownOne2, Depth+1);
-    unsigned KnownZeroOut = KnownZero2.countTrailingOnes();
+    unsigned KnownZeroHigh = KnownZero2.countLeadingOnes();
+    unsigned KnownZeroLow = KnownZero2.countTrailingOnes();
 
     computeKnownBits(Op.getOperand(1), KnownZero2, KnownOne2, Depth+1);
-    KnownZeroOut = std::min(KnownZeroOut,
+    KnownZeroHigh = std::min(KnownZeroHigh,
+                             KnownZero2.countLeadingOnes());
+    KnownZeroLow = std::min(KnownZeroLow,
                             KnownZero2.countTrailingOnes());
 
     if (Op.getOpcode() == ISD::ADD) {
-      KnownZero |= APInt::getLowBitsSet(BitWidth, KnownZeroOut);
+      KnownZero |= APInt::getLowBitsSet(BitWidth, KnownZeroLow);
+      if (KnownZeroHigh > 1)
+        KnownZero |= APInt::getHighBitsSet(BitWidth, KnownZeroHigh - 1);
       break;
     }
 
@@ -2372,8 +2381,8 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero,
     // information if we know (at least) that the low two bits are clear.  We
     // then return to the caller that the low bit is unknown but that other bits
     // are known zero.
-    if (KnownZeroOut >= 2) // ADDE
-      KnownZero |= APInt::getBitsSet(BitWidth, 1, KnownZeroOut);
+    if (KnownZeroLow >= 2) // ADDE
+      KnownZero |= APInt::getBitsSet(BitWidth, 1, KnownZeroLow);
     break;
   }
   case ISD::SREM:
diff --git a/test/CodeGen/AArch64/aarch64-dynamic-stack-layout.ll b/test/CodeGen/AArch64/aarch64-dynamic-stack-layout.ll
index a31c66bad4b..f33211eadd2 100644
--- a/test/CodeGen/AArch64/aarch64-dynamic-stack-layout.ll
+++ b/test/CodeGen/AArch64/aarch64-dynamic-stack-layout.ll
@@ -255,7 +255,7 @@ entry:
 ; CHECK: ubfx	x9, x0, #0, #32
 ; CHECK: lsl	x9, x9, #2
 ; CHECK: add	x9, x9, #15
-; CHECK: and	x9, x9, #0xfffffffffffffff0
+; CHECK: and	x9, x9, #0x7fffffff0
 ; CHECK: mov	 x10, sp
 ; CHECK: sub	 x[[VLASPTMP:[0-9]+]], x10, x9
 ; CHECK: mov	 sp, x[[VLASPTMP]]
@@ -302,7 +302,7 @@ entry:
 ; CHECK: ubfx	x9, x0, #0, #32
 ; CHECK: lsl	x9, x9, #2
 ; CHECK: add	x9, x9, #15
-; CHECK: and	x9, x9, #0xfffffffffffffff0
+; CHECK: and	x9, x9, #0x7fffffff0
 ; CHECK: mov	 x10, sp
 ; CHECK: sub	 x[[VLASPTMP:[0-9]+]], x10, x9
 ; CHECK: mov	 sp, x[[VLASPTMP]]
@@ -364,7 +364,7 @@ entry:
 ; CHECK: ubfx	x9, x0, #0, #32
 ; CHECK: lsl	x9, x9, #2
 ; CHECK: add	x9, x9, #15
-; CHECK: and	x9, x9, #0xfffffffffffffff0
+; CHECK: and	x9, x9, #0x7fffffff0
 ; CHECK: mov	 x10, sp
 ; CHECK: sub	 x[[VLASPTMP:[0-9]+]], x10, x9
 ; CHECK: mov	 sp, x[[VLASPTMP]]
@@ -417,7 +417,7 @@ entry:
 ; CHECK: ubfx	x9, x0, #0, #32
 ; CHECK: lsl	x9, x9, #2
 ; CHECK: add	x9, x9, #15
-; CHECK: and	x9, x9, #0xfffffffffffffff0
+; CHECK: and	x9, x9, #0x7fffffff0
 ; CHECK: mov	 x10, sp
 ; CHECK: sub	 x[[VLASPTMP:[0-9]+]], x10, x9
 ; CHECK: mov	 sp, x[[VLASPTMP]]
@@ -468,7 +468,7 @@ entry:
 ; CHECK: ubfx	x9, x0, #0, #32
 ; CHECK: lsl	x9, x9, #2
 ; CHECK: add	x9, x9, #15
-; CHECK: and	x9, x9, #0xfffffffffffffff0
+; CHECK: and	x9, x9, #0x7fffffff0
 ; CHECK: mov	 x10, sp
 ; CHECK: sub	 x[[VLASPTMP:[0-9]+]], x10, x9
 ; CHECK: mov	 sp, x[[VLASPTMP]]
diff --git a/test/CodeGen/X86/win64_frame.ll b/test/CodeGen/X86/win64_frame.ll
index 2c62f4918a7..477b3144d9e 100644
--- a/test/CodeGen/X86/win64_frame.ll
+++ b/test/CodeGen/X86/win64_frame.ll
@@ -100,8 +100,9 @@ define i32 @f8(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e) "no-frame-pointer-elim"="
 
   alloca i32, i32 %a
   ; CHECK:        movl    %ecx, %eax
-  ; CHECK:        leaq    15(,%rax,4), %rax
-  ; CHECK:        andq    $-16, %rax
+  ; CHECK:        leaq    15(,%rax,4), %rcx
+  ; CHECK:        movabsq $34359738352, %rax
+  ; CHECK:        andq    %rcx, %rax
   ; CHECK:        callq   __chkstk
   ; CHECK:        subq    %rax, %rsp
 
-- 
2.34.1