From 0e9a62e34fb4d4dca518ebfe88ff7695bc79516f Mon Sep 17 00:00:00 2001
From: Sanjay Patel <spatel@rotateright.com>
Date: Mon, 9 Nov 2015 21:16:49 +0000
Subject: [PATCH] [x86] try harder to match bitwise 'or' into an LEA

The motivation for this patch starts with the epic fail example in PR18007:
https://llvm.org/bugs/show_bug.cgi?id=18007

...unfortunately, this patch makes no difference for that case, but it solves some
simpler cases. We'll get there some day. :)

The current 'or' matching code was using computeKnownBits() via
isBaseWithConstantOffset() -> MaskedValueIsZero(), but that's an unnecessarily limited use.
We can do more by copying the logic in ValueTracking's haveNoCommonBitsSet(), so we can
treat the 'or' as if it was an 'add'.

There's a TODO comment here because we should lift the bit-checking logic into a helper
function, so it's not duplicated in DAGCombiner.

An example of the better LEA matching:

leal (%rdi,%rdi), %eax
andl $1, %esi
orl %esi, %eax

Becomes:

andl $1, %esi
leal (%rsi,%rdi,2), %eax

Differential Revision: http://reviews.llvm.org/D13956



git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@252515 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Target/X86/X86ISelDAGToDAG.cpp            | 32 ++++++++++++-------
 test/CodeGen/X86/or-lea.ll                    | 18 ++++-------
 .../X86/x86-64-double-precision-shift-left.ll | 15 ++++-----
 .../x86-64-double-precision-shift-right.ll    |  7 ++--
 4 files changed, 36 insertions(+), 36 deletions(-)
diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp
index c5a1093af12..0cbeda91ccc 100644
--- a/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -1338,19 +1338,29 @@ bool X86DAGToDAGISel::matchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
       return false;
     break;
 
-  case ISD::OR:
-    // Handle "X | C" as "X + C" iff X is known to have C bits clear.
-    if (CurDAG->isBaseWithConstantOffset(N)) {
-      X86ISelAddressMode Backup = AM;
-      ConstantSDNode *CN = cast<ConstantSDNode>(N.getOperand(1));
-
-      // Start with the LHS as an addr mode.
-      if (!matchAddressRecursively(N.getOperand(0), AM, Depth+1) &&
-          !foldOffsetIntoAddress(CN->getSExtValue(), AM))
+  case ISD::OR: {
+    // TODO: The bit-checking logic should be put into a helper function and
+    // used by DAGCombiner.
+
+    // We want to look through a transform in InstCombine and DAGCombiner that
+    // turns 'add' into 'or', so we can treat this 'or' exactly like an 'add'.
+    APInt LHSZero, LHSOne;
+    APInt RHSZero, RHSOne;
+    CurDAG->computeKnownBits(N.getOperand(0), LHSZero, LHSOne);
+    CurDAG->computeKnownBits(N.getOperand(1), RHSZero, RHSOne);
+
+    // If we know that there are no common bits set by the operands of this
+    // 'or', it is equivalent to an 'add'. For example:
+    // (or (and x, 1), (shl y, 3)) --> (add (and x, 1), (shl y, 3))
+    // An 'lea' can then be used to match the shift (multiply) and add:
+    // and $1, %esi
+    // lea (%rsi, %rdi, 8), %rax
+    if ((LHSZero | RHSZero).isAllOnesValue())
+      if (!matchAdd(N, AM, Depth))
         return false;
-      AM = Backup;
-    }
+
     break;
+  }
 
   case ISD::AND: {
     // Perform some heroic transforms on an and of a constant-count shift
diff --git a/test/CodeGen/X86/or-lea.ll b/test/CodeGen/X86/or-lea.ll
index bd117207e6c..f28cc8569cf 100644
--- a/test/CodeGen/X86/or-lea.ll
+++ b/test/CodeGen/X86/or-lea.ll
@@ -8,9 +8,8 @@
 define i32 @or_shift1_and1(i32 %x, i32 %y) {
 ; CHECK-LABEL: or_shift1_and1:
 ; CHECK:       # BB#0:
-; CHECK-NEXT:    addl %edi, %edi
 ; CHECK-NEXT:    andl $1, %esi
-; CHECK-NEXT:    leal (%rsi,%rdi), %eax
+; CHECK-NEXT:    leal (%rsi,%rdi,2), %eax
 ; CHECK-NEXT:    retq
 
   %shl = shl i32 %x, 1
@@ -22,9 +21,8 @@ define i32 @or_shift1_and1(i32 %x, i32 %y) {
 define i32 @or_shift1_and1_swapped(i32 %x, i32 %y) {
 ; CHECK-LABEL: or_shift1_and1_swapped:
 ; CHECK:       # BB#0:
-; CHECK-NEXT:    leal (%rdi,%rdi), %eax
 ; CHECK-NEXT:    andl $1, %esi
-; CHECK-NEXT:    orl %esi, %eax
+; CHECK-NEXT:    leal (%rsi,%rdi,2), %eax
 ; CHECK-NEXT:    retq
 
   %shl = shl i32 %x, 1
@@ -36,9 +34,8 @@ define i32 @or_shift1_and1_swapped(i32 %x, i32 %y) {
 define i32 @or_shift2_and1(i32 %x, i32 %y) {
 ; CHECK-LABEL: or_shift2_and1:
 ; CHECK:       # BB#0:
-; CHECK-NEXT:    leal (,%rdi,4), %eax
 ; CHECK-NEXT:    andl $1, %esi
-; CHECK-NEXT:    orl %esi, %eax
+; CHECK-NEXT:    leal (%rsi,%rdi,4), %eax
 ; CHECK-NEXT:    retq
 
   %shl = shl i32 %x, 2
@@ -50,9 +47,8 @@ define i32 @or_shift2_and1(i32 %x, i32 %y) {
 define i32 @or_shift3_and1(i32 %x, i32 %y) {
 ; CHECK-LABEL: or_shift3_and1:
 ; CHECK:       # BB#0:
-; CHECK-NEXT:    leal (,%rdi,8), %eax
 ; CHECK-NEXT:    andl $1, %esi
-; CHECK-NEXT:    orl %esi, %eax
+; CHECK-NEXT:    leal (%rsi,%rdi,8), %eax
 ; CHECK-NEXT:    retq
 
   %shl = shl i32 %x, 3
@@ -64,9 +60,8 @@ define i32 @or_shift3_and1(i32 %x, i32 %y) {
 define i32 @or_shift3_and7(i32 %x, i32 %y) {
 ; CHECK-LABEL: or_shift3_and7:
 ; CHECK:       # BB#0:
-; CHECK-NEXT:    leal (,%rdi,8), %eax
 ; CHECK-NEXT:    andl $7, %esi
-; CHECK-NEXT:    orl %esi, %eax
+; CHECK-NEXT:    leal (%rsi,%rdi,8), %eax
 ; CHECK-NEXT:    retq
 
   %shl = shl i32 %x, 3
@@ -112,9 +107,8 @@ define i32 @or_shift3_and8(i32 %x, i32 %y) {
 define i64 @or_shift1_and1_64(i64 %x, i64 %y) {
 ; CHECK-LABEL: or_shift1_and1_64:
 ; CHECK:       # BB#0:
-; CHECK-NEXT:    addq %rdi, %rdi
 ; CHECK-NEXT:    andl $1, %esi
-; CHECK-NEXT:    leaq (%rsi,%rdi), %rax
+; CHECK-NEXT:    leaq (%rsi,%rdi,2), %rax
 ; CHECK-NEXT:    retq
 
   %shl = shl i64 %x, 1
diff --git a/test/CodeGen/X86/x86-64-double-precision-shift-left.ll b/test/CodeGen/X86/x86-64-double-precision-shift-left.ll
index f2380f23b8e..75e9052c129 100644
--- a/test/CodeGen/X86/x86-64-double-precision-shift-left.ll
+++ b/test/CodeGen/X86/x86-64-double-precision-shift-left.ll
@@ -8,11 +8,9 @@
 ;    return (a << 1) | (b >> 63);
 ;}
 
-; CHECK:             lshift1:
-; CHECK:             addq    {{.*}},{{.*}}
-; CHECK-NEXT:        shrq    $63, {{.*}}
-; CHECK-NEXT:        leaq    ({{.*}},{{.*}}), {{.*}}
-
+; CHECK-LABEL:       lshift1:
+; CHECK:             shrq    $63, %rsi
+; CHECK-NEXT:        leaq    (%rsi,%rdi,2), %rax
 
 define i64 @lshift1(i64 %a, i64 %b) nounwind readnone uwtable {
 entry:
@@ -27,10 +25,9 @@ entry:
 ;    return (a << 2) | (b >> 62);
 ;}
 
-; CHECK:             lshift2:
-; CHECK:             shlq    $2, {{.*}}
-; CHECK-NEXT:        shrq    $62, {{.*}}
-; CHECK-NEXT:        leaq    ({{.*}},{{.*}}), {{.*}}
+; CHECK-LABEL:       lshift2:
+; CHECK:             shrq    $62, %rsi
+; CHECK-NEXT:        leaq    (%rsi,%rdi,4), %rax
 
 define i64 @lshift2(i64 %a, i64 %b) nounwind readnone uwtable {
 entry:
diff --git a/test/CodeGen/X86/x86-64-double-precision-shift-right.ll b/test/CodeGen/X86/x86-64-double-precision-shift-right.ll
index 5edaad89df4..bc2f39ee666 100644
--- a/test/CodeGen/X86/x86-64-double-precision-shift-right.ll
+++ b/test/CodeGen/X86/x86-64-double-precision-shift-right.ll
@@ -61,10 +61,9 @@ define i64 @rshift7(i64 %a, i64 %b) nounwind readnone uwtable {
 ;    return (a >> 63) | (b << 1);
 ;}
 
-; CHECK:             rshift63:
-; CHECK:             shrq    $63, {{.*}}
-; CHECK-NEXT:        leaq    ({{.*}},{{.*}}), {{.*}}
-; CHECK-NEXT:        orq     {{.*}}, {{.*}}
+; CHECK-LABEL:       rshift63:
+; CHECK:             shrq    $63, %rdi
+; CHECK-NEXT:        leaq    (%rdi,%rsi,2), %rax
 
 define i64 @rshift63(i64 %a, i64 %b) nounwind readnone uwtable {
   %1 = lshr i64 %a, 63
-- 
2.34.1