From b1ff87ec86284a1765ba70b8939f65155b1555af Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper@gmail.com>
Date: Sat, 4 Apr 2015 02:08:20 +0000
Subject: [PATCH] [X86] Don't use GR64 register 'and with immediate'
 instructions if the immediate is zero in the upper 33-bits or upper 57-bits.
 Use GR32 instructions instead.

Previously the patterns didn't have high enough priority and we would only use the GR32 form if the only the upper 32 or 56 bits were zero.

Fixes PR23100.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@234075 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Target/X86/X86InstrCompiler.td            |  5 +++++
 test/CodeGen/X86/2009-02-26-MachineLICMBug.ll |  2 +-
 test/CodeGen/X86/and-or-fold.ll               |  2 +-
 test/CodeGen/X86/andimm8.ll                   | 12 ++++++++++++
 test/CodeGen/X86/atomic64.ll                  |  2 +-
 test/CodeGen/X86/bmi.ll                       |  2 +-
 test/CodeGen/X86/setcc.ll                     |  2 +-
 test/CodeGen/X86/shift-pair.ll                |  2 +-
 test/CodeGen/X86/uint64-to-float.ll           |  4 ++--
 test/CodeGen/X86/x86-64-tls-1.ll              |  2 +-
 10 files changed, 26 insertions(+), 9 deletions(-)

diff --git a/lib/Target/X86/X86InstrCompiler.td b/lib/Target/X86/X86InstrCompiler.td
index 18bbe5d73f1..e7de24bca41 100644
--- a/lib/Target/X86/X86InstrCompiler.td
+++ b/lib/Target/X86/X86InstrCompiler.td
@@ -1233,6 +1233,7 @@ def : Pat<(store (add (loadi64 addr:$dst), 0x00000000800000000), addr:$dst),
 // least 32 bits of leading zeros. If in addition the last 32 bits can be
 // represented with a sign extension of a 8 bit constant, use that.
 
+let AddedComplexity = 1 in // Give priority over i64immSExt8.
 def : Pat<(and GR64:$src, i64immZExt32SExt8:$imm),
           (SUBREG_TO_REG
             (i64 0),
@@ -1241,6 +1242,7 @@ def : Pat<(and GR64:$src, i64immZExt32SExt8:$imm),
               (i32 (GetLo8XForm imm:$imm))),
             sub_32bit)>;
 
+let AddedComplexity = 1 in // Give priority over i64immSExt32.
 def : Pat<(and GR64:$src, i64immZExt32:$imm),
           (SUBREG_TO_REG
             (i64 0),
@@ -1267,16 +1269,19 @@ def : Pat<(and GR16:$src1, 0xff),
       Requires<[Not64BitMode]>;
 
 // r & (2^32-1) ==> movz
+let AddedComplexity = 1 in // Give priority over i64immZExt32.
 def : Pat<(and GR64:$src, 0x00000000FFFFFFFF),
           (SUBREG_TO_REG (i64 0),
                          (MOV32rr (EXTRACT_SUBREG GR64:$src, sub_32bit)),
                          sub_32bit)>;
 // r & (2^16-1) ==> movz
+let AddedComplexity = 1 in // Give priority over i64immZExt32.
 def : Pat<(and GR64:$src, 0xffff),
           (SUBREG_TO_REG (i64 0),
                       (MOVZX32rr16 (i16 (EXTRACT_SUBREG GR64:$src, sub_16bit))),
                       sub_32bit)>;
 // r & (2^8-1) ==> movz
+let AddedComplexity = 1 in // Give priority over i64immSExt32.
 def : Pat<(and GR64:$src, 0xff),
           (SUBREG_TO_REG (i64 0),
                          (MOVZX32rr8 (i8 (EXTRACT_SUBREG GR64:$src, sub_8bit))),
diff --git a/test/CodeGen/X86/2009-02-26-MachineLICMBug.ll b/test/CodeGen/X86/2009-02-26-MachineLICMBug.ll
index db3133364e2..dc8f69e4ad1 100644
--- a/test/CodeGen/X86/2009-02-26-MachineLICMBug.ll
+++ b/test/CodeGen/X86/2009-02-26-MachineLICMBug.ll
@@ -1,5 +1,5 @@
 ; REQUIRES: asserts
-; RUN: llc < %s -march=x86-64 -mattr=+sse3,+sse4.1 -mcpu=penryn -stats 2>&1 | grep "7 machine-licm"
+; RUN: llc < %s -march=x86-64 -mattr=+sse3,+sse4.1 -mcpu=penryn -stats 2>&1 | grep "9 machine-licm"
 ; RUN: llc < %s -march=x86-64 -mattr=+sse3,+sse4.1 -mcpu=penryn | FileCheck %s
 ; rdar://6627786
 ; rdar://7792037
diff --git a/test/CodeGen/X86/and-or-fold.ll b/test/CodeGen/X86/and-or-fold.ll
index 836b5f1551c..ec39522e6b1 100644
--- a/test/CodeGen/X86/and-or-fold.ll
+++ b/test/CodeGen/X86/and-or-fold.ll
@@ -21,6 +21,6 @@ entry:
   %tmp1 = and i64 %x, 123127
   %tmp2 = or i64 %tmp1, 3
   ret i64 %tmp2
-; DARWIN-OPT:       andq $123124
+; DARWIN-OPT:       andl $123124
 ; DARWIN-OPT-NEXT:  leaq 3
 }
diff --git a/test/CodeGen/X86/andimm8.ll b/test/CodeGen/X86/andimm8.ll
index 640237d0b50..d9e676aa66c 100644
--- a/test/CodeGen/X86/andimm8.ll
+++ b/test/CodeGen/X86/andimm8.ll
@@ -17,3 +17,15 @@ define void @foo(i64 %zed, i64* %x) nounwind {
   store i64 %t2, i64* %x, align 8
   ret void
 }
+
+define i64 @bar(i64 %zed) nounwind {
+; CHECK:  andl     $42, %edi               # encoding: [0x83,0xe7,0x2a]
+  %t1 = and i64 %zed, 42
+  ret i64 %t1
+}
+
+define i64 @baz(i64 %zed) nounwind {
+; CHECK:  andl $2147483647, %edi      # encoding: [0x81,0xe7,0xff,0xff,0xff,0x7f]
+  %t1 = and i64 %zed, 2147483647
+  ret i64 %t1
+}
diff --git a/test/CodeGen/X86/atomic64.ll b/test/CodeGen/X86/atomic64.ll
index 11b4e6864da..c6b1c39d35d 100644
--- a/test/CodeGen/X86/atomic64.ll
+++ b/test/CodeGen/X86/atomic64.ll
@@ -48,7 +48,7 @@ define void @atomic_fetch_and64() nounwind {
 ; X64:       lock
 ; X64:       andq $3
   %t2 = atomicrmw and  i64* @sc64, i64 5 acquire
-; X64:       andq
+; X64:       andl
 ; X64:       lock
 ; X64:       cmpxchgq
   %t3 = atomicrmw and  i64* @sc64, i64 %t2 acquire
diff --git a/test/CodeGen/X86/bmi.ll b/test/CodeGen/X86/bmi.ll
index f1ef9ef64e9..8b13e960cd8 100644
--- a/test/CodeGen/X86/bmi.ll
+++ b/test/CodeGen/X86/bmi.ll
@@ -260,7 +260,7 @@ entry:
   %and = and i64 %x, 2147483647
   ret i64 %and
 ; CHECK-LABEL: bzhi64_small_constant_mask:
-; CHECK: andq  $2147483647, %r[[ARG1]]
+; CHECK: andl  $2147483647, %e[[ARG1]]
 }
 
 define i32 @blsi32(i32 %x) nounwind readnone {
diff --git a/test/CodeGen/X86/setcc.ll b/test/CodeGen/X86/setcc.ll
index 2454af926aa..6f1ddbdc6ac 100644
--- a/test/CodeGen/X86/setcc.ll
+++ b/test/CodeGen/X86/setcc.ll
@@ -29,7 +29,7 @@ define i64 @t3(i64 %x) nounwind readnone ssp {
 entry:
 ; CHECK-LABEL: t3:
 ; CHECK: sbbq %rax, %rax
-; CHECK: andq $64, %rax
+; CHECK: andl $64, %eax
   %0 = icmp ult i64 %x, 18                        ; <i1> [#uses=1]
   %iftmp.2.0 = select i1 %0, i64 64, i64 0        ; <i64> [#uses=1]
   ret i64 %iftmp.2.0
diff --git a/test/CodeGen/X86/shift-pair.ll b/test/CodeGen/X86/shift-pair.ll
index 24ba1fc7707..62e51f002f7 100644
--- a/test/CodeGen/X86/shift-pair.ll
+++ b/test/CodeGen/X86/shift-pair.ll
@@ -3,7 +3,7 @@
 define i64 @test(i64 %A) {
 ; CHECK: @test
 ; CHECK: shrq $54
-; CHECK: andq $1020
+; CHECK: andl $1020
 ; CHECK: ret
     %B = lshr i64 %A, 56
     %C = shl i64 %B, 2
diff --git a/test/CodeGen/X86/uint64-to-float.ll b/test/CodeGen/X86/uint64-to-float.ll
index ca764e7568f..a1074a6d698 100644
--- a/test/CodeGen/X86/uint64-to-float.ll
+++ b/test/CodeGen/X86/uint64-to-float.ll
@@ -6,13 +6,13 @@
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
 target triple = "x86_64-apple-darwin10.0.0"
 
-; CHECK: testq %rdi, %rdi
+; CHECK: andl
+; CHECK-NEXT: testq %rdi, %rdi
 ; CHECK-NEXT: js LBB0_1
 ; CHECK: cvtsi2ss
 ; CHECK-NEXT: ret
 ; CHECK: LBB0_1
 ; CHECK: shrq
-; CHECK-NEXT: andq
 ; CHECK-NEXT: orq
 ; CHECK-NEXT: cvtsi2ss
 define float @test(i64 %a) {
diff --git a/test/CodeGen/X86/x86-64-tls-1.ll b/test/CodeGen/X86/x86-64-tls-1.ll
index 2879fb4e1e7..2c954dbc9c9 100644
--- a/test/CodeGen/X86/x86-64-tls-1.ll
+++ b/test/CodeGen/X86/x86-64-tls-1.ll
@@ -3,7 +3,7 @@
 define i64 @z() nounwind {
 ; CHECK:      movq    $tm_nest_level@TPOFF, %r[[R0:[abcd]]]x
 ; CHECK-NEXT: addl    %fs:0, %e[[R0]]x
-; CHECK-NEXT: andq    $100, %r[[R0]]x
+; CHECK-NEXT: andl    $100, %e[[R0]]x
 
   ret i64 and (i64 ptrtoint (i32* @tm_nest_level to i64), i64 100)
 }
-- 
2.34.1