From e8a6608a7509f81223e9d9e286d90ce9ad4f5ed3 Mon Sep 17 00:00:00 2001
From: Michael Kuperstein <michael.m.kuperstein@intel.com>
Date: Wed, 19 Aug 2015 11:21:43 +0000
Subject: [PATCH] [X86] Do not lower scalar sdiv/udiv to a shifts + mul
 sequence when optimizing for minsize

There are some cases where the mul sequence is smaller, but for the most part,
using a div is preferable. This does not apply to vectors, since x86 doesn't
have vector idiv, and a vector mul/shifts sequence ought to be smaller than a
scalarized division.

Differential Revision: http://reviews.llvm.org/D12082

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@245431 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Target/X86/X86ISelLowering.cpp     | 11 +++++++++
 lib/Target/X86/X86ISelLowering.h       |  2 ++
 test/CodeGen/X86/divide-by-constant.ll | 32 ++++++++++++++++++++++++++
 test/CodeGen/X86/vec_sdiv_to_shift.ll  | 13 +++++++++++
 4 files changed, 58 insertions(+)
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 642fe904735..f3012d59f0c 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -26427,3 +26427,14 @@ int X86TargetLowering::getScalingFactorCost(const DataLayout &DL,
 bool X86TargetLowering::isTargetFTOL() const {
   return Subtarget->isTargetKnownWindowsMSVC() && !Subtarget->is64Bit();
 }
+
+bool X86TargetLowering::isIntDivCheap(EVT VT, bool OptSize) const {
+  // Integer division on x86 is expensive. However, when aggressively optimizing
+  // for code size, we prefer to use a div instruction, as it is usually smaller
+  // than the alternative sequence.
+  // The exception to this is vector division. Since x86 doesn't have vector
+  // integer division, leaving the division as-is is a loss even in terms of
+  // size, because it will have to be scalarized, while the alternative code
+  // sequence can be performed in vector form.
+  return OptSize && !VT.isVector();
+}
diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h
index 837fe8cfcf7..c0abf0beb0d 100644
--- a/lib/Target/X86/X86ISelLowering.h
+++ b/lib/Target/X86/X86ISelLowering.h
@@ -902,6 +902,8 @@ namespace llvm {
     /// \brief Customize the preferred legalization strategy for certain types.
     LegalizeTypeAction getPreferredVectorAction(EVT VT) const override;
 
+    bool isIntDivCheap(EVT VT, bool OptSize) const override;
+
   protected:
     std::pair<const TargetRegisterClass *, uint8_t>
     findRepresentativeClass(const TargetRegisterInfo *TRI,
diff --git a/test/CodeGen/X86/divide-by-constant.ll b/test/CodeGen/X86/divide-by-constant.ll
index fd07a3f5510..9543d6c4d74 100644
--- a/test/CodeGen/X86/divide-by-constant.ll
+++ b/test/CodeGen/X86/divide-by-constant.ll
@@ -94,3 +94,35 @@ define i8 @test9(i8 %x) nounwind {
 ; CHECK: shrl $11
 ; CHECK: ret
 }
+
+define i32 @testsize1(i32 %x) minsize nounwind {
+entry:
+	%div = sdiv i32 %x, 32
+	ret i32 %div
+; CHECK-LABEL: testsize1:
+; CHECK: divl
+}
+
+define i32 @testsize2(i32 %x) minsize nounwind {
+entry:
+	%div = sdiv i32 %x, 33
+	ret i32 %div
+; CHECK-LABEL: testsize2:
+; CHECK: divl
+}
+
+define i32 @testsize3(i32 %x) minsize nounwind {
+entry:
+	%div = udiv i32 %x, 32
+	ret i32 %div
+; CHECK-LABEL: testsize3:
+; CHECK: shrl
+}
+
+define i32 @testsize4(i32 %x) minsize nounwind {
+entry:
+	%div = udiv i32 %x, 33
+	ret i32 %div
+; CHECK-LABEL: testsize4:
+; CHECK: divl
+}
diff --git a/test/CodeGen/X86/vec_sdiv_to_shift.ll b/test/CodeGen/X86/vec_sdiv_to_shift.ll
index 56855d3c44e..7f71a0c2ea5 100644
--- a/test/CodeGen/X86/vec_sdiv_to_shift.ll
+++ b/test/CodeGen/X86/vec_sdiv_to_shift.ll
@@ -13,6 +13,19 @@ entry:
   ret <8 x i16> %0
 }
 
+define <8 x i16> @sdiv_vec8x16_minsize(<8 x i16> %var) minsize {
+entry:
+; CHECK: sdiv_vec8x16_minsize
+; CHECK: psraw  $15
+; CHECK: vpsrlw  $11
+; CHECK: vpaddw
+; CHECK: vpsraw  $5
+; CHECK: ret
+  %0 = sdiv <8 x i16> %var, <i16 32, i16 32, i16 32, i16 32, i16 32, i16 32, i16 32, i16 32>
+  ret <8 x i16> %0
+}
+
+
 define <4 x i32> @sdiv_zero(<4 x i32> %var) {
 entry:
 ; CHECK: sdiv_zero
-- 
2.34.1