From 8a6197a968c91aca5b793718072ef3886c2cadd8 Mon Sep 17 00:00:00 2001
From: Sanjay Patel <spatel@rotateright.com>
Date: Thu, 30 Jul 2015 21:41:50 +0000
Subject: [PATCH] fix memcpy/memset/memmove lowering when optimizing for size

Fixing MinSize attribute handling was discussed in D11363.
This is a prerequisite patch to doing that.

The handling of OptSize when lowering mem* functions was broken
on Darwin because it wants to ignore -Os for these cases, but the
existing logic also made it ignore -Oz (MinSize).

The Linux change demonstrates a widespread problem. The backend
doesn't usually recognize the MinSize attribute by itself; it
assumes that if the MinSize attribute exists, then the OptSize
attribute must also exist.

Fixing this more generally will be a follow-on patch or two.

Differential Revision: http://reviews.llvm.org/D11568



git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@243693 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 18 ++++++--
 lib/Target/ARM/ARMISelLowering.cpp        |  6 +--
 lib/Target/X86/X86ISelLowering.cpp        |  8 ++--
 test/CodeGen/X86/memcpy.ll                | 53 ++---------------------
 4 files changed, 24 insertions(+), 61 deletions(-)
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index d41d46b7b14..4122ce06b04 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -4151,6 +4151,18 @@ static bool FindOptimalMemOpLowering(std::vector<EVT> &MemOps,
   return true;
 }
 
+static bool shouldLowerMemFuncForSize(const MachineFunction &MF) {
+  const Function *F = MF.getFunction();
+  bool HasMinSize = F->hasFnAttribute(Attribute::MinSize);
+  bool HasOptSize = F->hasFnAttribute(Attribute::OptimizeForSize);
+
+  // On Darwin, -Os means optimize for size without hurting performance, so
+  // only really optimize for size when -Oz (MinSize) is used.
+  if (MF.getTarget().getTargetTriple().isOSDarwin())
+    return HasMinSize;
+  return HasOptSize || HasMinSize;
+}
+
 static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, SDLoc dl,
                                        SDValue Chain, SDValue Dst,
                                        SDValue Src, uint64_t Size,
@@ -4171,7 +4183,7 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, SDLoc dl,
   bool DstAlignCanChange = false;
   MachineFunction &MF = DAG.getMachineFunction();
   MachineFrameInfo *MFI = MF.getFrameInfo();
-  bool OptSize = MF.getFunction()->hasFnAttribute(Attribute::OptimizeForSize);
+  bool OptSize = shouldLowerMemFuncForSize(MF);
   FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst);
   if (FI && !MFI->isFixedObjectIndex(FI->getIndex()))
     DstAlignCanChange = true;
@@ -4284,7 +4296,7 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, SDLoc dl,
   bool DstAlignCanChange = false;
   MachineFunction &MF = DAG.getMachineFunction();
   MachineFrameInfo *MFI = MF.getFrameInfo();
-  bool OptSize = MF.getFunction()->hasFnAttribute(Attribute::OptimizeForSize);
+  bool OptSize = shouldLowerMemFuncForSize(MF);
   FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst);
   if (FI && !MFI->isFixedObjectIndex(FI->getIndex()))
     DstAlignCanChange = true;
@@ -4378,7 +4390,7 @@ static SDValue getMemsetStores(SelectionDAG &DAG, SDLoc dl,
   bool DstAlignCanChange = false;
   MachineFunction &MF = DAG.getMachineFunction();
   MachineFrameInfo *MFI = MF.getFrameInfo();
-  bool OptSize = MF.getFunction()->hasFnAttribute(Attribute::OptimizeForSize);
+  bool OptSize = shouldLowerMemFuncForSize(MF);
   FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst);
   if (FI && !MFI->isFixedObjectIndex(FI->getIndex()))
     DstAlignCanChange = true;
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp
index e01d26a67d8..04827df7ac4 100644
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -964,11 +964,11 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
 
   //// temporary - rewrite interface to use type
   MaxStoresPerMemset = 8;
-  MaxStoresPerMemsetOptSize = Subtarget->isTargetDarwin() ? 8 : 4;
+  MaxStoresPerMemsetOptSize = 4;
   MaxStoresPerMemcpy = 4; // For @llvm.memcpy -> sequence of stores
-  MaxStoresPerMemcpyOptSize = Subtarget->isTargetDarwin() ? 4 : 2;
+  MaxStoresPerMemcpyOptSize = 2;
   MaxStoresPerMemmove = 4; // For @llvm.memmove -> sequence of stores
-  MaxStoresPerMemmoveOptSize = Subtarget->isTargetDarwin() ? 4 : 2;
+  MaxStoresPerMemmoveOptSize = 2;
 
   // On ARM arguments smaller than 4 bytes are extended, so all arguments
   // are at least 4 bytes aligned.
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 906f0464d84..1788edcaf78 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -1723,14 +1723,12 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
 
   computeRegisterProperties(Subtarget->getRegisterInfo());
 
-  // On Darwin, -Os means optimize for size without hurting performance,
-  // do not reduce the limit.
   MaxStoresPerMemset = 16; // For @llvm.memset -> sequence of stores
-  MaxStoresPerMemsetOptSize = Subtarget->isTargetDarwin() ? 16 : 8;
+  MaxStoresPerMemsetOptSize = 8;
   MaxStoresPerMemcpy = 8; // For @llvm.memcpy -> sequence of stores
-  MaxStoresPerMemcpyOptSize = Subtarget->isTargetDarwin() ? 8 : 4;
+  MaxStoresPerMemcpyOptSize = 4;
   MaxStoresPerMemmove = 8; // For @llvm.memmove -> sequence of stores
-  MaxStoresPerMemmoveOptSize = Subtarget->isTargetDarwin() ? 8 : 4;
+  MaxStoresPerMemmoveOptSize = 4;
   setPrefLoopAlignment(4); // 2^4 bytes.
 
   // Predictable cmov don't hurt on atom because it's in-order.
diff --git a/test/CodeGen/X86/memcpy.ll b/test/CodeGen/X86/memcpy.ll
index 78d250930c2..00669443d6d 100644
--- a/test/CodeGen/X86/memcpy.ll
+++ b/test/CodeGen/X86/memcpy.ll
@@ -59,48 +59,16 @@ entry:
 ; DARWIN: movq
 }
 
-; FIXME: Both Linux and Darwin should lower to a memcpy call; minsize is on.
 define void @test3_minsize(i8* nocapture %A, i8* nocapture %B) nounwind minsize noredzone {
   tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %A, i8* %B, i64 64, i32 1, i1 false)
   ret void
 ; LINUX-LABEL: test3_minsize:
-; LINUX: movq
-; LINUX: movq
-; LINUX: movq
-; LINUX: movq
-; LINUX: movq
-; LINUX: movq
-; LINUX: movq
-; LINUX: movq
-; LINUX: movq
-; LINUX: movq
-; LINUX: movq
-; LINUX: movq
-; LINUX: movq
-; LINUX: movq
-; LINUX: movq
-; LINUX: movq
+; LINUX: memcpy
 
 ; DARWIN-LABEL: test3_minsize:
-; DARWIN: movq
-; DARWIN: movq
-; DARWIN: movq
-; DARWIN: movq
-; DARWIN: movq
-; DARWIN: movq
-; DARWIN: movq
-; DARWIN: movq
-; DARWIN: movq
-; DARWIN: movq
-; DARWIN: movq
-; DARWIN: movq
-; DARWIN: movq
-; DARWIN: movq
-; DARWIN: movq
-; DARWIN: movq
+; DARWIN: memcpy
 }
 
-; FIXME: Darwin should lower to a memcpy call; minsize is on.
 define void @test3_minsize_optsize(i8* nocapture %A, i8* nocapture %B) nounwind optsize minsize noredzone {
   tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %A, i8* %B, i64 64, i32 1, i1 false)
   ret void
@@ -108,22 +76,7 @@ define void @test3_minsize_optsize(i8* nocapture %A, i8* nocapture %B) nounwind
 ; LINUX: memcpy
 
 ; DARWIN-LABEL: test3_minsize_optsize:
-; DARWIN: movq
-; DARWIN: movq
-; DARWIN: movq
-; DARWIN: movq
-; DARWIN: movq
-; DARWIN: movq
-; DARWIN: movq
-; DARWIN: movq
-; DARWIN: movq
-; DARWIN: movq
-; DARWIN: movq
-; DARWIN: movq
-; DARWIN: movq
-; DARWIN: movq
-; DARWIN: movq
-; DARWIN: movq
+; DARWIN: memcpy
 }
 
 ; Large constant memcpy's should be inlined when not optimizing for size.
-- 
2.34.1