From 8a6197a968c91aca5b793718072ef3886c2cadd8 Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Thu, 30 Jul 2015 21:41:50 +0000 Subject: [PATCH] fix memcpy/memset/memmove lowering when optimizing for size Fixing MinSize attribute handling was discussed in D11363. This is a prerequisite patch to doing that. The handling of OptSize when lowering mem* functions was broken on Darwin because it wants to ignore -Os for these cases, but the existing logic also made it ignore -Oz (MinSize). The Linux change demonstrates a widespread problem. The backend doesn't usually recognize the MinSize attribute by itself; it assumes that if the MinSize attribute exists, then the OptSize attribute must also exist. Fixing this more generally will be a follow-on patch or two. Differential Revision: http://reviews.llvm.org/D11568 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@243693 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 18 ++++++-- lib/Target/ARM/ARMISelLowering.cpp | 6 +-- lib/Target/X86/X86ISelLowering.cpp | 8 ++-- test/CodeGen/X86/memcpy.ll | 53 ++--------------------- 4 files changed, 24 insertions(+), 61 deletions(-) diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index d41d46b7b14..4122ce06b04 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -4151,6 +4151,18 @@ static bool FindOptimalMemOpLowering(std::vector &MemOps, return true; } +static bool shouldLowerMemFuncForSize(const MachineFunction &MF) { + const Function *F = MF.getFunction(); + bool HasMinSize = F->hasFnAttribute(Attribute::MinSize); + bool HasOptSize = F->hasFnAttribute(Attribute::OptimizeForSize); + + // On Darwin, -Os means optimize for size without hurting performance, so + // only really optimize for size when -Oz (MinSize) is used. + if (MF.getTarget().getTargetTriple().isOSDarwin()) + return HasMinSize; + return HasOptSize || HasMinSize; +} + static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, SDLoc dl, SDValue Chain, SDValue Dst, SDValue Src, uint64_t Size, @@ -4171,7 +4183,7 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, SDLoc dl, bool DstAlignCanChange = false; MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo *MFI = MF.getFrameInfo(); - bool OptSize = MF.getFunction()->hasFnAttribute(Attribute::OptimizeForSize); + bool OptSize = shouldLowerMemFuncForSize(MF); FrameIndexSDNode *FI = dyn_cast(Dst); if (FI && !MFI->isFixedObjectIndex(FI->getIndex())) DstAlignCanChange = true; @@ -4284,7 +4296,7 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, SDLoc dl, bool DstAlignCanChange = false; MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo *MFI = MF.getFrameInfo(); - bool OptSize = MF.getFunction()->hasFnAttribute(Attribute::OptimizeForSize); + bool OptSize = shouldLowerMemFuncForSize(MF); FrameIndexSDNode *FI = dyn_cast(Dst); if (FI && !MFI->isFixedObjectIndex(FI->getIndex())) DstAlignCanChange = true; @@ -4378,7 +4390,7 @@ static SDValue getMemsetStores(SelectionDAG &DAG, SDLoc dl, bool DstAlignCanChange = false; MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo *MFI = MF.getFrameInfo(); - bool OptSize = MF.getFunction()->hasFnAttribute(Attribute::OptimizeForSize); + bool OptSize = shouldLowerMemFuncForSize(MF); FrameIndexSDNode *FI = dyn_cast(Dst); if (FI && !MFI->isFixedObjectIndex(FI->getIndex())) DstAlignCanChange = true; diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index e01d26a67d8..04827df7ac4 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -964,11 +964,11 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, //// temporary - rewrite interface to use type MaxStoresPerMemset = 8; - MaxStoresPerMemsetOptSize = Subtarget->isTargetDarwin() ? 8 : 4; + MaxStoresPerMemsetOptSize = 4; MaxStoresPerMemcpy = 4; // For @llvm.memcpy -> sequence of stores - MaxStoresPerMemcpyOptSize = Subtarget->isTargetDarwin() ? 4 : 2; + MaxStoresPerMemcpyOptSize = 2; MaxStoresPerMemmove = 4; // For @llvm.memmove -> sequence of stores - MaxStoresPerMemmoveOptSize = Subtarget->isTargetDarwin() ? 4 : 2; + MaxStoresPerMemmoveOptSize = 2; // On ARM arguments smaller than 4 bytes are extended, so all arguments // are at least 4 bytes aligned. diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 906f0464d84..1788edcaf78 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -1723,14 +1723,12 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, computeRegisterProperties(Subtarget->getRegisterInfo()); - // On Darwin, -Os means optimize for size without hurting performance, - // do not reduce the limit. MaxStoresPerMemset = 16; // For @llvm.memset -> sequence of stores - MaxStoresPerMemsetOptSize = Subtarget->isTargetDarwin() ? 16 : 8; + MaxStoresPerMemsetOptSize = 8; MaxStoresPerMemcpy = 8; // For @llvm.memcpy -> sequence of stores - MaxStoresPerMemcpyOptSize = Subtarget->isTargetDarwin() ? 8 : 4; + MaxStoresPerMemcpyOptSize = 4; MaxStoresPerMemmove = 8; // For @llvm.memmove -> sequence of stores - MaxStoresPerMemmoveOptSize = Subtarget->isTargetDarwin() ? 8 : 4; + MaxStoresPerMemmoveOptSize = 4; setPrefLoopAlignment(4); // 2^4 bytes. // Predictable cmov don't hurt on atom because it's in-order. diff --git a/test/CodeGen/X86/memcpy.ll b/test/CodeGen/X86/memcpy.ll index 78d250930c2..00669443d6d 100644 --- a/test/CodeGen/X86/memcpy.ll +++ b/test/CodeGen/X86/memcpy.ll @@ -59,48 +59,16 @@ entry: ; DARWIN: movq } -; FIXME: Both Linux and Darwin should lower to a memcpy call; minsize is on. define void @test3_minsize(i8* nocapture %A, i8* nocapture %B) nounwind minsize noredzone { tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %A, i8* %B, i64 64, i32 1, i1 false) ret void ; LINUX-LABEL: test3_minsize: -; LINUX: movq -; LINUX: movq -; LINUX: movq -; LINUX: movq -; LINUX: movq -; LINUX: movq -; LINUX: movq -; LINUX: movq -; LINUX: movq -; LINUX: movq -; LINUX: movq -; LINUX: movq -; LINUX: movq -; LINUX: movq -; LINUX: movq -; LINUX: movq +; LINUX: memcpy ; DARWIN-LABEL: test3_minsize: -; DARWIN: movq -; DARWIN: movq -; DARWIN: movq -; DARWIN: movq -; DARWIN: movq -; DARWIN: movq -; DARWIN: movq -; DARWIN: movq -; DARWIN: movq -; DARWIN: movq -; DARWIN: movq -; DARWIN: movq -; DARWIN: movq -; DARWIN: movq -; DARWIN: movq -; DARWIN: movq +; DARWIN: memcpy } -; FIXME: Darwin should lower to a memcpy call; minsize is on. define void @test3_minsize_optsize(i8* nocapture %A, i8* nocapture %B) nounwind optsize minsize noredzone { tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %A, i8* %B, i64 64, i32 1, i1 false) ret void @@ -108,22 +76,7 @@ define void @test3_minsize_optsize(i8* nocapture %A, i8* nocapture %B) nounwind ; LINUX: memcpy ; DARWIN-LABEL: test3_minsize_optsize: -; DARWIN: movq -; DARWIN: movq -; DARWIN: movq -; DARWIN: movq -; DARWIN: movq -; DARWIN: movq -; DARWIN: movq -; DARWIN: movq -; DARWIN: movq -; DARWIN: movq -; DARWIN: movq -; DARWIN: movq -; DARWIN: movq -; DARWIN: movq -; DARWIN: movq -; DARWIN: movq +; DARWIN: memcpy } ; Large constant memcpy's should be inlined when not optimizing for size. -- 2.34.1