Implement load to store => memcpy in MemCpyOpt for aggregates

author Amaury Sechet <deadalnix@gmail.com>

Tue, 5 Jan 2016 20:17:48 +0000 (20:17 +0000)

committer Amaury Sechet <deadalnix@gmail.com>

Tue, 5 Jan 2016 20:17:48 +0000 (20:17 +0000)
author Amaury Sechet <deadalnix@gmail.com>
Tue, 5 Jan 2016 20:17:48 +0000 (20:17 +0000)
committer Amaury Sechet <deadalnix@gmail.com>
Tue, 5 Jan 2016 20:17:48 +0000 (20:17 +0000)
diff --git a/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/lib/Transforms/Scalar/MemCpyOptimizer.cpp

index 0333bf2284e1e6e7393ed38ad1a8ceeaad95e21b..94725db56b82519daf440fbe7c414a5bd7d9996a 100644 (file)
--- a/lib/Transforms/Scalar/MemCpyOptimizer.cpp
+++ b/lib/Transforms/Scalar/MemCpyOptimizer.cpp
@@ -481,6 +481,17 @@ Instruction *MemCpyOpt::tryMergingIntoMemset(Instruction *StartInst,
    return AMemSet;
  }
  
+static unsigned findCommonAlignment(const DataLayout &DL, const StoreInst *SI,
+                                     const LoadInst *LI) {
+  unsigned StoreAlign = SI->getAlignment();
+  if (!StoreAlign)
+    StoreAlign = DL.getABITypeAlignment(SI->getOperand(0)->getType());
+  unsigned LoadAlign = LI->getAlignment();
+  if (!LoadAlign)
+    LoadAlign = DL.getABITypeAlignment(LI->getType());
+
+  return std::min(StoreAlign, LoadAlign);
+}
  
  bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
    if (!SI->isSimple()) return false;
@@ -496,12 +507,70 @@ bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
  
    const DataLayout &DL = SI->getModule()->getDataLayout();
  
-  // Detect cases where we're performing call slot forwarding, but
-  // happen to be using a load-store pair to implement it, rather than
-  // a memcpy.
+  // Load to store forwarding can be interpreted as memcpy.
    if (LoadInst *LI = dyn_cast<LoadInst>(SI->getOperand(0))) {
      if (LI->isSimple() && LI->hasOneUse() &&
          LI->getParent() == SI->getParent()) {
+
+      auto *T = LI->getType();
+      if (T->isAggregateType()) {
+        AliasAnalysis &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
+        MemoryLocation LoadLoc = MemoryLocation::get(LI);
+
+        // We use alias analysis to check if an instruction may store to
+        // the memory we load from in between the load and the store. If
+        // such an instruction is found, we store it in AI.
+        Instruction *AI = nullptr;
+        for (BasicBlock::iterator I = ++LI->getIterator(), E = SI->getIterator();
+             I != E; ++I) {
+          if (AA.getModRefInfo(&*I, LoadLoc) & MRI_Mod) {
+            AI = &*I;
+            break;
+          }
+        }
+
+        // If no aliasing instruction is found, then we can promote the
+        // load/store pair to a memcpy at the store loaction.
+        if (!AI) {
+          // If we load from memory that may alias the memory we store to,
+          // memmove must be used to preserve semantic. If not, memcpy can
+          // be used.
+          bool UseMemMove = false;
+          if (!AA.isNoAlias(MemoryLocation::get(SI), LoadLoc))
+            UseMemMove = true;
+
+          unsigned Align = findCommonAlignment(DL, SI, LI);
+          uint64_t Size = DL.getTypeStoreSize(T);
+
+          IRBuilder<> Builder(SI);
+          Instruction *M;
+          if (UseMemMove)
+            M = Builder.CreateMemMove(SI->getPointerOperand(),
+                                      LI->getPointerOperand(), Size,
+                                      Align, SI->isVolatile());
+          else
+            M = Builder.CreateMemCpy(SI->getPointerOperand(),
+                                     LI->getPointerOperand(), Size,
+                                     Align, SI->isVolatile());
+
+          DEBUG(dbgs() << "Promoting " << *LI << " to " << *SI
+                       << " => " << *M << "\n");
+
+          MD->removeInstruction(SI);
+          SI->eraseFromParent();
+          MD->removeInstruction(LI);
+          LI->eraseFromParent();
+          ++NumMemCpyInstr;
+
+          // Make sure we do not invalidate the iterator.
+          BBI = M->getIterator();
+          return true;
+        }
+      }
+
+      // Detect cases where we're performing call slot forwarding, but
+      // happen to be using a load-store pair to implement it, rather than
+      // a memcpy.
        MemDepResult ldep = MD->getDependency(LI);
        CallInst *C = nullptr;
        if (ldep.isClobber() && !isa<MemCpyInst>(ldep.getInst()))
@@ -522,18 +591,11 @@ bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
        }
  
        if (C) {
-        unsigned storeAlign = SI->getAlignment();
-        if (!storeAlign)
-          storeAlign = DL.getABITypeAlignment(SI->getOperand(0)->getType());
-        unsigned loadAlign = LI->getAlignment();
-        if (!loadAlign)
-          loadAlign = DL.getABITypeAlignment(LI->getType());
-
          bool changed = performCallSlotOptzn(
              LI, SI->getPointerOperand()->stripPointerCasts(),
              LI->getPointerOperand()->stripPointerCasts(),
              DL.getTypeStoreSize(SI->getOperand(0)->getType()),
-            std::min(storeAlign, loadAlign), C);
+            findCommonAlignment(DL, SI, LI), C);
          if (changed) {
            MD->removeInstruction(SI);
            SI->eraseFromParent();
diff --git a/test/Transforms/MemCpyOpt/fca2memcpy.ll b/test/Transforms/MemCpyOpt/fca2memcpy.ll

new file mode 100644 (file)

index 0000000..61ec7f1
--- /dev/null
+++ b/test/Transforms/MemCpyOpt/fca2memcpy.ll
@@ -0,0 +1,47 @@
+; RUN: opt -memcpyopt -S < %s | FileCheck %s
+
+target datalayout = "e-i64:64-f80:128-n8:16:32:64"
+target triple = "x86_64-unknown-linux-gnu"
+
+%S = type { i8*, i32 }
+
+define void @copy(%S* %src, %S* %dst) {
+; CHECK-LABEL: copy
+; CHECK: call void @llvm.memmove.p0i8.p0i8.i64
+; CHECK-NEXT: ret void
+  %1 = load %S, %S* %src
+  store %S %1, %S* %dst
+  ret void
+}
+
+define void @noaliassrc(%S* noalias %src, %S* %dst) {
+; CHECK-LABEL: noaliassrc
+; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64
+; CHECK-NEXT: ret void
+  %1 = load %S, %S* %src
+  store %S %1, %S* %dst
+  ret void
+}
+
+define void @noaliasdst(%S* %src, %S* noalias %dst) {
+; CHECK-LABEL: noaliasdst
+; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64
+; CHECK-NEXT: ret void
+  %1 = load %S, %S* %src
+  store %S %1, %S* %dst
+  ret void
+}
+
+define void @copyalias(%S* %src, %S* %dst) {
+; CHECK-LABEL: copyalias
+; CHECK-NEXT: [[LOAD:%[a-z0-9\.]+]] = load %S, %S* %src
+; CHECK-NOT: load
+; CHECK: call void @llvm.memmove.p0i8.p0i8.i64
+; CHECK-NEXT: store %S [[LOAD]], %S* %dst
+; CHECK-NEXT: ret void
+  %1 = load %S, %S* %src
+  %2 = load %S, %S* %src
+  store %S %1, %S* %dst
+  store %S %2, %S* %dst
+  ret void
+}
author	Amaury Sechet <deadalnix@gmail.com>
	Tue, 5 Jan 2016 20:17:48 +0000 (20:17 +0000)
committer	Amaury Sechet <deadalnix@gmail.com>
	Tue, 5 Jan 2016 20:17:48 +0000 (20:17 +0000)
lib/Transforms/Scalar/MemCpyOptimizer.cpp		patch \| blob \| history
test/Transforms/MemCpyOpt/fca2memcpy.ll	[new file with mode: 0644]	patch \| blob