remove a pointless restriction from memcpyopt. It was

author Chris Lattner <sabre@nondot.org>

Thu, 18 Nov 2010 08:00:57 +0000 (08:00 +0000)

committer Chris Lattner <sabre@nondot.org>

Thu, 18 Nov 2010 08:00:57 +0000 (08:00 +0000)
author Chris Lattner <sabre@nondot.org>
Thu, 18 Nov 2010 08:00:57 +0000 (08:00 +0000)
committer Chris Lattner <sabre@nondot.org>
Thu, 18 Nov 2010 08:00:57 +0000 (08:00 +0000)
diff --git a/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/lib/Transforms/Scalar/MemCpyOptimizer.cpp

index ea29fca346a10894fd657c6b3d1a1fefde2203cf..9c16ae417c40af0247d7b2dfb4c15d3f857dbbd9 100644 (file)
--- a/lib/Transforms/Scalar/MemCpyOptimizer.cpp
+++ b/lib/Transforms/Scalar/MemCpyOptimizer.cpp
@@ -688,11 +688,14 @@ bool MemCpyOpt::processMemCpyMemCpyDependence(MemCpyInst *M, MemCpyInst *MDep,
    if (DepSize < MSize)
      return false;
    
-  // Finally, we have to make sure that the dest of the second does not
-  // alias the source of the first.
+  Intrinsic::ID ResultFn = Intrinsic::memcpy;
+  
+  // If the dest of the second might alias the source of the first, then the
+  // source and dest might overlap.  We still want to eliminate the intermediate
+  // value, but we have to generate a memmove instead of memcpy.
    AliasAnalysis &AA = getAnalysis<AliasAnalysis>();
    if (!AA.isNoAlias(M->getRawDest(), MSize, MDep->getRawSource(), DepSize))
-    return false;
+    ResultFn = Intrinsic::memmove;
    
    // If all checks passed, then we can transform these memcpy's
    const Type *ArgTys[3] = {
@@ -702,7 +705,7 @@ bool MemCpyOpt::processMemCpyMemCpyDependence(MemCpyInst *M, MemCpyInst *MDep,
    };
    Function *MemCpyFun =
      Intrinsic::getDeclaration(M->getParent()->getParent()->getParent(),
-                              M->getIntrinsicID(), ArgTys, 3);
+                              ResultFn, ArgTys, 3);
    
    // Make sure to use the lesser of the alignment of the source and the dest
    // since we're changing where we're reading from, but don't want to increase
diff --git a/test/Transforms/MemCpyOpt/2008-04-29-SRetRemoval.ll b/test/Transforms/MemCpyOpt/2008-04-29-SRetRemoval.ll

deleted file mode 100644 (file)

index 4fec169..0000000
--- a/test/Transforms/MemCpyOpt/2008-04-29-SRetRemoval.ll
+++ /dev/null
@@ -1,17 +0,0 @@
-; RUN: opt < %s -memcpyopt -S | grep {call.*memcpy.*agg.result}
-
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
-target triple = "i386-apple-darwin8"
-@x = external global { x86_fp80, x86_fp80 }            ; <{ x86_fp80, x86_fp80 }*> [#uses=1]
-
-define void @foo({ x86_fp80, x86_fp80 }* noalias sret %agg.result) nounwind  {
-entry:
-       %x.0 = alloca { x86_fp80, x86_fp80 }            ; <{ x86_fp80, x86_fp80 }*> [#uses=1]
-       %x.01 = bitcast { x86_fp80, x86_fp80 }* %x.0 to i8*             ; <i8*> [#uses=2]
-       call void @llvm.memcpy.i32( i8* %x.01, i8* bitcast ({ x86_fp80, x86_fp80 }* @x to i8*), i32 32, i32 16 )
-       %agg.result2 = bitcast { x86_fp80, x86_fp80 }* %agg.result to i8*               ; <i8*> [#uses=1]
-       call void @llvm.memcpy.i32( i8* %agg.result2, i8* %x.01, i32 32, i32 16 )
-       ret void
-}
-
-declare void @llvm.memcpy.i32(i8*, i8*, i32, i32) nounwind 
diff --git a/test/Transforms/MemCpyOpt/memcpy.ll b/test/Transforms/MemCpyOpt/memcpy.ll

index 7d69287280b0a51d2c573ee526bdae3923a19785..303c2fcceef324bbe4e49f88f3a8e4ba83b2e30e 100644 (file)
--- a/test/Transforms/MemCpyOpt/memcpy.ll
+++ b/test/Transforms/MemCpyOpt/memcpy.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -basicaa -memcpyopt -dse -S | grep {call.*memcpy} | count 1
+; RUN: opt < %s -basicaa -memcpyopt -dse -S | FileCheck %s
  
  target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
  target triple = "i686-apple-darwin9"
@@ -20,7 +20,7 @@ entry:
  
  ; CHECK: @test1
  ; CHECK: call void @ccoshl
-; CHECK: call @llvm.memcpy
+; CHECK: call void @llvm.memcpy
  ; CHECK-NOT: llvm.memcpy
  ; CHECK: ret void
         ret void
@@ -29,3 +29,36 @@ entry:
  declare void @ccoshl({ x86_fp80, x86_fp80 }* sret , x86_fp80, x86_fp80) nounwind 
  
  declare void @llvm.memcpy.i32(i8*, i8*, i32, i32) nounwind 
+
+
+; The intermediate alloca and one of the memcpy's should be eliminated, the
+; other should be related with a memmove.
+define void @test2(i8* %P, i8* %Q) nounwind  {
+       %memtmp = alloca { x86_fp80, x86_fp80 }, align 16
+       %R = bitcast { x86_fp80, x86_fp80 }* %memtmp to i8*
+       call void @llvm.memcpy.i32( i8* %R, i8* %P, i32 32, i32 16 )
+       call void @llvm.memcpy.i32( i8* %Q, i8* %R, i32 32, i32 16 )
+        ret void
+        
+; CHECK: @test2
+; CHECK-NEXT: call void @llvm.memmove{{.*}}(i8* %Q, i8* %P
+; CHECK-NEXT: ret void
+}
+
+
+
+
+@x = external global { x86_fp80, x86_fp80 }
+
+define void @test3({ x86_fp80, x86_fp80 }* noalias sret %agg.result) nounwind  {
+       %x.0 = alloca { x86_fp80, x86_fp80 }
+       %x.01 = bitcast { x86_fp80, x86_fp80 }* %x.0 to i8*
+       call void @llvm.memcpy.i32( i8* %x.01, i8* bitcast ({ x86_fp80, x86_fp80 }* @x to i8*), i32 32, i32 16 )
+       %agg.result2 = bitcast { x86_fp80, x86_fp80 }* %agg.result to i8*
+       call void @llvm.memcpy.i32( i8* %agg.result2, i8* %x.01, i32 32, i32 16 )
+       ret void
+; CHECK: @test3
+; CHECK-NEXT: %agg.result2 = bitcast 
+; CHECK-NEXT: call void @llvm.memcpy
+; CHECK-NEXT: ret void
+}
author	Chris Lattner <sabre@nondot.org>
	Thu, 18 Nov 2010 08:00:57 +0000 (08:00 +0000)
committer	Chris Lattner <sabre@nondot.org>
	Thu, 18 Nov 2010 08:00:57 +0000 (08:00 +0000)
lib/Transforms/Scalar/MemCpyOptimizer.cpp		patch \| blob \| history
test/Transforms/MemCpyOpt/2008-04-29-SRetRemoval.ll	[deleted file]	patch \| blob \| history
test/Transforms/MemCpyOpt/memcpy.ll		patch \| blob \| history