Turn a memcpy from a double* into a load/store of double instead of

author Chris Lattner <sabre@nondot.org>

Mon, 14 Jan 2008 00:28:35 +0000 (00:28 +0000)

committer Chris Lattner <sabre@nondot.org>

Mon, 14 Jan 2008 00:28:35 +0000 (00:28 +0000)
author Chris Lattner <sabre@nondot.org>
Mon, 14 Jan 2008 00:28:35 +0000 (00:28 +0000)
committer Chris Lattner <sabre@nondot.org>
Mon, 14 Jan 2008 00:28:35 +0000 (00:28 +0000)
diff --git a/lib/Transforms/Scalar/InstructionCombining.cpp b/lib/Transforms/Scalar/InstructionCombining.cpp

index bdd3e512ec5137c5bee2d6513a645590684cdeab..63919a8d32cda8bc7eeae2c9b210b54dd1670ea5 100644 (file)
--- a/lib/Transforms/Scalar/InstructionCombining.cpp
+++ b/lib/Transforms/Scalar/InstructionCombining.cpp
@@ -7826,16 +7826,49 @@ Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) {
    ConstantInt *MemOpLength = dyn_cast<ConstantInt>(MI->getOperand(3));
    if (MemOpLength == 0) return 0;
    
-  // Source and destination pointer types are always "i8*" for intrinsic.
-  //   If Size is 8 then use Int64Ty
-  //   If Size is 4 then use Int32Ty
-  //   If Size is 2 then use Int16Ty
-  //   If Size is 1 then use Int8Ty
+  // Source and destination pointer types are always "i8*" for intrinsic.  See
+  // if the size is something we can handle with a single primitive load/store.
+  // A single load+store correctly handles overlapping memory in the memmove
+  // case.
    unsigned Size = MemOpLength->getZExtValue();
    if (Size == 0 || Size > 8 || (Size&(Size-1)))
-    return 0;  // If not 1/2/4/8, exit.
+    return 0;  // If not 1/2/4/8 bytes, exit.
    
+  // Use an integer load+store unless we can find something better.
    Type *NewPtrTy = PointerType::getUnqual(IntegerType::get(Size<<3));
+  
+  // Memcpy forces the use of i8* for the source and destination.  That means
+  // that if you're using memcpy to move one double around, you'll get a cast
+  // from double* to i8*.  We'd much rather use a double load+store rather than
+  // an i64 load+store, here because this improves the odds that the source or
+  // dest address will be promotable.  See if we can find a better type than the
+  // integer datatype.
+  if (Value *Op = getBitCastOperand(MI->getOperand(1))) {
+    const Type *SrcETy = cast<PointerType>(Op->getType())->getElementType();
+    if (SrcETy->isSized() && TD->getTypeStoreSize(SrcETy) == Size) {
+      // The SrcETy might be something like {{{double}}} or [1 x double].  Rip
+      // down through these levels if so.
+      while (!SrcETy->isFirstClassType()) {
+        if (const StructType *STy = dyn_cast<StructType>(SrcETy)) {
+          if (STy->getNumElements() == 1)
+            SrcETy = STy->getElementType(0);
+          else
+            break;
+        } else if (const ArrayType *ATy = dyn_cast<ArrayType>(SrcETy)) {
+          if (ATy->getNumElements() == 1)
+            SrcETy = ATy->getElementType();
+          else
+            break;
+        } else
+          break;
+      }
+      
+      if (SrcETy->isFirstClassType())
+        NewPtrTy = PointerType::getUnqual(SrcETy);
+    }
+  }
+  
+  
    // If the memcpy/memmove provides better alignment info than we can
    // infer, use it.
    SrcAlign = std::max(SrcAlign, CopyAlign);
@@ -7843,9 +7876,13 @@ Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) {
    
    Value *Src = InsertBitCastBefore(MI->getOperand(2), NewPtrTy, *MI);
    Value *Dest = InsertBitCastBefore(MI->getOperand(1), NewPtrTy, *MI);
-  Value *L = new LoadInst(Src, "tmp", false, SrcAlign, MI);
-  new StoreInst(L, Dest, false, DstAlign, MI);
-  return EraseInstFromFunction(*MI);
+  Instruction *L = new LoadInst(Src, "tmp", false, SrcAlign);
+  InsertNewInstBefore(L, *MI);
+  InsertNewInstBefore(new StoreInst(L, Dest, false, DstAlign), *MI);
+
+  // Set the size of the copy to 0, it will be deleted on the next iteration.
+  MI->setOperand(3, Constant::getNullValue(MemOpLength->getType()));
+  return MI;
  }
  
  /// visitCallInst - CallInst simplification.  This mostly only handles folding 
diff --git a/test/Transforms/InstCombine/memcpy-to-load.ll b/test/Transforms/InstCombine/memcpy-to-load.ll

new file mode 100644 (file)

index 0000000..7a0027d
--- /dev/null
+++ b/test/Transforms/InstCombine/memcpy-to-load.ll
@@ -0,0 +1,14 @@
+; RUN: llvm-as < %s | opt -instcombine | llvm-dis | grep {load double}
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i686-apple-darwin8"
+
+define void @foo(double* %X, double* %Y) {
+entry:
+       %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
+       %tmp2 = bitcast double* %X to i8*               ; <i8*> [#uses=1]
+       %tmp13 = bitcast double* %Y to i8*              ; <i8*> [#uses=1]
+       call void @llvm.memcpy.i32( i8* %tmp2, i8* %tmp13, i32 8, i32 1 )
+       ret void
+}
+
+declare void @llvm.memcpy.i32(i8*, i8*, i32, i32) nounwind
author	Chris Lattner <sabre@nondot.org>
	Mon, 14 Jan 2008 00:28:35 +0000 (00:28 +0000)
committer	Chris Lattner <sabre@nondot.org>
	Mon, 14 Jan 2008 00:28:35 +0000 (00:28 +0000)
lib/Transforms/Scalar/InstructionCombining.cpp		patch \| blob \| history
test/Transforms/InstCombine/memcpy-to-load.ll	[new file with mode: 0644]	patch \| blob