From 0f32a037ef13c6b0f0a9509a294e6607776f65f0 Mon Sep 17 00:00:00 2001
From: Ahmed Bougacha <ahmed.bougacha@gmail.com>
Date: Tue, 21 Apr 2015 21:28:33 +0000
Subject: [PATCH] [MemCpyOpt] Use the raw i8* dest when optimizing
 memset+memcpy.

MemIntrinsic::getDest() looks through pointer casts, and using it
directly when building the new GEP+memset results in stuff like:

  %0 = getelementptr i64* %p, i32 16
  %1 = bitcast i64* %0 to i8*
  call ..memset(i8* %1, ...)

instead of the correct:

  %0 = bitcast i64* %p to i8*
  %1 = getelementptr i8* %0, i32 16
  call ..memset(i8* %1, ...)

Instead, use getRawDest, which just gives you the i8* value.
While there, use the memcpy's dest, as it's live anyway.

In most cases, when the optimization triggers, the memset and memcpy
sizes are the same, so the built memset is 0-sized and eliminated.
The problem occurs when they're different.

Fixes a regression caused by r235232: PR23300.


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@235419 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/Scalar/MemCpyOptimizer.cpp        |  3 ++-
 .../MemCpyOpt/memset-memcpy-redundant-memset.ll  | 16 ++++++++++++++++
 2 files changed, 18 insertions(+), 1 deletion(-)

diff --git a/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/lib/Transforms/Scalar/MemCpyOptimizer.cpp
index 563e49c0190..8465ffd5064 100644
--- a/lib/Transforms/Scalar/MemCpyOptimizer.cpp
+++ b/lib/Transforms/Scalar/MemCpyOptimizer.cpp
@@ -860,7 +860,8 @@ bool MemCpyOpt::processMemSetMemCpyDependence(MemCpyInst *MemCpy,
   if (MemSet->getDest() != MemCpy->getDest())
     return false;
 
-  Value *Dest = MemSet->getDest();
+  // Use the same i8* dest as the memcpy, killing the memset dest if different.
+  Value *Dest = MemCpy->getRawDest();
   Value *DestSize = MemSet->getLength();
   Value *SrcSize = MemCpy->getLength();
 
diff --git a/test/Transforms/MemCpyOpt/memset-memcpy-redundant-memset.ll b/test/Transforms/MemCpyOpt/memset-memcpy-redundant-memset.ll
index 5d3ef6bfd00..0a21e71b21b 100644
--- a/test/Transforms/MemCpyOpt/memset-memcpy-redundant-memset.ll
+++ b/test/Transforms/MemCpyOpt/memset-memcpy-redundant-memset.ll
@@ -100,6 +100,22 @@ define void @test_align_memcpy(i8* %src, i8* %dst, i64 %dst_size) {
   ret void
 }
 
+; CHECK-LABEL: define void @test_non_i8_dst_type
+; CHECK-NEXT: %dst = bitcast i64* %dst_pi64 to i8*
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %src, i64 %src_size, i32 1, i1 false)
+; CHECK-DAG: [[DST:%[0-9]+]] = getelementptr i8, i8* %dst, i64 %src_size
+; CHECK-DAG: [[ULE:%[0-9]+]] = icmp ule i64 %dst_size, %src_size
+; CHECK-DAG: [[SIZEDIFF:%[0-9]+]] = sub i64 %dst_size, %src_size
+; CHECK-DAG: [[SIZE:%[0-9]+]] = select i1 [[ULE]], i64 0, i64 [[SIZEDIFF]]
+; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[DST]], i8 %c, i64 [[SIZE]], i32 1, i1 false)
+; CHECK-NEXT: ret void
+define void @test_non_i8_dst_type(i8* %src, i64 %src_size, i64* %dst_pi64, i64 %dst_size, i8 %c) {
+  %dst = bitcast i64* %dst_pi64 to i8*
+  call void @llvm.memset.p0i8.i64(i8* %dst, i8 %c, i64 %dst_size, i32 1, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %src, i64 %src_size, i32 1, i1 false)
+  ret void
+}
+
 ; CHECK-LABEL: define void @test_different_dst
 ; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* %dst, i8 0, i64 %dst_size, i32 1, i1 false)
 ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst2, i8* %src, i64 %src_size, i32 1, i1 false)
-- 
2.34.1