From 1faa4454761c5aee2854ab127ffc0dde877b3982 Mon Sep 17 00:00:00 2001 From: Chandler Carruth Date: Mon, 1 Sep 2014 10:09:18 +0000 Subject: [PATCH] Fix a really bad miscompile introduced in r216865 - the else-if logic chain became completely broken here as *all* intrinsic users ended up being skipped, and the ones that seemed to be singled out were actually the exact wrong set. This is a great example of why long else-if chains can be easily confusing. Switch the entire code to use early exits and early continues to have simpler (and more importantly, correct) logic here, as well as fixing the reversed logic for detecting and continuing on lifetime intrinsics. I've also significantly cleaned up the test case and added another test case demonstrating an example where the optimization is not (trivially) safe to perform. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@216871 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Scalar/MemCpyOptimizer.cpp | 24 +++++----- .../memcpy-to-memset-with-lifetimes.ll | 45 +++++++++++++++---- 2 files changed, 50 insertions(+), 19 deletions(-) diff --git a/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/lib/Transforms/Scalar/MemCpyOptimizer.cpp index b1a2ed39f24..c750ece41b4 100644 --- a/lib/Transforms/Scalar/MemCpyOptimizer.cpp +++ b/lib/Transforms/Scalar/MemCpyOptimizer.cpp @@ -673,19 +673,23 @@ bool MemCpyOpt::performCallSlotOptzn(Instruction *cpy, if (isa(U) || isa(U)) { for (User *UU : U->users()) srcUseList.push_back(UU); - } else if (GetElementPtrInst *G = dyn_cast(U)) { - if (G->hasAllZeroIndices()) - for (User *UU : U->users()) - srcUseList.push_back(UU); - else + continue; + } + if (GetElementPtrInst *G = dyn_cast(U)) { + if (!G->hasAllZeroIndices()) return false; - } else if (const IntrinsicInst *IT = dyn_cast(U)) { - if (IT->getIntrinsicID() != Intrinsic::lifetime_start && - IT->getIntrinsicID() != Intrinsic::lifetime_end) + + for (User *UU : U->users()) + srcUseList.push_back(UU); + continue; + } + if (const IntrinsicInst *IT = dyn_cast(U)) + if (IT->getIntrinsicID() == Intrinsic::lifetime_start || + IT->getIntrinsicID() == Intrinsic::lifetime_end) continue; - } else if (U != C && U != cpy) { + + if (U != C && U != cpy) return false; - } } // Check that src isn't captured by the called function since the diff --git a/test/Transforms/MemCpyOpt/memcpy-to-memset-with-lifetimes.ll b/test/Transforms/MemCpyOpt/memcpy-to-memset-with-lifetimes.ll index d702f3ff852..626317614c9 100644 --- a/test/Transforms/MemCpyOpt/memcpy-to-memset-with-lifetimes.ll +++ b/test/Transforms/MemCpyOpt/memcpy-to-memset-with-lifetimes.ll @@ -3,20 +3,47 @@ target datalayout = "e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" -define void @foo([8 x i64]* noalias nocapture sret dereferenceable(64)) { +define void @foo([8 x i64]* noalias nocapture sret dereferenceable(64) %sret) { entry-block: %a = alloca [8 x i64], align 8 - %1 = bitcast [8 x i64]* %a to i8* - call void @llvm.lifetime.start(i64 64, i8* %1) - call void @llvm.memset.p0i8.i64(i8* %1, i8 0, i64 64, i32 8, i1 false) - %2 = bitcast [8 x i64]* %0 to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* %2, i8* %1, i64 64, i32 8, i1 false) - call void @llvm.lifetime.end(i64 64, i8* %1) + %a.cast = bitcast [8 x i64]* %a to i8* + call void @llvm.lifetime.start(i64 64, i8* %a.cast) + call void @llvm.memset.p0i8.i64(i8* %a.cast, i8 0, i64 64, i32 8, i1 false) + %sret.cast = bitcast [8 x i64]* %sret to i8* + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %sret.cast, i8* %a.cast, i64 64, i32 8, i1 false) + call void @llvm.lifetime.end(i64 64, i8* %a.cast) ret void ; CHECK-LABEL: @foo( -; CHECK: %1 = bitcast -; CHECK: call void @llvm.memset +; CHECK: %[[sret_cast:[^=]+]] = bitcast [8 x i64]* %sret to i8* +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* %[[sret_cast]], i8 0, i64 64 +; CHECK-NOT: call void @llvm.memcpy +; CHECK: ret void +} + +define void @bar([8 x i64]* noalias nocapture sret dereferenceable(64) %sret, [8 x i64]* noalias nocapture dereferenceable(64) %out) { +entry-block: + %a = alloca [8 x i64], align 8 + %a.cast = bitcast [8 x i64]* %a to i8* + call void @llvm.lifetime.start(i64 64, i8* %a.cast) + call void @llvm.memset.p0i8.i64(i8* %a.cast, i8 0, i64 64, i32 8, i1 false) + %sret.cast = bitcast [8 x i64]* %sret to i8* + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %sret.cast, i8* %a.cast, i64 64, i32 8, i1 false) + call void @llvm.memset.p0i8.i64(i8* %a.cast, i8 42, i64 32, i32 8, i1 false) + %out.cast = bitcast [8 x i64]* %out to i8* + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %out.cast, i8* %a.cast, i64 64, i32 8, i1 false) + call void @llvm.lifetime.end(i64 64, i8* %a.cast) + ret void + +; CHECK-LABEL: @bar( +; CHECK: %[[a:[^=]+]] = alloca [8 x i64] +; CHECK: %[[a_cast:[^=]+]] = bitcast [8 x i64]* %[[a]] to i8* +; CHECK: call void @llvm.memset.p0i8.i64(i8* %[[a_cast]], i8 0, i64 64 +; CHECK: %[[sret_cast:[^=]+]] = bitcast [8 x i64]* %sret to i8* +; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %[[sret_cast]], i8* %[[a_cast]], i64 64 +; CHECK: call void @llvm.memset.p0i8.i64(i8* %[[a_cast]], i8 42, i64 32 +; CHECK: %[[out_cast:[^=]+]] = bitcast [8 x i64]* %out to i8* +; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %[[out_cast]], i8* %[[a_cast]], i64 64 ; CHECK-NOT: call void @llvm.memcpy ; CHECK: ret void } -- 2.34.1