From: Eli Friedman Date: Thu, 2 Jun 2011 21:24:42 +0000 (+0000) Subject: PR10067: Add missing safety check to call return transformation in MemCpyOpt::process... X-Git-Url: http://demsky.eecs.uci.edu/git/?a=commitdiff_plain;h=70d893e84b0de21205ad042c6c00148d0e3cd74e;p=oota-llvm.git PR10067: Add missing safety check to call return transformation in MemCpyOpt::processStore. If something accesses the dest of the "copy" between the call and the copy, the performCallSlotOptzn transformation is not valid. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@132485 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/lib/Transforms/Scalar/MemCpyOptimizer.cpp index 360639ec95b..be5aa2ea583 100644 --- a/lib/Transforms/Scalar/MemCpyOptimizer.cpp +++ b/lib/Transforms/Scalar/MemCpyOptimizer.cpp @@ -488,11 +488,28 @@ bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) { // a memcpy. if (LoadInst *LI = dyn_cast(SI->getOperand(0))) { if (!LI->isVolatile() && LI->hasOneUse()) { - MemDepResult dep = MD->getDependency(LI); + MemDepResult ldep = MD->getDependency(LI); CallInst *C = 0; - if (dep.isClobber() && !isa(dep.getInst())) - C = dyn_cast(dep.getInst()); - + if (ldep.isClobber() && !isa(ldep.getInst())) + C = dyn_cast(ldep.getInst()); + + if (C) { + // Check that nothing touches the dest of the "copy" between + // the call and the store. + MemDepResult sdep = MD->getDependency(SI); + if (!sdep.isNonLocal()) { + bool FoundCall = false; + for (BasicBlock::iterator I = SI, E = sdep.getInst(); I != E; --I) { + if (&*I == C) { + FoundCall = true; + break; + } + } + if (!FoundCall) + C = 0; + } + } + if (C) { bool changed = performCallSlotOptzn(LI, SI->getPointerOperand()->stripPointerCasts(), diff --git a/test/Transforms/MemCpyOpt/2011-06-02-CallSlotOverwritten.ll b/test/Transforms/MemCpyOpt/2011-06-02-CallSlotOverwritten.ll new file mode 100644 index 00000000000..132966ef6b9 --- /dev/null +++ b/test/Transforms/MemCpyOpt/2011-06-02-CallSlotOverwritten.ll @@ -0,0 +1,36 @@ +; RUN: opt < %s -basicaa -memcpyopt -S | FileCheck %s +; PR10067 +; Make sure the call+copy isn't optimized in such a way that +; %ret ends up with the wrong value. + +target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32" +target triple = "i386-apple-darwin10" + +%struct1 = type { i32, i32 } +%struct2 = type { %struct1, i8* } + +declare void @bar(%struct1* nocapture sret %agg.result) nounwind + +define i32 @foo() nounwind { + %x = alloca %struct1, align 8 + %y = alloca %struct2, align 8 + call void @bar(%struct1* sret %x) nounwind +; CHECK: call void @bar(%struct1* sret %x) + + %gepn1 = getelementptr inbounds %struct2* %y, i32 0, i32 0, i32 0 + store i32 0, i32* %gepn1, align 8 + %gepn2 = getelementptr inbounds %struct2* %y, i32 0, i32 0, i32 1 + store i32 0, i32* %gepn2, align 4 + + %bit1 = bitcast %struct1* %x to i64* + %bit2 = bitcast %struct2* %y to i64* + %load = load i64* %bit1, align 8 + store i64 %load, i64* %bit2, align 8 + +; CHECK: %load = load i64* %bit1, align 8 +; CHECK: store i64 %load, i64* %bit2, align 8 + + %gep1 = getelementptr %struct2* %y, i32 0, i32 0, i32 0 + %ret = load i32* %gep1 + ret i32 %ret +}