Fix an issue where GVN was performing the return slot optimization when it was

author Owen Anderson <resistor@mac.com>

Mon, 25 Feb 2008 04:08:09 +0000 (04:08 +0000)

committer Owen Anderson <resistor@mac.com>

Mon, 25 Feb 2008 04:08:09 +0000 (04:08 +0000)
author Owen Anderson <resistor@mac.com>
Mon, 25 Feb 2008 04:08:09 +0000 (04:08 +0000)
committer Owen Anderson <resistor@mac.com>
Mon, 25 Feb 2008 04:08:09 +0000 (04:08 +0000)
diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp

index 650612144adc4285cabab9e6ed19753271ab84a2..4b7e82c927e6290ce3dc26f1f66203ed74a0632c 100644 (file)
--- a/lib/Transforms/Scalar/GVN.cpp
+++ b/lib/Transforms/Scalar/GVN.cpp
@@ -752,6 +752,8 @@ namespace {
      bool iterateOnFunction(Function &F);
      Value* CollapsePhi(PHINode* p);
      bool isSafeReplacement(PHINode* p, Instruction* inst);
+    bool valueHasOnlyOneUseAfter(Value* val, MemCpyInst* use,
+                                 Instruction* cutoff);
    };
    
    char GVN::ID = 0;
@@ -1055,22 +1057,32 @@ bool GVN::processLoad(LoadInst* L,
    return deletedLoad;
  }
  
-/// isReturnSlotOptznProfitable - Determine if performing a return slot 
-/// fusion with the slot dest is profitable
-static bool isReturnSlotOptznProfitable(Value* dest, MemCpyInst* cpy) {
-  // We currently consider it profitable if dest is otherwise dead.
-  SmallVector<User*, 8> useList(dest->use_begin(), dest->use_end());
+/// valueHasOnlyOneUse - Returns true if a value has only one use after the
+/// cutoff that is in the current same block and is the same as the use
+/// parameter.
+bool GVN::valueHasOnlyOneUseAfter(Value* val, MemCpyInst* use,
+                                  Instruction* cutoff) {
+  DominatorTree& DT = getAnalysis<DominatorTree>();
+  
+  SmallVector<User*, 8> useList(val->use_begin(), val->use_end());
    while (!useList.empty()) {
      User* UI = useList.back();
      
+    
      if (isa<GetElementPtrInst>(UI) || isa<BitCastInst>(UI)) {
        useList.pop_back();
        for (User::use_iterator I = UI->use_begin(), E = UI->use_end();
             I != E; ++I)
          useList.push_back(*I);
-    } else if (UI == cpy)
+    } else if (UI == use) {
        useList.pop_back();
-    else
+    } else if (Instruction* inst = dyn_cast<Instruction>(UI)) {
+      if (inst->getParent() == use->getParent() &&
+          (inst == cutoff || !DT.dominates(cutoff, inst))) {
+        useList.pop_back();
+      } else
+        return false;
+    } else
        return false;
    }
    
@@ -1123,8 +1135,14 @@ bool GVN::performReturnSlotOptzn(MemCpyInst* cpy, CallInst* C,
    if (TD.getTypeStoreSize(PT->getElementType()) != cpyLength->getZExtValue())
      return false;
    
+  // For safety, we must ensure that the output parameter of the call only has
+  // a single use, the memcpy.  Otherwise this can introduce an invalid
+  // transformation.
+  if (!valueHasOnlyOneUseAfter(CS.getArgument(0), cpy, C))
+    return false;
+  
    // We only perform the transformation if it will be profitable. 
-  if (!isReturnSlotOptznProfitable(cpyDest, cpy))
+  if (!valueHasOnlyOneUseAfter(cpyDest, cpy, C))
      return false;
    
    // In addition to knowing that the call does not access the return slot
diff --git a/test/Transforms/GVN/2008-02-24-MultipleUseofSRet.ll b/test/Transforms/GVN/2008-02-24-MultipleUseofSRet.ll

new file mode 100644 (file)

index 0000000..797dba2
--- /dev/null
+++ b/test/Transforms/GVN/2008-02-24-MultipleUseofSRet.ll
@@ -0,0 +1,32 @@
+; RUN: llvm-as < %s | opt -gvn -dse | llvm-dis | grep {call.*initialize} | grep memtmp | count 1
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
+target triple = "i386-pc-linux-gnu"
+
+define internal fastcc void @initialize({ x86_fp80, x86_fp80 }* noalias sret  %agg.result) nounwind  {
+entry:
+       %agg.result.03 = getelementptr { x86_fp80, x86_fp80 }* %agg.result, i32 0, i32 0                ; <x86_fp80*> [#uses=1]
+       store x86_fp80 0xK00000000000000000000, x86_fp80* %agg.result.03
+       %agg.result.15 = getelementptr { x86_fp80, x86_fp80 }* %agg.result, i32 0, i32 1                ; <x86_fp80*> [#uses=1]
+       store x86_fp80 0xK00000000000000000000, x86_fp80* %agg.result.15
+       ret void
+}
+
+declare fastcc x86_fp80 @passed_uninitialized({ x86_fp80, x86_fp80 }* %x) nounwind
+
+define fastcc void @badly_optimized() nounwind  {
+entry:
+       %z = alloca { x86_fp80, x86_fp80 }              ; <{ x86_fp80, x86_fp80 }*> [#uses=2]
+       %tmp = alloca { x86_fp80, x86_fp80 }            ; <{ x86_fp80, x86_fp80 }*> [#uses=2]
+       %memtmp = alloca { x86_fp80, x86_fp80 }, align 8                ; <{ x86_fp80, x86_fp80 }*> [#uses=2]
+       call fastcc void @initialize( { x86_fp80, x86_fp80 }* noalias sret  %memtmp )
+       %tmp1 = bitcast { x86_fp80, x86_fp80 }* %tmp to i8*             ; <i8*> [#uses=1]
+       %memtmp2 = bitcast { x86_fp80, x86_fp80 }* %memtmp to i8*               ; <i8*> [#uses=1]
+       call void @llvm.memcpy.i32( i8* %tmp1, i8* %memtmp2, i32 24, i32 8 )
+       %z3 = bitcast { x86_fp80, x86_fp80 }* %z to i8*         ; <i8*> [#uses=1]
+       %tmp4 = bitcast { x86_fp80, x86_fp80 }* %tmp to i8*             ; <i8*> [#uses=1]
+       call void @llvm.memcpy.i32( i8* %z3, i8* %tmp4, i32 24, i32 8 )
+       %tmp5 = call fastcc x86_fp80 @passed_uninitialized( { x86_fp80, x86_fp80 }* %z )                ; <x86_fp80> [#uses=0]
+       ret void
+}
+
+declare void @llvm.memcpy.i32(i8*, i8*, i32, i32) nounwind
+\ No newline at end of file
author	Owen Anderson <resistor@mac.com>
	Mon, 25 Feb 2008 04:08:09 +0000 (04:08 +0000)
committer	Owen Anderson <resistor@mac.com>
	Mon, 25 Feb 2008 04:08:09 +0000 (04:08 +0000)
lib/Transforms/Scalar/GVN.cpp		patch \| blob \| history
test/Transforms/GVN/2008-02-24-MultipleUseofSRet.ll	[new file with mode: 0644]	patch \| blob