bool iterateOnFunction(Function &F);
Value* CollapsePhi(PHINode* p);
bool isSafeReplacement(PHINode* p, Instruction* inst);
+ bool valueHasOnlyOneUseAfter(Value* val, MemCpyInst* use,
+ Instruction* cutoff);
};
char GVN::ID = 0;
return deletedLoad;
}
-/// isReturnSlotOptznProfitable - Determine if performing a return slot
-/// fusion with the slot dest is profitable
-static bool isReturnSlotOptznProfitable(Value* dest, MemCpyInst* cpy) {
- // We currently consider it profitable if dest is otherwise dead.
- SmallVector<User*, 8> useList(dest->use_begin(), dest->use_end());
+/// valueHasOnlyOneUse - Returns true if a value has only one use after the
+/// cutoff that is in the current same block and is the same as the use
+/// parameter.
+bool GVN::valueHasOnlyOneUseAfter(Value* val, MemCpyInst* use,
+ Instruction* cutoff) {
+ DominatorTree& DT = getAnalysis<DominatorTree>();
+
+ SmallVector<User*, 8> useList(val->use_begin(), val->use_end());
while (!useList.empty()) {
User* UI = useList.back();
+
if (isa<GetElementPtrInst>(UI) || isa<BitCastInst>(UI)) {
useList.pop_back();
for (User::use_iterator I = UI->use_begin(), E = UI->use_end();
I != E; ++I)
useList.push_back(*I);
- } else if (UI == cpy)
+ } else if (UI == use) {
useList.pop_back();
- else
+ } else if (Instruction* inst = dyn_cast<Instruction>(UI)) {
+ if (inst->getParent() == use->getParent() &&
+ (inst == cutoff || !DT.dominates(cutoff, inst))) {
+ useList.pop_back();
+ } else
+ return false;
+ } else
return false;
}
if (TD.getTypeStoreSize(PT->getElementType()) != cpyLength->getZExtValue())
return false;
+ // For safety, we must ensure that the output parameter of the call only has
+ // a single use, the memcpy. Otherwise this can introduce an invalid
+ // transformation.
+ if (!valueHasOnlyOneUseAfter(CS.getArgument(0), cpy, C))
+ return false;
+
// We only perform the transformation if it will be profitable.
- if (!isReturnSlotOptznProfitable(cpyDest, cpy))
+ if (!valueHasOnlyOneUseAfter(cpyDest, cpy, C))
return false;
// In addition to knowing that the call does not access the return slot
--- /dev/null
+; RUN: llvm-as < %s | opt -gvn -dse | llvm-dis | grep {call.*initialize} | grep memtmp | count 1
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
+target triple = "i386-pc-linux-gnu"
+
+define internal fastcc void @initialize({ x86_fp80, x86_fp80 }* noalias sret %agg.result) nounwind {
+entry:
+ %agg.result.03 = getelementptr { x86_fp80, x86_fp80 }* %agg.result, i32 0, i32 0 ; <x86_fp80*> [#uses=1]
+ store x86_fp80 0xK00000000000000000000, x86_fp80* %agg.result.03
+ %agg.result.15 = getelementptr { x86_fp80, x86_fp80 }* %agg.result, i32 0, i32 1 ; <x86_fp80*> [#uses=1]
+ store x86_fp80 0xK00000000000000000000, x86_fp80* %agg.result.15
+ ret void
+}
+
+declare fastcc x86_fp80 @passed_uninitialized({ x86_fp80, x86_fp80 }* %x) nounwind
+
+define fastcc void @badly_optimized() nounwind {
+entry:
+ %z = alloca { x86_fp80, x86_fp80 } ; <{ x86_fp80, x86_fp80 }*> [#uses=2]
+ %tmp = alloca { x86_fp80, x86_fp80 } ; <{ x86_fp80, x86_fp80 }*> [#uses=2]
+ %memtmp = alloca { x86_fp80, x86_fp80 }, align 8 ; <{ x86_fp80, x86_fp80 }*> [#uses=2]
+ call fastcc void @initialize( { x86_fp80, x86_fp80 }* noalias sret %memtmp )
+ %tmp1 = bitcast { x86_fp80, x86_fp80 }* %tmp to i8* ; <i8*> [#uses=1]
+ %memtmp2 = bitcast { x86_fp80, x86_fp80 }* %memtmp to i8* ; <i8*> [#uses=1]
+ call void @llvm.memcpy.i32( i8* %tmp1, i8* %memtmp2, i32 24, i32 8 )
+ %z3 = bitcast { x86_fp80, x86_fp80 }* %z to i8* ; <i8*> [#uses=1]
+ %tmp4 = bitcast { x86_fp80, x86_fp80 }* %tmp to i8* ; <i8*> [#uses=1]
+ call void @llvm.memcpy.i32( i8* %z3, i8* %tmp4, i32 24, i32 8 )
+ %tmp5 = call fastcc x86_fp80 @passed_uninitialized( { x86_fp80, x86_fp80 }* %z ) ; <x86_fp80> [#uses=0]
+ ret void
+}
+
+declare void @llvm.memcpy.i32(i8*, i8*, i32, i32) nounwind
\ No newline at end of file