From: Chris Lattner Date: Mon, 20 Dec 2010 08:10:40 +0000 (+0000) Subject: when eliding a byval copy due to inlining a readonly function, we have X-Git-Url: http://demsky.eecs.uci.edu/git/?a=commitdiff_plain;h=0b66f63a26387f5c0360a4324fc3c31e0599a6e0;p=oota-llvm.git when eliding a byval copy due to inlining a readonly function, we have to make sure that the reused alloca has sufficient alignment. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@122236 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Transforms/Utils/InlineFunction.cpp b/lib/Transforms/Utils/InlineFunction.cpp index bca9fc4549a..76fdd097c07 100644 --- a/lib/Transforms/Utils/InlineFunction.cpp +++ b/lib/Transforms/Utils/InlineFunction.cpp @@ -229,17 +229,56 @@ static void UpdateCallGraphAfterInlining(CallSite CS, CallerNode->removeCallEdgeFor(CS); } +/// HandleByValArgument - When inlining a call site that has a byval argument, +/// we have to make the implicit memcpy explicit by adding it. static Value *HandleByValArgument(Value *Arg, Instruction *TheCall, const Function *CalledFunc, InlineFunctionInfo &IFI, unsigned ByValAlignment) { - if (CalledFunc->onlyReadsMemory()) - return Arg; + const Type *AggTy = cast(Arg->getType())->getElementType(); + + // If the called function is readonly, then it could not mutate the caller's + // copy of the byval'd memory. In this case, it is safe to elide the copy and + // temporary. + if (CalledFunc->onlyReadsMemory()) { + // If the byval argument has a specified alignment that is greater than the + // passed in pointer, then we either have to round up the input pointer or + // give up on this transformation. + if (ByValAlignment <= 1) // 0 = unspecified, 1 = no particular alignment. + return Arg; + + // See if the argument is a (bitcasted) pointer to an alloca. If so, we can + // round up the alloca if needed. + if (AllocaInst *AI = dyn_cast(Arg->stripPointerCasts())) { + unsigned AIAlign = AI->getAlignment(); + + // If the alloca is known at least aligned as much as the byval, we can do + // this optimization. + if (AIAlign >= ByValAlignment) + return Arg; + + // If the alloca has a specified alignment that is less than the byval, + // then we can safely bump it up. + if (AIAlign) { + AI->setAlignment(ByValAlignment); + return Arg; + } + + // If the alignment has an unspecified alignment, then we can only modify + // it if we have TD information. Doing so without TD info could end up + // with us rounding the alignment *down* accidentally, which is badness. + if (IFI.TD) { + AIAlign = std::max(ByValAlignment, IFI.TD->getPrefTypeAlignment(AggTy)); + AI->setAlignment(AIAlign); + return Arg; + } + } + + // Otherwise, we have to make a memcpy to get a safe alignment, pretty lame. + } LLVMContext &Context = Arg->getContext(); - - const Type *AggTy = cast(Arg->getType())->getElementType(); const Type *VoidPtrTy = Type::getInt8PtrTy(Context); // Create the alloca. If we have TargetData, use nice alignment. diff --git a/test/Transforms/Inline/byval.ll b/test/Transforms/Inline/byval.ll index 08219fcec8d..e601faf2bb3 100644 --- a/test/Transforms/Inline/byval.ll +++ b/test/Transforms/Inline/byval.ll @@ -81,3 +81,26 @@ entry: ; CHECK: call void @g3(%struct.ss* %S1) ; CHECK: ret void } + + +; Inlining a byval struct should NOT cause an explicit copy +; into an alloca if the function is readonly, but should increase an alloca's +; alignment to satisfy an explicit alignment request. + +define internal i32 @f4(%struct.ss* byval align 64 %b) nounwind readonly { + call void @g3(%struct.ss* %b) + ret i32 4 +} + +define i32 @test4() nounwind { +entry: + %S = alloca %struct.ss, align 2 ; <%struct.ss*> [#uses=4] + %X = call i32 @f4( %struct.ss* byval align 64 %S ) nounwind + ret i32 %X +; CHECK: @test4() +; CHECK: %S = alloca %struct.ss, align 64 +; CHECK-NOT: call void @llvm.memcpy +; CHECK: call void @g3 +; CHECK: ret i32 4 +} +