From 59bec0e3c0c39cf704c6eb7fb2a44e787dbab4a0 Mon Sep 17 00:00:00 2001 From: Reid Kleckner Date: Tue, 28 Jan 2014 02:38:36 +0000 Subject: [PATCH] Update optimization passes to handle inalloca arguments Summary: I searched Transforms/ and Analysis/ for 'ByVal' and updated those call sites to check for inalloca if appropriate. I added tests for any change that would allow an optimization to fire on inalloca. Reviewers: nlewycky Differential Revision: http://llvm-reviews.chandlerc.com/D2449 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@200281 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Analysis/MemoryBuiltins.cpp | 2 +- lib/Analysis/ValueTracking.cpp | 9 ++-- lib/Transforms/IPO/ArgumentPromotion.cpp | 15 ++++-- .../IPO/DeadArgumentElimination.cpp | 2 +- lib/Transforms/IPO/FunctionAttrs.cpp | 4 ++ lib/Transforms/IPO/IPConstantPropagation.cpp | 2 +- .../InstCombine/InstCombineCalls.cpp | 7 ++- .../InstCombineLoadStoreAlloca.cpp | 6 ++- lib/Transforms/ObjCARC/ObjCARC.h | 1 + .../Scalar/DeadStoreElimination.cpp | 10 ++-- test/Transforms/ArgumentPromotion/inalloca.ll | 49 +++++++++++++++++++ .../Transforms/DeadStoreElimination/simple.ll | 9 ++++ test/Transforms/FunctionAttrs/readattrs.ll | 6 +++ .../InstCombine/call-cast-target-inalloca.ll | 22 +++++++++ 14 files changed, 124 insertions(+), 20 deletions(-) create mode 100644 test/Transforms/ArgumentPromotion/inalloca.ll create mode 100644 test/Transforms/InstCombine/call-cast-target-inalloca.ll diff --git a/lib/Analysis/MemoryBuiltins.cpp b/lib/Analysis/MemoryBuiltins.cpp index 37e2e271cee..82a910fd376 100644 --- a/lib/Analysis/MemoryBuiltins.cpp +++ b/lib/Analysis/MemoryBuiltins.cpp @@ -458,7 +458,7 @@ SizeOffsetType ObjectSizeOffsetVisitor::visitAllocaInst(AllocaInst &I) { SizeOffsetType ObjectSizeOffsetVisitor::visitArgument(Argument &A) { // no interprocedural analysis is done at the moment - if (!A.hasByValAttr()) { + if (!A.hasByValOrInAllocaAttr()) { ++ObjectVisitorArgument; return unknown(); } diff --git a/lib/Analysis/ValueTracking.cpp b/lib/Analysis/ValueTracking.cpp index 803051d0bb2..1e423c45126 100644 --- a/lib/Analysis/ValueTracking.cpp +++ b/lib/Analysis/ValueTracking.cpp @@ -311,8 +311,9 @@ void llvm::ComputeMaskedBits(Value *V, APInt &KnownZero, APInt &KnownOne, if (Argument *A = dyn_cast(V)) { unsigned Align = 0; - if (A->hasByValAttr()) { - // Get alignment information off byval arguments if specified in the IR. + if (A->hasByValOrInAllocaAttr()) { + // Get alignment information off byval/inalloca arguments if specified in + // the IR. Align = A->getParamAlignment(); } else if (TD && A->hasStructRetAttr()) { // An sret parameter has at least the ABI alignment of the return type. @@ -2070,9 +2071,9 @@ bool llvm::isKnownNonNull(const Value *V, const TargetLibraryInfo *TLI) { // Alloca never returns null, malloc might. if (isa(V)) return true; - // A byval argument is never null. + // A byval or inalloca argument is never null. if (const Argument *A = dyn_cast(V)) - return A->hasByValAttr(); + return A->hasByValOrInAllocaAttr(); // Global values are not null unless extern weak. if (const GlobalValue *GV = dyn_cast(V)) diff --git a/lib/Transforms/IPO/ArgumentPromotion.cpp b/lib/Transforms/IPO/ArgumentPromotion.cpp index ea24d526942..36e7833ac16 100644 --- a/lib/Transforms/IPO/ArgumentPromotion.cpp +++ b/lib/Transforms/IPO/ArgumentPromotion.cpp @@ -155,7 +155,8 @@ CallGraphNode *ArgPromotion::PromoteArguments(CallGraphNode *CGN) { Type *AgTy = cast(PtrArg->getType())->getElementType(); // If this is a byval argument, and if the aggregate type is small, just - // pass the elements, which is always safe. + // pass the elements, which is always safe. This does not apply to + // inalloca. if (PtrArg->hasByValAttr()) { if (StructType *STy = dyn_cast(AgTy)) { if (maxElements > 0 && STy->getNumElements() > maxElements) { @@ -201,7 +202,7 @@ CallGraphNode *ArgPromotion::PromoteArguments(CallGraphNode *CGN) { } // Otherwise, see if we can promote the pointer to its value. - if (isSafeToPromoteArgument(PtrArg, PtrArg->hasByValAttr())) + if (isSafeToPromoteArgument(PtrArg, PtrArg->hasByValOrInAllocaAttr())) ArgsToPromote.insert(PtrArg); } @@ -301,7 +302,8 @@ static void MarkIndicesSafe(const ArgPromotion::IndicesVector &ToMark, /// This method limits promotion of aggregates to only promote up to three /// elements of the aggregate in order to avoid exploding the number of /// arguments passed in. -bool ArgPromotion::isSafeToPromoteArgument(Argument *Arg, bool isByVal) const { +bool ArgPromotion::isSafeToPromoteArgument(Argument *Arg, + bool isByValOrInAlloca) const { typedef std::set GEPIndicesSet; // Quick exit for unused arguments @@ -323,6 +325,9 @@ bool ArgPromotion::isSafeToPromoteArgument(Argument *Arg, bool isByVal) const { // // This set will contain all sets of indices that are loaded in the entry // block, and thus are safe to unconditionally load in the caller. + // + // This optimization is also safe for InAlloca parameters, because it verifies + // that the address isn't captured. GEPIndicesSet SafeToUnconditionallyLoad; // This set contains all the sets of indices that we are planning to promote. @@ -330,7 +335,7 @@ bool ArgPromotion::isSafeToPromoteArgument(Argument *Arg, bool isByVal) const { GEPIndicesSet ToPromote; // If the pointer is always valid, any load with first index 0 is valid. - if (isByVal || AllCallersPassInValidPointerForArgument(Arg)) + if (isByValOrInAlloca || AllCallersPassInValidPointerForArgument(Arg)) SafeToUnconditionallyLoad.insert(IndicesVector(1, 0)); // First, iterate the entry block and mark loads of (geps of) arguments as @@ -389,7 +394,7 @@ bool ArgPromotion::isSafeToPromoteArgument(Argument *Arg, bool isByVal) const { // TODO: This runs the above loop over and over again for dead GEPs // Couldn't we just do increment the UI iterator earlier and erase the // use? - return isSafeToPromoteArgument(Arg, isByVal); + return isSafeToPromoteArgument(Arg, isByValOrInAlloca); } // Ensure that all of the indices are constants. diff --git a/lib/Transforms/IPO/DeadArgumentElimination.cpp b/lib/Transforms/IPO/DeadArgumentElimination.cpp index 911c14e7e31..81dbd329c4b 100644 --- a/lib/Transforms/IPO/DeadArgumentElimination.cpp +++ b/lib/Transforms/IPO/DeadArgumentElimination.cpp @@ -378,7 +378,7 @@ bool DAE::RemoveDeadArgumentsFromCallers(Function &Fn) I != E; ++I) { Argument *Arg = I; - if (Arg->use_empty() && !Arg->hasByValAttr()) + if (Arg->use_empty() && !Arg->hasByValOrInAllocaAttr()) UnusedArgs.push_back(Arg->getArgNo()); } diff --git a/lib/Transforms/IPO/FunctionAttrs.cpp b/lib/Transforms/IPO/FunctionAttrs.cpp index ea664506da2..92d2f79fa73 100644 --- a/lib/Transforms/IPO/FunctionAttrs.cpp +++ b/lib/Transforms/IPO/FunctionAttrs.cpp @@ -414,6 +414,10 @@ determinePointerReadAttrs(Argument *A, SmallSet Visited; int Count = 0; + // inalloca arguments are always clobbered by the call. + if (A->hasInAllocaAttr()) + return Attribute::None; + bool IsRead = false; // We don't need to track IsWritten. If A is written to, return immediately. diff --git a/lib/Transforms/IPO/IPConstantPropagation.cpp b/lib/Transforms/IPO/IPConstantPropagation.cpp index 8b816e556e3..16596bbfe98 100644 --- a/lib/Transforms/IPO/IPConstantPropagation.cpp +++ b/lib/Transforms/IPO/IPConstantPropagation.cpp @@ -135,7 +135,7 @@ bool IPCP::PropagateConstantsIntoArguments(Function &F) { for (unsigned i = 0, e = ArgumentConstants.size(); i != e; ++i, ++AI) { // Do we have a constant argument? if (ArgumentConstants[i].second || AI->use_empty() || - (AI->hasByValAttr() && !F.onlyReadsMemory())) + AI->hasInAllocaAttr() || (AI->hasByValAttr() && !F.onlyReadsMemory())) continue; Value *V = ArgumentConstants[i].first; diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp index 7d2fc0a5289..8e308ecacf3 100644 --- a/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -767,10 +767,10 @@ static bool isSafeToEliminateVarargsCast(const CallSite CS, if (!CI->isLosslessCast()) return false; - // The size of ByVal arguments is derived from the type, so we + // The size of ByVal or InAlloca arguments is derived from the type, so we // can't change to a type with a different size. If the size were // passed explicitly we could avoid this check. - if (!CS.isByValArgument(ix)) + if (!CS.isByValOrInAllocaArgument(ix)) return true; Type* SrcTy = @@ -1049,6 +1049,9 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) { typeIncompatible(ParamTy, i + 1), i + 1)) return false; // Attribute not compatible with transformed value. + if (CS.isInAllocaArgument(i)) + return false; // Cannot transform to and from inalloca. + // If the parameter is passed as a byval argument, then we have to have a // sized type and the sized type has to have the same size as the old type. if (ParamTy != ActTy && diff --git a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp index 4c861b3fd09..1f691768177 100644 --- a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp +++ b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp @@ -81,10 +81,14 @@ isOnlyCopiedFromConstantGlobal(Value *V, MemTransferInst *&TheCopy, if (CS.isCallee(UI)) continue; + // Inalloca arguments are clobbered by the call. + unsigned ArgNo = CS.getArgumentNo(UI); + if (CS.isInAllocaArgument(ArgNo)) + return false; + // If this is a readonly/readnone call site, then we know it is just a // load (but one that potentially returns the value itself), so we can // ignore it if we know that the value isn't captured. - unsigned ArgNo = CS.getArgumentNo(UI); if (CS.onlyReadsMemory() && (CS.getInstruction()->use_empty() || CS.doesNotCapture(ArgNo))) continue; diff --git a/lib/Transforms/ObjCARC/ObjCARC.h b/lib/Transforms/ObjCARC/ObjCARC.h index 8044494f18e..8a6d67ef828 100644 --- a/lib/Transforms/ObjCARC/ObjCARC.h +++ b/lib/Transforms/ObjCARC/ObjCARC.h @@ -308,6 +308,7 @@ static inline bool IsPotentialRetainableObjPtr(const Value *Op) { // Special arguments can not be a valid retainable object pointer. if (const Argument *Arg = dyn_cast(Op)) if (Arg->hasByValAttr() || + Arg->hasInAllocaAttr() || Arg->hasNestAttr() || Arg->hasStructRetAttr()) return false; diff --git a/lib/Transforms/Scalar/DeadStoreElimination.cpp b/lib/Transforms/Scalar/DeadStoreElimination.cpp index 49f19bc60f2..453dca303d3 100644 --- a/lib/Transforms/Scalar/DeadStoreElimination.cpp +++ b/lib/Transforms/Scalar/DeadStoreElimination.cpp @@ -374,8 +374,8 @@ static OverwriteResult isOverwrite(const AliasAnalysis::Location &Later, return OverwriteUnknown; // Check to see if the later store is to the entire object (either a global, - // an alloca, or a byval argument). If so, then it clearly overwrites any - // other store to the same object. + // an alloca, or a byval/inalloca argument). If so, then it clearly + // overwrites any other store to the same object. const DataLayout *TD = AA.getDataLayout(); const Value *UO1 = GetUnderlyingObject(P1, TD), @@ -742,11 +742,11 @@ bool DSE::handleEndBlock(BasicBlock &BB) { DeadStackObjects.insert(I); } - // Treat byval arguments the same, stores to them are dead at the end of the - // function. + // Treat byval or inalloca arguments the same, stores to them are dead at the + // end of the function. for (Function::arg_iterator AI = BB.getParent()->arg_begin(), AE = BB.getParent()->arg_end(); AI != AE; ++AI) - if (AI->hasByValAttr()) + if (AI->hasByValOrInAllocaAttr()) DeadStackObjects.insert(AI); // Scan the basic block backwards diff --git a/test/Transforms/ArgumentPromotion/inalloca.ll b/test/Transforms/ArgumentPromotion/inalloca.ll new file mode 100644 index 00000000000..513a968255e --- /dev/null +++ b/test/Transforms/ArgumentPromotion/inalloca.ll @@ -0,0 +1,49 @@ +; RUN: opt %s -argpromotion -scalarrepl -S | FileCheck %s + +target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128" + +%struct.ss = type { i32, i32 } + +; Argpromote + scalarrepl should change this to passing the two integers by value. +define internal i32 @f(%struct.ss* inalloca %s) { +entry: + %f0 = getelementptr %struct.ss* %s, i32 0, i32 0 + %f1 = getelementptr %struct.ss* %s, i32 0, i32 1 + %a = load i32* %f0, align 4 + %b = load i32* %f1, align 4 + %r = add i32 %a, %b + ret i32 %r +} +; CHECK-LABEL: define internal i32 @f +; CHECK-NOT: load +; CHECK: ret + +define i32 @main() { +entry: + %S = alloca %struct.ss + %f0 = getelementptr %struct.ss* %S, i32 0, i32 0 + %f1 = getelementptr %struct.ss* %S, i32 0, i32 1 + store i32 1, i32* %f0, align 4 + store i32 2, i32* %f1, align 4 + %r = call i32 @f(%struct.ss* inalloca %S) + ret i32 %r +} +; CHECK-LABEL: define i32 @main +; CHECK-NOT: load +; CHECK: ret + +; Argpromote can't promote %a because of the icmp use. +define internal i1 @g(%struct.ss* %a, %struct.ss* inalloca %b) nounwind { +; CHECK: define internal i1 @g(%struct.ss* %a, %struct.ss* inalloca %b) +entry: + %c = icmp eq %struct.ss* %a, %b + ret i1 %c +} + +define i32 @test() { +entry: + %S = alloca %struct.ss + %c = call i1 @g(%struct.ss* %S, %struct.ss* inalloca %S) +; CHECK: call i1 @g(%struct.ss* %S, %struct.ss* inalloca %S) + ret i32 0 +} diff --git a/test/Transforms/DeadStoreElimination/simple.ll b/test/Transforms/DeadStoreElimination/simple.ll index ec98466d33b..cdfe2268eff 100644 --- a/test/Transforms/DeadStoreElimination/simple.ll +++ b/test/Transforms/DeadStoreElimination/simple.ll @@ -105,6 +105,15 @@ define void @test9(%struct.x* byval %a) nounwind { ; CHECK-NEXT: ret void } +; Test for inalloca handling. +define void @test9_2(%struct.x* inalloca %a) nounwind { + %tmp2 = getelementptr %struct.x* %a, i32 0, i32 0 + store i32 1, i32* %tmp2, align 4 + ret void +; CHECK-LABEL: @test9_2( +; CHECK-NEXT: ret void +} + ; va_arg has fuzzy dependence, the store shouldn't be zapped. define double @test10(i8* %X) { %X_addr = alloca i8* diff --git a/test/Transforms/FunctionAttrs/readattrs.ll b/test/Transforms/FunctionAttrs/readattrs.ll index 0842f566d12..7ae38bbb102 100644 --- a/test/Transforms/FunctionAttrs/readattrs.ll +++ b/test/Transforms/FunctionAttrs/readattrs.ll @@ -45,3 +45,9 @@ define void @test6_2(i8** %p, i8* %q) { call void @test6_1() ret void } + +; CHECK: define void @test7_1(i32* inalloca nocapture %a) +; inalloca parameters are always considered written +define void @test7_1(i32* inalloca %a) { + ret void +} diff --git a/test/Transforms/InstCombine/call-cast-target-inalloca.ll b/test/Transforms/InstCombine/call-cast-target-inalloca.ll new file mode 100644 index 00000000000..baf97e0ce9a --- /dev/null +++ b/test/Transforms/InstCombine/call-cast-target-inalloca.ll @@ -0,0 +1,22 @@ +; RUN: opt < %s -instcombine -S | FileCheck %s + +target datalayout = "e-p:32:32" +target triple = "i686-pc-linux-gnu" + +declare void @takes_i32(i32) +declare void @takes_i32_inalloca(i32* inalloca) + +define void @f() { +; CHECK-LABEL: define void @f() + %args = alloca i32 + call void bitcast (void (i32)* @takes_i32 to void (i32*)*)(i32* inalloca %args) +; CHECK: call void bitcast + ret void +} + +define void @g() { +; CHECK-LABEL: define void @g() + call void bitcast (void (i32*)* @takes_i32_inalloca to void (i32)*)(i32 0) +; CHECK: call void bitcast + ret void +} -- 2.34.1