X-Git-Url: http://demsky.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FTransforms%2FUtils%2FInlineFunction.cpp;h=c1faf2411331c5a56bbbf965107e354df34fcf70;hb=2bc2a08b1bf6b5dcbfa515acc85999d6f884ec1a;hp=91390bc7beca48edafe3281006842210cb1275f4;hpb=0ca2f28458ae9122f413a4092ddcee33a9dd21c6;p=oota-llvm.git diff --git a/lib/Transforms/Utils/InlineFunction.cpp b/lib/Transforms/Utils/InlineFunction.cpp index 91390bc7bec..c1faf241133 100644 --- a/lib/Transforms/Utils/InlineFunction.cpp +++ b/lib/Transforms/Utils/InlineFunction.cpp @@ -22,7 +22,9 @@ #include "llvm/Attributes.h" #include "llvm/Analysis/CallGraph.h" #include "llvm/Analysis/DebugInfo.h" +#include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Target/TargetData.h" +#include "llvm/Transforms/Utils/Local.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringExtras.h" #include "llvm/Support/CallSite.h" @@ -63,7 +65,8 @@ static void HandleCallsInBlockInlinedThroughInvoke(BasicBlock *BB, // Next, create the new invoke instruction, inserting it at the end // of the old basic block. - SmallVector InvokeArgs(CI->op_begin()+1, CI->op_end()); + ImmutableCallSite CS(CI); + SmallVector InvokeArgs(CS.arg_begin(), CS.arg_end()); InvokeInst *II = InvokeInst::Create(CI->getCalledValue(), Split, InvokeDest, InvokeArgs.begin(), InvokeArgs.end(), @@ -169,7 +172,7 @@ static void HandleInlinedInvoke(InvokeInst *II, BasicBlock *FirstNewBlock, /// some edges of the callgraph may remain. static void UpdateCallGraphAfterInlining(CallSite CS, Function::iterator FirstNewBlock, - DenseMap &ValueMap, + ValueToValueMapTy &VMap, InlineFunctionInfo &IFI) { CallGraph &CG = *IFI.CG; const Function *Caller = CS.getInstruction()->getParent()->getParent(); @@ -192,9 +195,9 @@ static void UpdateCallGraphAfterInlining(CallSite CS, for (; I != E; ++I) { const Value *OrigCall = I->first; - DenseMap::iterator VMI = ValueMap.find(OrigCall); + ValueToValueMapTy::iterator VMI = VMap.find(OrigCall); // Only copy the edge if the call was inlined! - if (VMI == ValueMap.end() || VMI->second == 0) + if (VMI == VMap.end() || VMI->second == 0) continue; // If the call was inlined, but then constant folded, there is no edge to @@ -214,12 +217,12 @@ static void UpdateCallGraphAfterInlining(CallSite CS, if (I->second->getFunction() == 0) if (Function *F = CallSite(NewCall).getCalledFunction()) { // Indirect call site resolved to direct call. - CallerNode->addCalledFunction(CallSite::get(NewCall), CG[F]); - + CallerNode->addCalledFunction(CallSite(NewCall), CG[F]); + continue; } - - CallerNode->addCalledFunction(CallSite::get(NewCall), I->second); + + CallerNode->addCalledFunction(CallSite(NewCall), I->second); } // Update the call graph by deleting the edge from Callee to Caller. We must @@ -227,6 +230,90 @@ static void UpdateCallGraphAfterInlining(CallSite CS, CallerNode->removeCallEdgeFor(CS); } +/// HandleByValArgument - When inlining a call site that has a byval argument, +/// we have to make the implicit memcpy explicit by adding it. +static Value *HandleByValArgument(Value *Arg, Instruction *TheCall, + const Function *CalledFunc, + InlineFunctionInfo &IFI, + unsigned ByValAlignment) { + const Type *AggTy = cast(Arg->getType())->getElementType(); + + // If the called function is readonly, then it could not mutate the caller's + // copy of the byval'd memory. In this case, it is safe to elide the copy and + // temporary. + if (CalledFunc->onlyReadsMemory()) { + // If the byval argument has a specified alignment that is greater than the + // passed in pointer, then we either have to round up the input pointer or + // give up on this transformation. + if (ByValAlignment <= 1) // 0 = unspecified, 1 = no particular alignment. + return Arg; + + // If the pointer is already known to be sufficiently aligned, or if we can + // round it up to a larger alignment, then we don't need a temporary. + if (getOrEnforceKnownAlignment(Arg, ByValAlignment, + IFI.TD) >= ByValAlignment) + return Arg; + + // Otherwise, we have to make a memcpy to get a safe alignment. This is bad + // for code quality, but rarely happens and is required for correctness. + } + + LLVMContext &Context = Arg->getContext(); + + const Type *VoidPtrTy = Type::getInt8PtrTy(Context); + + // Create the alloca. If we have TargetData, use nice alignment. + unsigned Align = 1; + if (IFI.TD) + Align = IFI.TD->getPrefTypeAlignment(AggTy); + + // If the byval had an alignment specified, we *must* use at least that + // alignment, as it is required by the byval argument (and uses of the + // pointer inside the callee). + Align = std::max(Align, ByValAlignment); + + Function *Caller = TheCall->getParent()->getParent(); + + Value *NewAlloca = new AllocaInst(AggTy, 0, Align, Arg->getName(), + &*Caller->begin()->begin()); + // Emit a memcpy. + const Type *Tys[3] = {VoidPtrTy, VoidPtrTy, Type::getInt64Ty(Context)}; + Function *MemCpyFn = Intrinsic::getDeclaration(Caller->getParent(), + Intrinsic::memcpy, + Tys, 3); + Value *DestCast = new BitCastInst(NewAlloca, VoidPtrTy, "tmp", TheCall); + Value *SrcCast = new BitCastInst(Arg, VoidPtrTy, "tmp", TheCall); + + Value *Size; + if (IFI.TD == 0) + Size = ConstantExpr::getSizeOf(AggTy); + else + Size = ConstantInt::get(Type::getInt64Ty(Context), + IFI.TD->getTypeStoreSize(AggTy)); + + // Always generate a memcpy of alignment 1 here because we don't know + // the alignment of the src pointer. Other optimizations can infer + // better alignment. + Value *CallArgs[] = { + DestCast, SrcCast, Size, + ConstantInt::get(Type::getInt32Ty(Context), 1), + ConstantInt::getFalse(Context) // isVolatile + }; + CallInst *TheMemCpy = + CallInst::Create(MemCpyFn, CallArgs, CallArgs+5, "", TheCall); + + // If we have a call graph, update it. + if (CallGraph *CG = IFI.CG) { + CallGraphNode *MemCpyCGN = CG->getOrInsertFunction(MemCpyFn); + CallGraphNode *CallerNode = (*CG)[Caller]; + CallerNode->addCalledFunction(TheMemCpy, MemCpyCGN); + } + + // Uses of the argument in the function should use our new alloca + // instead. + return NewAlloca; +} + // InlineFunction - This function inlines the called function into the basic // block of the caller. This returns false if it is not possible to inline this // call. The program is still in a well defined state if this occurs though. @@ -250,7 +337,6 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI) { CalledFunc->isDeclaration() || // call, or call to a vararg function! CalledFunc->getFunctionType()->isVarArg()) return false; - // If the call to the callee is not a tail call, we must clear the 'tail' // flags on any calls that we inline. bool MustClearTailCallFlags = @@ -285,8 +371,8 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI) { ClonedCodeInfo InlinedFunctionInfo; Function::iterator FirstNewBlock; - { // Scope to destroy ValueMap after cloning. - DenseMap ValueMap; + { // Scope to destroy VMap after cloning. + ValueToValueMapTy VMap; assert(CalledFunc->arg_size() == CS.arg_size() && "No varargs calls can be inlined!"); @@ -303,64 +389,25 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI) { // by them explicit. However, we don't do this if the callee is readonly // or readnone, because the copy would be unneeded: the callee doesn't // modify the struct. - if (CalledFunc->paramHasAttr(ArgNo+1, Attribute::ByVal) && - !CalledFunc->onlyReadsMemory()) { - const Type *AggTy = cast(I->getType())->getElementType(); - const Type *VoidPtrTy = - Type::getInt8PtrTy(Context); - - // Create the alloca. If we have TargetData, use nice alignment. - unsigned Align = 1; - if (IFI.TD) Align = IFI.TD->getPrefTypeAlignment(AggTy); - Value *NewAlloca = new AllocaInst(AggTy, 0, Align, - I->getName(), - &*Caller->begin()->begin()); - // Emit a memcpy. - const Type *Tys[3] = {VoidPtrTy, VoidPtrTy, Type::getInt64Ty(Context)}; - Function *MemCpyFn = Intrinsic::getDeclaration(Caller->getParent(), - Intrinsic::memcpy, - Tys, 3); - Value *DestCast = new BitCastInst(NewAlloca, VoidPtrTy, "tmp", TheCall); - Value *SrcCast = new BitCastInst(*AI, VoidPtrTy, "tmp", TheCall); - - Value *Size; - if (IFI.TD == 0) - Size = ConstantExpr::getSizeOf(AggTy); - else - Size = ConstantInt::get(Type::getInt64Ty(Context), - IFI.TD->getTypeStoreSize(AggTy)); - - // Always generate a memcpy of alignment 1 here because we don't know - // the alignment of the src pointer. Other optimizations can infer - // better alignment. - Value *CallArgs[] = { - DestCast, SrcCast, Size, - ConstantInt::get(Type::getInt32Ty(Context), 1), - ConstantInt::get(Type::getInt1Ty(Context), 0) - }; - CallInst *TheMemCpy = - CallInst::Create(MemCpyFn, CallArgs, CallArgs+5, "", TheCall); - - // If we have a call graph, update it. - if (CallGraph *CG = IFI.CG) { - CallGraphNode *MemCpyCGN = CG->getOrInsertFunction(MemCpyFn); - CallGraphNode *CallerNode = (*CG)[Caller]; - CallerNode->addCalledFunction(TheMemCpy, MemCpyCGN); - } - - // Uses of the argument in the function should use our new alloca - // instead. - ActualArg = NewAlloca; + if (CalledFunc->paramHasAttr(ArgNo+1, Attribute::ByVal)) { + ActualArg = HandleByValArgument(ActualArg, TheCall, CalledFunc, IFI, + CalledFunc->getParamAlignment(ArgNo+1)); + + // Calls that we inline may use the new alloca, so we need to clear + // their 'tail' flags if HandleByValArgument introduced a new alloca and + // the callee has calls. + MustClearTailCallFlags |= ActualArg != *AI; } - ValueMap[I] = ActualArg; + VMap[I] = ActualArg; } // We want the inliner to prune the code as it copies. We would LOVE to // have no dead or constant instructions leftover after inlining occurs // (which can happen, e.g., because an argument was constant), but we'll be // happy with whatever the cloner can do. - CloneAndPruneFunctionInto(Caller, CalledFunc, ValueMap, Returns, ".i", + CloneAndPruneFunctionInto(Caller, CalledFunc, VMap, + /*ModuleLevelChanges=*/false, Returns, ".i", &InlinedFunctionInfo, IFI.TD, TheCall); // Remember the first block that is newly cloned over. @@ -368,7 +415,7 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI) { // Update the callgraph if requested. if (IFI.CG) - UpdateCallGraphAfterInlining(CS, FirstNewBlock, ValueMap, IFI); + UpdateCallGraphAfterInlining(CS, FirstNewBlock, VMap, IFI); } // If there are any alloca instructions in the block that used to be the entry @@ -393,8 +440,7 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI) { if (!isa(AI->getArraySize())) continue; - // Keep track of the static allocas that we inline into the caller if the - // StaticAllocas pointer is non-null. + // Keep track of the static allocas that we inline into the caller. IFI.StaticAllocas.push_back(AI); // Scan for the block of allocas that we can move over, and move them @@ -573,10 +619,10 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI) { // any users of the original call/invoke instruction. const Type *RTy = CalledFunc->getReturnType(); + PHINode *PHI = 0; if (Returns.size() > 1) { // The PHI node should go at the front of the new basic block to merge all // possible incoming values. - PHINode *PHI = 0; if (!TheCall->use_empty()) { PHI = PHINode::Create(RTy, TheCall->getName(), AfterCallBB->begin()); @@ -594,14 +640,6 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI) { "Ret value not consistent in function!"); PHI->addIncoming(RI->getReturnValue(), RI->getParent()); } - - // Now that we inserted the PHI, check to see if it has a single value - // (e.g. all the entries are the same or undef). If so, remove the PHI so - // it doesn't block other optimizations. - if (Value *V = PHI->hasConstantValue()) { - PHI->replaceAllUsesWith(V); - PHI->eraseFromParent(); - } } @@ -658,5 +696,14 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI) { // Now we can remove the CalleeEntry block, which is now empty. Caller->getBasicBlockList().erase(CalleeEntry); + // If we inserted a phi node, check to see if it has a single value (e.g. all + // the entries are the same or undef). If so, remove the PHI so it doesn't + // block other optimizations. + if (PHI) + if (Value *V = SimplifyInstruction(PHI, IFI.TD)) { + PHI->replaceAllUsesWith(V); + PHI->eraseFromParent(); + } + return true; }