X-Git-Url: http://demsky.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FTransforms%2FScalar%2FGVN.cpp;h=481956f6b4ce039dbb5307cc558372ce9e4c5d06;hb=61d30a821f372ca4185fc518023f857fbcb9c0f5;hp=a4f78fe4573c4291a960d2490e887b64039295f8;hpb=5aa4f2a0857563b4ad9115c614afed9501aa58f4;p=oota-llvm.git diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp index a4f78fe4573..481956f6b4c 100644 --- a/lib/Transforms/Scalar/GVN.cpp +++ b/lib/Transforms/Scalar/GVN.cpp @@ -34,6 +34,7 @@ #include "llvm/Analysis/MemoryDependenceAnalysis.h" #include "llvm/Support/CFG.h" #include "llvm/Support/Compiler.h" +#include "llvm/Target/TargetData.h" using namespace llvm; //===----------------------------------------------------------------------===// @@ -721,8 +722,10 @@ namespace { AU.addRequired(); AU.addRequired(); AU.addRequired(); + AU.addRequired(); AU.addPreserved(); AU.addPreserved(); + AU.addPreserved(); } // Helper fuctions @@ -738,7 +741,8 @@ namespace { SmallVector& toErase); bool processNonLocalLoad(LoadInst* L, SmallVector& toErase); - bool processMemCpy(MemCpyInst* M, SmallVector& toErase); + bool processMemCpy(MemCpyInst* M, MemCpyInst* MDep, + SmallVector& toErase); bool performReturnSlotOptzn(MemCpyInst* cpy, CallInst* C, SmallVector& toErase); Value *GetValueForBlock(BasicBlock *BB, LoadInst* orig, @@ -1051,57 +1055,91 @@ bool GVN::processLoad(LoadInst* L, return deletedLoad; } +/// isReturnSlotOptznProfitable - Determine if performing a return slot +/// fusion with the slot dest is profitable +static bool isReturnSlotOptznProfitable(Value* dest, MemCpyInst* cpy) { + // We currently consider it profitable if dest is otherwise dead. + SmallVector useList(dest->use_begin(), dest->use_end()); + while (!useList.empty()) { + User* UI = useList.back(); + + if (isa(UI) || isa(UI)) { + useList.pop_back(); + for (User::use_iterator I = UI->use_begin(), E = UI->use_end(); + I != E; ++I) + useList.push_back(*I); + } else if (UI == cpy) + useList.pop_back(); + else + return false; + } + + return true; +} + /// performReturnSlotOptzn - takes a memcpy and a call that it depends on, /// and checks for the possibility of a return slot optimization by having /// the call write its result directly into the callees return parameter /// rather than using memcpy bool GVN::performReturnSlotOptzn(MemCpyInst* cpy, CallInst* C, SmallVector& toErase) { - // Check that we're copying to an argument... + // Deliberately get the source and destination with bitcasts stripped away, + // because we'll need to do type comparisons based on the underlying type. Value* cpyDest = cpy->getDest(); - if (!isa(cpyDest)) + Value* cpySrc = cpy->getSource(); + CallSite CS = CallSite::get(C); + + // Since this is a return slot optimization, we need to make sure that + // the value being copied is, in fact, in a return slot. We also need to + // check that the return slot parameter is marked noalias, so that we can + // be sure that changing it will not cause unexpected behavior changes due + // to it being accessed through a global or another parameter. + if (CS.arg_size() == 0 || + cpySrc != CS.getArgument(0) || + !CS.paramHasAttr(1, ParamAttr::NoAlias | ParamAttr::StructRet)) return false; - // And that the argument is the return slot - Argument* sretArg = cast(cpyDest); - if (!sretArg->hasStructRetAttr()) + // Check that something sneaky is not happening involving casting + // return slot types around. + if (CS.getArgument(0)->getType() != cpyDest->getType()) return false; + // sret --> pointer + const PointerType* PT = cast(cpyDest->getType()); - // Make sure the return slot is otherwise dead - std::set useList(sretArg->use_begin(), sretArg->use_end()); - while (!useList.empty()) { - User* UI = *useList.begin(); - - if (isa(UI) || isa(UI)) { - useList.insert(UI->use_begin(), UI->use_end()); - useList.erase(UI); - } else if (UI == cpy) - useList.erase(UI); - else - return false; - } + // We can only perform the transformation if the size of the memcpy + // is constant and equal to the size of the structure. + ConstantInt* cpyLength = dyn_cast(cpy->getLength()); + if (!cpyLength) + return false; - // Make sure the call cannot modify the return slot in some unpredicted way + TargetData& TD = getAnalysis(); + if (TD.getTypeStoreSize(PT->getElementType()) != cpyLength->getZExtValue()) + return false; + + // We only perform the transformation if it will be profitable. + if (!isReturnSlotOptznProfitable(cpyDest, cpy)) + return false; + + // In addition to knowing that the call does not access the return slot + // in some unexpected manner, which we derive from the noalias attribute, + // we also need to know that it does not sneakily modify the destination + // slot in the caller. We don't have parameter attributes to go by + // for this one, so we just rely on AA to figure it out for us. AliasAnalysis& AA = getAnalysis(); - if (AA.getModRefInfo(C, cpy->getRawDest(), ~0UL) != AliasAnalysis::NoModRef) + if (AA.getModRefInfo(C, cpy->getRawDest(), cpyLength->getZExtValue()) != + AliasAnalysis::NoModRef) return false; - // If all checks passed, then we can perform the transformation - CallSite CS = CallSite::get(C); - for (unsigned i = 0; i < CS.arg_size(); ++i) { - if (CS.paramHasAttr(i+1, ParamAttr::StructRet)) { - if (CS.getArgument(i)->getType() != cpyDest->getType()) - return false; - - CS.setArgument(i, cpyDest); - break; - } - } + // If all the checks have passed, then we're alright to do the transformation. + CS.setArgument(0, cpyDest); + // Drop any cached information about the call, because we may have changed + // its dependence information by changing its parameter. MemoryDependenceAnalysis& MD = getAnalysis(); MD.dropInstruction(C); // Remove the memcpy + MD.removeInstruction(cpy); toErase.push_back(cpy); return true; @@ -1111,25 +1149,10 @@ bool GVN::performReturnSlotOptzn(MemCpyInst* cpy, CallInst* C, /// copies X to Y, and memcpy B which copies Y to Z, then we can rewrite B to be /// a memcpy from X to Z (or potentially a memmove, depending on circumstances). /// This allows later passes to remove the first memcpy altogether. -bool GVN::processMemCpy(MemCpyInst* M, +bool GVN::processMemCpy(MemCpyInst* M, MemCpyInst* MDep, SmallVector& toErase) { - MemoryDependenceAnalysis& MD = getAnalysis(); - - // First, we have to check that the dependency is another memcpy - Instruction* dep = MD.getDependency(M); - if (dep == MemoryDependenceAnalysis::None || - dep == MemoryDependenceAnalysis::NonLocal) - return false; - else if (!isa(dep)) { - if (CallInst* C = dyn_cast(dep)) - return performReturnSlotOptzn(M, C, toErase); - else - return false; - } - // We can only transforms memcpy's where the dest of one is the source of the // other - MemCpyInst* MDep = cast(dep); if (M->getSource() != MDep->getDest()) return false; @@ -1160,11 +1183,9 @@ bool GVN::processMemCpy(MemCpyInst* M, return false; // If all checks passed, then we can transform these memcpy's - bool is32bit = M->getIntrinsicID() == Intrinsic::memcpy_i32; - Function* MemMoveFun = Intrinsic::getDeclaration( + Function* MemCpyFun = Intrinsic::getDeclaration( M->getParent()->getParent()->getParent(), - is32bit ? Intrinsic::memcpy_i32 : - Intrinsic::memcpy_i64); + M->getIntrinsicID()); std::vector args; args.push_back(M->getRawDest()); @@ -1172,8 +1193,9 @@ bool GVN::processMemCpy(MemCpyInst* M, args.push_back(M->getLength()); args.push_back(M->getAlignment()); - CallInst* C = new CallInst(MemMoveFun, args.begin(), args.end(), "", M); + CallInst* C = new CallInst(MemCpyFun, args.begin(), args.end(), "", M); + MemoryDependenceAnalysis& MD = getAnalysis(); if (MD.getDependency(C) == MDep) { MD.dropInstruction(M); toErase.push_back(M); @@ -1194,7 +1216,20 @@ bool GVN::processInstruction(Instruction* I, if (LoadInst* L = dyn_cast(I)) { return processLoad(L, lastSeenLoad, toErase); } else if (MemCpyInst* M = dyn_cast(I)) { - return processMemCpy(M, toErase); + MemoryDependenceAnalysis& MD = getAnalysis(); + + // The are two possible optimizations we can do for memcpy: + // a) memcpy-memcpy xform which exposes redundance for DSE + // b) call-memcpy xform for sret return slot optimization + Instruction* dep = MD.getDependency(M); + if (dep == MemoryDependenceAnalysis::None || + dep == MemoryDependenceAnalysis::NonLocal) + return false; + if (MemCpyInst *MemCpy = dyn_cast(dep)) + return processMemCpy(M, MemCpy, toErase); + if (CallInst* C = dyn_cast(dep)) + return performReturnSlotOptzn(M, C, toErase); + return false; } unsigned num = VN.lookup_or_add(I);