#include "llvm/Analysis/MemoryDependenceAnalysis.h"
#include "llvm/Support/CFG.h"
#include "llvm/Support/Compiler.h"
+#include "llvm/Target/TargetData.h"
using namespace llvm;
//===----------------------------------------------------------------------===//
AU.addRequired<DominatorTree>();
AU.addRequired<MemoryDependenceAnalysis>();
AU.addRequired<AliasAnalysis>();
+ AU.addRequired<TargetData>();
AU.addPreserved<AliasAnalysis>();
AU.addPreserved<MemoryDependenceAnalysis>();
+ AU.addPreserved<TargetData>();
}
// Helper fuctions
SmallVector<Instruction*, 4>& toErase);
bool processNonLocalLoad(LoadInst* L,
SmallVector<Instruction*, 4>& toErase);
- bool processMemCpy(MemCpyInst* M, SmallVector<Instruction*, 4>& toErase);
+ bool processMemCpy(MemCpyInst* M, MemCpyInst* MDep,
+ SmallVector<Instruction*, 4>& toErase);
bool performReturnSlotOptzn(MemCpyInst* cpy, CallInst* C,
SmallVector<Instruction*, 4>& toErase);
Value *GetValueForBlock(BasicBlock *BB, LoadInst* orig,
return deletedLoad;
}
+/// isReturnSlotOptznProfitable - Determine if performing a return slot
+/// fusion with the slot dest is profitable
+static bool isReturnSlotOptznProfitable(Value* dest, MemCpyInst* cpy) {
+ // We currently consider it profitable if dest is otherwise dead.
+ SmallVector<User*, 8> useList(dest->use_begin(), dest->use_end());
+ while (!useList.empty()) {
+ User* UI = useList.back();
+
+ if (isa<GetElementPtrInst>(UI) || isa<BitCastInst>(UI)) {
+ useList.pop_back();
+ for (User::use_iterator I = UI->use_begin(), E = UI->use_end();
+ I != E; ++I)
+ useList.push_back(*I);
+ } else if (UI == cpy)
+ useList.pop_back();
+ else
+ return false;
+ }
+
+ return true;
+}
+
/// performReturnSlotOptzn - takes a memcpy and a call that it depends on,
/// and checks for the possibility of a return slot optimization by having
/// the call write its result directly into the callees return parameter
/// rather than using memcpy
bool GVN::performReturnSlotOptzn(MemCpyInst* cpy, CallInst* C,
SmallVector<Instruction*, 4>& toErase) {
- // Check that we're copying to an argument...
+ // Deliberately get the source and destination with bitcasts stripped away,
+ // because we'll need to do type comparisons based on the underlying type.
Value* cpyDest = cpy->getDest();
- if (!isa<Argument>(cpyDest))
+ Value* cpySrc = cpy->getSource();
+ CallSite CS = CallSite::get(C);
+
+ // Since this is a return slot optimization, we need to make sure that
+ // the value being copied is, in fact, in a return slot. We also need to
+ // check that the return slot parameter is marked noalias, so that we can
+ // be sure that changing it will not cause unexpected behavior changes due
+ // to it being accessed through a global or another parameter.
+ if (CS.arg_size() == 0 ||
+ cpySrc != CS.getArgument(0) ||
+ !CS.paramHasAttr(1, ParamAttr::NoAlias | ParamAttr::StructRet))
return false;
- // And that the argument is the return slot
- Argument* sretArg = cast<Argument>(cpyDest);
- if (!sretArg->hasStructRetAttr())
+ // Check that something sneaky is not happening involving casting
+ // return slot types around.
+ if (CS.getArgument(0)->getType() != cpyDest->getType())
return false;
+ // sret --> pointer
+ const PointerType* PT = cast<PointerType>(cpyDest->getType());
- // Make sure the return slot is otherwise dead
- std::set<User*> useList(sretArg->use_begin(), sretArg->use_end());
- while (!useList.empty()) {
- User* UI = *useList.begin();
-
- if (isa<GetElementPtrInst>(UI) || isa<BitCastInst>(UI)) {
- useList.insert(UI->use_begin(), UI->use_end());
- useList.erase(UI);
- } else if (UI == cpy)
- useList.erase(UI);
- else
- return false;
- }
+ // We can only perform the transformation if the size of the memcpy
+ // is constant and equal to the size of the structure.
+ ConstantInt* cpyLength = dyn_cast<ConstantInt>(cpy->getLength());
+ if (!cpyLength)
+ return false;
- // Make sure the call cannot modify the return slot in some unpredicted way
+ TargetData& TD = getAnalysis<TargetData>();
+ if (TD.getTypeStoreSize(PT->getElementType()) != cpyLength->getZExtValue())
+ return false;
+
+ // We only perform the transformation if it will be profitable.
+ if (!isReturnSlotOptznProfitable(cpyDest, cpy))
+ return false;
+
+ // In addition to knowing that the call does not access the return slot
+ // in some unexpected manner, which we derive from the noalias attribute,
+ // we also need to know that it does not sneakily modify the destination
+ // slot in the caller. We don't have parameter attributes to go by
+ // for this one, so we just rely on AA to figure it out for us.
AliasAnalysis& AA = getAnalysis<AliasAnalysis>();
- if (AA.getModRefInfo(C, cpy->getRawDest(), ~0UL) != AliasAnalysis::NoModRef)
+ if (AA.getModRefInfo(C, cpy->getRawDest(), cpyLength->getZExtValue()) !=
+ AliasAnalysis::NoModRef)
return false;
- // If all checks passed, then we can perform the transformation
- CallSite CS = CallSite::get(C);
- for (unsigned i = 0; i < CS.arg_size(); ++i) {
- if (CS.paramHasAttr(i+1, ParamAttr::StructRet)) {
- if (CS.getArgument(i)->getType() != cpyDest->getType())
- return false;
-
- CS.setArgument(i, cpyDest);
- break;
- }
- }
+ // If all the checks have passed, then we're alright to do the transformation.
+ CS.setArgument(0, cpyDest);
+ // Drop any cached information about the call, because we may have changed
+ // its dependence information by changing its parameter.
MemoryDependenceAnalysis& MD = getAnalysis<MemoryDependenceAnalysis>();
MD.dropInstruction(C);
// Remove the memcpy
+ MD.removeInstruction(cpy);
toErase.push_back(cpy);
return true;
/// copies X to Y, and memcpy B which copies Y to Z, then we can rewrite B to be
/// a memcpy from X to Z (or potentially a memmove, depending on circumstances).
/// This allows later passes to remove the first memcpy altogether.
-bool GVN::processMemCpy(MemCpyInst* M,
+bool GVN::processMemCpy(MemCpyInst* M, MemCpyInst* MDep,
SmallVector<Instruction*, 4>& toErase) {
- MemoryDependenceAnalysis& MD = getAnalysis<MemoryDependenceAnalysis>();
-
- // First, we have to check that the dependency is another memcpy
- Instruction* dep = MD.getDependency(M);
- if (dep == MemoryDependenceAnalysis::None ||
- dep == MemoryDependenceAnalysis::NonLocal)
- return false;
- else if (CallInst* C = dyn_cast<CallInst>(dep))
- if (!isa<MemCpyInst>(C))
- return performReturnSlotOptzn(M, C, toErase);
- else if (!isa<MemCpyInst>(dep))
- return false;
-
// We can only transforms memcpy's where the dest of one is the source of the
// other
- MemCpyInst* MDep = cast<MemCpyInst>(dep);
if (M->getSource() != MDep->getDest())
return false;
return false;
// If all checks passed, then we can transform these memcpy's
- bool is32bit = M->getIntrinsicID() == Intrinsic::memcpy_i32;
- Function* MemMoveFun = Intrinsic::getDeclaration(
+ Function* MemCpyFun = Intrinsic::getDeclaration(
M->getParent()->getParent()->getParent(),
- is32bit ? Intrinsic::memcpy_i32 :
- Intrinsic::memcpy_i64);
+ M->getIntrinsicID());
std::vector<Value*> args;
args.push_back(M->getRawDest());
args.push_back(M->getLength());
args.push_back(M->getAlignment());
- CallInst* C = new CallInst(MemMoveFun, args.begin(), args.end(), "", M);
+ CallInst* C = new CallInst(MemCpyFun, args.begin(), args.end(), "", M);
+ MemoryDependenceAnalysis& MD = getAnalysis<MemoryDependenceAnalysis>();
if (MD.getDependency(C) == MDep) {
MD.dropInstruction(M);
toErase.push_back(M);
if (LoadInst* L = dyn_cast<LoadInst>(I)) {
return processLoad(L, lastSeenLoad, toErase);
} else if (MemCpyInst* M = dyn_cast<MemCpyInst>(I)) {
- return processMemCpy(M, toErase);
+ MemoryDependenceAnalysis& MD = getAnalysis<MemoryDependenceAnalysis>();
+
+ // The are two possible optimizations we can do for memcpy:
+ // a) memcpy-memcpy xform which exposes redundance for DSE
+ // b) call-memcpy xform for sret return slot optimization
+ Instruction* dep = MD.getDependency(M);
+ if (dep == MemoryDependenceAnalysis::None ||
+ dep == MemoryDependenceAnalysis::NonLocal)
+ return false;
+ if (MemCpyInst *MemCpy = dyn_cast<MemCpyInst>(dep))
+ return processMemCpy(M, MemCpy, toErase);
+ if (CallInst* C = dyn_cast<CallInst>(dep))
+ return performReturnSlotOptzn(M, C, toErase);
+ return false;
}
unsigned num = VN.lookup_or_add(I);