#include "llvm/Analysis/MemoryDependenceAnalysis.h"
#include "llvm/Support/CFG.h"
#include "llvm/Support/Compiler.h"
+#include "llvm/Target/TargetData.h"
using namespace llvm;
//===----------------------------------------------------------------------===//
hash = e.secondVN + hash * 37;
hash = e.thirdVN + hash * 37;
- hash = (unsigned)((uintptr_t)e.type >> 4) ^
- (unsigned)((uintptr_t)e.type >> 9) +
- hash * 37;
+ hash = ((unsigned)((uintptr_t)e.type >> 4) ^
+ (unsigned)((uintptr_t)e.type >> 9)) +
+ hash * 37;
for (SmallVector<uint32_t, 4>::const_iterator I = e.varargs.begin(),
E = e.varargs.end(); I != E; ++I)
hash = *I + hash * 37;
- hash = (unsigned)((uintptr_t)e.function >> 4) ^
- (unsigned)((uintptr_t)e.function >> 9) +
- hash * 37;
+ hash = ((unsigned)((uintptr_t)e.function >> 4) ^
+ (unsigned)((uintptr_t)e.function >> 9)) +
+ hash * 37;
return hash;
}
AU.addRequired<DominatorTree>();
AU.addRequired<MemoryDependenceAnalysis>();
AU.addRequired<AliasAnalysis>();
+ AU.addRequired<TargetData>();
AU.addPreserved<AliasAnalysis>();
AU.addPreserved<MemoryDependenceAnalysis>();
+ AU.addPreserved<TargetData>();
}
// Helper fuctions
bool iterateOnFunction(Function &F);
Value* CollapsePhi(PHINode* p);
bool isSafeReplacement(PHINode* p, Instruction* inst);
+ bool valueHasOnlyOneUseAfter(Value* val, MemCpyInst* use,
+ Instruction* cutoff);
};
char GVN::ID = 0;
return deletedLoad;
}
+/// valueHasOnlyOneUse - Returns true if a value has only one use after the
+/// cutoff that is in the current same block and is the same as the use
+/// parameter.
+bool GVN::valueHasOnlyOneUseAfter(Value* val, MemCpyInst* use,
+ Instruction* cutoff) {
+ DominatorTree& DT = getAnalysis<DominatorTree>();
+
+ SmallVector<User*, 8> useList(val->use_begin(), val->use_end());
+ while (!useList.empty()) {
+ User* UI = useList.back();
+
+
+ if (isa<GetElementPtrInst>(UI) || isa<BitCastInst>(UI)) {
+ useList.pop_back();
+ for (User::use_iterator I = UI->use_begin(), E = UI->use_end();
+ I != E; ++I)
+ useList.push_back(*I);
+ } else if (UI == use) {
+ useList.pop_back();
+ } else if (Instruction* inst = dyn_cast<Instruction>(UI)) {
+ if (inst->getParent() == use->getParent() &&
+ (inst == cutoff || !DT.dominates(cutoff, inst))) {
+ useList.pop_back();
+ } else
+ return false;
+ } else
+ return false;
+ }
+
+ return true;
+}
+
/// performReturnSlotOptzn - takes a memcpy and a call that it depends on,
/// and checks for the possibility of a return slot optimization by having
/// the call write its result directly into the callees return parameter
/// rather than using memcpy
bool GVN::performReturnSlotOptzn(MemCpyInst* cpy, CallInst* C,
SmallVector<Instruction*, 4>& toErase) {
- // Check that we're copying to an argument...
+ // Deliberately get the source and destination with bitcasts stripped away,
+ // because we'll need to do type comparisons based on the underlying type.
Value* cpyDest = cpy->getDest();
- if (!isa<Argument>(cpyDest))
- return false;
+ Value* cpySrc = cpy->getSource();
+ CallSite CS = CallSite::get(C);
- // And that the argument is the return slot
- Argument* sretArg = cast<Argument>(cpyDest);
- if (!sretArg->hasStructRetAttr())
+ // Since this is a return slot optimization, we need to make sure that
+ // the value being copied is, in fact, in a return slot. We also need to
+ // check that the return slot parameter is marked noalias, so that we can
+ // be sure that changing it will not cause unexpected behavior changes due
+ // to it being accessed through a global or another parameter.
+ if (CS.arg_size() == 0 ||
+ cpySrc != CS.getArgument(0) ||
+ !CS.paramHasAttr(1, ParamAttr::NoAlias | ParamAttr::StructRet))
return false;
- // Make sure the return slot is otherwise dead
- std::set<User*> useList(sretArg->use_begin(), sretArg->use_end());
- while (!useList.empty()) {
- User* UI = *useList.begin();
-
- if (isa<GetElementPtrInst>(UI) || isa<BitCastInst>(UI)) {
- useList.insert(UI->use_begin(), UI->use_end());
- useList.erase(UI);
- } else if (UI == cpy)
- useList.erase(UI);
- else
+ // Since we're changing the parameter to the callsite, we need to make sure
+ // that what would be the new parameter dominates the callsite.
+ DominatorTree& DT = getAnalysis<DominatorTree>();
+ if (Instruction* cpyDestInst = dyn_cast<Instruction>(cpyDest))
+ if (!DT.dominates(cpyDestInst, C))
return false;
- }
- // Make sure the call cannot modify the return slot in some unpredicted way
- AliasAnalysis& AA = getAnalysis<AliasAnalysis>();
- if (AA.getModRefInfo(C, cpy->getRawDest(), ~0UL) != AliasAnalysis::NoModRef)
+ // Check that something sneaky is not happening involving casting
+ // return slot types around.
+ if (CS.getArgument(0)->getType() != cpyDest->getType())
return false;
+ // sret --> pointer
+ const PointerType* PT = cast<PointerType>(cpyDest->getType());
- // If all checks passed, then we can perform the transformation
- CallSite CS = CallSite::get(C);
- if (CS.getArgument(0)->getType() != cpyDest->getType())
+ // We can only perform the transformation if the size of the memcpy
+ // is constant and equal to the size of the structure.
+ ConstantInt* cpyLength = dyn_cast<ConstantInt>(cpy->getLength());
+ if (!cpyLength)
return false;
-
+
+ TargetData& TD = getAnalysis<TargetData>();
+ if (TD.getTypeStoreSize(PT->getElementType()) != cpyLength->getZExtValue())
+ return false;
+
+ // For safety, we must ensure that the output parameter of the call only has
+ // a single use, the memcpy. Otherwise this can introduce an invalid
+ // transformation.
+ if (!valueHasOnlyOneUseAfter(CS.getArgument(0), cpy, C))
+ return false;
+
+ // We only perform the transformation if it will be profitable.
+ if (!valueHasOnlyOneUseAfter(cpyDest, cpy, C))
+ return false;
+
+ // In addition to knowing that the call does not access the return slot
+ // in some unexpected manner, which we derive from the noalias attribute,
+ // we also need to know that it does not sneakily modify the destination
+ // slot in the caller. We don't have parameter attributes to go by
+ // for this one, so we just rely on AA to figure it out for us.
+ AliasAnalysis& AA = getAnalysis<AliasAnalysis>();
+ if (AA.getModRefInfo(C, cpy->getRawDest(), cpyLength->getZExtValue()) !=
+ AliasAnalysis::NoModRef)
+ return false;
+
+ // If all the checks have passed, then we're alright to do the transformation.
CS.setArgument(0, cpyDest);
+ // Drop any cached information about the call, because we may have changed
+ // its dependence information by changing its parameter.
MemoryDependenceAnalysis& MD = getAnalysis<MemoryDependenceAnalysis>();
MD.dropInstruction(C);
// Remove the memcpy
+ MD.removeInstruction(cpy);
toErase.push_back(cpy);
return true;
if (!C1 || !C2)
return false;
- uint64_t CpySize = C1->getValue().getZExtValue();
- uint64_t DepSize = C2->getValue().getZExtValue();
+ uint64_t DepSize = C1->getValue().getZExtValue();
+ uint64_t CpySize = C2->getValue().getZExtValue();
if (DepSize < CpySize)
return false;
if (dep == MemoryDependenceAnalysis::None ||
dep == MemoryDependenceAnalysis::NonLocal)
return false;
- else if (CallInst* C = dyn_cast<CallInst>(dep)) {
- if (!isa<MemCpyInst>(C))
- return performReturnSlotOptzn(M, C, toErase);
- } else if (!isa<MemCpyInst>(dep))
- return false;
-
- return processMemCpy(M, cast<MemCpyInst>(dep), toErase);
+ if (MemCpyInst *MemCpy = dyn_cast<MemCpyInst>(dep))
+ return processMemCpy(M, MemCpy, toErase);
+ if (CallInst* C = dyn_cast<CallInst>(dep))
+ return performReturnSlotOptzn(M, C, toErase);
+ return false;
}
unsigned num = VN.lookup_or_add(I);