#define DEBUG_TYPE "memdep"
#include "llvm/Analysis/MemoryDependenceAnalysis.h"
-#include "llvm/Constants.h"
#include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
#include "llvm/Function.h"
#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/MallocHelper.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/Support/PredIteratorCache.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Target/TargetData.h"
using namespace llvm;
STATISTIC(NumCacheNonLocal, "Number of fully cached non-local responses");
void MemoryDependenceAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesAll();
AU.addRequiredTransitive<AliasAnalysis>();
- AU.addRequiredTransitive<TargetData>();
}
bool MemoryDependenceAnalysis::runOnFunction(Function &) {
AA = &getAnalysis<AliasAnalysis>();
- TD = &getAnalysis<TargetData>();
if (PredCache == 0)
PredCache.reset(new PredIteratorCache());
return false;
/// 'Inst's set in ReverseMap. If the set becomes empty, remove Inst's entry.
template <typename KeyTy>
static void RemoveFromReverseMap(DenseMap<Instruction*,
- SmallPtrSet<KeyTy*, 4> > &ReverseMap,
- Instruction *Inst, KeyTy *Val) {
- typename DenseMap<Instruction*, SmallPtrSet<KeyTy*, 4> >::iterator
+ SmallPtrSet<KeyTy, 4> > &ReverseMap,
+ Instruction *Inst, KeyTy Val) {
+ typename DenseMap<Instruction*, SmallPtrSet<KeyTy, 4> >::iterator
InstIt = ReverseMap.find(Inst);
assert(InstIt != ReverseMap.end() && "Reverse map out of sync?");
bool Found = InstIt->second.erase(Val);
uint64_t PointerSize = 0;
if (StoreInst *S = dyn_cast<StoreInst>(Inst)) {
Pointer = S->getPointerOperand();
- PointerSize = TD->getTypeStoreSize(S->getOperand(0)->getType());
+ PointerSize = AA->getTypeStoreSize(S->getOperand(0)->getType());
} else if (VAArgInst *V = dyn_cast<VAArgInst>(Inst)) {
Pointer = V->getOperand(0);
- PointerSize = TD->getTypeStoreSize(V->getType());
+ PointerSize = AA->getTypeStoreSize(V->getType());
} else if (FreeInst *F = dyn_cast<FreeInst>(Inst)) {
Pointer = F->getPointerOperand();
// FreeInsts erase the entire structure
PointerSize = ~0ULL;
+ } else if (isFreeCall(Inst)) {
+ Pointer = Inst->getOperand(0);
+ // calls to free() erase the entire structure
+ PointerSize = ~0ULL;
} else if (isa<CallInst>(Inst) || isa<InvokeInst>(Inst)) {
+ // Debug intrinsics don't cause dependences.
+ if (isa<DbgInfoIntrinsic>(Inst)) continue;
CallSite InstCS = CallSite::get(Inst);
// If these two calls do not interfere, look past it.
switch (AA->getModRefInfo(CS, InstCS)) {
while (ScanIt != BB->begin()) {
Instruction *Inst = --ScanIt;
+ // Debug intrinsics don't cause dependences.
+ if (isa<DbgInfoIntrinsic>(Inst)) continue;
+
// Values depend on loads if the pointers are must aliased. This means that
// a load depends on another must aliased load from the same value.
if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
Value *Pointer = LI->getPointerOperand();
- uint64_t PointerSize = TD->getTypeStoreSize(LI->getType());
+ uint64_t PointerSize = AA->getTypeStoreSize(LI->getType());
// If we found a pointer, check if it could be the same as our pointer.
AliasAnalysis::AliasResult R =
}
if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
- Value *Pointer = SI->getPointerOperand();
- uint64_t PointerSize = TD->getTypeStoreSize(SI->getOperand(0)->getType());
+ // If alias analysis can tell that this store is guaranteed to not modify
+ // the query pointer, ignore it. Use getModRefInfo to handle cases where
+ // the query pointer points to constant memory etc.
+ if (AA->getModRefInfo(SI, MemPtr, MemSize) == AliasAnalysis::NoModRef)
+ continue;
+ // Ok, this store might clobber the query pointer. Check to see if it is
+ // a must alias: in this case, we want to return this as a def.
+ Value *Pointer = SI->getPointerOperand();
+ uint64_t PointerSize = AA->getTypeStoreSize(SI->getOperand(0)->getType());
+
// If we found a pointer, check if it could be the same as our pointer.
AliasAnalysis::AliasResult R =
AA->alias(Pointer, PointerSize, MemPtr, MemSize);
// the allocation, return Def. This means that there is no dependence and
// the access can be optimized based on that. For example, a load could
// turn into undef.
- if (AllocationInst *AI = dyn_cast<AllocationInst>(Inst)) {
+ // Note: Only determine this to be a malloc if Inst is the malloc call, not
+ // a subsequent bitcast of the malloc call result. There can be stores to
+ // the malloced memory between the malloc call and its bitcast uses, and we
+ // need to continue scanning until the malloc call.
+ if (isa<AllocaInst>(Inst) || extractMallocCall(Inst)) {
Value *AccessPtr = MemPtr->getUnderlyingObject();
- if (AccessPtr == AI ||
- AA->alias(AI, 1, AccessPtr, 1) == AliasAnalysis::MustAlias)
- return MemDepResult::getDef(AI);
+ if (AccessPtr == Inst ||
+ AA->alias(Inst, 1, AccessPtr, 1) == AliasAnalysis::MustAlias)
+ return MemDepResult::getDef(Inst);
continue;
}
-
+
// See if this instruction (e.g. a call or vaarg) mod/ref's the pointer.
switch (AA->getModRefInfo(Inst, MemPtr, MemSize)) {
case AliasAnalysis::NoModRef:
LocalCache = MemDepResult::getClobber(--BasicBlock::iterator(ScanPos));
else {
MemPtr = SI->getPointerOperand();
- MemSize = TD->getTypeStoreSize(SI->getOperand(0)->getType());
+ MemSize = AA->getTypeStoreSize(SI->getOperand(0)->getType());
}
} else if (LoadInst *LI = dyn_cast<LoadInst>(QueryInst)) {
// If this is a volatile load, don't mess around with it. Just return the
LocalCache = MemDepResult::getClobber(--BasicBlock::iterator(ScanPos));
else {
MemPtr = LI->getPointerOperand();
- MemSize = TD->getTypeStoreSize(LI->getType());
+ MemSize = AA->getTypeStoreSize(LI->getType());
}
+ } else if (isFreeCall(QueryInst)) {
+ MemPtr = QueryInst->getOperand(0);
+ // calls to free() erase the entire structure, not just a field.
+ MemSize = ~0UL;
} else if (isa<CallInst>(QueryInst) || isa<InvokeInst>(QueryInst)) {
CallSite QueryCS = CallSite::get(QueryInst);
bool isReadOnly = AA->onlyReadsMemory(QueryCS);
// We know that the pointer value is live into FromBB find the def/clobbers
// from presecessors.
const Type *EltTy = cast<PointerType>(Pointer->getType())->getElementType();
- uint64_t PointeeSize = TD->getTypeStoreSize(EltTy);
+ uint64_t PointeeSize = AA->getTypeStoreSize(EltTy);
// This is the set of blocks we've inspected, and the pointer we consider in
// each block. Because of critical edges, we currently bail out if querying
// Eliminating the dirty entry from 'Cache', so update the reverse info.
ValueIsLoadPair CacheKey(Pointer, isLoad);
- RemoveFromReverseMap(ReverseNonLocalPtrDeps, ScanPos,
- CacheKey.getOpaqueValue());
+ RemoveFromReverseMap(ReverseNonLocalPtrDeps, ScanPos, CacheKey);
} else {
++NumUncacheNonLocalPtr;
}
Instruction *Inst = Dep.getInst();
assert(Inst && "Didn't depend on anything?");
ValueIsLoadPair CacheKey(Pointer, isLoad);
- ReverseNonLocalPtrDeps[Inst].insert(CacheKey.getOpaqueValue());
+ ReverseNonLocalPtrDeps[Inst].insert(CacheKey);
return Dep;
}
+/// SortNonLocalDepInfoCache - Sort the a NonLocalDepInfo cache, given a certain
+/// number of elements in the array that are already properly ordered. This is
+/// optimized for the case when only a few entries are added.
+static void
+SortNonLocalDepInfoCache(MemoryDependenceAnalysis::NonLocalDepInfo &Cache,
+ unsigned NumSortedEntries) {
+ switch (Cache.size() - NumSortedEntries) {
+ case 0:
+ // done, no new entries.
+ break;
+ case 2: {
+ // Two new entries, insert the last one into place.
+ MemoryDependenceAnalysis::NonLocalDepEntry Val = Cache.back();
+ Cache.pop_back();
+ MemoryDependenceAnalysis::NonLocalDepInfo::iterator Entry =
+ std::upper_bound(Cache.begin(), Cache.end()-1, Val);
+ Cache.insert(Entry, Val);
+ // FALL THROUGH.
+ }
+ case 1:
+ // One new entry, Just insert the new value at the appropriate position.
+ if (Cache.size() != 1) {
+ MemoryDependenceAnalysis::NonLocalDepEntry Val = Cache.back();
+ Cache.pop_back();
+ MemoryDependenceAnalysis::NonLocalDepInfo::iterator Entry =
+ std::upper_bound(Cache.begin(), Cache.end(), Val);
+ Cache.insert(Entry, Val);
+ }
+ break;
+ default:
+ // Added many values, do a full scale sort.
+ std::sort(Cache.begin(), Cache.end());
+ break;
+ }
+}
+
/// getNonLocalPointerDepFromBB - Perform a dependency query based on
/// pointer/pointeesize starting at the end of StartBB. Add any clobber/def
// If we do need to do phi translation, then there are a bunch of different
// cases, because we have to find a Value* live in the predecessor block. We
// know that PtrInst is defined in this block at least.
+
+ // We may have added values to the cache list before this PHI translation.
+ // If so, we haven't done anything to ensure that the cache remains sorted.
+ // Sort it now (if needed) so that recursive invocations of
+ // getNonLocalPointerDepFromBB and other routines that could reuse the cache
+ // value will only see properly sorted cache arrays.
+ if (Cache && NumSortedEntries != Cache->size()) {
+ SortNonLocalDepInfoCache(*Cache, NumSortedEntries);
+ NumSortedEntries = Cache->size();
+ }
// If this is directly a PHI node, just use the incoming values for each
// pred as the phi translated version.
if (PHINode *PtrPHI = dyn_cast<PHINode>(PtrInst)) {
+ Cache = 0;
+
for (BasicBlock **PI = PredCache->GetPreds(BB); *PI; ++PI) {
BasicBlock *Pred = *PI;
Value *PredPtr = PtrPHI->getIncomingValueForBlock(Pred);
goto PredTranslationFailure;
}
- // We may have added values to the cache list before this PHI
- // translation. If so, we haven't done anything to ensure that the
- // cache remains sorted. Sort it now (if needed) so that recursive
- // invocations of getNonLocalPointerDepFromBB that could reuse the cache
- // value will only see properly sorted cache arrays.
- if (Cache && NumSortedEntries != Cache->size())
- std::sort(Cache->begin(), Cache->end());
- Cache = 0;
-
// FIXME: it is entirely possible that PHI translating will end up with
// the same value. Consider PHI translating something like:
// X = phi [x, bb1], [y, bb2]. PHI translating for bb1 doesn't *need*
Result, Visited))
goto PredTranslationFailure;
}
-
+
// Refresh the CacheInfo/Cache pointer so that it isn't invalidated.
CacheInfo = &NonLocalPointerDeps[CacheKey];
Cache = &CacheInfo->second;
// cerr << "OP:\t\t\t\t" << *PtrInst->getOperand(0);
PredTranslationFailure:
- // Refresh the CacheInfo/Cache pointer so that it isn't invalidated.
- CacheInfo = &NonLocalPointerDeps[CacheKey];
- Cache = &CacheInfo->second;
- NumSortedEntries = Cache->size();
-
+ if (Cache == 0) {
+ // Refresh the CacheInfo/Cache pointer if it got invalidated.
+ CacheInfo = &NonLocalPointerDeps[CacheKey];
+ Cache = &CacheInfo->second;
+ NumSortedEntries = Cache->size();
+ }
+
// Since we did phi translation, the "Cache" set won't contain all of the
// results for the query. This is ok (we can still use it to accelerate
// specific block queries) but we can't do the fastpath "return all
assert(I->second.isNonLocal() &&
"Should only be here with transparent block");
I->second = MemDepResult::getClobber(BB->begin());
- ReverseNonLocalPtrDeps[BB->begin()].insert(CacheKey.getOpaqueValue());
+ ReverseNonLocalPtrDeps[BB->begin()].insert(CacheKey);
Result.push_back(*I);
break;
}
}
-
+
// Okay, we're done now. If we added new values to the cache, re-sort it.
- switch (Cache->size()-NumSortedEntries) {
- case 0:
- // done, no new entries.
- break;
- case 2: {
- // Two new entries, insert the last one into place.
- NonLocalDepEntry Val = Cache->back();
- Cache->pop_back();
- NonLocalDepInfo::iterator Entry =
- std::upper_bound(Cache->begin(), Cache->end()-1, Val);
- Cache->insert(Entry, Val);
- // FALL THROUGH.
- }
- case 1:
- // One new entry, Just insert the new value at the appropriate position.
- if (Cache->size() != 1) {
- NonLocalDepEntry Val = Cache->back();
- Cache->pop_back();
- NonLocalDepInfo::iterator Entry =
- std::upper_bound(Cache->begin(), Cache->end(), Val);
- Cache->insert(Entry, Val);
- }
- break;
- default:
- // Added many values, do a full scale sort.
- std::sort(Cache->begin(), Cache->end());
- }
+ SortNonLocalDepInfoCache(*Cache, NumSortedEntries);
DEBUG(AssertSorted(*Cache));
return false;
}
assert(Target->getParent() == PInfo[i].first);
// Eliminating the dirty entry from 'Cache', so update the reverse info.
- RemoveFromReverseMap(ReverseNonLocalPtrDeps, Target, P.getOpaqueValue());
+ RemoveFromReverseMap(ReverseNonLocalPtrDeps, Target, P);
}
// Remove P from NonLocalPointerDeps (which deletes NonLocalDepInfo).
ReverseNonLocalPtrDepTy::iterator ReversePtrDepIt =
ReverseNonLocalPtrDeps.find(RemInst);
if (ReversePtrDepIt != ReverseNonLocalPtrDeps.end()) {
- SmallPtrSet<void*, 4> &Set = ReversePtrDepIt->second;
+ SmallPtrSet<ValueIsLoadPair, 4> &Set = ReversePtrDepIt->second;
SmallVector<std::pair<Instruction*, ValueIsLoadPair>,8> ReversePtrDepsToAdd;
- for (SmallPtrSet<void*, 4>::iterator I = Set.begin(), E = Set.end();
- I != E; ++I) {
- ValueIsLoadPair P;
- P.setFromOpaqueValue(*I);
+ for (SmallPtrSet<ValueIsLoadPair, 4>::iterator I = Set.begin(),
+ E = Set.end(); I != E; ++I) {
+ ValueIsLoadPair P = *I;
assert(P.getPointer() != RemInst &&
"Already removed NonLocalPointerDeps info for RemInst");
if (Instruction *NewDirtyInst = NewDirtyVal.getInst())
ReversePtrDepsToAdd.push_back(std::make_pair(NewDirtyInst, P));
}
+
+ // Re-sort the NonLocalDepInfo. Changing the dirty entry to its
+ // subsequent value may invalidate the sortedness.
+ std::sort(NLPDI.begin(), NLPDI.end());
}
ReverseNonLocalPtrDeps.erase(ReversePtrDepIt);
while (!ReversePtrDepsToAdd.empty()) {
ReverseNonLocalPtrDeps[ReversePtrDepsToAdd.back().first]
- .insert(ReversePtrDepsToAdd.back().second.getOpaqueValue());
+ .insert(ReversePtrDepsToAdd.back().second);
ReversePtrDepsToAdd.pop_back();
}
}
E = ReverseNonLocalPtrDeps.end(); I != E; ++I) {
assert(I->first != D && "Inst occurs in rev NLPD map");
- for (SmallPtrSet<void*, 4>::const_iterator II = I->second.begin(),
+ for (SmallPtrSet<ValueIsLoadPair, 4>::const_iterator II = I->second.begin(),
E = I->second.end(); II != E; ++II)
- assert(*II != ValueIsLoadPair(D, false).getOpaqueValue() &&
- *II != ValueIsLoadPair(D, true).getOpaqueValue() &&
+ assert(*II != ValueIsLoadPair(D, false) &&
+ *II != ValueIsLoadPair(D, true) &&
"Inst occurs in ReverseNonLocalPtrDeps map");
}