#define DEBUG_TYPE "memdep"
#include "llvm/Analysis/MemoryDependenceAnalysis.h"
-#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Instructions.h"
#include "llvm/IntrinsicInst.h"
#include "llvm/Function.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/Support/PredIteratorCache.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Target/TargetData.h"
+#include "llvm/DataLayout.h"
using namespace llvm;
STATISTIC(NumCacheNonLocal, "Number of fully cached non-local responses");
STATISTIC(NumCacheCompleteNonLocalPtr,
"Number of block queries that were completely cached");
+// Limit for the number of instructions to scan in a block.
+// FIXME: Figure out what a sane value is for this.
+// (500 is relatively insane.)
+static const int BlockScanLimit = 500;
+
char MemoryDependenceAnalysis::ID = 0;
// Register this pass...
bool MemoryDependenceAnalysis::runOnFunction(Function &) {
AA = &getAnalysis<AliasAnalysis>();
- TD = getAnalysisIfAvailable<TargetData>();
+ TD = getAnalysisIfAvailable<DataLayout>();
+ DT = getAnalysisIfAvailable<DominatorTree>();
if (PredCache == 0)
PredCache.reset(new PredIteratorCache());
return false;
AliasAnalysis::Location &Loc,
AliasAnalysis *AA) {
if (const LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
- if (LI->isVolatile()) {
- Loc = AliasAnalysis::Location();
+ if (LI->isUnordered()) {
+ Loc = AA->getLocation(LI);
+ return AliasAnalysis::Ref;
+ } else if (LI->getOrdering() == Monotonic) {
+ Loc = AA->getLocation(LI);
return AliasAnalysis::ModRef;
}
- Loc = AA->getLocation(LI);
- return AliasAnalysis::Ref;
+ Loc = AliasAnalysis::Location();
+ return AliasAnalysis::ModRef;
}
if (const StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
- if (SI->isVolatile()) {
- Loc = AliasAnalysis::Location();
+ if (SI->isUnordered()) {
+ Loc = AA->getLocation(SI);
+ return AliasAnalysis::Mod;
+ } else if (SI->getOrdering() == Monotonic) {
+ Loc = AA->getLocation(SI);
return AliasAnalysis::ModRef;
}
- Loc = AA->getLocation(SI);
- return AliasAnalysis::Mod;
+ Loc = AliasAnalysis::Location();
+ return AliasAnalysis::ModRef;
}
if (const VAArgInst *V = dyn_cast<VAArgInst>(Inst)) {
return AliasAnalysis::ModRef;
}
- if (const CallInst *CI = isFreeCall(Inst)) {
+ if (const CallInst *CI = isFreeCall(Inst, AA->getTargetLibraryInfo())) {
// calls to free() deallocate the entire structure
Loc = AliasAnalysis::Location(CI->getArgOperand(0));
return AliasAnalysis::Mod;
MemDepResult MemoryDependenceAnalysis::
getCallSiteDependencyFrom(CallSite CS, bool isReadOnlyCall,
BasicBlock::iterator ScanIt, BasicBlock *BB) {
+ unsigned Limit = BlockScanLimit;
+
// Walk backwards through the block, looking for dependencies
while (ScanIt != BB->begin()) {
+ // Limit the amount of scanning we do so we don't end up with quadratic
+ // running time on extreme testcases.
+ --Limit;
+ if (!Limit)
+ return MemDepResult::getUnknown();
+
Instruction *Inst = --ScanIt;
// If this inst is a memory op, get the pointer it accessed
// Otherwise if the two calls don't interact (e.g. InstCS is readnone)
// keep scanning.
- break;
+ continue;
default:
return MemDepResult::getClobber(Inst);
}
}
+
+ // If we could not obtain a pointer for the instruction and the instruction
+ // touches memory then assume that this is a dependency.
+ if (MR != AliasAnalysis::NoModRef)
+ return MemDepResult::getClobber(Inst);
}
-
- // No dependence found. If this is the entry block of the function, it is a
- // clobber, otherwise it is non-local.
+
+ // No dependence found. If this is the entry block of the function, it is
+ // unknown, otherwise it is non-local.
if (BB != &BB->getParent()->getEntryBlock())
return MemDepResult::getNonLocal();
- return MemDepResult::getClobber(ScanIt);
+ return MemDepResult::getNonFuncLocal();
}
/// isLoadLoadClobberIfExtendedToFullWidth - Return true if LI is a load that
isLoadLoadClobberIfExtendedToFullWidth(const AliasAnalysis::Location &MemLoc,
const Value *&MemLocBase,
int64_t &MemLocOffs,
- const LoadInst *LI, TargetData *TD) {
+ const LoadInst *LI,
+ const DataLayout *TD) {
// If we have no target data, we can't do this.
if (TD == 0) return false;
if (MemLocBase == 0)
MemLocBase = GetPointerBaseWithConstantOffset(MemLoc.Ptr, MemLocOffs, *TD);
+ unsigned Size = MemoryDependenceAnalysis::
+ getLoadLoadClobberFullWidthSize(MemLocBase, MemLocOffs, MemLoc.Size,
+ LI, *TD);
+ return Size != 0;
+}
+
+/// getLoadLoadClobberFullWidthSize - This is a little bit of analysis that
+/// looks at a memory location for a load (specified by MemLocBase, Offs,
+/// and Size) and compares it against a load. If the specified load could
+/// be safely widened to a larger integer load that is 1) still efficient,
+/// 2) safe for the target, and 3) would provide the specified memory
+/// location value, then this function returns the size in bytes of the
+/// load width to use. If not, this returns zero.
+unsigned MemoryDependenceAnalysis::
+getLoadLoadClobberFullWidthSize(const Value *MemLocBase, int64_t MemLocOffs,
+ unsigned MemLocSize, const LoadInst *LI,
+ const DataLayout &TD) {
+ // We can only extend simple integer loads.
+ if (!isa<IntegerType>(LI->getType()) || !LI->isSimple()) return 0;
+
// Get the base of this load.
int64_t LIOffs = 0;
const Value *LIBase =
- GetPointerBaseWithConstantOffset(LI->getPointerOperand(), LIOffs, *TD);
+ GetPointerBaseWithConstantOffset(LI->getPointerOperand(), LIOffs, TD);
// If the two pointers are not based on the same pointer, we can't tell that
// they are related.
- if (LIBase != MemLocBase) return false;
+ if (LIBase != MemLocBase) return 0;
// Okay, the two values are based on the same pointer, but returned as
// no-alias. This happens when we have things like two byte loads at "P+1"
// the bits required by MemLoc.
// If MemLoc is before LI, then no widening of LI will help us out.
- if (MemLocOffs < LIOffs) return false;
+ if (MemLocOffs < LIOffs) return 0;
// Get the alignment of the load in bytes. We assume that it is safe to load
// any legal integer up to this size without a problem. For example, if we're
// to i16.
unsigned LoadAlign = LI->getAlignment();
- int64_t MemLocEnd = MemLocOffs+MemLoc.Size;
+ int64_t MemLocEnd = MemLocOffs+MemLocSize;
// If no amount of rounding up will let MemLoc fit into LI, then bail out.
- if (LIOffs+LoadAlign < MemLocEnd) return false;
+ if (LIOffs+LoadAlign < MemLocEnd) return 0;
// This is the size of the load to try. Start with the next larger power of
// two.
// If this load size is bigger than our known alignment or would not fit
// into a native integer register, then we fail.
if (NewLoadByteSize > LoadAlign ||
- !TD->fitsInLegalInteger(NewLoadByteSize*8))
- return false;
+ !TD.fitsInLegalInteger(NewLoadByteSize*8))
+ return 0;
+
+ if (LIOffs+NewLoadByteSize > MemLocEnd &&
+ LI->getParent()->getParent()->getFnAttributes().hasAddressSafetyAttr()){
+ // We will be reading past the location accessed by the original program.
+ // While this is safe in a regular build, Address Safety analysis tools
+ // may start reporting false warnings. So, don't do widening.
+ return 0;
+ }
// If a load of this width would include all of MemLoc, then we succeed.
if (LIOffs+NewLoadByteSize >= MemLocEnd)
- return true;
+ return NewLoadByteSize;
NewLoadByteSize <<= 1;
}
-
- return false;
}
/// getPointerDependencyFrom - Return the instruction on which a memory
const Value *MemLocBase = 0;
int64_t MemLocOffset = 0;
-
+
+ unsigned Limit = BlockScanLimit;
+
// Walk backwards through the basic block, looking for dependencies.
while (ScanIt != BB->begin()) {
+ // Limit the amount of scanning we do so we don't end up with quadratic
+ // running time on extreme testcases.
+ --Limit;
+ if (!Limit)
+ return MemDepResult::getUnknown();
+
Instruction *Inst = --ScanIt;
if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
// Values depend on loads if the pointers are must aliased. This means that
// a load depends on another must aliased load from the same value.
if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
+ // Atomic loads have complications involved.
+ // FIXME: This is overly conservative.
+ if (!LI->isUnordered())
+ return MemDepResult::getClobber(LI);
+
AliasAnalysis::Location LoadLoc = AA->getLocation(LI);
// If we found a pointer, check if it could be the same as our pointer.
// location is 1 byte at P+1). If so, return it as a load/load
// clobber result, allowing the client to decide to widen the load if
// it wants to.
- if (const IntegerType *ITy = dyn_cast<IntegerType>(LI->getType()))
+ if (IntegerType *ITy = dyn_cast<IntegerType>(LI->getType()))
if (LI->getAlignment()*8 > ITy->getPrimitiveSizeInBits() &&
isLoadLoadClobberIfExtendedToFullWidth(MemLoc, MemLocBase,
MemLocOffset, LI, TD))
if (R == AliasAnalysis::MustAlias)
return MemDepResult::getDef(Inst);
+#if 0 // FIXME: Temporarily disabled. GVN is cleverly rewriting loads
+ // in terms of clobbering loads, but since it does this by looking
+ // at the clobbering load directly, it doesn't know about any
+ // phi translation that may have happened along the way.
+
// If we have a partial alias, then return this as a clobber for the
// client to handle.
if (R == AliasAnalysis::PartialAlias)
return MemDepResult::getClobber(Inst);
+#endif
// Random may-alias loads don't depend on each other without a
// dependence.
}
if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
+ // Atomic stores have complications involved.
+ // FIXME: This is overly conservative.
+ if (!SI->isUnordered())
+ return MemDepResult::getClobber(SI);
+
// If alias analysis can tell that this store is guaranteed to not modify
// the query pointer, ignore it. Use getModRefInfo to handle cases where
// the query pointer points to constant memory etc.
// a subsequent bitcast of the malloc call result. There can be stores to
// the malloced memory between the malloc call and its bitcast uses, and we
// need to continue scanning until the malloc call.
- if (isa<AllocaInst>(Inst) ||
- (isa<CallInst>(Inst) && extractMallocCall(Inst))) {
+ const TargetLibraryInfo *TLI = AA->getTargetLibraryInfo();
+ if (isa<AllocaInst>(Inst) || isNoAliasFn(Inst, TLI)) {
const Value *AccessPtr = GetUnderlyingObject(MemLoc.Ptr, TD);
if (AccessPtr == Inst || AA->isMustAlias(Inst, AccessPtr))
return MemDepResult::getDef(Inst);
- continue;
+ // Be conservative if the accessed pointer may alias the allocation.
+ if (AA->alias(Inst, AccessPtr) != AliasAnalysis::NoAlias)
+ return MemDepResult::getClobber(Inst);
+ // If the allocation is not aliased and does not read memory (like
+ // strdup), it is safe to ignore.
+ if (isa<AllocaInst>(Inst) ||
+ isMallocLikeFn(Inst, TLI) || isCallocLikeFn(Inst, TLI))
+ continue;
}
// See if this instruction (e.g. a call or vaarg) mod/ref's the pointer.
- switch (AA->getModRefInfo(Inst, MemLoc)) {
+ AliasAnalysis::ModRefResult MR = AA->getModRefInfo(Inst, MemLoc);
+ // If necessary, perform additional analysis.
+ if (MR == AliasAnalysis::ModRef)
+ MR = AA->callCapturesBefore(Inst, MemLoc, DT);
+ switch (MR) {
case AliasAnalysis::NoModRef:
// If the call has no effect on the queried pointer, just ignore it.
continue;
}
}
- // No dependence found. If this is the entry block of the function, it is a
- // clobber, otherwise it is non-local.
+ // No dependence found. If this is the entry block of the function, it is
+ // unknown, otherwise it is non-local.
if (BB != &BB->getParent()->getEntryBlock())
return MemDepResult::getNonLocal();
- return MemDepResult::getClobber(ScanIt);
+ return MemDepResult::getNonFuncLocal();
}
/// getDependency - Return the instruction on which a memory operation
// Do the scan.
if (BasicBlock::iterator(QueryInst) == QueryParent->begin()) {
- // No dependence found. If this is the entry block of the function, it is a
- // clobber, otherwise it is non-local.
+ // No dependence found. If this is the entry block of the function, it is
+ // unknown, otherwise it is non-local.
if (QueryParent != &QueryParent->getParent()->getEntryBlock())
LocalCache = MemDepResult::getNonLocal();
else
- LocalCache = MemDepResult::getClobber(QueryInst);
+ LocalCache = MemDepResult::getNonFuncLocal();
} else {
AliasAnalysis::Location MemLoc;
AliasAnalysis::ModRefResult MR = GetLocation(QueryInst, MemLoc, AA);
// If we can do a pointer scan, make it happen.
bool isLoad = !(MR & AliasAnalysis::Mod);
if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(QueryInst))
- isLoad |= II->getIntrinsicID() == Intrinsic::lifetime_end;
+ isLoad |= II->getIntrinsicID() == Intrinsic::lifetime_start;
LocalCache = getPointerDependencyFrom(MemLoc, isLoad, ScanPos,
QueryParent);
QueryParent);
} else
// Non-memory instruction.
- LocalCache = MemDepResult::getClobber(--BasicBlock::iterator(ScanPos));
+ LocalCache = MemDepResult::getUnknown();
}
// Remember the result!
Dep = getCallSiteDependencyFrom(QueryCS, isReadonlyCall,ScanPos, DirtyBB);
} else if (DirtyBB != &DirtyBB->getParent()->getEntryBlock()) {
// No dependence found. If this is the entry block of the function, it is
- // a clobber, otherwise it is non-local.
+ // a clobber, otherwise it is unknown.
Dep = MemDepResult::getNonLocal();
} else {
- Dep = MemDepResult::getClobber(ScanPos);
+ Dep = MemDepResult::getNonFuncLocal();
}
// If we had a dirty entry for the block, update it. Otherwise, just add
return;
Result.clear();
Result.push_back(NonLocalDepResult(FromBB,
- MemDepResult::getClobber(FromBB->begin()),
+ MemDepResult::getUnknown(),
const_cast<Value *>(Loc.Ptr)));
}
// If the block has a dependency (i.e. it isn't completely transparent to
// the value), remember the reverse association because we just added it
// to Cache!
- if (Dep.isNonLocal())
+ if (!Dep.isDef() && !Dep.isClobber())
return Dep;
// Keep the ReverseNonLocalPtrDeps map up to date so we can efficiently
if (!Pair.second) {
if (CacheInfo->Size < Loc.Size) {
// The query's Size is greater than the cached one. Throw out the
- // cached data and procede with the query at the greater size.
+ // cached data and proceed with the query at the greater size.
CacheInfo->Pair = BBSkipFirstBlockPair();
CacheInfo->Size = Loc.Size;
for (NonLocalDepInfo::iterator DI = CacheInfo->NonLocalDeps.begin(),
SmallVector<BasicBlock*, 32> Worklist;
Worklist.push_back(StartBB);
+ // PredList used inside loop.
+ SmallVector<std::pair<BasicBlock*, PHITransAddr>, 16> PredList;
+
// Keep track of the entries that we know are sorted. Previously cached
// entries will all be sorted. The entries we add we only sort on demand (we
// don't insert every element into its sorted position). We know that we
// the same Pointer.
if (!Pointer.NeedsPHITranslationFromBlock(BB)) {
SkipFirstBlock = false;
+ SmallVector<BasicBlock*, 16> NewBlocks;
for (BasicBlock **PI = PredCache->GetPreds(BB); *PI; ++PI) {
// Verify that we haven't looked at this block yet.
std::pair<DenseMap<BasicBlock*,Value*>::iterator, bool>
InsertRes = Visited.insert(std::make_pair(*PI, Pointer.getAddr()));
if (InsertRes.second) {
// First time we've looked at *PI.
- Worklist.push_back(*PI);
+ NewBlocks.push_back(*PI);
continue;
}
// If we have seen this block before, but it was with a different
// pointer then we have a phi translation failure and we have to treat
// this as a clobber.
- if (InsertRes.first->second != Pointer.getAddr())
+ if (InsertRes.first->second != Pointer.getAddr()) {
+ // Make sure to clean up the Visited map before continuing on to
+ // PredTranslationFailure.
+ for (unsigned i = 0; i < NewBlocks.size(); i++)
+ Visited.erase(NewBlocks[i]);
goto PredTranslationFailure;
+ }
}
+ Worklist.append(NewBlocks.begin(), NewBlocks.end());
continue;
}
NumSortedEntries = Cache->size();
}
Cache = 0;
-
+
+ PredList.clear();
for (BasicBlock **PI = PredCache->GetPreds(BB); *PI; ++PI) {
BasicBlock *Pred = *PI;
-
+ PredList.push_back(std::make_pair(Pred, Pointer));
+
// Get the PHI translated pointer in this predecessor. This can fail if
// not translatable, in which case the getAddr() returns null.
- PHITransAddr PredPointer(Pointer);
+ PHITransAddr &PredPointer = PredList.back().second;
PredPointer.PHITranslateValue(BB, Pred, 0);
Value *PredPtrVal = PredPointer.getAddr();
InsertRes = Visited.insert(std::make_pair(Pred, PredPtrVal));
if (!InsertRes.second) {
+ // We found the pred; take it off the list of preds to visit.
+ PredList.pop_back();
+
// If the predecessor was visited with PredPtr, then we already did
// the analysis and can ignore it.
if (InsertRes.first->second == PredPtrVal)
// Otherwise, the block was previously analyzed with a different
// pointer. We can't represent the result of this case, so we just
// treat this as a phi translation failure.
+
+ // Make sure to clean up the Visited map before continuing on to
+ // PredTranslationFailure.
+ for (unsigned i = 0; i < PredList.size(); i++)
+ Visited.erase(PredList[i].first);
+
goto PredTranslationFailure;
}
-
+ }
+
+ // Actually process results here; this need to be a separate loop to avoid
+ // calling getNonLocalPointerDepFromBB for blocks we don't want to return
+ // any results for. (getNonLocalPointerDepFromBB will modify our
+ // datastructures in ways the code after the PredTranslationFailure label
+ // doesn't expect.)
+ for (unsigned i = 0; i < PredList.size(); i++) {
+ BasicBlock *Pred = PredList[i].first;
+ PHITransAddr &PredPointer = PredList[i].second;
+ Value *PredPtrVal = PredPointer.getAddr();
+
+ bool CanTranslate = true;
// If PHI translation was unable to find an available pointer in this
// predecessor, then we have to assume that the pointer is clobbered in
// that predecessor. We can still do PRE of the load, which would insert
// a computation of the pointer in this predecessor.
- if (PredPtrVal == 0) {
+ if (PredPtrVal == 0)
+ CanTranslate = false;
+
+ // FIXME: it is entirely possible that PHI translating will end up with
+ // the same value. Consider PHI translating something like:
+ // X = phi [x, bb1], [y, bb2]. PHI translating for bb1 doesn't *need*
+ // to recurse here, pedantically speaking.
+
+ // If getNonLocalPointerDepFromBB fails here, that means the cached
+ // result conflicted with the Visited list; we have to conservatively
+ // assume it is unknown, but this also does not block PRE of the load.
+ if (!CanTranslate ||
+ getNonLocalPointerDepFromBB(PredPointer,
+ Loc.getWithNewPtr(PredPtrVal),
+ isLoad, Pred,
+ Result, Visited)) {
// Add the entry to the Result list.
- NonLocalDepResult Entry(Pred,
- MemDepResult::getClobber(Pred->getTerminator()),
- PredPtrVal);
+ NonLocalDepResult Entry(Pred, MemDepResult::getUnknown(), PredPtrVal);
Result.push_back(Entry);
// Since we had a phi translation failure, the cache for CacheKey won't
NLPI.Pair = BBSkipFirstBlockPair();
continue;
}
-
- // FIXME: it is entirely possible that PHI translating will end up with
- // the same value. Consider PHI translating something like:
- // X = phi [x, bb1], [y, bb2]. PHI translating for bb1 doesn't *need*
- // to recurse here, pedantically speaking.
-
- // If we have a problem phi translating, fall through to the code below
- // to handle the failure condition.
- if (getNonLocalPointerDepFromBB(PredPointer,
- Loc.getWithNewPtr(PredPointer.getAddr()),
- isLoad, Pred,
- Result, Visited))
- goto PredTranslationFailure;
}
// Refresh the CacheInfo/Cache pointer so that it isn't invalidated.
continue;
PredTranslationFailure:
+ // The following code is "failure"; we can't produce a sane translation
+ // for the given block. It assumes that we haven't modified any of
+ // our datastructures while processing the current block.
if (Cache == 0) {
// Refresh the CacheInfo/Cache pointer if it got invalidated.
// results from the set". Clear out the indicator for this.
CacheInfo->Pair = BBSkipFirstBlockPair();
- // If *nothing* works, mark the pointer as being clobbered by the first
- // instruction in this block.
+ // If *nothing* works, mark the pointer as unknown.
//
// If this is the magic first block, return this as a clobber of the whole
// incoming value. Since we can't phi translate to one of the predecessors,
assert(I->getResult().isNonLocal() &&
"Should only be here with transparent block");
- I->setResult(MemDepResult::getClobber(BB->begin()));
- ReverseNonLocalPtrDeps[BB->begin()].insert(CacheKey);
+ I->setResult(MemDepResult::getUnknown());
Result.push_back(NonLocalDepResult(I->getBB(), I->getResult(),
Pointer.getAddr()));
break;