#include "llvm/Function.h"
#include "llvm/LLVMContext.h"
#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/CaptureTracking.h"
#include "llvm/Analysis/Dominators.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/MemoryBuiltins.h"
STATISTIC(NumCacheCompleteNonLocalPtr,
"Number of block queries that were completely cached");
+// Limit for the number of instructions to scan in a block.
+// FIXME: Figure out what a sane value is for this.
+// (500 is relatively insane.)
+static const int BlockScanLimit = 500;
+
char MemoryDependenceAnalysis::ID = 0;
// Register this pass...
bool MemoryDependenceAnalysis::runOnFunction(Function &) {
AA = &getAnalysis<AliasAnalysis>();
TD = getAnalysisIfAvailable<TargetData>();
+ DT = getAnalysisIfAvailable<DominatorTree>();
if (PredCache == 0)
PredCache.reset(new PredIteratorCache());
return false;
AliasAnalysis::Location &Loc,
AliasAnalysis *AA) {
if (const LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
- if (LI->isVolatile()) {
- Loc = AliasAnalysis::Location();
+ if (LI->isUnordered()) {
+ Loc = AA->getLocation(LI);
+ return AliasAnalysis::Ref;
+ } else if (LI->getOrdering() == Monotonic) {
+ Loc = AA->getLocation(LI);
return AliasAnalysis::ModRef;
}
- Loc = AA->getLocation(LI);
- return AliasAnalysis::Ref;
+ Loc = AliasAnalysis::Location();
+ return AliasAnalysis::ModRef;
}
if (const StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
- if (SI->isVolatile()) {
- Loc = AliasAnalysis::Location();
+ if (SI->isUnordered()) {
+ Loc = AA->getLocation(SI);
+ return AliasAnalysis::Mod;
+ } else if (SI->getOrdering() == Monotonic) {
+ Loc = AA->getLocation(SI);
return AliasAnalysis::ModRef;
}
- Loc = AA->getLocation(SI);
- return AliasAnalysis::Mod;
+ Loc = AliasAnalysis::Location();
+ return AliasAnalysis::ModRef;
}
if (const VAArgInst *V = dyn_cast<VAArgInst>(Inst)) {
MemDepResult MemoryDependenceAnalysis::
getCallSiteDependencyFrom(CallSite CS, bool isReadOnlyCall,
BasicBlock::iterator ScanIt, BasicBlock *BB) {
+ unsigned Limit = BlockScanLimit;
+
// Walk backwards through the block, looking for dependencies
while (ScanIt != BB->begin()) {
+ // Limit the amount of scanning we do so we don't end up with quadratic
+ // running time on extreme testcases.
+ --Limit;
+ if (!Limit)
+ return MemDepResult::getUnknown();
+
Instruction *Inst = --ScanIt;
// If this inst is a memory op, get the pointer it accessed
}
}
- // No dependence found. If this is the entry block of the function, it is a
- // clobber, otherwise it is non-local.
+ // No dependence found. If this is the entry block of the function, it is
+ // unknown, otherwise it is non-local.
if (BB != &BB->getParent()->getEntryBlock())
return MemDepResult::getNonLocal();
- return MemDepResult::getClobber(ScanIt);
+ return MemDepResult::getNonFuncLocal();
}
/// isLoadLoadClobberIfExtendedToFullWidth - Return true if LI is a load that
getLoadLoadClobberFullWidthSize(const Value *MemLocBase, int64_t MemLocOffs,
unsigned MemLocSize, const LoadInst *LI,
const TargetData &TD) {
- // We can only extend non-volatile integer loads.
- if (!isa<IntegerType>(LI->getType()) || LI->isVolatile()) return 0;
+ // We can only extend simple integer loads.
+ if (!isa<IntegerType>(LI->getType()) || !LI->isSimple()) return 0;
// Get the base of this load.
int64_t LIOffs = 0;
!TD.fitsInLegalInteger(NewLoadByteSize*8))
return 0;
+ if (LIOffs+NewLoadByteSize > MemLocEnd &&
+ LI->getParent()->getParent()->hasFnAttr(Attribute::AddressSafety)) {
+ // We will be reading past the location accessed by the original program.
+ // While this is safe in a regular build, Address Safety analysis tools
+ // may start reporting false warnings. So, don't do widening.
+ return 0;
+ }
+
// If a load of this width would include all of MemLoc, then we succeed.
if (LIOffs+NewLoadByteSize >= MemLocEnd)
return NewLoadByteSize;
NewLoadByteSize <<= 1;
}
-
- return 0;
+}
+
+namespace {
+ /// Only find pointer captures which happen before the given instruction. Uses
+ /// the dominator tree to determine whether one instruction is before another.
+ struct CapturesBefore : public CaptureTracker {
+ CapturesBefore(const Instruction *I, DominatorTree *DT)
+ : BeforeHere(I), DT(DT), Captured(false) {}
+
+ void tooManyUses() { Captured = true; }
+
+ bool shouldExplore(Use *U) {
+ Instruction *I = cast<Instruction>(U->getUser());
+ BasicBlock *BB = I->getParent();
+ if (BeforeHere != I &&
+ (!DT->isReachableFromEntry(BB) || DT->dominates(BeforeHere, I)))
+ return false;
+ return true;
+ }
+
+ bool captured(Use *U) {
+ Instruction *I = cast<Instruction>(U->getUser());
+ BasicBlock *BB = I->getParent();
+ if (BeforeHere != I &&
+ (!DT->isReachableFromEntry(BB) || DT->dominates(BeforeHere, I)))
+ return false;
+ Captured = true;
+ return true;
+ }
+
+ const Instruction *BeforeHere;
+ DominatorTree *DT;
+
+ bool Captured;
+ };
+}
+
+AliasAnalysis::ModRefResult
+MemoryDependenceAnalysis::getModRefInfo(const Instruction *Inst,
+ const AliasAnalysis::Location &MemLoc) {
+ AliasAnalysis::ModRefResult MR = AA->getModRefInfo(Inst, MemLoc);
+ if (MR != AliasAnalysis::ModRef) return MR;
+
+ // FIXME: this is really just shoring-up a deficiency in alias analysis.
+ // BasicAA isn't willing to spend linear time determining whether an alloca
+ // was captured before or after this particular call, while we are. However,
+ // with a smarter AA in place, this test is just wasting compile time.
+ if (!DT) return AliasAnalysis::ModRef;
+ const Value *Object = GetUnderlyingObject(MemLoc.Ptr, TD);
+ if (!isIdentifiedObject(Object) || isa<GlobalValue>(Object))
+ return AliasAnalysis::ModRef;
+ ImmutableCallSite CS(Inst);
+ if (!CS.getInstruction()) return AliasAnalysis::ModRef;
+
+ CapturesBefore CB(Inst, DT);
+ llvm::PointerMayBeCaptured(Object, &CB);
+
+ if (isa<Constant>(Object) || CS.getInstruction() == Object || CB.Captured)
+ return AliasAnalysis::ModRef;
+
+ unsigned ArgNo = 0;
+ for (ImmutableCallSite::arg_iterator CI = CS.arg_begin(), CE = CS.arg_end();
+ CI != CE; ++CI, ++ArgNo) {
+ // Only look at the no-capture or byval pointer arguments. If this
+ // pointer were passed to arguments that were neither of these, then it
+ // couldn't be no-capture.
+ if (!(*CI)->getType()->isPointerTy() ||
+ (!CS.doesNotCapture(ArgNo) && !CS.isByValArgument(ArgNo)))
+ continue;
+
+ // If this is a no-capture pointer argument, see if we can tell that it
+ // is impossible to alias the pointer we're checking. If not, we have to
+ // assume that the call could touch the pointer, even though it doesn't
+ // escape.
+ if (!AA->isNoAlias(AliasAnalysis::Location(*CI),
+ AliasAnalysis::Location(Object))) {
+ return AliasAnalysis::ModRef;
+ }
+ }
+ return AliasAnalysis::NoModRef;
}
/// getPointerDependencyFrom - Return the instruction on which a memory
const Value *MemLocBase = 0;
int64_t MemLocOffset = 0;
-
+
+ unsigned Limit = BlockScanLimit;
+
// Walk backwards through the basic block, looking for dependencies.
while (ScanIt != BB->begin()) {
+ // Limit the amount of scanning we do so we don't end up with quadratic
+ // running time on extreme testcases.
+ --Limit;
+ if (!Limit)
+ return MemDepResult::getUnknown();
+
Instruction *Inst = --ScanIt;
if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
// Values depend on loads if the pointers are must aliased. This means that
// a load depends on another must aliased load from the same value.
if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
+ // Atomic loads have complications involved.
+ // FIXME: This is overly conservative.
+ if (!LI->isUnordered())
+ return MemDepResult::getClobber(LI);
+
AliasAnalysis::Location LoadLoc = AA->getLocation(LI);
// If we found a pointer, check if it could be the same as our pointer.
// location is 1 byte at P+1). If so, return it as a load/load
// clobber result, allowing the client to decide to widen the load if
// it wants to.
- if (const IntegerType *ITy = dyn_cast<IntegerType>(LI->getType()))
+ if (IntegerType *ITy = dyn_cast<IntegerType>(LI->getType()))
if (LI->getAlignment()*8 > ITy->getPrimitiveSizeInBits() &&
isLoadLoadClobberIfExtendedToFullWidth(MemLoc, MemLocBase,
MemLocOffset, LI, TD))
if (R == AliasAnalysis::MustAlias)
return MemDepResult::getDef(Inst);
+#if 0 // FIXME: Temporarily disabled. GVN is cleverly rewriting loads
+ // in terms of clobbering loads, but since it does this by looking
+ // at the clobbering load directly, it doesn't know about any
+ // phi translation that may have happened along the way.
+
// If we have a partial alias, then return this as a clobber for the
// client to handle.
if (R == AliasAnalysis::PartialAlias)
return MemDepResult::getClobber(Inst);
+#endif
// Random may-alias loads don't depend on each other without a
// dependence.
}
if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
+ // Atomic stores have complications involved.
+ // FIXME: This is overly conservative.
+ if (!SI->isUnordered())
+ return MemDepResult::getClobber(SI);
+
// If alias analysis can tell that this store is guaranteed to not modify
// the query pointer, ignore it. Use getModRefInfo to handle cases where
// the query pointer points to constant memory etc.
}
// See if this instruction (e.g. a call or vaarg) mod/ref's the pointer.
- switch (AA->getModRefInfo(Inst, MemLoc)) {
+ switch (getModRefInfo(Inst, MemLoc)) {
case AliasAnalysis::NoModRef:
// If the call has no effect on the queried pointer, just ignore it.
continue;
}
}
- // No dependence found. If this is the entry block of the function, it is a
- // clobber, otherwise it is non-local.
+ // No dependence found. If this is the entry block of the function, it is
+ // unknown, otherwise it is non-local.
if (BB != &BB->getParent()->getEntryBlock())
return MemDepResult::getNonLocal();
- return MemDepResult::getClobber(ScanIt);
+ return MemDepResult::getNonFuncLocal();
}
/// getDependency - Return the instruction on which a memory operation
// Do the scan.
if (BasicBlock::iterator(QueryInst) == QueryParent->begin()) {
- // No dependence found. If this is the entry block of the function, it is a
- // clobber, otherwise it is non-local.
+ // No dependence found. If this is the entry block of the function, it is
+ // unknown, otherwise it is non-local.
if (QueryParent != &QueryParent->getParent()->getEntryBlock())
LocalCache = MemDepResult::getNonLocal();
else
- LocalCache = MemDepResult::getClobber(QueryInst);
+ LocalCache = MemDepResult::getNonFuncLocal();
} else {
AliasAnalysis::Location MemLoc;
AliasAnalysis::ModRefResult MR = GetLocation(QueryInst, MemLoc, AA);
// If we can do a pointer scan, make it happen.
bool isLoad = !(MR & AliasAnalysis::Mod);
if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(QueryInst))
- isLoad |= II->getIntrinsicID() == Intrinsic::lifetime_end;
+ isLoad |= II->getIntrinsicID() == Intrinsic::lifetime_start;
LocalCache = getPointerDependencyFrom(MemLoc, isLoad, ScanPos,
QueryParent);
QueryParent);
} else
// Non-memory instruction.
- LocalCache = MemDepResult::getClobber(--BasicBlock::iterator(ScanPos));
+ LocalCache = MemDepResult::getUnknown();
}
// Remember the result!
Dep = getCallSiteDependencyFrom(QueryCS, isReadonlyCall,ScanPos, DirtyBB);
} else if (DirtyBB != &DirtyBB->getParent()->getEntryBlock()) {
// No dependence found. If this is the entry block of the function, it is
- // a clobber, otherwise it is non-local.
+ // a clobber, otherwise it is unknown.
Dep = MemDepResult::getNonLocal();
} else {
- Dep = MemDepResult::getClobber(ScanPos);
+ Dep = MemDepResult::getNonFuncLocal();
}
// If we had a dirty entry for the block, update it. Otherwise, just add
return;
Result.clear();
Result.push_back(NonLocalDepResult(FromBB,
- MemDepResult::getClobber(FromBB->begin()),
+ MemDepResult::getUnknown(),
const_cast<Value *>(Loc.Ptr)));
}
// If the block has a dependency (i.e. it isn't completely transparent to
// the value), remember the reverse association because we just added it
// to Cache!
- if (Dep.isNonLocal())
+ if (!Dep.isDef() && !Dep.isClobber())
return Dep;
// Keep the ReverseNonLocalPtrDeps map up to date so we can efficiently
SmallVector<BasicBlock*, 32> Worklist;
Worklist.push_back(StartBB);
+ // PredList used inside loop.
+ SmallVector<std::pair<BasicBlock*, PHITransAddr>, 16> PredList;
+
// Keep track of the entries that we know are sorted. Previously cached
// entries will all be sorted. The entries we add we only sort on demand (we
// don't insert every element into its sorted position). We know that we
// the same Pointer.
if (!Pointer.NeedsPHITranslationFromBlock(BB)) {
SkipFirstBlock = false;
+ SmallVector<BasicBlock*, 16> NewBlocks;
for (BasicBlock **PI = PredCache->GetPreds(BB); *PI; ++PI) {
// Verify that we haven't looked at this block yet.
std::pair<DenseMap<BasicBlock*,Value*>::iterator, bool>
InsertRes = Visited.insert(std::make_pair(*PI, Pointer.getAddr()));
if (InsertRes.second) {
// First time we've looked at *PI.
- Worklist.push_back(*PI);
+ NewBlocks.push_back(*PI);
continue;
}
// If we have seen this block before, but it was with a different
// pointer then we have a phi translation failure and we have to treat
// this as a clobber.
- if (InsertRes.first->second != Pointer.getAddr())
+ if (InsertRes.first->second != Pointer.getAddr()) {
+ // Make sure to clean up the Visited map before continuing on to
+ // PredTranslationFailure.
+ for (unsigned i = 0; i < NewBlocks.size(); i++)
+ Visited.erase(NewBlocks[i]);
goto PredTranslationFailure;
+ }
}
+ Worklist.append(NewBlocks.begin(), NewBlocks.end());
continue;
}
NumSortedEntries = Cache->size();
}
Cache = 0;
-
+
+ PredList.clear();
for (BasicBlock **PI = PredCache->GetPreds(BB); *PI; ++PI) {
BasicBlock *Pred = *PI;
-
+ PredList.push_back(std::make_pair(Pred, Pointer));
+
// Get the PHI translated pointer in this predecessor. This can fail if
// not translatable, in which case the getAddr() returns null.
- PHITransAddr PredPointer(Pointer);
+ PHITransAddr &PredPointer = PredList.back().second;
PredPointer.PHITranslateValue(BB, Pred, 0);
Value *PredPtrVal = PredPointer.getAddr();
InsertRes = Visited.insert(std::make_pair(Pred, PredPtrVal));
if (!InsertRes.second) {
+ // We found the pred; take it off the list of preds to visit.
+ PredList.pop_back();
+
// If the predecessor was visited with PredPtr, then we already did
// the analysis and can ignore it.
if (InsertRes.first->second == PredPtrVal)
// Otherwise, the block was previously analyzed with a different
// pointer. We can't represent the result of this case, so we just
// treat this as a phi translation failure.
+
+ // Make sure to clean up the Visited map before continuing on to
+ // PredTranslationFailure.
+ for (unsigned i = 0; i < PredList.size(); i++)
+ Visited.erase(PredList[i].first);
+
goto PredTranslationFailure;
}
-
+ }
+
+ // Actually process results here; this need to be a separate loop to avoid
+ // calling getNonLocalPointerDepFromBB for blocks we don't want to return
+ // any results for. (getNonLocalPointerDepFromBB will modify our
+ // datastructures in ways the code after the PredTranslationFailure label
+ // doesn't expect.)
+ for (unsigned i = 0; i < PredList.size(); i++) {
+ BasicBlock *Pred = PredList[i].first;
+ PHITransAddr &PredPointer = PredList[i].second;
+ Value *PredPtrVal = PredPointer.getAddr();
+
+ bool CanTranslate = true;
// If PHI translation was unable to find an available pointer in this
// predecessor, then we have to assume that the pointer is clobbered in
// that predecessor. We can still do PRE of the load, which would insert
// a computation of the pointer in this predecessor.
- if (PredPtrVal == 0) {
+ if (PredPtrVal == 0)
+ CanTranslate = false;
+
+ // FIXME: it is entirely possible that PHI translating will end up with
+ // the same value. Consider PHI translating something like:
+ // X = phi [x, bb1], [y, bb2]. PHI translating for bb1 doesn't *need*
+ // to recurse here, pedantically speaking.
+
+ // If getNonLocalPointerDepFromBB fails here, that means the cached
+ // result conflicted with the Visited list; we have to conservatively
+ // assume it is unknown, but this also does not block PRE of the load.
+ if (!CanTranslate ||
+ getNonLocalPointerDepFromBB(PredPointer,
+ Loc.getWithNewPtr(PredPtrVal),
+ isLoad, Pred,
+ Result, Visited)) {
// Add the entry to the Result list.
- NonLocalDepResult Entry(Pred,
- MemDepResult::getClobber(Pred->getTerminator()),
- PredPtrVal);
+ NonLocalDepResult Entry(Pred, MemDepResult::getUnknown(), PredPtrVal);
Result.push_back(Entry);
// Since we had a phi translation failure, the cache for CacheKey won't
NLPI.Pair = BBSkipFirstBlockPair();
continue;
}
-
- // FIXME: it is entirely possible that PHI translating will end up with
- // the same value. Consider PHI translating something like:
- // X = phi [x, bb1], [y, bb2]. PHI translating for bb1 doesn't *need*
- // to recurse here, pedantically speaking.
-
- // If we have a problem phi translating, fall through to the code below
- // to handle the failure condition.
- if (getNonLocalPointerDepFromBB(PredPointer,
- Loc.getWithNewPtr(PredPointer.getAddr()),
- isLoad, Pred,
- Result, Visited))
- goto PredTranslationFailure;
}
// Refresh the CacheInfo/Cache pointer so that it isn't invalidated.
continue;
PredTranslationFailure:
+ // The following code is "failure"; we can't produce a sane translation
+ // for the given block. It assumes that we haven't modified any of
+ // our datastructures while processing the current block.
if (Cache == 0) {
// Refresh the CacheInfo/Cache pointer if it got invalidated.
// results from the set". Clear out the indicator for this.
CacheInfo->Pair = BBSkipFirstBlockPair();
- // If *nothing* works, mark the pointer as being clobbered by the first
- // instruction in this block.
+ // If *nothing* works, mark the pointer as unknown.
//
// If this is the magic first block, return this as a clobber of the whole
// incoming value. Since we can't phi translate to one of the predecessors,
assert(I->getResult().isNonLocal() &&
"Should only be here with transparent block");
- I->setResult(MemDepResult::getClobber(BB->begin()));
- ReverseNonLocalPtrDeps[BB->begin()].insert(CacheKey);
+ I->setResult(MemDepResult::getUnknown());
Result.push_back(NonLocalDepResult(I->getBB(), I->getResult(),
Pointer.getAddr()));
break;