//
//===----------------------------------------------------------------------===//
//
-// This file implements a trivial dead store elimination that only considers
-// basic-block local redundant stores.
-//
-// FIXME: This should eventually be extended to be a post-dominator tree
-// traversal. Doing so would be pretty trivial.
+// This file implements dead store elimination that considers redundant stores
+// within a basic-block as well as across basic blocks in a reverse CFG order.
//
//===----------------------------------------------------------------------===//
STATISTIC(NumRedundantStores, "Number of redundant stores deleted");
STATISTIC(NumFastStores, "Number of stores deleted");
STATISTIC(NumFastOther , "Number of other instrs removed");
+STATISTIC(NumNonLocalStores, "Number of non-local stores deleted");
+
+static cl::opt<bool> EnableNonLocalDSE("enable-nonlocal-dse", cl::init(true));
+
+/// MaxNonLocalAttempts is an arbitrary threshold that provides
+/// an early opportunitiy for bail out to control compile time.
+static const unsigned MaxNonLocalAttempts = 100;
namespace {
struct DSE : public FunctionPass {
bool runOnBasicBlock(BasicBlock &BB);
bool MemoryIsNotModifiedBetween(Instruction *FirstI, Instruction *SecondI);
bool HandleFree(CallInst *F);
+ bool handleNonLocalDependency(Instruction *Inst);
bool handleEndBlock(BasicBlock &BB);
void RemoveAccessedObjects(const MemoryLocation &LoadedLoc,
SmallSetVector<Value *, 16> &DeadStackObjects,
bool DSE::runOnBasicBlock(BasicBlock &BB) {
const DataLayout &DL = BB.getModule()->getDataLayout();
bool MadeChange = false;
+ unsigned NumNonLocalAttempts = 0;
// Do a top-down walk on the BB.
for (BasicBlock::iterator BBI = BB.begin(), BBE = BB.end(); BBI != BBE; ) {
MemDepResult InstDep = MD->getDependency(Inst);
- // Ignore any store where we can't find a local dependence.
- // FIXME: cross-block DSE would be fun. :)
- if (!InstDep.isDef() && !InstDep.isClobber())
- continue;
-
- // Figure out what location is being stored to.
- MemoryLocation Loc = getLocForWrite(Inst, *AA);
+ if (InstDep.isDef() || InstDep.isClobber()) {
+ // Figure out what location is being stored to.
+ MemoryLocation Loc = getLocForWrite(Inst, *AA);
- // If we didn't get a useful location, fail.
- if (!Loc.Ptr)
- continue;
-
- while (InstDep.isDef() || InstDep.isClobber()) {
- // Get the memory clobbered by the instruction we depend on. MemDep will
- // skip any instructions that 'Loc' clearly doesn't interact with. If we
- // end up depending on a may- or must-aliased load, then we can't optimize
- // away the store and we bail out. However, if we depend on on something
- // that overwrites the memory location we *can* potentially optimize it.
- //
- // Find out what memory location the dependent instruction stores.
- Instruction *DepWrite = InstDep.getInst();
- MemoryLocation DepLoc = getLocForWrite(DepWrite, *AA);
- // If we didn't get a useful location, or if it isn't a size, bail out.
- if (!DepLoc.Ptr)
- break;
+ // If we didn't get a useful location, fail.
+ if (!Loc.Ptr)
+ continue;
- // If we find a write that is a) removable (i.e., non-volatile), b) is
- // completely obliterated by the store to 'Loc', and c) which we know that
- // 'Inst' doesn't load from, then we can remove it.
- if (isRemovable(DepWrite) &&
- !isPossibleSelfRead(Inst, Loc, DepWrite, *TLI, *AA)) {
- int64_t InstWriteOffset, DepWriteOffset;
- OverwriteResult OR =
- isOverwrite(Loc, DepLoc, DL, *TLI, DepWriteOffset, InstWriteOffset);
- if (OR == OverwriteComplete) {
- DEBUG(dbgs() << "DSE: Remove Dead Store:\n DEAD: "
- << *DepWrite << "\n KILLER: " << *Inst << '\n');
-
- // Delete the store and now-dead instructions that feed it.
- DeleteDeadInstruction(DepWrite, *MD, *TLI);
- ++NumFastStores;
- MadeChange = true;
-
- // DeleteDeadInstruction can delete the current instruction in loop
- // cases, reset BBI.
- BBI = Inst->getIterator();
- if (BBI != BB.begin())
- --BBI;
+ while (InstDep.isDef() || InstDep.isClobber()) {
+ // Get the memory clobbered by the instruction we depend on. MemDep
+ // will skip any instructions that 'Loc' clearly doesn't interact with.
+ // If we end up depending on a may- or must-aliased load, then we can't
+ // optimize away the store and we bail out. However, if we depend on on
+ // something that overwrites the memory location we *can* potentially
+ // optimize it.
+ //
+ // Find out what memory location the dependent instruction stores.
+ Instruction *DepWrite = InstDep.getInst();
+ MemoryLocation DepLoc = getLocForWrite(DepWrite, *AA);
+ // If we didn't get a useful location, or if it isn't a size, bail out.
+ if (!DepLoc.Ptr)
break;
- } else if (OR == OverwriteEnd && isShortenable(DepWrite)) {
- // TODO: base this on the target vector size so that if the earlier
- // store was too small to get vector writes anyway then its likely
- // a good idea to shorten it
- // Power of 2 vector writes are probably always a bad idea to optimize
- // as any store/memset/memcpy is likely using vector instructions so
- // shortening it to not vector size is likely to be slower
- MemIntrinsic* DepIntrinsic = cast<MemIntrinsic>(DepWrite);
- unsigned DepWriteAlign = DepIntrinsic->getAlignment();
- if (llvm::isPowerOf2_64(InstWriteOffset) ||
- ((DepWriteAlign != 0) && InstWriteOffset % DepWriteAlign == 0)) {
-
- DEBUG(dbgs() << "DSE: Remove Dead Store:\n OW END: "
- << *DepWrite << "\n KILLER (offset "
- << InstWriteOffset << ", "
- << DepLoc.Size << ")"
- << *Inst << '\n');
-
- Value* DepWriteLength = DepIntrinsic->getLength();
- Value* TrimmedLength = ConstantInt::get(DepWriteLength->getType(),
- InstWriteOffset -
- DepWriteOffset);
- DepIntrinsic->setLength(TrimmedLength);
+
+ // If we find a write that is a) removable (i.e., non-volatile), b) is
+ // completely obliterated by the store to 'Loc', and c) which we know
+ // that 'Inst' doesn't load from, then we can remove it.
+ if (isRemovable(DepWrite) &&
+ !isPossibleSelfRead(Inst, Loc, DepWrite, *TLI, *AA)) {
+ int64_t InstWriteOffset, DepWriteOffset;
+ OverwriteResult OR = isOverwrite(Loc, DepLoc, DL, *TLI,
+ DepWriteOffset, InstWriteOffset);
+ if (OR == OverwriteComplete) {
+ DEBUG(dbgs() << "DSE: Remove Dead Store:\n DEAD: " << *DepWrite
+ << "\n KILLER: " << *Inst << '\n');
+
+ // Delete the store and now-dead instructions that feed it.
+ DeleteDeadInstruction(DepWrite, *MD, *TLI);
+ ++NumFastStores;
MadeChange = true;
+
+ // DeleteDeadInstruction can delete the current instruction in loop
+ // cases, reset BBI.
+ BBI = Inst->getIterator();
+ if (BBI != BB.begin())
+ --BBI;
+ break;
+ } else if (OR == OverwriteEnd && isShortenable(DepWrite)) {
+ // TODO: base this on the target vector size so that if the earlier
+ // store was too small to get vector writes anyway then its likely a
+ // good idea to shorten it.
+
+ // Power of 2 vector writes are probably always a bad idea to
+ // optimize as any store/memset/memcpy is likely using vector
+ // instructions so shortening it to not vector size is likely to be
+ // slower.
+ MemIntrinsic *DepIntrinsic = cast<MemIntrinsic>(DepWrite);
+ unsigned DepWriteAlign = DepIntrinsic->getAlignment();
+ if (llvm::isPowerOf2_64(InstWriteOffset) ||
+ ((DepWriteAlign != 0) &&
+ InstWriteOffset % DepWriteAlign == 0)) {
+
+ DEBUG(dbgs() << "DSE: Remove Dead Store:\n OW END: " << *DepWrite
+ << "\n KILLER (offset " << InstWriteOffset << ", "
+ << DepLoc.Size << ")" << *Inst << '\n');
+
+ Value *DepWriteLength = DepIntrinsic->getLength();
+ Value *TrimmedLength = ConstantInt::get(
+ DepWriteLength->getType(), InstWriteOffset - DepWriteOffset);
+ DepIntrinsic->setLength(TrimmedLength);
+ MadeChange = true;
+ }
}
}
- }
- // If this is a may-aliased store that is clobbering the store value, we
- // can keep searching past it for another must-aliased pointer that stores
- // to the same location. For example, in:
- // store -> P
- // store -> Q
- // store -> P
- // we can remove the first store to P even though we don't know if P and Q
- // alias.
- if (DepWrite == &BB.front()) break;
-
- // Can't look past this instruction if it might read 'Loc'.
- if (AA->getModRefInfo(DepWrite, Loc) & MRI_Ref)
- break;
+ // If this is a may-aliased store that is clobbering the store value, we
+ // can keep searching past it for another must-aliased pointer that
+ // stores to the same location. For example, in
+ // store -> P
+ // store -> Q
+ // store -> P
+ // we can remove the first store to P even though we don't know if P and
+ // Q alias.
+ if (DepWrite == &BB.front())
+ break;
+
+ // Can't look past this instruction if it might read 'Loc'.
+ if (AA->getModRefInfo(DepWrite, Loc) & MRI_Ref)
+ break;
- InstDep = MD->getPointerDependencyFrom(Loc, false,
- DepWrite->getIterator(), &BB);
+ InstDep = MD->getPointerDependencyFrom(Loc, false,
+ DepWrite->getIterator(), &BB);
+ }
+ } else if (EnableNonLocalDSE && InstDep.isNonLocal()) { // DSE across BB
+ if (++NumNonLocalAttempts < MaxNonLocalAttempts)
+ MadeChange |= handleNonLocalDependency(Inst);
}
}
return MadeChange;
}
+/// A helper for handleNonLocalDependency() function to find all blocks
+/// that lead to the input block BB and append them to the output PredBlocks.
+/// PredBlocks will include not only predecessors of BB that unconditionally
+/// lead to BB but also:
+/// - single-block loops that lead to BB, and
+/// - if-blocks for which one edge goes to BB and the other edge goes to
+/// a block in the input SafeBlocks.
+/// PredBlocks will not include blocks unreachable from the entry block, nor
+/// blocks that form cycles with BB.
+static void findSafePreds(SmallVectorImpl<BasicBlock *> &PredBlocks,
+ SmallSetVector<BasicBlock *, 8> &SafeBlocks,
+ BasicBlock *BB, DominatorTree *DT) {
+ for (auto I = pred_begin(BB), E = pred_end(BB); I != E; ++I) {
+ BasicBlock *Pred = *I;
+ if (Pred == BB)
+ continue;
+ // The second check below prevents adding blocks that form a cycle with BB
+ // in order to avoid potential problems due to MemoryDependenceAnalysis,
+ // isOverwrite, etc. being not loop-aware.
+ if (!DT->isReachableFromEntry(Pred) || DT->dominates(BB, Pred))
+ continue;
+
+ bool PredIsSafe = true;
+ for (auto II = succ_begin(Pred), EE = succ_end(Pred); II != EE; ++II) {
+ BasicBlock *Succ = *II;
+ if (Succ == BB || Succ == Pred) // shortcut, BB should be in SafeBlocks
+ continue;
+ if (!SafeBlocks.count(Succ)) {
+ PredIsSafe = false;
+ break;
+ }
+ }
+ if (PredIsSafe)
+ PredBlocks.push_back(Pred);
+ }
+}
+
+static bool underlyingObjectsDoNotAlias(StoreInst *SI, LoadInst *LI,
+ const DataLayout &DL,
+ AliasAnalysis &AA) {
+ Value *AObj = GetUnderlyingObject(SI->getPointerOperand(), DL);
+ SmallVector<Value *, 4> Pointers;
+ GetUnderlyingObjects(LI->getPointerOperand(), Pointers, DL);
+
+ for (auto I = Pointers.begin(), E = Pointers.end(); I != E; ++I) {
+ Value *BObj = *I;
+ if (!AA.isNoAlias(AObj, DL.getTypeStoreSize(AObj->getType()), BObj,
+ DL.getTypeStoreSize(BObj->getType())))
+ return false;
+ }
+ return true;
+}
+
+/// handleNonLocalDependency - Handle a non-local dependency on
+/// the input instruction Inst located in BB in attempt to remove
+/// redundant stores outside BB.
+bool DSE::handleNonLocalDependency(Instruction *Inst) {
+ auto *SI = dyn_cast<StoreInst>(Inst);
+ if (!SI)
+ return false;
+ // Get the location being stored to.
+ // If we don't get a useful location, bail out.
+ MemoryLocation Loc = getLocForWrite(SI, *AA);
+ if (!Loc.Ptr)
+ return false;
+
+ bool MadeChange = false;
+ BasicBlock *BB = Inst->getParent();
+ const DataLayout &DL = BB->getModule()->getDataLayout();
+
+ // Worklist of predecessor blocks of BB
+ SmallVector<BasicBlock *, 8> Blocks;
+ // Keep track of all predecessor blocks that are safe to search through
+ SmallSetVector<BasicBlock *, 8> SafeBlocks;
+ SafeBlocks.insert(BB);
+ findSafePreds(Blocks, SafeBlocks, BB, DT);
+
+ while (!Blocks.empty()) {
+ BasicBlock *PB = Blocks.pop_back_val();
+ MemDepResult Dep =
+ MD->getPointerDependencyFrom(Loc, false, PB->end(), PB, SI);
+ while (Dep.isDef() || Dep.isClobber()) {
+ Instruction *Dependency = Dep.getInst();
+
+ // Filter out false dependency from a load to SI looking through phis.
+ if (auto *LI = dyn_cast<LoadInst>(Dependency)) {
+ if (underlyingObjectsDoNotAlias(SI, LI, DL, *AA)) {
+ Dep = MD->getPointerDependencyFrom(Loc, false,
+ Dependency->getIterator(), PB, SI);
+ continue;
+ }
+ }
+
+ // If we don't get a useful location for the dependent instruction,
+ // it doesn't write memory, it is not removable, or it might read Loc,
+ // then bail out.
+ MemoryLocation DepLoc = getLocForWrite(Dependency, *AA);
+ if (!DepLoc.Ptr || !hasMemoryWrite(Dependency, *TLI) ||
+ !isRemovable(Dependency) ||
+ (AA->getModRefInfo(Dependency, Loc) & MRI_Ref))
+ break;
+
+ // Don't remove a store within single-block loops;
+ // we need more analysis: e.g. looking for an interferring load
+ // above the store within the loop, etc.
+ bool SingleBlockLoop = false;
+ for (auto I = succ_begin(PB), E = succ_end(PB); I != E; ++I) {
+ BasicBlock *Succ = *I;
+ if (Succ == PB) {
+ SingleBlockLoop = true;
+ break;
+ }
+ }
+ if (SingleBlockLoop)
+ break;
+
+ int64_t InstWriteOffset, DepWriteOffset;
+ OverwriteResult OR =
+ isOverwrite(Loc, DepLoc, DL, *TLI, DepWriteOffset, InstWriteOffset);
+ if (OR == OverwriteComplete) {
+ DEBUG(dbgs() << "DSE: Remove Non-Local Dead Store:\n DEAD: "
+ << *Dependency << "\n KILLER: " << *SI << '\n');
+
+ // Delete redundant store and now-dead instructions that feed it.
+ auto Next = std::next(Dependency->getIterator());
+ DeleteDeadInstruction(Dependency, *MD, *TLI);
+ ++NumNonLocalStores;
+ MadeChange = true;
+ Dep = MD->getPointerDependencyFrom(Loc, false, Next, PB, SI);
+ continue;
+ }
+ // TODO: attempt shortening of Dependency inst as in the local case
+ break;
+ }
+
+ if (Dep.isNonLocal()) {
+ SafeBlocks.insert(PB);
+ findSafePreds(Blocks, SafeBlocks, PB, DT);
+ }
+ }
+
+ return MadeChange;
+}
+
/// Returns true if the memory which is accessed by the second instruction is not
/// modified between the first and the second instruction.
/// Precondition: Second instruction must be dominated by the first