From: Chad Rosier Date: Fri, 11 Dec 2015 18:39:41 +0000 (+0000) Subject: Revert r255247, r255265, and r255286 due to serious compile-time regressions. X-Git-Url: http://demsky.eecs.uci.edu/git/?a=commitdiff_plain;h=23c0f3b5a2197452c6a25a74fe2fc624763547a1;p=oota-llvm.git Revert r255247, r255265, and r255286 due to serious compile-time regressions. Revert "[DSE] Disable non-local DSE to see if the bots go green." Revert "[DeadStoreElimination] Use range-based loops. NFC." Revert "[DeadStoreElimination] Add support for non-local DSE." git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@255354 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Transforms/Scalar/DeadStoreElimination.cpp b/lib/Transforms/Scalar/DeadStoreElimination.cpp index e6996ab97a8..36ad0a5f7b9 100644 --- a/lib/Transforms/Scalar/DeadStoreElimination.cpp +++ b/lib/Transforms/Scalar/DeadStoreElimination.cpp @@ -7,8 +7,11 @@ // //===----------------------------------------------------------------------===// // -// This file implements dead store elimination that considers redundant stores -// within a basic-block as well as across basic blocks in a reverse CFG order. +// This file implements a trivial dead store elimination that only considers +// basic-block local redundant stores. +// +// FIXME: This should eventually be extended to be a post-dominator tree +// traversal. Doing so would be pretty trivial. // //===----------------------------------------------------------------------===// @@ -41,13 +44,6 @@ using namespace llvm; STATISTIC(NumRedundantStores, "Number of redundant stores deleted"); STATISTIC(NumFastStores, "Number of stores deleted"); STATISTIC(NumFastOther , "Number of other instrs removed"); -STATISTIC(NumNonLocalStores, "Number of non-local stores deleted"); - -static cl::opt EnableNonLocalDSE("enable-nonlocal-dse", cl::init(false)); - -/// MaxNonLocalAttempts is an arbitrary threshold that provides -/// an early opportunitiy for bail out to control compile time. -static const unsigned MaxNonLocalAttempts = 100; namespace { struct DSE : public FunctionPass { @@ -84,7 +80,6 @@ namespace { bool runOnBasicBlock(BasicBlock &BB); bool MemoryIsNotModifiedBetween(Instruction *FirstI, Instruction *SecondI); bool HandleFree(CallInst *F); - bool handleNonLocalDependency(Instruction *Inst); bool handleEndBlock(BasicBlock &BB); void RemoveAccessedObjects(const MemoryLocation &LoadedLoc, SmallSetVector &DeadStackObjects, @@ -490,7 +485,6 @@ static bool isPossibleSelfRead(Instruction *Inst, bool DSE::runOnBasicBlock(BasicBlock &BB) { const DataLayout &DL = BB.getModule()->getDataLayout(); bool MadeChange = false; - unsigned NumNonLocalAttempts = 0; // Do a top-down walk on the BB. for (BasicBlock::iterator BBI = BB.begin(), BBE = BB.end(); BBI != BBE; ) { @@ -560,101 +554,99 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) { MemDepResult InstDep = MD->getDependency(Inst); - if (InstDep.isDef() || InstDep.isClobber()) { - // Figure out what location is being stored to. - MemoryLocation Loc = getLocForWrite(Inst, *AA); + // Ignore any store where we can't find a local dependence. + // FIXME: cross-block DSE would be fun. :) + if (!InstDep.isDef() && !InstDep.isClobber()) + continue; - // If we didn't get a useful location, fail. - if (!Loc.Ptr) - continue; + // Figure out what location is being stored to. + MemoryLocation Loc = getLocForWrite(Inst, *AA); - while (InstDep.isDef() || InstDep.isClobber()) { - // Get the memory clobbered by the instruction we depend on. MemDep - // will skip any instructions that 'Loc' clearly doesn't interact with. - // If we end up depending on a may- or must-aliased load, then we can't - // optimize away the store and we bail out. However, if we depend on on - // something that overwrites the memory location we *can* potentially - // optimize it. - // - // Find out what memory location the dependent instruction stores. - Instruction *DepWrite = InstDep.getInst(); - MemoryLocation DepLoc = getLocForWrite(DepWrite, *AA); - // If we didn't get a useful location, or if it isn't a size, bail out. - if (!DepLoc.Ptr) - break; + // If we didn't get a useful location, fail. + if (!Loc.Ptr) + continue; - // If we find a write that is a) removable (i.e., non-volatile), b) is - // completely obliterated by the store to 'Loc', and c) which we know - // that 'Inst' doesn't load from, then we can remove it. - if (isRemovable(DepWrite) && - !isPossibleSelfRead(Inst, Loc, DepWrite, *TLI, *AA)) { - int64_t InstWriteOffset, DepWriteOffset; - OverwriteResult OR = isOverwrite(Loc, DepLoc, DL, *TLI, - DepWriteOffset, InstWriteOffset); - if (OR == OverwriteComplete) { - DEBUG(dbgs() << "DSE: Remove Dead Store:\n DEAD: " << *DepWrite - << "\n KILLER: " << *Inst << '\n'); - - // Delete the store and now-dead instructions that feed it. - DeleteDeadInstruction(DepWrite, *MD, *TLI); - ++NumFastStores; - MadeChange = true; + while (InstDep.isDef() || InstDep.isClobber()) { + // Get the memory clobbered by the instruction we depend on. MemDep will + // skip any instructions that 'Loc' clearly doesn't interact with. If we + // end up depending on a may- or must-aliased load, then we can't optimize + // away the store and we bail out. However, if we depend on on something + // that overwrites the memory location we *can* potentially optimize it. + // + // Find out what memory location the dependent instruction stores. + Instruction *DepWrite = InstDep.getInst(); + MemoryLocation DepLoc = getLocForWrite(DepWrite, *AA); + // If we didn't get a useful location, or if it isn't a size, bail out. + if (!DepLoc.Ptr) + break; - // DeleteDeadInstruction can delete the current instruction in loop - // cases, reset BBI. - BBI = Inst->getIterator(); - if (BBI != BB.begin()) - --BBI; - break; - } else if (OR == OverwriteEnd && isShortenable(DepWrite)) { - // TODO: base this on the target vector size so that if the earlier - // store was too small to get vector writes anyway then its likely a - // good idea to shorten it. - - // Power of 2 vector writes are probably always a bad idea to - // optimize as any store/memset/memcpy is likely using vector - // instructions so shortening it to not vector size is likely to be - // slower. - MemIntrinsic *DepIntrinsic = cast(DepWrite); - unsigned DepWriteAlign = DepIntrinsic->getAlignment(); - if (llvm::isPowerOf2_64(InstWriteOffset) || - ((DepWriteAlign != 0) && - InstWriteOffset % DepWriteAlign == 0)) { - - DEBUG(dbgs() << "DSE: Remove Dead Store:\n OW END: " << *DepWrite - << "\n KILLER (offset " << InstWriteOffset << ", " - << DepLoc.Size << ")" << *Inst << '\n'); - - Value *DepWriteLength = DepIntrinsic->getLength(); - Value *TrimmedLength = ConstantInt::get( - DepWriteLength->getType(), InstWriteOffset - DepWriteOffset); - DepIntrinsic->setLength(TrimmedLength); - MadeChange = true; - } + // If we find a write that is a) removable (i.e., non-volatile), b) is + // completely obliterated by the store to 'Loc', and c) which we know that + // 'Inst' doesn't load from, then we can remove it. + if (isRemovable(DepWrite) && + !isPossibleSelfRead(Inst, Loc, DepWrite, *TLI, *AA)) { + int64_t InstWriteOffset, DepWriteOffset; + OverwriteResult OR = + isOverwrite(Loc, DepLoc, DL, *TLI, DepWriteOffset, InstWriteOffset); + if (OR == OverwriteComplete) { + DEBUG(dbgs() << "DSE: Remove Dead Store:\n DEAD: " + << *DepWrite << "\n KILLER: " << *Inst << '\n'); + + // Delete the store and now-dead instructions that feed it. + DeleteDeadInstruction(DepWrite, *MD, *TLI); + ++NumFastStores; + MadeChange = true; + + // DeleteDeadInstruction can delete the current instruction in loop + // cases, reset BBI. + BBI = Inst->getIterator(); + if (BBI != BB.begin()) + --BBI; + break; + } else if (OR == OverwriteEnd && isShortenable(DepWrite)) { + // TODO: base this on the target vector size so that if the earlier + // store was too small to get vector writes anyway then its likely + // a good idea to shorten it + // Power of 2 vector writes are probably always a bad idea to optimize + // as any store/memset/memcpy is likely using vector instructions so + // shortening it to not vector size is likely to be slower + MemIntrinsic* DepIntrinsic = cast(DepWrite); + unsigned DepWriteAlign = DepIntrinsic->getAlignment(); + if (llvm::isPowerOf2_64(InstWriteOffset) || + ((DepWriteAlign != 0) && InstWriteOffset % DepWriteAlign == 0)) { + + DEBUG(dbgs() << "DSE: Remove Dead Store:\n OW END: " + << *DepWrite << "\n KILLER (offset " + << InstWriteOffset << ", " + << DepLoc.Size << ")" + << *Inst << '\n'); + + Value* DepWriteLength = DepIntrinsic->getLength(); + Value* TrimmedLength = ConstantInt::get(DepWriteLength->getType(), + InstWriteOffset - + DepWriteOffset); + DepIntrinsic->setLength(TrimmedLength); + MadeChange = true; } } + } - // If this is a may-aliased store that is clobbering the store value, we - // can keep searching past it for another must-aliased pointer that - // stores to the same location. For example, in - // store -> P - // store -> Q - // store -> P - // we can remove the first store to P even though we don't know if P and - // Q alias. - if (DepWrite == &BB.front()) - break; - - // Can't look past this instruction if it might read 'Loc'. - if (AA->getModRefInfo(DepWrite, Loc) & MRI_Ref) - break; + // If this is a may-aliased store that is clobbering the store value, we + // can keep searching past it for another must-aliased pointer that stores + // to the same location. For example, in: + // store -> P + // store -> Q + // store -> P + // we can remove the first store to P even though we don't know if P and Q + // alias. + if (DepWrite == &BB.front()) break; + + // Can't look past this instruction if it might read 'Loc'. + if (AA->getModRefInfo(DepWrite, Loc) & MRI_Ref) + break; - InstDep = MD->getPointerDependencyFrom(Loc, false, - DepWrite->getIterator(), &BB); - } - } else if (EnableNonLocalDSE && InstDep.isNonLocal()) { // DSE across BB - if (++NumNonLocalAttempts < MaxNonLocalAttempts) - MadeChange |= handleNonLocalDependency(Inst); + InstDep = MD->getPointerDependencyFrom(Loc, false, + DepWrite->getIterator(), &BB); } } @@ -666,147 +658,6 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) { return MadeChange; } -/// A helper for handleNonLocalDependency() function to find all blocks -/// that lead to the input block BB and append them to the output PredBlocks. -/// PredBlocks will include not only predecessors of BB that unconditionally -/// lead to BB but also: -/// - single-block loops that lead to BB, and -/// - if-blocks for which one edge goes to BB and the other edge goes to -/// a block in the input SafeBlocks. -/// PredBlocks will not include blocks unreachable from the entry block, nor -/// blocks that form cycles with BB. -static void findSafePreds(SmallVectorImpl &PredBlocks, - SmallSetVector &SafeBlocks, - BasicBlock *BB, DominatorTree *DT) { - for (auto *Pred : predecessors(BB)) { - if (Pred == BB) - continue; - // The second check below prevents adding blocks that form a cycle with BB - // in order to avoid potential problems due to MemoryDependenceAnalysis, - // isOverwrite, etc. being not loop-aware. - if (!DT->isReachableFromEntry(Pred) || DT->dominates(BB, Pred)) - continue; - - bool PredIsSafe = true; - for (auto *Succ : successors(Pred)) { - if (Succ == BB || Succ == Pred) // shortcut, BB should be in SafeBlocks - continue; - if (!SafeBlocks.count(Succ)) { - PredIsSafe = false; - break; - } - } - if (PredIsSafe) - PredBlocks.push_back(Pred); - } -} - -static bool underlyingObjectsDoNotAlias(StoreInst *SI, LoadInst *LI, - const DataLayout &DL, - AliasAnalysis &AA) { - Value *AObj = GetUnderlyingObject(SI->getPointerOperand(), DL); - SmallVector Pointers; - GetUnderlyingObjects(LI->getPointerOperand(), Pointers, DL); - - for (auto *BObj : Pointers) { - if (!AA.isNoAlias(AObj, DL.getTypeStoreSize(AObj->getType()), BObj, - DL.getTypeStoreSize(BObj->getType()))) - return false; - } - return true; -} - -/// handleNonLocalDependency - Handle a non-local dependency on -/// the input instruction Inst located in BB in attempt to remove -/// redundant stores outside BB. -bool DSE::handleNonLocalDependency(Instruction *Inst) { - auto *SI = dyn_cast(Inst); - if (!SI) - return false; - // Get the location being stored to. - // If we don't get a useful location, bail out. - MemoryLocation Loc = getLocForWrite(SI, *AA); - if (!Loc.Ptr) - return false; - - bool MadeChange = false; - BasicBlock *BB = Inst->getParent(); - const DataLayout &DL = BB->getModule()->getDataLayout(); - - // Worklist of predecessor blocks of BB - SmallVector Blocks; - // Keep track of all predecessor blocks that are safe to search through - SmallSetVector SafeBlocks; - SafeBlocks.insert(BB); - findSafePreds(Blocks, SafeBlocks, BB, DT); - - while (!Blocks.empty()) { - BasicBlock *PB = Blocks.pop_back_val(); - MemDepResult Dep = - MD->getPointerDependencyFrom(Loc, false, PB->end(), PB, SI); - while (Dep.isDef() || Dep.isClobber()) { - Instruction *Dependency = Dep.getInst(); - - // Filter out false dependency from a load to SI looking through phis. - if (auto *LI = dyn_cast(Dependency)) { - if (underlyingObjectsDoNotAlias(SI, LI, DL, *AA)) { - Dep = MD->getPointerDependencyFrom(Loc, false, - Dependency->getIterator(), PB, SI); - continue; - } - } - - // If we don't get a useful location for the dependent instruction, - // it doesn't write memory, it is not removable, or it might read Loc, - // then bail out. - MemoryLocation DepLoc = getLocForWrite(Dependency, *AA); - if (!DepLoc.Ptr || !hasMemoryWrite(Dependency, *TLI) || - !isRemovable(Dependency) || - (AA->getModRefInfo(Dependency, Loc) & MRI_Ref)) - break; - - // Don't remove a store within single-block loops; - // we need more analysis: e.g. looking for an interferring load - // above the store within the loop, etc. - bool SingleBlockLoop = false; - for (auto I = succ_begin(PB), E = succ_end(PB); I != E; ++I) { - BasicBlock *Succ = *I; - if (Succ == PB) { - SingleBlockLoop = true; - break; - } - } - if (SingleBlockLoop) - break; - - int64_t InstWriteOffset, DepWriteOffset; - OverwriteResult OR = - isOverwrite(Loc, DepLoc, DL, *TLI, DepWriteOffset, InstWriteOffset); - if (OR == OverwriteComplete) { - DEBUG(dbgs() << "DSE: Remove Non-Local Dead Store:\n DEAD: " - << *Dependency << "\n KILLER: " << *SI << '\n'); - - // Delete redundant store and now-dead instructions that feed it. - auto Next = std::next(Dependency->getIterator()); - DeleteDeadInstruction(Dependency, *MD, *TLI); - ++NumNonLocalStores; - MadeChange = true; - Dep = MD->getPointerDependencyFrom(Loc, false, Next, PB, SI); - continue; - } - // TODO: attempt shortening of Dependency inst as in the local case - break; - } - - if (Dep.isNonLocal()) { - SafeBlocks.insert(PB); - findSafePreds(Blocks, SafeBlocks, PB, DT); - } - } - - return MadeChange; -} - /// Returns true if the memory which is accessed by the second instruction is not /// modified between the first and the second instruction. /// Precondition: Second instruction must be dominated by the first @@ -855,10 +706,10 @@ bool DSE::MemoryIsNotModifiedBetween(Instruction *FirstI, if (B != FirstBB) { assert(B != &FirstBB->getParent()->getEntryBlock() && "Should not hit the entry block because SI must be dominated by LI"); - for (auto *PredI : predecessors(B)) { - if (!Visited.insert(PredI).second) + for (auto PredI = pred_begin(B), PE = pred_end(B); PredI != PE; ++PredI) { + if (!Visited.insert(*PredI).second) continue; - WorkList.push_back(PredI); + WorkList.push_back(*PredI); } } } diff --git a/test/Transforms/DeadStoreElimination/cycle.ll b/test/Transforms/DeadStoreElimination/cycle.ll deleted file mode 100644 index aa2de415c60..00000000000 --- a/test/Transforms/DeadStoreElimination/cycle.ll +++ /dev/null @@ -1,26 +0,0 @@ -; RUN: opt < %s -basicaa -dse -S | FileCheck %s - -@Table = global [535 x i32] zeroinitializer, align 4 - -; The store in for.inc block should NOT be removed by non-local DSE. -; CHECK: store i32 64, i32* %arrayidx -; -define void @foo() { -entry: - br label %for.body - -for.body: - %i = phi i32 [ 0, %entry ], [ %inc, %for.inc ] - %arrayidx = getelementptr inbounds [535 x i32], [535 x i32]* @Table, i32 0, i32 %i - store i32 %i, i32* %arrayidx, align 4 - %cmp1 = icmp slt i32 %i, 64 - br i1 %cmp1, label %for.inc, label %for.end - -for.inc: - store i32 64, i32* %arrayidx, align 4 - %inc = add nsw i32 %i, 1 - br label %for.body - -for.end: - ret void -} diff --git a/test/Transforms/DeadStoreElimination/ifthen.ll b/test/Transforms/DeadStoreElimination/ifthen.ll deleted file mode 100644 index 21c87f89256..00000000000 --- a/test/Transforms/DeadStoreElimination/ifthen.ll +++ /dev/null @@ -1,22 +0,0 @@ -; RUN: opt < %s -basicaa -dse -enable-nonlocal-dse -S | FileCheck %s - -; The store and add in if.then block should be removed by non-local DSE. -; CHECK-NOT: %stval = add -; CHECK-NOT: store i32 %stval -; -define void @foo(i32* noalias nocapture %a, i32* noalias nocapture readonly %b, i32 %c) { -entry: - %cmp = icmp sgt i32 %c, 0 - br i1 %cmp, label %if.then, label %if.end - -if.then: - %0 = load i32, i32* %b, align 4 - %stval = add nsw i32 %0, 1 - store i32 %stval, i32* %a, align 4 - br label %if.end - -if.end: - %m.0 = phi i32 [ 13, %if.then ], [ 10, %entry ] - store i32 %m.0, i32* %a, align 4 - ret void -} diff --git a/test/Transforms/DeadStoreElimination/ifthenelse.ll b/test/Transforms/DeadStoreElimination/ifthenelse.ll deleted file mode 100644 index 59ef17e37a5..00000000000 --- a/test/Transforms/DeadStoreElimination/ifthenelse.ll +++ /dev/null @@ -1,27 +0,0 @@ -; RUN: opt < %s -basicaa -dse -enable-nonlocal-dse -S | FileCheck %s - -; The add and store in entry block should be removed by non-local DSE. -; CHECK-NOT: %stval = add -; CHECK-NOT: store i32 %stval -; -define void @foo(i32* noalias nocapture %a, i32* noalias nocapture readonly %b, i32 %c) { -entry: - %0 = load i32, i32* %b, align 4 - %stval = add nsw i32 %0, 1 - store i32 %stval, i32* %a, align 4 - %cmp = icmp sgt i32 %c, 0 - br i1 %cmp, label %if.then, label %if.else - -if.then: - %1 = add nsw i32 %c, 10 - br label %if.end - -if.else: - %2 = add nsw i32 %c, 13 - br label %if.end - -if.end: - %3 = phi i32 [ %1, %if.then ], [ %2, %if.else ] - store i32 %3, i32* %a, align 4 - ret void -} diff --git a/test/Transforms/DeadStoreElimination/ifthenelse2.ll b/test/Transforms/DeadStoreElimination/ifthenelse2.ll deleted file mode 100644 index 7aab0300477..00000000000 --- a/test/Transforms/DeadStoreElimination/ifthenelse2.ll +++ /dev/null @@ -1,34 +0,0 @@ -; RUN: opt < %s -basicaa -dse -enable-nonlocal-dse -S | FileCheck %s - -; The add and store in entry block should be removed by non-local DSE. -; CHECK-NOT: %stval = add -; CHECK-NOT: store i32 %stval -; -; The stores in if.then and if.else blocks should be removed by non-local DSE. -; CHECK-NOT: store i32 %1 -; CHECK-NOT: store i32 %2 -; -define void @foo(i32* noalias nocapture %a, i32* noalias nocapture readonly %b, i32 %c) { -entry: - %0 = load i32, i32* %b, align 4 - %stval = add nsw i32 %0, 1 - store i32 %stval, i32* %a, align 4 - %cmp = icmp sgt i32 %c, 0 - br i1 %cmp, label %if.then, label %if.else - -if.then: - %1 = add nsw i32 %c, 10 - store i32 %1, i32* %a, align 4 - br label %if.end - -if.else: - %2 = add nsw i32 %c, 13 - store i32 %2, i32* %a, align 4 - br label %if.end - -if.end: - %3 = phi i32 [ %1, %if.then ], [ %2, %if.else ] - %4 = sub nsw i32 %3, 6 - store i32 %4, i32* %a, align 4 - ret void -} diff --git a/test/Transforms/DeadStoreElimination/loop.ll b/test/Transforms/DeadStoreElimination/loop.ll deleted file mode 100644 index 941e6fafaf3..00000000000 --- a/test/Transforms/DeadStoreElimination/loop.ll +++ /dev/null @@ -1,42 +0,0 @@ -; RUN: opt < %s -basicaa -dse -enable-nonlocal-dse -S | FileCheck %s - -; The store in for.body block should be removed by non-local DSE. -; CHECK-NOT: store i32 0, i32* %arrayidx -; -define void @sum(i32 %N, i32* noalias nocapture %C, i32* noalias nocapture readonly %A, i32* noalias nocapture readonly %B) { -entry: - %cmp24 = icmp eq i32 %N, 0 - br i1 %cmp24, label %for.end11, label %for.body - -for.body: - %i.025 = phi i32 [ %inc10, %for.cond1.for.inc9_crit_edge ], [ 0, %entry ] - %arrayidx = getelementptr inbounds i32, i32* %C, i32 %i.025 - store i32 0, i32* %arrayidx, align 4 - %mul = mul i32 %i.025, %N - %arrayidx4.gep = getelementptr i32, i32* %A, i32 %mul - br label %for.body3 - -for.body3: - %0 = phi i32 [ 0, %for.body ], [ %add8, %for.body3 ] - %arrayidx4.phi = phi i32* [ %arrayidx4.gep, %for.body ], [ %arrayidx4.inc, %for.body3 ] - %arrayidx5.phi = phi i32* [ %B, %for.body ], [ %arrayidx5.inc, %for.body3 ] - %j.023 = phi i32 [ 0, %for.body ], [ %inc, %for.body3 ] - %1 = load i32, i32* %arrayidx4.phi, align 4 - %2 = load i32, i32* %arrayidx5.phi, align 4 - %add6 = add nsw i32 %2, %1 - %add8 = add nsw i32 %add6, %0 - %inc = add i32 %j.023, 1 - %exitcond = icmp ne i32 %inc, %N - %arrayidx4.inc = getelementptr i32, i32* %arrayidx4.phi, i32 1 - %arrayidx5.inc = getelementptr i32, i32* %arrayidx5.phi, i32 1 - br i1 %exitcond, label %for.body3, label %for.cond1.for.inc9_crit_edge - -for.cond1.for.inc9_crit_edge: - store i32 %add8, i32* %arrayidx, align 4 - %inc10 = add i32 %i.025, 1 - %exitcond26 = icmp ne i32 %inc10, %N - br i1 %exitcond26, label %for.body, label %for.end11 - -for.end11: - ret void -}