X-Git-Url: http://demsky.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FTransforms%2FScalar%2FEarlyCSE.cpp;h=eb38ef5f1645e6a74939d1c82a1f7b21d4506283;hb=eca5f1938e545905126f43b6f1afb7b515c36b0e;hp=3f8089c5bbfe6a92c090057dba88cbd36613aaad;hpb=c94da20917cb4dfa750903b366c920210c5265ee;p=oota-llvm.git diff --git a/lib/Transforms/Scalar/EarlyCSE.cpp b/lib/Transforms/Scalar/EarlyCSE.cpp index 3f8089c5bbf..eb38ef5f164 100644 --- a/lib/Transforms/Scalar/EarlyCSE.cpp +++ b/lib/Transforms/Scalar/EarlyCSE.cpp @@ -16,8 +16,10 @@ #include "llvm/ADT/Hashing.h" #include "llvm/ADT/ScopedHashTable.h" #include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Dominators.h" @@ -27,7 +29,7 @@ #include "llvm/Pass.h" #include "llvm/Support/Debug.h" #include "llvm/Support/RecyclingAllocator.h" -#include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/Local.h" #include @@ -262,8 +264,6 @@ namespace { /// expected that a later pass of GVN will catch the interesting/hard cases. class EarlyCSE { public: - Function &F; - const DataLayout *DL; const TargetLibraryInfo &TLI; const TargetTransformInfo &TTI; DominatorTree &DT; @@ -281,20 +281,37 @@ public: /// that dominated values can succeed in their lookup. ScopedHTType AvailableValues; - /// \brief A scoped hash table of the current values of loads. + /// A scoped hash table of the current values of previously encounted memory + /// locations. /// - /// This allows us to get efficient access to dominating loads when we have - /// a fully redundant load. In addition to the most recent load, we keep - /// track of a generation count of the read, which is compared against the - /// current generation count. The current generation count is incremented + /// This allows us to get efficient access to dominating loads or stores when + /// we have a fully redundant load. In addition to the most recent load, we + /// keep track of a generation count of the read, which is compared against + /// the current generation count. The current generation count is incremented /// after every possibly writing memory operation, which ensures that we only - /// CSE loads with other loads that have no intervening store. - typedef RecyclingAllocator< - BumpPtrAllocator, - ScopedHashTableVal>> + /// CSE loads with other loads that have no intervening store. Ordering + /// events (such as fences or atomic instructions) increment the generation + /// count as well; essentially, we model these as writes to all possible + /// locations. Note that atomic and/or volatile loads and stores can be + /// present the table; it is the responsibility of the consumer to inspect + /// the atomicity/volatility if needed. + struct LoadValue { + Value *Data; + unsigned Generation; + int MatchingId; + bool IsAtomic; + LoadValue() + : Data(nullptr), Generation(0), MatchingId(-1), IsAtomic(false) {} + LoadValue(Value *Data, unsigned Generation, unsigned MatchingId, + bool IsAtomic) + : Data(Data), Generation(Generation), MatchingId(MatchingId), + IsAtomic(IsAtomic) {} + }; + typedef RecyclingAllocator> LoadMapAllocator; - typedef ScopedHashTable, - DenseMapInfo, LoadMapAllocator> LoadHTType; + typedef ScopedHashTable, + LoadMapAllocator> LoadHTType; LoadHTType AvailableLoads; /// \brief A scoped hash table of the current values of read-only call @@ -308,11 +325,9 @@ public: unsigned CurrentGeneration; /// \brief Set up the EarlyCSE runner for a particular function. - EarlyCSE(Function &F, const DataLayout *DL, const TargetLibraryInfo &TLI, - const TargetTransformInfo &TTI, DominatorTree &DT, - AssumptionCache &AC) - : F(F), DL(DL), TLI(TLI), TTI(TTI), DT(DT), AC(AC), CurrentGeneration(0) { - } + EarlyCSE(const TargetLibraryInfo &TLI, const TargetTransformInfo &TTI, + DominatorTree &DT, AssumptionCache &AC) + : TLI(TLI), TTI(TTI), DT(DT), AC(AC), CurrentGeneration(0) {} bool run(); @@ -383,57 +398,100 @@ private: class ParseMemoryInst { public: ParseMemoryInst(Instruction *Inst, const TargetTransformInfo &TTI) - : Load(false), Store(false), Vol(false), MayReadFromMemory(false), - MayWriteToMemory(false), MatchingId(-1), Ptr(nullptr) { - MayReadFromMemory = Inst->mayReadFromMemory(); - MayWriteToMemory = Inst->mayWriteToMemory(); - if (IntrinsicInst *II = dyn_cast(Inst)) { - MemIntrinsicInfo Info; - if (!TTI.getTgtMemIntrinsic(II, Info)) - return; - if (Info.NumMemRefs == 1) { - Store = Info.WriteMem; - Load = Info.ReadMem; - MatchingId = Info.MatchingId; - MayReadFromMemory = Info.ReadMem; - MayWriteToMemory = Info.WriteMem; - Vol = Info.Vol; - Ptr = Info.PtrVal; - } - } else if (LoadInst *LI = dyn_cast(Inst)) { - Load = true; - Vol = !LI->isSimple(); - Ptr = LI->getPointerOperand(); + : IsTargetMemInst(false), Inst(Inst) { + if (IntrinsicInst *II = dyn_cast(Inst)) + if (TTI.getTgtMemIntrinsic(II, Info) && Info.NumMemRefs == 1) + IsTargetMemInst = true; + } + bool isLoad() const { + if (IsTargetMemInst) return Info.ReadMem; + return isa(Inst); + } + bool isStore() const { + if (IsTargetMemInst) return Info.WriteMem; + return isa(Inst); + } + bool isSimple() const { + if (IsTargetMemInst) return Info.IsSimple; + if (LoadInst *LI = dyn_cast(Inst)) { + return LI->isSimple(); } else if (StoreInst *SI = dyn_cast(Inst)) { - Store = true; - Vol = !SI->isSimple(); - Ptr = SI->getPointerOperand(); + return SI->isSimple(); + } + return Inst->isAtomic(); + } + bool isAtomic() const { + if (IsTargetMemInst) { + assert(Info.IsSimple && "need to refine IsSimple in TTI"); + return false; } + return Inst->isAtomic(); } - bool isLoad() { return Load; } - bool isStore() { return Store; } - bool isVolatile() { return Vol; } - bool isMatchingMemLoc(const ParseMemoryInst &Inst) { - return Ptr == Inst.Ptr && MatchingId == Inst.MatchingId; + bool isUnordered() const { + if (IsTargetMemInst) { + assert(Info.IsSimple && "need to refine IsSimple in TTI"); + return true; + } + if (LoadInst *LI = dyn_cast(Inst)) { + return LI->isUnordered(); + } else if (StoreInst *SI = dyn_cast(Inst)) { + return SI->isUnordered(); + } + // Conservative answer + return !Inst->isAtomic(); } - bool isValid() { return Ptr != nullptr; } - int getMatchingId() { return MatchingId; } - Value *getPtr() { return Ptr; } - bool mayReadFromMemory() { return MayReadFromMemory; } - bool mayWriteToMemory() { return MayWriteToMemory; } - private: - bool Load; - bool Store; - bool Vol; - bool MayReadFromMemory; - bool MayWriteToMemory; + bool isVolatile() const { + if (IsTargetMemInst) { + assert(Info.IsSimple && "need to refine IsSimple in TTI"); + return false; + } + if (LoadInst *LI = dyn_cast(Inst)) { + return LI->isVolatile(); + } else if (StoreInst *SI = dyn_cast(Inst)) { + return SI->isVolatile(); + } + // Conservative answer + return true; + } + + + bool isMatchingMemLoc(const ParseMemoryInst &Inst) const { + return (getPointerOperand() == Inst.getPointerOperand() && + getMatchingId() == Inst.getMatchingId()); + } + bool isValid() const { return getPointerOperand() != nullptr; } + // For regular (non-intrinsic) loads/stores, this is set to -1. For // intrinsic loads/stores, the id is retrieved from the corresponding // field in the MemIntrinsicInfo structure. That field contains // non-negative values only. - int MatchingId; - Value *Ptr; + int getMatchingId() const { + if (IsTargetMemInst) return Info.MatchingId; + return -1; + } + Value *getPointerOperand() const { + if (IsTargetMemInst) return Info.PtrVal; + if (LoadInst *LI = dyn_cast(Inst)) { + return LI->getPointerOperand(); + } else if (StoreInst *SI = dyn_cast(Inst)) { + return SI->getPointerOperand(); + } + return nullptr; + } + bool mayReadFromMemory() const { + if (IsTargetMemInst) return Info.ReadMem; + return Inst->mayReadFromMemory(); + } + bool mayWriteToMemory() const { + if (IsTargetMemInst) return Info.WriteMem; + return Inst->mayWriteToMemory(); + } + + private: + bool IsTargetMemInst; + MemIntrinsicInfo Info; + Instruction *Inst; }; bool processNode(DomTreeNode *Node); @@ -462,6 +520,30 @@ bool EarlyCSE::processNode(DomTreeNode *Node) { if (!BB->getSinglePredecessor()) ++CurrentGeneration; + // If this node has a single predecessor which ends in a conditional branch, + // we can infer the value of the branch condition given that we took this + // path. We need the single predeccesor to ensure there's not another path + // which reaches this block where the condition might hold a different + // value. Since we're adding this to the scoped hash table (like any other + // def), it will have been popped if we encounter a future merge block. + if (BasicBlock *Pred = BB->getSinglePredecessor()) + if (auto *BI = dyn_cast(Pred->getTerminator())) + if (BI->isConditional()) + if (auto *CondInst = dyn_cast(BI->getCondition())) + if (SimpleValue::canHandle(CondInst)) { + assert(BI->getSuccessor(0) == BB || BI->getSuccessor(1) == BB); + auto *ConditionalConstant = (BI->getSuccessor(0) == BB) ? + ConstantInt::getTrue(BB->getContext()) : + ConstantInt::getFalse(BB->getContext()); + AvailableValues.insert(CondInst, ConditionalConstant); + DEBUG(dbgs() << "EarlyCSE CVP: Add conditional value for '" + << CondInst->getName() << "' as " << *ConditionalConstant + << " in " << BB->getName() << "\n"); + // Replace all dominated uses with the known value + replaceDominatedUsesWith(CondInst, ConditionalConstant, DT, + BasicBlockEdge(Pred, BB)); + } + /// LastStore - Keep track of the last non-volatile store that we saw... for /// as long as there in no instruction that reads memory. If we see a store /// to the same location, we delete the dead store. This zaps trivial dead @@ -469,11 +551,12 @@ bool EarlyCSE::processNode(DomTreeNode *Node) { Instruction *LastStore = nullptr; bool Changed = false; + const DataLayout &DL = BB->getModule()->getDataLayout(); // See if any instructions in the block can be eliminated. If so, do it. If // not, add them to AvailableValues. for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E;) { - Instruction *Inst = I++; + Instruction *Inst = &*I++; // Dead instructions should just be removed. if (isInstructionTriviallyDead(Inst, &TLI)) { @@ -524,24 +607,26 @@ bool EarlyCSE::processNode(DomTreeNode *Node) { ParseMemoryInst MemInst(Inst, TTI); // If this is a non-volatile load, process it. if (MemInst.isValid() && MemInst.isLoad()) { - // Ignore volatile loads. - if (MemInst.isVolatile()) { + // (conservatively) we can't peak past the ordering implied by this + // operation, but we can add this load to our set of available values + if (MemInst.isVolatile() || !MemInst.isUnordered()) { LastStore = nullptr; - // Don't CSE across synchronization boundaries. - if (Inst->mayWriteToMemory()) - ++CurrentGeneration; - continue; + ++CurrentGeneration; } // If we have an available version of this load, and if it is the right // generation, replace this instruction. - std::pair InVal = - AvailableLoads.lookup(MemInst.getPtr()); - if (InVal.first != nullptr && InVal.second == CurrentGeneration) { - Value *Op = getOrCreateResult(InVal.first, Inst->getType()); + LoadValue InVal = AvailableLoads.lookup(MemInst.getPointerOperand()); + if (InVal.Data != nullptr && InVal.Generation == CurrentGeneration && + InVal.MatchingId == MemInst.getMatchingId() && + // We don't yet handle removing loads with ordering of any kind. + !MemInst.isVolatile() && MemInst.isUnordered() && + // We can't replace an atomic load with one which isn't also atomic. + InVal.IsAtomic >= MemInst.isAtomic()) { + Value *Op = getOrCreateResult(InVal.Data, Inst->getType()); if (Op != nullptr) { DEBUG(dbgs() << "EarlyCSE CSE LOAD: " << *Inst - << " to: " << *InVal.first << '\n'); + << " to: " << *InVal.Data << '\n'); if (!Inst->use_empty()) Inst->replaceAllUsesWith(Op); Inst->eraseFromParent(); @@ -552,8 +637,10 @@ bool EarlyCSE::processNode(DomTreeNode *Node) { } // Otherwise, remember that we have this instruction. - AvailableLoads.insert(MemInst.getPtr(), std::pair( - Inst, CurrentGeneration)); + AvailableLoads.insert( + MemInst.getPointerOperand(), + LoadValue(Inst, CurrentGeneration, MemInst.getMatchingId(), + MemInst.isAtomic())); LastStore = nullptr; continue; } @@ -589,6 +676,17 @@ bool EarlyCSE::processNode(DomTreeNode *Node) { continue; } + // A release fence requires that all stores complete before it, but does + // not prevent the reordering of following loads 'before' the fence. As a + // result, we don't need to consider it as writing to memory and don't need + // to advance the generation. We do need to prevent DSE across the fence, + // but that's handled above. + if (FenceInst *FI = dyn_cast(Inst)) + if (FI->getOrdering() == Release) { + assert(Inst->mayReadFromMemory() && "relied on to prevent DSE above"); + continue; + } + // Okay, this isn't something we can CSE at all. Check to see if it is // something that could modify memory. If so, our available memory values // cannot be used so bump the generation count. @@ -597,9 +695,12 @@ bool EarlyCSE::processNode(DomTreeNode *Node) { if (MemInst.isValid() && MemInst.isStore()) { // We do a trivial form of DSE if there are two stores to the same - // location with no intervening loads. Delete the earlier store. + // location with no intervening loads. Delete the earlier store. Note + // that we can delete an earlier simple store even if the following one + // is ordered/volatile/atomic store. if (LastStore) { ParseMemoryInst LastStoreMemInst(LastStore, TTI); + assert(LastStoreMemInst.isSimple() && "Violated invariant"); if (LastStoreMemInst.isMatchingMemLoc(MemInst)) { DEBUG(dbgs() << "EarlyCSE DEAD STORE: " << *LastStore << " due to: " << *Inst << '\n'); @@ -616,12 +717,19 @@ bool EarlyCSE::processNode(DomTreeNode *Node) { // version of the pointer. It is safe to forward from volatile stores // to non-volatile loads, so we don't have to check for volatility of // the store. - AvailableLoads.insert(MemInst.getPtr(), std::pair( - Inst, CurrentGeneration)); - - // Remember that this was the last store we saw for DSE. - if (!MemInst.isVolatile()) + AvailableLoads.insert( + MemInst.getPointerOperand(), + LoadValue(Inst, CurrentGeneration, MemInst.getMatchingId(), + MemInst.isAtomic())); + + // Remember that this was the last normal store we saw for DSE. + // Note that we can't delete an earlier atomic or volatile store in + // favor of a later one which isn't. We could in principal remove an + // earlier unordered store if the later one is also unordered. + if (MemInst.isSimple()) LastStore = Inst; + else + LastStore = nullptr; } } } @@ -634,7 +742,7 @@ bool EarlyCSE::run() { // gains over vector when the container becomes very large due to the // specific access patterns. For more information see the mailing list // discussion on this: - // http://lists.cs.uiuc.edu/pipermail/llvm-commits/Week-of-Mon-20120116/135228.html + // http://lists.llvm.org/pipermail/llvm-commits/Week-of-Mon-20120116/135228.html std::deque nodesToProcess; bool Changed = false; @@ -685,14 +793,12 @@ bool EarlyCSE::run() { PreservedAnalyses EarlyCSEPass::run(Function &F, AnalysisManager *AM) { - const DataLayout &DL = F.getParent()->getDataLayout(); - auto &TLI = AM->getResult(F); auto &TTI = AM->getResult(F); auto &DT = AM->getResult(F); auto &AC = AM->getResult(F); - EarlyCSE CSE(F, &DL, TLI, TTI, DT, AC); + EarlyCSE CSE(TLI, TTI, DT, AC); if (!CSE.run()) return PreservedAnalyses::all(); @@ -724,13 +830,12 @@ public: if (skipOptnoneFunction(F)) return false; - auto &DL = F.getParent()->getDataLayout(); auto &TLI = getAnalysis().getTLI(); auto &TTI = getAnalysis().getTTI(F); auto &DT = getAnalysis().getDomTree(); auto &AC = getAnalysis().getAssumptionCache(F); - EarlyCSE CSE(F, &DL, TLI, TTI, DT, AC); + EarlyCSE CSE(TLI, TTI, DT, AC); return CSE.run(); } @@ -740,6 +845,7 @@ public: AU.addRequired(); AU.addRequired(); AU.addRequired(); + AU.addPreserved(); AU.setPreservesCFG(); } };