// traversing the function in depth-first order to rewrite loads and stores as
// appropriate.
//
-// The algorithm used here is based on:
-//
-// Sreedhar and Gao. A linear time algorithm for placing phi-nodes.
-// In Proceedings of the 22nd ACM SIGPLAN-SIGACT Symposium on Principles of
-// Programming Languages
-// POPL '95. ACM, New York, NY, 62-73.
-//
-// It has been modified to not explicitly use the DJ graph data structure and to
-// directly compute pruned SSA using per-variable liveness information.
-//
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/Utils/PromoteMemToReg.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasSetTracker.h"
#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/IteratedDominanceFrontier.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/CFG.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Metadata.h"
+#include "llvm/IR/Module.h"
#include "llvm/Transforms/Utils/Local.h"
#include <algorithm>
-#include <queue>
using namespace llvm;
#define DEBUG_TYPE "mem2reg"
bool llvm::isAllocaPromotable(const AllocaInst *AI) {
// FIXME: If the memory unit is of pointer or integer type, we can permit
// assignments to subsections of the memory unit.
+ unsigned AS = AI->getType()->getAddressSpace();
// Only allow direct and non-volatile loads and stores...
for (const User *U : AI->users()) {
II->getIntrinsicID() != Intrinsic::lifetime_end)
return false;
} else if (const BitCastInst *BCI = dyn_cast<BitCastInst>(U)) {
- if (BCI->getType() != Type::getInt8PtrTy(U->getContext()))
+ if (BCI->getType() != Type::getInt8PtrTy(U->getContext(), AS))
return false;
if (!onlyUsedByLifetimeMarkers(BCI))
return false;
} else if (const GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(U)) {
- if (GEPI->getType() != Type::getInt8PtrTy(U->getContext()))
+ if (GEPI->getType() != Type::getInt8PtrTy(U->getContext(), AS))
return false;
if (!GEPI->hasAllZeroIndices())
return false;
void clear() {
DefiningBlocks.clear();
UsingBlocks.clear();
- OnlyStore = 0;
- OnlyBlock = 0;
+ OnlyStore = nullptr;
+ OnlyBlock = nullptr;
OnlyUsedInOneBlock = true;
- AllocaPointerVal = 0;
- DbgDeclare = 0;
+ AllocaPointerVal = nullptr;
+ DbgDeclare = nullptr;
}
/// Scan the uses of the specified alloca, filling in the AllocaInfo used
}
if (OnlyUsedInOneBlock) {
- if (OnlyBlock == 0)
+ if (!OnlyBlock)
OnlyBlock = User->getParent();
else if (OnlyBlock != User->getParent())
OnlyUsedInOneBlock = false;
public:
typedef std::vector<Value *> ValVector;
- RenamePassData() : BB(NULL), Pred(NULL), Values() {}
+ RenamePassData() : BB(nullptr), Pred(nullptr), Values() {}
RenamePassData(BasicBlock *B, BasicBlock *P, const ValVector &V)
: BB(B), Pred(P), Values(V) {}
BasicBlock *BB;
// avoid gratuitus rescans.
const BasicBlock *BB = I->getParent();
unsigned InstNo = 0;
- for (BasicBlock::const_iterator BBI = BB->begin(), E = BB->end(); BBI != E;
- ++BBI)
- if (isInterestingInstruction(BBI))
- InstNumbers[BBI] = InstNo++;
+ for (const Instruction &BBI : *BB)
+ if (isInterestingInstruction(&BBI))
+ InstNumbers[&BBI] = InstNo++;
It = InstNumbers.find(I);
assert(It != InstNumbers.end() && "Didn't insert instruction?");
/// An AliasSetTracker object to update. If null, don't update it.
AliasSetTracker *AST;
+ /// A cache of @llvm.assume intrinsics used by SimplifyInstruction.
+ AssumptionCache *AC;
+
/// Reverse mapping of Allocas.
DenseMap<AllocaInst *, unsigned> AllocaLookup;
/// behavior.
DenseMap<BasicBlock *, unsigned> BBNumbers;
- /// Maps DomTreeNodes to their level in the dominator tree.
- DenseMap<DomTreeNode *, unsigned> DomLevels;
-
/// Lazily compute the number of predecessors a block has.
DenseMap<const BasicBlock *, unsigned> BBNumPreds;
public:
PromoteMem2Reg(ArrayRef<AllocaInst *> Allocas, DominatorTree &DT,
- AliasSetTracker *AST)
+ AliasSetTracker *AST, AssumptionCache *AC)
: Allocas(Allocas.begin(), Allocas.end()), DT(DT),
- DIB(*DT.getRoot()->getParent()->getParent()), AST(AST) {}
+ DIB(*DT.getRoot()->getParent()->getParent(), /*AllowUnresolved*/ false),
+ AST(AST), AC(AC) {}
void run();
return NP - 1;
}
- void DetermineInsertionPoint(AllocaInst *AI, unsigned AllocaNum,
- AllocaInfo &Info);
void ComputeLiveInBlocks(AllocaInst *AI, AllocaInfo &Info,
- const SmallPtrSet<BasicBlock *, 32> &DefBlocks,
- SmallPtrSet<BasicBlock *, 32> &LiveInBlocks);
+ const SmallPtrSetImpl<BasicBlock *> &DefBlocks,
+ SmallPtrSetImpl<BasicBlock *> &LiveInBlocks);
void RenamePass(BasicBlock *BB, BasicBlock *Pred,
RenamePassData::ValVector &IncVals,
std::vector<RenamePassData> &Worklist);
// Record debuginfo for the store and remove the declaration's
// debuginfo.
if (DbgDeclareInst *DDI = Info.DbgDeclare) {
- DIBuilder DIB(*AI->getParent()->getParent()->getParent());
+ DIBuilder DIB(*AI->getModule(), /*AllowUnresolved*/ false);
ConvertDebugDeclareToDebugValue(DDI, Info.OnlyStore, DIB);
DDI->eraseFromParent();
LBI.deleteValue(DDI);
/// using the Alloca.
///
/// If we cannot promote this alloca (because it is read before it is written),
-/// return true. This is necessary in cases where, due to control flow, the
-/// alloca is potentially undefined on some control flow paths. e.g. code like
-/// this is potentially correct:
-///
-/// for (...) { if (c) { A = undef; undef = B; } }
-///
-/// ... so long as A is not used before undef is set.
-static void promoteSingleBlockAlloca(AllocaInst *AI, const AllocaInfo &Info,
+/// return false. This is necessary in cases where, due to control flow, the
+/// alloca is undefined only on some control flow paths. e.g. code like
+/// this is correct in LLVM IR:
+/// // A is an alloca with no stores so far
+/// for (...) {
+/// int t = *A;
+/// if (!first_iteration)
+/// use(t);
+/// *A = 42;
+/// }
+static bool promoteSingleBlockAlloca(AllocaInst *AI, const AllocaInfo &Info,
LargeBlockInfo &LBI,
AliasSetTracker *AST) {
// The trickiest case to handle is when we have large blocks. Because of this,
// Find the nearest store that has a lower index than this load.
StoresByIndexTy::iterator I =
std::lower_bound(StoresByIndex.begin(), StoresByIndex.end(),
- std::make_pair(LoadIdx, static_cast<StoreInst *>(0)),
+ std::make_pair(LoadIdx,
+ static_cast<StoreInst *>(nullptr)),
less_first());
-
- if (I == StoresByIndex.begin())
- // If there is no store before this load, the load takes the undef value.
- LI->replaceAllUsesWith(UndefValue::get(LI->getType()));
+ if (I == StoresByIndex.begin()) {
+ if (StoresByIndex.empty())
+ // If there are no stores, the load takes the undef value.
+ LI->replaceAllUsesWith(UndefValue::get(LI->getType()));
+ else
+ // There is no store before this load, bail out (load may be affected
+ // by the following stores - see main comment).
+ return false;
+ }
else
// Otherwise, there was a store before this load, the load takes its value.
LI->replaceAllUsesWith(std::prev(I)->second->getOperand(0));
StoreInst *SI = cast<StoreInst>(AI->user_back());
// Record debuginfo for the store before removing it.
if (DbgDeclareInst *DDI = Info.DbgDeclare) {
- DIBuilder DIB(*AI->getParent()->getParent()->getParent());
+ DIBuilder DIB(*AI->getModule(), /*AllowUnresolved*/ false);
ConvertDebugDeclareToDebugValue(DDI, SI, DIB);
}
SI->eraseFromParent();
}
++NumLocalPromoted;
+ return true;
}
void PromoteMem2Reg::run() {
AllocaInfo Info;
LargeBlockInfo LBI;
+ IDFCalculator IDF(DT);
for (unsigned AllocaNum = 0; AllocaNum != Allocas.size(); ++AllocaNum) {
AllocaInst *AI = Allocas[AllocaNum];
// If the alloca is only read and written in one basic block, just perform a
// linear sweep over the block to eliminate it.
- if (Info.OnlyUsedInOneBlock) {
- promoteSingleBlockAlloca(AI, Info, LBI, AST);
-
+ if (Info.OnlyUsedInOneBlock &&
+ promoteSingleBlockAlloca(AI, Info, LBI, AST)) {
// The alloca has been processed, move on.
RemoveFromAllocasList(AllocaNum);
continue;
}
- // If we haven't computed dominator tree levels, do so now.
- if (DomLevels.empty()) {
- SmallVector<DomTreeNode *, 32> Worklist;
-
- DomTreeNode *Root = DT.getRootNode();
- DomLevels[Root] = 0;
- Worklist.push_back(Root);
-
- while (!Worklist.empty()) {
- DomTreeNode *Node = Worklist.pop_back_val();
- unsigned ChildLevel = DomLevels[Node] + 1;
- for (DomTreeNode::iterator CI = Node->begin(), CE = Node->end();
- CI != CE; ++CI) {
- DomLevels[*CI] = ChildLevel;
- Worklist.push_back(*CI);
- }
- }
- }
-
// If we haven't computed a numbering for the BB's in the function, do so
// now.
if (BBNumbers.empty()) {
unsigned ID = 0;
- for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I)
- BBNumbers[I] = ID++;
+ for (auto &BB : F)
+ BBNumbers[&BB] = ID++;
}
// If we have an AST to keep updated, remember some pointer value that is
// the standard SSA construction algorithm. Determine which blocks need PHI
// nodes and see if we can optimize out some work by avoiding insertion of
// dead phi nodes.
- DetermineInsertionPoint(AI, AllocaNum, Info);
+
+
+ // Unique the set of defining blocks for efficient lookup.
+ SmallPtrSet<BasicBlock *, 32> DefBlocks;
+ DefBlocks.insert(Info.DefiningBlocks.begin(), Info.DefiningBlocks.end());
+
+ // Determine which blocks the value is live in. These are blocks which lead
+ // to uses.
+ SmallPtrSet<BasicBlock *, 32> LiveInBlocks;
+ ComputeLiveInBlocks(AI, Info, DefBlocks, LiveInBlocks);
+
+ // At this point, we're committed to promoting the alloca using IDF's, and
+ // the standard SSA construction algorithm. Determine which blocks need phi
+ // nodes and see if we can optimize out some work by avoiding insertion of
+ // dead phi nodes.
+ IDF.setLiveInBlocks(LiveInBlocks);
+ IDF.setDefiningBlocks(DefBlocks);
+ SmallVector<BasicBlock *, 32> PHIBlocks;
+ IDF.calculate(PHIBlocks);
+ if (PHIBlocks.size() > 1)
+ std::sort(PHIBlocks.begin(), PHIBlocks.end(),
+ [this](BasicBlock *A, BasicBlock *B) {
+ return BBNumbers.lookup(A) < BBNumbers.lookup(B);
+ });
+
+ unsigned CurrentVersion = 0;
+ for (unsigned i = 0, e = PHIBlocks.size(); i != e; ++i)
+ QueuePhiNode(PHIBlocks[i], AllocaNum, CurrentVersion);
}
if (Allocas.empty())
// and inserting the phi nodes we marked as necessary
//
std::vector<RenamePassData> RenamePassWorkList;
- RenamePassWorkList.push_back(RenamePassData(F.begin(), 0, Values));
+ RenamePassWorkList.emplace_back(&F.front(), nullptr, std::move(Values));
do {
RenamePassData RPD;
RPD.swap(RenamePassWorkList.back());
A->eraseFromParent();
}
+ const DataLayout &DL = F.getParent()->getDataLayout();
+
// Remove alloca's dbg.declare instrinsics from the function.
for (unsigned i = 0, e = AllocaDbgDeclares.size(); i != e; ++i)
if (DbgDeclareInst *DDI = AllocaDbgDeclares[i])
PHINode *PN = I->second;
// If this PHI node merges one value and/or undefs, get the value.
- if (Value *V = SimplifyInstruction(PN, 0, 0, &DT)) {
+ if (Value *V = SimplifyInstruction(PN, DL, nullptr, &DT, AC)) {
if (AST && PN->getType()->isPointerTy())
AST->deleteValue(PN);
PN->replaceAllUsesWith(V);
/// inserted phi nodes would be dead).
void PromoteMem2Reg::ComputeLiveInBlocks(
AllocaInst *AI, AllocaInfo &Info,
- const SmallPtrSet<BasicBlock *, 32> &DefBlocks,
- SmallPtrSet<BasicBlock *, 32> &LiveInBlocks) {
+ const SmallPtrSetImpl<BasicBlock *> &DefBlocks,
+ SmallPtrSetImpl<BasicBlock *> &LiveInBlocks) {
// To determine liveness, we must iterate through the predecessors of blocks
// where the def is live. Blocks are added to the worklist if we need to
// The block really is live in here, insert it into the set. If already in
// the set, then it has already been processed.
- if (!LiveInBlocks.insert(BB))
+ if (!LiveInBlocks.insert(BB).second)
continue;
// Since the value is live into BB, it is either defined in a predecessor or
}
}
-/// At this point, we're committed to promoting the alloca using IDF's, and the
-/// standard SSA construction algorithm. Determine which blocks need phi nodes
-/// and see if we can optimize out some work by avoiding insertion of dead phi
-/// nodes.
-void PromoteMem2Reg::DetermineInsertionPoint(AllocaInst *AI, unsigned AllocaNum,
- AllocaInfo &Info) {
- // Unique the set of defining blocks for efficient lookup.
- SmallPtrSet<BasicBlock *, 32> DefBlocks;
- DefBlocks.insert(Info.DefiningBlocks.begin(), Info.DefiningBlocks.end());
-
- // Determine which blocks the value is live in. These are blocks which lead
- // to uses.
- SmallPtrSet<BasicBlock *, 32> LiveInBlocks;
- ComputeLiveInBlocks(AI, Info, DefBlocks, LiveInBlocks);
-
- // Use a priority queue keyed on dominator tree level so that inserted nodes
- // are handled from the bottom of the dominator tree upwards.
- typedef std::pair<DomTreeNode *, unsigned> DomTreeNodePair;
- typedef std::priority_queue<DomTreeNodePair, SmallVector<DomTreeNodePair, 32>,
- less_second> IDFPriorityQueue;
- IDFPriorityQueue PQ;
-
- for (SmallPtrSet<BasicBlock *, 32>::const_iterator I = DefBlocks.begin(),
- E = DefBlocks.end();
- I != E; ++I) {
- if (DomTreeNode *Node = DT.getNode(*I))
- PQ.push(std::make_pair(Node, DomLevels[Node]));
- }
-
- SmallVector<std::pair<unsigned, BasicBlock *>, 32> DFBlocks;
- SmallPtrSet<DomTreeNode *, 32> Visited;
- SmallVector<DomTreeNode *, 32> Worklist;
- while (!PQ.empty()) {
- DomTreeNodePair RootPair = PQ.top();
- PQ.pop();
- DomTreeNode *Root = RootPair.first;
- unsigned RootLevel = RootPair.second;
-
- // Walk all dominator tree children of Root, inspecting their CFG edges with
- // targets elsewhere on the dominator tree. Only targets whose level is at
- // most Root's level are added to the iterated dominance frontier of the
- // definition set.
-
- Worklist.clear();
- Worklist.push_back(Root);
-
- while (!Worklist.empty()) {
- DomTreeNode *Node = Worklist.pop_back_val();
- BasicBlock *BB = Node->getBlock();
-
- for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); SI != SE;
- ++SI) {
- DomTreeNode *SuccNode = DT.getNode(*SI);
-
- // Quickly skip all CFG edges that are also dominator tree edges instead
- // of catching them below.
- if (SuccNode->getIDom() == Node)
- continue;
-
- unsigned SuccLevel = DomLevels[SuccNode];
- if (SuccLevel > RootLevel)
- continue;
-
- if (!Visited.insert(SuccNode))
- continue;
-
- BasicBlock *SuccBB = SuccNode->getBlock();
- if (!LiveInBlocks.count(SuccBB))
- continue;
-
- DFBlocks.push_back(std::make_pair(BBNumbers[SuccBB], SuccBB));
- if (!DefBlocks.count(SuccBB))
- PQ.push(std::make_pair(SuccNode, SuccLevel));
- }
-
- for (DomTreeNode::iterator CI = Node->begin(), CE = Node->end(); CI != CE;
- ++CI) {
- if (!Visited.count(*CI))
- Worklist.push_back(*CI);
- }
- }
- }
-
- if (DFBlocks.size() > 1)
- std::sort(DFBlocks.begin(), DFBlocks.end());
-
- unsigned CurrentVersion = 0;
- for (unsigned i = 0, e = DFBlocks.size(); i != e; ++i)
- QueuePhiNode(DFBlocks[i].second, AllocaNum, CurrentVersion);
-}
-
/// \brief Queue a phi-node to be added to a basic-block for a specific Alloca.
///
/// Returns true if there wasn't already a phi-node for that variable
// BasicBlock.
PN = PHINode::Create(Allocas[AllocaNo]->getAllocatedType(), getNumPreds(BB),
Allocas[AllocaNo]->getName() + "." + Twine(Version++),
- BB->begin());
+ &BB->front());
++NumPHIInsert;
PhiToAllocaMap[PN] = AllocaNo;
// Get the next phi node.
++PNI;
APN = dyn_cast<PHINode>(PNI);
- if (APN == 0)
+ if (!APN)
break;
// Verify that it is missing entries. If not, it is not being inserted
}
// Don't revisit blocks.
- if (!Visited.insert(BB))
+ if (!Visited.insert(BB).second)
return;
for (BasicBlock::iterator II = BB->begin(); !isa<TerminatorInst>(II);) {
- Instruction *I = II++; // get the instruction, increment iterator
+ Instruction *I = &*II++; // get the instruction, increment iterator
if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
AllocaInst *Src = dyn_cast<AllocaInst>(LI->getPointerOperand());
++I;
for (; I != E; ++I)
- if (VisitedSuccs.insert(*I))
- Worklist.push_back(RenamePassData(*I, Pred, IncomingVals));
+ if (VisitedSuccs.insert(*I).second)
+ Worklist.emplace_back(*I, Pred, IncomingVals);
goto NextIteration;
}
void llvm::PromoteMemToReg(ArrayRef<AllocaInst *> Allocas, DominatorTree &DT,
- AliasSetTracker *AST) {
+ AliasSetTracker *AST, AssumptionCache *AC) {
// If there is nothing to do, bail out...
if (Allocas.empty())
return;
- PromoteMem2Reg(Allocas, DT, AST).run();
+ PromoteMem2Reg(Allocas, DT, AST, AC).run();
}