1 //===- PRE.cpp - Partial Redundancy Elimination ---------------------------===//
3 // This file implements the well known Partial Redundancy Elimination
4 // optimization, using an SSA formulation based on e-paths. See this paper for
7 // E-path_PRE: partial redundancy elimination made easy
8 // By: Dhananjay M. Dhamdhere In: ACM SIGPLAN Notices. Vol 37, #8, 2002
9 // http://doi.acm.org/10.1145/596992.597004
11 // This file actually implements a sparse version of the algorithm, using SSA
12 // and CFG properties instead of bit-vectors.
14 //===----------------------------------------------------------------------===//
16 #include "llvm/Pass.h"
17 #include "llvm/Function.h"
18 #include "llvm/Type.h"
19 #include "llvm/iPHINode.h"
20 #include "llvm/iMemory.h"
21 #include "llvm/Support/CFG.h"
22 #include "llvm/Analysis/Dominators.h"
23 #include "llvm/Analysis/PostDominators.h"
24 #include "llvm/Analysis/ValueNumbering.h"
25 #include "llvm/Transforms/Scalar.h"
26 #include "Support/DepthFirstIterator.h"
27 #include "Support/PostOrderIterator.h"
28 #include "Support/Statistic.h"
29 #include "Support/hash_set"
32 Statistic<> NumExprsEliminated("pre", "Number of expressions constantified");
33 Statistic<> NumRedundant ("pre", "Number of redundant exprs eliminated");
34 Statistic<> NumInserted ("pre", "Number of expressions inserted");
36 struct PRE : public FunctionPass {
37 virtual void getAnalysisUsage(AnalysisUsage &AU) const {
38 AU.addRequiredID(BreakCriticalEdgesID); // No critical edges for now!
39 AU.addRequired<PostDominatorTree>();
40 AU.addRequired<PostDominanceFrontier>();
41 AU.addRequired<DominatorSet>();
42 AU.addRequired<DominatorTree>();
43 AU.addRequired<DominanceFrontier>();
44 AU.addRequired<ValueNumbering>();
46 virtual bool runOnFunction(Function &F);
49 // Block information - Map basic blocks in a function back and forth to
51 std::vector<BasicBlock*> BlockMapping;
52 hash_map<BasicBlock*, unsigned> BlockNumbering;
54 // ProcessedExpressions - Keep track of which expressions have already been
56 hash_set<Instruction*> ProcessedExpressions;
58 // Provide access to the various analyses used...
60 DominatorTree *DT; PostDominatorTree *PDT;
61 DominanceFrontier *DF; PostDominanceFrontier *PDF;
64 // AvailableBlocks - Contain a mapping of blocks with available expression
65 // values to the expression value itself. This can be used as an efficient
66 // way to find out if the expression is available in the block, and if so,
67 // which version to use. This map is only used while processing a single
70 typedef hash_map<BasicBlock*, Instruction*> AvailableBlocksTy;
71 AvailableBlocksTy AvailableBlocks;
73 bool ProcessBlock(BasicBlock *BB);
75 // Anticipatibility calculation...
76 void MarkPostDominatingBlocksAnticipatible(PostDominatorTree::Node *N,
77 std::vector<char> &AntBlocks,
78 Instruction *Occurance);
79 void CalculateAnticipatiblityForOccurance(unsigned BlockNo,
80 std::vector<char> &AntBlocks,
81 Instruction *Occurance);
82 void CalculateAnticipatibleBlocks(const std::map<unsigned, Instruction*> &D,
83 std::vector<char> &AnticipatibleBlocks);
85 // PRE for an expression
86 void MarkOccuranceAvailableInAllDominatedBlocks(Instruction *Occurance,
87 BasicBlock *StartBlock);
88 void ReplaceDominatedAvailableOccurancesWith(Instruction *NewOcc,
89 DominatorTree::Node *N);
90 bool ProcessExpression(Instruction *I);
93 RegisterOpt<PRE> Z("pre", "Partial Redundancy Elimination");
97 bool PRE::runOnFunction(Function &F) {
98 VN = &getAnalysis<ValueNumbering>();
99 DS = &getAnalysis<DominatorSet>();
100 DT = &getAnalysis<DominatorTree>();
101 DF = &getAnalysis<DominanceFrontier>();
102 PDT = &getAnalysis<PostDominatorTree>();
103 PDF = &getAnalysis<PostDominanceFrontier>();
105 DEBUG(std::cerr << "\n*** Running PRE on func '" << F.getName() << "'...\n");
107 // Number the basic blocks based on a reverse post-order traversal of the CFG
108 // so that all predecessors of a block (ignoring back edges) are visited
109 // before a block is visited.
111 BlockMapping.reserve(F.size());
113 ReversePostOrderTraversal<Function*> RPOT(&F);
114 DEBUG(std::cerr << "Block order: ");
115 for (ReversePostOrderTraversal<Function*>::rpo_iterator I = RPOT.begin(),
116 E = RPOT.end(); I != E; ++I) {
117 // Keep track of mapping...
119 BlockNumbering.insert(std::make_pair(BB, BlockMapping.size()));
120 BlockMapping.push_back(BB);
121 DEBUG(std::cerr << BB->getName() << " ");
123 DEBUG(std::cerr << "\n");
126 // Traverse the current function depth-first in dominator-tree order. This
127 // ensures that we see all definitions before their uses (except for PHI
128 // nodes), allowing us to hoist dependent expressions correctly.
129 bool Changed = false;
130 for (unsigned i = 0, e = BlockMapping.size(); i != e; ++i)
131 Changed |= ProcessBlock(BlockMapping[i]);
134 BlockMapping.clear();
135 BlockNumbering.clear();
136 ProcessedExpressions.clear();
141 // ProcessBlock - Process any expressions first seen in this block...
143 bool PRE::ProcessBlock(BasicBlock *BB) {
144 bool Changed = false;
146 // PRE expressions first defined in this block...
147 Instruction *PrevInst = 0;
148 for (BasicBlock::iterator I = BB->begin(); I != BB->end(); )
149 if (ProcessExpression(I)) {
150 // The current instruction may have been deleted, make sure to back up to
164 void PRE::MarkPostDominatingBlocksAnticipatible(PostDominatorTree::Node *N,
165 std::vector<char> &AntBlocks,
166 Instruction *Occurance) {
167 unsigned BlockNo = BlockNumbering[N->getNode()];
169 if (AntBlocks[BlockNo]) return; // Already known to be anticipatible??
171 // Check to see if any of the operands are defined in this block, if so, the
172 // entry of this block does not anticipate the expression. This computes
174 for (unsigned i = 0, e = Occurance->getNumOperands(); i != e; ++i)
175 if (Instruction *I = dyn_cast<Instruction>(Occurance->getOperand(i)))
176 if (I->getParent() == N->getNode()) // Operand is defined in this block!
179 if (isa<LoadInst>(Occurance))
180 return; // FIXME: compute transparency for load instructions using AA
182 // Insert block into AntBlocks list...
183 AntBlocks[BlockNo] = true;
185 for (PostDominatorTree::Node::iterator I = N->begin(), E = N->end(); I != E;
187 MarkPostDominatingBlocksAnticipatible(*I, AntBlocks, Occurance);
190 void PRE::CalculateAnticipatiblityForOccurance(unsigned BlockNo,
191 std::vector<char> &AntBlocks,
192 Instruction *Occurance) {
193 if (AntBlocks[BlockNo]) return; // Block already anticipatible!
195 BasicBlock *BB = BlockMapping[BlockNo];
197 // For each occurance, mark all post-dominated blocks as anticipatible...
198 MarkPostDominatingBlocksAnticipatible(PDT->getNode(BB), AntBlocks,
201 // Next, mark any blocks in the post-dominance frontier as anticipatible iff
202 // all successors are anticipatible.
204 PostDominanceFrontier::iterator PDFI = PDF->find(BB);
205 if (PDFI != DF->end())
206 for (std::set<BasicBlock*>::iterator DI = PDFI->second.begin();
207 DI != PDFI->second.end(); ++DI) {
208 BasicBlock *PDFBlock = *DI;
209 bool AllSuccessorsAnticipatible = true;
210 for (succ_iterator SI = succ_begin(PDFBlock), SE = succ_end(PDFBlock);
212 if (!AntBlocks[BlockNumbering[*SI]]) {
213 AllSuccessorsAnticipatible = false;
217 if (AllSuccessorsAnticipatible)
218 CalculateAnticipatiblityForOccurance(BlockNumbering[PDFBlock],
219 AntBlocks, Occurance);
224 void PRE::CalculateAnticipatibleBlocks(const std::map<unsigned,
226 std::vector<char> &AntBlocks) {
227 // Initialize to zeros...
228 AntBlocks.resize(BlockMapping.size());
230 // Loop over all of the expressions...
231 for (std::map<unsigned, Instruction*>::const_iterator I = Defs.begin(),
232 E = Defs.end(); I != E; ++I)
233 CalculateAnticipatiblityForOccurance(I->first, AntBlocks, I->second);
236 /// MarkOccuranceAvailableInAllDominatedBlocks - Add entries to AvailableBlocks
237 /// for all nodes dominated by the occurance to indicate that it is now the
238 /// available occurance to use in any of these blocks.
240 void PRE::MarkOccuranceAvailableInAllDominatedBlocks(Instruction *Occurance,
242 // FIXME: There are much more efficient ways to get the blocks dominated
243 // by a block. Use them.
245 DominatorTree::Node *N = DT->getNode(Occurance->getParent());
246 for (df_iterator<DominatorTree::Node*> DI = df_begin(N), E = df_end(N);
248 AvailableBlocks[(*DI)->getNode()] = Occurance;
251 /// ReplaceDominatedAvailableOccurancesWith - This loops over the region
252 /// dominated by N, replacing any available expressions with NewOcc.
253 void PRE::ReplaceDominatedAvailableOccurancesWith(Instruction *NewOcc,
254 DominatorTree::Node *N) {
255 BasicBlock *BB = N->getNode();
256 Instruction *&ExistingAvailableVal = AvailableBlocks[BB];
258 // If there isn't a definition already active in this node, make this the new
259 // active definition...
260 if (ExistingAvailableVal == 0) {
261 ExistingAvailableVal = NewOcc;
263 for (DominatorTree::Node::iterator I = N->begin(), E = N->end(); I != E;++I)
264 ReplaceDominatedAvailableOccurancesWith(NewOcc, *I);
266 // If there is already an active definition in this block, replace it with
267 // NewOcc, and force it into all dominated blocks.
268 DEBUG(std::cerr << " Replacing dominated occ %"
269 << ExistingAvailableVal->getName() << " with %" << NewOcc->getName()
271 assert(ExistingAvailableVal != NewOcc && "NewOcc already inserted??");
272 ExistingAvailableVal->replaceAllUsesWith(NewOcc);
275 assert(ExistingAvailableVal->getParent() == BB &&
276 "OldOcc not defined in current block?");
277 BB->getInstList().erase(ExistingAvailableVal);
279 // Mark NewOCC as the Available expression in all blocks dominated by BB
280 for (df_iterator<DominatorTree::Node*> DI = df_begin(N), E = df_end(N);
282 AvailableBlocks[(*DI)->getNode()] = NewOcc;
287 /// ProcessExpression - Given an expression (instruction) process the
288 /// instruction to remove any partial redundancies induced by equivalent
289 /// computations. Note that we only need to PRE each expression once, so we
290 /// keep track of whether an expression has been PRE'd already, and don't PRE an
291 /// expression again. Expressions may be seen multiple times because process
292 /// the entire equivalence class at once, which may leave expressions later in
293 /// the control path.
295 bool PRE::ProcessExpression(Instruction *Expr) {
296 if (Expr->mayWriteToMemory() || Expr->getType() == Type::VoidTy ||
298 return false; // Cannot move expression
299 if (ProcessedExpressions.count(Expr)) return false; // Already processed.
301 // Ok, this is the first time we have seen the expression. Build a set of
302 // equivalent expressions using SSA def/use information. We consider
303 // expressions to be equivalent if they are the same opcode and have
304 // equivalent operands. As a special case for SSA, values produced by PHI
305 // nodes are considered to be equivalent to all of their operands.
307 std::vector<Value*> Values;
308 VN->getEqualNumberNodes(Expr, Values);
311 // FIXME: This should handle PHI nodes correctly. To do this, we need to
312 // consider expressions of the following form equivalent to this set of
315 // If an operand is a PHI node, add any occurances of the expression with the
316 // PHI operand replaced with the PHI node operands. This is only valid if the
317 // PHI operand occurances exist in blocks post-dominated by the incoming edge
321 // We have to be careful to handle expression definitions which dominated by
322 // other expressions. These can be directly eliminated in favor of their
323 // dominating value. Keep track of which blocks contain definitions (the key)
324 // and if a block contains a definition, which instruction it is.
326 std::map<unsigned, Instruction*> Definitions;
327 Definitions.insert(std::make_pair(BlockNumbering[Expr->getParent()], Expr));
329 bool Changed = false;
331 // Look at all of the equal values. If any of the values is not an
332 // instruction, replace all other expressions immediately with it (it must be
333 // an argument or a constant or something). Otherwise, convert the list of
334 // values into a list of expression (instruction) definitions ordering
335 // according to their dominator tree ordering.
337 Value *NonInstValue = 0;
338 for (unsigned i = 0, e = Values.size(); i != e; ++i)
339 if (Instruction *I = dyn_cast<Instruction>(Values[i])) {
340 Instruction *&BlockInst = Definitions[BlockNumbering[I->getParent()]];
341 if (BlockInst && BlockInst != I) { // Eliminate direct redundancy
342 if (DS->dominates(I, BlockInst)) { // I dom BlockInst
343 BlockInst->replaceAllUsesWith(I);
344 BlockInst->getParent()->getInstList().erase(BlockInst);
345 } else { // BlockInst dom I
346 I->replaceAllUsesWith(BlockInst);
347 I->getParent()->getInstList().erase(I);
354 NonInstValue = Values[i];
357 std::vector<Value*>().swap(Values); // Done with the values list
360 // This is the good, though unlikely, case where we find out that this
361 // expression is equal to a constant or argument directly. We can replace
362 // this and all of the other equivalent instructions with the value
365 for (std::map<unsigned, Instruction*>::iterator I = Definitions.begin(),
366 E = Definitions.end(); I != E; ++I) {
367 Instruction *Inst = I->second;
368 // Replace the value with the specified non-instruction value.
369 Inst->replaceAllUsesWith(NonInstValue); // Fixup any uses
370 Inst->getParent()->getInstList().erase(Inst); // Erase the instruction
372 NumExprsEliminated += Definitions.size();
373 return true; // Program modified!
376 // There are no expressions equal to this one. Exit early.
377 assert(!Definitions.empty() && "no equal expressions??");
379 if (Definitions.size() == 1) {
380 ProcessedExpressions.insert(Definitions.begin()->second);
384 DEBUG(std::cerr << "\n====--- Expression: " << Expr);
385 const Type *ExprType = Expr->getType();
387 // AnticipatibleBlocks - Blocks where the current expression is anticipatible.
388 // This is logically std::vector<bool> but using 'char' for performance.
389 std::vector<char> AnticipatibleBlocks;
391 // Calculate all of the blocks which the current expression is anticipatible.
392 CalculateAnticipatibleBlocks(Definitions, AnticipatibleBlocks);
394 // Print out anticipatible blocks...
395 DEBUG(std::cerr << "AntBlocks: ";
396 for (unsigned i = 0, e = AnticipatibleBlocks.size(); i != e; ++i)
397 if (AnticipatibleBlocks[i])
398 std::cerr << BlockMapping[i]->getName() <<" ";
403 // AvailabilityFrontier - Calculates the availability frontier for the current
404 // expression. The availability frontier contains the blocks on the dominance
405 // frontier of the current available expressions, iff they anticipate a
406 // definition of the expression.
407 hash_set<unsigned> AvailabilityFrontier;
409 Instruction *NonPHIOccurance = 0;
411 while (!Definitions.empty() || !AvailabilityFrontier.empty()) {
412 if (!Definitions.empty() &&
413 (AvailabilityFrontier.empty() ||
414 Definitions.begin()->first < *AvailabilityFrontier.begin())) {
415 Instruction *Occurance = Definitions.begin()->second;
416 BasicBlock *BB = Occurance->getParent();
417 Definitions.erase(Definitions.begin());
419 DEBUG(std::cerr << "PROCESSING Occurance: " << Occurance);
421 // Check to see if there is already an incoming value for this block...
422 AvailableBlocksTy::iterator LBI = AvailableBlocks.find(BB);
423 if (LBI != AvailableBlocks.end()) {
424 // Yes, there is a dominating definition for this block. Replace this
425 // occurance with the incoming value.
426 if (LBI->second != Occurance) {
427 DEBUG(std::cerr << " replacing with: " << LBI->second);
428 Occurance->replaceAllUsesWith(LBI->second);
429 BB->getInstList().erase(Occurance); // Delete instruction
434 ProcessedExpressions.insert(Occurance);
435 if (!isa<PHINode>(Occurance))
436 NonPHIOccurance = Occurance; // Keep an occurance of this expr
438 // Okay, there is no incoming value for this block, so this expression
439 // is a new definition that is good for this block and all blocks
440 // dominated by it. Add this information to the AvailableBlocks map.
442 MarkOccuranceAvailableInAllDominatedBlocks(Occurance, BB);
444 // Update the dominance frontier for the definitions so far... if a node
445 // in the dominator frontier now has all of its predecessors available,
446 // and the block is in an anticipatible region, we can insert a PHI node
448 DominanceFrontier::iterator DFI = DF->find(BB);
449 if (DFI != DF->end()) {
450 for (std::set<BasicBlock*>::iterator DI = DFI->second.begin();
451 DI != DFI->second.end(); ++DI) {
452 BasicBlock *DFBlock = *DI;
453 unsigned DFBlockID = BlockNumbering[DFBlock];
454 if (AnticipatibleBlocks[DFBlockID]) {
455 // Check to see if any of the predecessors of this block on the
456 // frontier are not available...
457 bool AnyNotAvailable = false;
458 for (pred_iterator PI = pred_begin(DFBlock),
459 PE = pred_end(DFBlock); PI != PE; ++PI)
460 if (!AvailableBlocks.count(*PI)) {
461 AnyNotAvailable = true;
465 // If any predecessor blocks are not available, add the node to
466 // the current expression dominance frontier.
467 if (AnyNotAvailable) {
468 AvailabilityFrontier.insert(DFBlockID);
470 // This block is no longer in the availability frontier, it IS
472 AvailabilityFrontier.erase(DFBlockID);
474 // If all of the predecessor blocks are available (and the block
475 // anticipates a definition along the path to the exit), we need
476 // to insert a new PHI node in this block. This block serves as
477 // a new definition for the expression, extending the available
480 PHINode *PN = new PHINode(ExprType, Expr->getName()+".pre",
482 ProcessedExpressions.insert(PN);
484 DEBUG(std::cerr << " INSERTING PHI on frontier: " << PN);
486 // Add the incoming blocks for the PHI node
487 for (pred_iterator PI = pred_begin(DFBlock),
488 PE = pred_end(DFBlock); PI != PE; ++PI)
490 PN->addIncoming(AvailableBlocks[*PI], *PI);
491 else // edge from the current block
492 PN->addIncoming(PN, DFBlock);
494 Instruction *&BlockOcc = Definitions[DFBlockID];
496 DEBUG(std::cerr <<" PHI superceeds occurance: "<<BlockOcc);
497 BlockOcc->replaceAllUsesWith(PN);
498 BlockOcc->getParent()->getInstList().erase(BlockOcc);
509 // Otherwise we must be looking at a node in the availability frontier!
510 unsigned AFBlockID = *AvailabilityFrontier.begin();
511 AvailabilityFrontier.erase(AvailabilityFrontier.begin());
512 BasicBlock *AFBlock = BlockMapping[AFBlockID];
514 // We eliminate the partial redundancy on this frontier by inserting a PHI
515 // node into this block, merging any incoming available versions into the
516 // PHI and inserting a new computation into predecessors without an
517 // incoming value. Note that we would have to insert the expression on
518 // the edge if the predecessor didn't anticipate the expression and we
519 // didn't break critical edges.
521 PHINode *PN = new PHINode(ExprType, Expr->getName()+".PRE",
523 DEBUG(std::cerr << "INSERTING PHI for PR: " << PN);
525 // If there is a pending occurance in this block, make sure to replace it
526 // with the PHI node...
527 std::map<unsigned, Instruction*>::iterator EDFI =
528 Definitions.find(AFBlockID);
529 if (EDFI != Definitions.end()) {
530 // There is already an occurance in this block. Replace it with PN and
532 Instruction *OldOcc = EDFI->second;
533 DEBUG(std::cerr << " Replaces occurance: " << OldOcc);
534 OldOcc->replaceAllUsesWith(PN);
535 AFBlock->getInstList().erase(OldOcc);
536 Definitions.erase(EDFI);
540 for (pred_iterator PI = pred_begin(AFBlock), PE = pred_end(AFBlock);
542 BasicBlock *Pred = *PI;
543 AvailableBlocksTy::iterator LBI = AvailableBlocks.find(Pred);
544 if (LBI != AvailableBlocks.end()) { // If there is a available value
545 PN->addIncoming(LBI->second, Pred); // for this pred, use it.
546 } else { // No available value yet...
547 unsigned PredID = BlockNumbering[Pred];
549 // Is the predecessor the same block that we inserted the PHI into?
551 if (Pred == AFBlock) {
552 // Yes, reuse the incoming value here...
553 PN->addIncoming(PN, Pred);
555 // No, we must insert a new computation into this block and add it
556 // to the definitions list...
557 assert(NonPHIOccurance && "No non-phi occurances seen so far???");
558 Instruction *New = NonPHIOccurance->clone();
559 New->setName(NonPHIOccurance->getName() + ".PRE-inserted");
560 ProcessedExpressions.insert(New);
562 DEBUG(std::cerr << " INSERTING OCCURANCE: " << New);
564 // Insert it into the bottom of the predecessor, right before the
565 // terminator instruction...
566 Pred->getInstList().insert(Pred->getTerminator(), New);
568 // Make this block be the available definition for any blocks it
569 // dominates. The ONLY case that this can affect more than just the
570 // block itself is when we are moving a computation to a loop
571 // header. In all other cases, because we don't have critical
572 // edges, the node is guaranteed to only dominate itself.
574 ReplaceDominatedAvailableOccurancesWith(New, DT->getNode(Pred));
576 // Add it as an incoming value on this edge to the PHI node
577 PN->addIncoming(New, Pred);
578 NonPHIOccurance = New;
584 // Find out if there is already an available value in this block. If so,
585 // we need to replace the available value with the PHI node. This can
586 // only happen when we just inserted a PHI node on a backedge.
588 AvailableBlocksTy::iterator LBBlockAvailableValIt =
589 AvailableBlocks.find(AFBlock);
590 if (LBBlockAvailableValIt != AvailableBlocks.end()) {
591 if (LBBlockAvailableValIt->second->getParent() == AFBlock) {
592 Instruction *OldVal = LBBlockAvailableValIt->second;
593 OldVal->replaceAllUsesWith(PN); // Use the new PHI node now
595 DEBUG(std::cerr << " PHI replaces available value: %"
596 << OldVal->getName() << "\n");
598 // Loop over all of the blocks dominated by this PHI node, and change
599 // the AvailableBlocks entries to be the PHI node instead of the old
601 MarkOccuranceAvailableInAllDominatedBlocks(PN, AFBlock);
603 AFBlock->getInstList().erase(OldVal); // Delete old instruction!
605 // The resultant PHI node is a new definition of the value!
606 Definitions.insert(std::make_pair(AFBlockID, PN));
608 // If the value is not defined in this block, that means that an
609 // inserted occurance in a predecessor is now the live value for the
610 // region (occurs when hoisting loop invariants, f.e.). In this case,
611 // the PHI node should actually just be removed.
612 assert(PN->use_empty() && "No uses should exist for dead PHI node!");
613 PN->getParent()->getInstList().erase(PN);
616 // The resultant PHI node is a new definition of the value!
617 Definitions.insert(std::make_pair(AFBlockID, PN));
622 AvailableBlocks.clear();