1 //===- PRE.cpp - Partial Redundancy Elimination ---------------------------===//
3 // The LLVM Compiler Infrastructure
5 // This file was developed by the LLVM research group and is distributed under
6 // the University of Illinois Open Source License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file implements the well-known Partial Redundancy Elimination
11 // optimization, using an SSA formulation based on e-paths. See this paper for
14 // E-path_PRE: partial redundancy elimination made easy
15 // By: Dhananjay M. Dhamdhere In: ACM SIGPLAN Notices. Vol 37, #8, 2002
16 // http://doi.acm.org/10.1145/596992.597004
18 // This file actually implements a sparse version of the algorithm, using SSA
19 // and CFG properties instead of bit-vectors.
21 //===----------------------------------------------------------------------===//
23 #include "llvm/Pass.h"
24 #include "llvm/Function.h"
25 #include "llvm/Type.h"
26 #include "llvm/Instructions.h"
27 #include "llvm/Support/CFG.h"
28 #include "llvm/Analysis/Dominators.h"
29 #include "llvm/Analysis/PostDominators.h"
30 #include "llvm/Analysis/ValueNumbering.h"
31 #include "llvm/Transforms/Scalar.h"
32 #include "llvm/Support/Debug.h"
33 #include "llvm/ADT/DepthFirstIterator.h"
34 #include "llvm/ADT/PostOrderIterator.h"
35 #include "llvm/ADT/Statistic.h"
36 #include "llvm/ADT/hash_set"
37 #include "llvm/ADT/hash_map"
41 Statistic<> NumExprsEliminated("pre", "Number of expressions constantified");
42 Statistic<> NumRedundant ("pre", "Number of redundant exprs eliminated");
43 Statistic<> NumInserted ("pre", "Number of expressions inserted");
45 struct PRE : public FunctionPass {
46 virtual void getAnalysisUsage(AnalysisUsage &AU) const {
47 AU.addRequiredID(BreakCriticalEdgesID); // No critical edges for now!
48 AU.addRequired<PostDominatorTree>();
49 AU.addRequired<PostDominanceFrontier>();
50 AU.addRequired<DominatorSet>();
51 AU.addRequired<DominatorTree>();
52 AU.addRequired<DominanceFrontier>();
53 AU.addRequired<ValueNumbering>();
55 virtual bool runOnFunction(Function &F);
58 // Block information - Map basic blocks in a function back and forth to
60 std::vector<BasicBlock*> BlockMapping;
61 hash_map<BasicBlock*, unsigned> BlockNumbering;
63 // ProcessedExpressions - Keep track of which expressions have already been
65 hash_set<Instruction*> ProcessedExpressions;
67 // Provide access to the various analyses used...
69 DominatorTree *DT; PostDominatorTree *PDT;
70 DominanceFrontier *DF; PostDominanceFrontier *PDF;
73 // AvailableBlocks - Contain a mapping of blocks with available expression
74 // values to the expression value itself. This can be used as an efficient
75 // way to find out if the expression is available in the block, and if so,
76 // which version to use. This map is only used while processing a single
79 typedef hash_map<BasicBlock*, Instruction*> AvailableBlocksTy;
80 AvailableBlocksTy AvailableBlocks;
82 bool ProcessBlock(BasicBlock *BB);
84 // Anticipatibility calculation...
85 void MarkPostDominatingBlocksAnticipatible(PostDominatorTree::Node *N,
86 std::vector<char> &AntBlocks,
87 Instruction *Occurrence);
88 void CalculateAnticipatiblityForOccurrence(unsigned BlockNo,
89 std::vector<char> &AntBlocks,
90 Instruction *Occurrence);
91 void CalculateAnticipatibleBlocks(const std::map<unsigned, Instruction*> &D,
92 std::vector<char> &AnticipatibleBlocks);
94 // PRE for an expression
95 void MarkOccurrenceAvailableInAllDominatedBlocks(Instruction *Occurrence,
96 BasicBlock *StartBlock);
97 void ReplaceDominatedAvailableOccurrencesWith(Instruction *NewOcc,
98 DominatorTree::Node *N);
99 bool ProcessExpression(Instruction *I);
102 RegisterOpt<PRE> Z("pre", "Partial Redundancy Elimination");
105 FunctionPass* llvm::createPREPass() { return new PRE(); }
107 bool PRE::runOnFunction(Function &F) {
108 VN = &getAnalysis<ValueNumbering>();
109 DS = &getAnalysis<DominatorSet>();
110 DT = &getAnalysis<DominatorTree>();
111 DF = &getAnalysis<DominanceFrontier>();
112 PDT = &getAnalysis<PostDominatorTree>();
113 PDF = &getAnalysis<PostDominanceFrontier>();
115 DEBUG(std::cerr << "\n*** Running PRE on func '" << F.getName() << "'...\n");
117 // Number the basic blocks based on a reverse post-order traversal of the CFG
118 // so that all predecessors of a block (ignoring back edges) are visited
119 // before a block is visited.
121 BlockMapping.reserve(F.size());
123 ReversePostOrderTraversal<Function*> RPOT(&F);
124 DEBUG(std::cerr << "Block order: ");
125 for (ReversePostOrderTraversal<Function*>::rpo_iterator I = RPOT.begin(),
126 E = RPOT.end(); I != E; ++I) {
127 // Keep track of mapping...
129 BlockNumbering.insert(std::make_pair(BB, BlockMapping.size()));
130 BlockMapping.push_back(BB);
131 DEBUG(std::cerr << BB->getName() << " ");
133 DEBUG(std::cerr << "\n");
136 // Traverse the current function depth-first in dominator-tree order. This
137 // ensures that we see all definitions before their uses (except for PHI
138 // nodes), allowing us to hoist dependent expressions correctly.
139 bool Changed = false;
140 for (unsigned i = 0, e = BlockMapping.size(); i != e; ++i)
141 Changed |= ProcessBlock(BlockMapping[i]);
144 BlockMapping.clear();
145 BlockNumbering.clear();
146 ProcessedExpressions.clear();
151 // ProcessBlock - Process any expressions first seen in this block...
153 bool PRE::ProcessBlock(BasicBlock *BB) {
154 bool Changed = false;
156 // DISABLED: This pass invalidates iterators and then uses them.
159 // PRE expressions first defined in this block...
160 for (BasicBlock::iterator I = BB->begin(); I != BB->end(); )
161 if (ProcessExpression(I++))
167 void PRE::MarkPostDominatingBlocksAnticipatible(PostDominatorTree::Node *N,
168 std::vector<char> &AntBlocks,
169 Instruction *Occurrence) {
170 unsigned BlockNo = BlockNumbering[N->getBlock()];
172 if (AntBlocks[BlockNo]) return; // Already known to be anticipatible??
174 // Check to see if any of the operands are defined in this block, if so, the
175 // entry of this block does not anticipate the expression. This computes
177 for (unsigned i = 0, e = Occurrence->getNumOperands(); i != e; ++i)
178 if (Instruction *I = dyn_cast<Instruction>(Occurrence->getOperand(i)))
179 if (I->getParent() == N->getBlock()) // Operand is defined in this block!
182 if (isa<LoadInst>(Occurrence))
183 return; // FIXME: compute transparency for load instructions using AA
185 // Insert block into AntBlocks list...
186 AntBlocks[BlockNo] = true;
188 for (PostDominatorTree::Node::iterator I = N->begin(), E = N->end(); I != E;
190 MarkPostDominatingBlocksAnticipatible(*I, AntBlocks, Occurrence);
193 void PRE::CalculateAnticipatiblityForOccurrence(unsigned BlockNo,
194 std::vector<char> &AntBlocks,
195 Instruction *Occurrence) {
196 if (AntBlocks[BlockNo]) return; // Block already anticipatible!
198 BasicBlock *BB = BlockMapping[BlockNo];
200 // For each occurrence, mark all post-dominated blocks as anticipatible...
201 MarkPostDominatingBlocksAnticipatible(PDT->getNode(BB), AntBlocks,
204 // Next, mark any blocks in the post-dominance frontier as anticipatible iff
205 // all successors are anticipatible.
207 PostDominanceFrontier::iterator PDFI = PDF->find(BB);
208 if (PDFI != DF->end())
209 for (std::set<BasicBlock*>::iterator DI = PDFI->second.begin();
210 DI != PDFI->second.end(); ++DI) {
211 BasicBlock *PDFBlock = *DI;
212 bool AllSuccessorsAnticipatible = true;
213 for (succ_iterator SI = succ_begin(PDFBlock), SE = succ_end(PDFBlock);
215 if (!AntBlocks[BlockNumbering[*SI]]) {
216 AllSuccessorsAnticipatible = false;
220 if (AllSuccessorsAnticipatible)
221 CalculateAnticipatiblityForOccurrence(BlockNumbering[PDFBlock],
222 AntBlocks, Occurrence);
227 void PRE::CalculateAnticipatibleBlocks(const std::map<unsigned,
229 std::vector<char> &AntBlocks) {
230 // Initialize to zeros...
231 AntBlocks.resize(BlockMapping.size());
233 // Loop over all of the expressions...
234 for (std::map<unsigned, Instruction*>::const_iterator I = Defs.begin(),
235 E = Defs.end(); I != E; ++I)
236 CalculateAnticipatiblityForOccurrence(I->first, AntBlocks, I->second);
239 /// MarkOccurrenceAvailableInAllDominatedBlocks - Add entries to AvailableBlocks
240 /// for all nodes dominated by the occurrence to indicate that it is now the
241 /// available occurrence to use in any of these blocks.
243 void PRE::MarkOccurrenceAvailableInAllDominatedBlocks(Instruction *Occurrence,
245 // FIXME: There are much more efficient ways to get the blocks dominated
246 // by a block. Use them.
248 DominatorTree::Node *N = DT->getNode(Occurrence->getParent());
249 for (df_iterator<DominatorTree::Node*> DI = df_begin(N), E = df_end(N);
251 AvailableBlocks[(*DI)->getBlock()] = Occurrence;
254 /// ReplaceDominatedAvailableOccurrencesWith - This loops over the region
255 /// dominated by N, replacing any available expressions with NewOcc.
256 void PRE::ReplaceDominatedAvailableOccurrencesWith(Instruction *NewOcc,
257 DominatorTree::Node *N) {
258 BasicBlock *BB = N->getBlock();
259 Instruction *&ExistingAvailableVal = AvailableBlocks[BB];
261 // If there isn't a definition already active in this node, make this the new
262 // active definition...
263 if (ExistingAvailableVal == 0) {
264 ExistingAvailableVal = NewOcc;
266 for (DominatorTree::Node::iterator I = N->begin(), E = N->end(); I != E;++I)
267 ReplaceDominatedAvailableOccurrencesWith(NewOcc, *I);
269 // If there is already an active definition in this block, replace it with
270 // NewOcc, and force it into all dominated blocks.
271 DEBUG(std::cerr << " Replacing dominated occ %"
272 << ExistingAvailableVal->getName() << " with %" << NewOcc->getName()
274 assert(ExistingAvailableVal != NewOcc && "NewOcc already inserted??");
275 ExistingAvailableVal->replaceAllUsesWith(NewOcc);
278 assert(ExistingAvailableVal->getParent() == BB &&
279 "OldOcc not defined in current block?");
280 BB->getInstList().erase(ExistingAvailableVal);
282 // Mark NewOCC as the Available expression in all blocks dominated by BB
283 for (df_iterator<DominatorTree::Node*> DI = df_begin(N), E = df_end(N);
285 AvailableBlocks[(*DI)->getBlock()] = NewOcc;
290 /// ProcessExpression - Given an expression (instruction) process the
291 /// instruction to remove any partial redundancies induced by equivalent
292 /// computations. Note that we only need to PRE each expression once, so we
293 /// keep track of whether an expression has been PRE'd already, and don't PRE an
294 /// expression again. Expressions may be seen multiple times because process
295 /// the entire equivalence class at once, which may leave expressions later in
296 /// the control path.
298 bool PRE::ProcessExpression(Instruction *Expr) {
299 if (Expr->mayWriteToMemory() || Expr->getType() == Type::VoidTy ||
301 return false; // Cannot move expression
302 if (ProcessedExpressions.count(Expr)) return false; // Already processed.
304 // Ok, this is the first time we have seen the expression. Build a set of
305 // equivalent expressions using SSA def/use information. We consider
306 // expressions to be equivalent if they are the same opcode and have
307 // equivalent operands. As a special case for SSA, values produced by PHI
308 // nodes are considered to be equivalent to all of their operands.
310 std::vector<Value*> Values;
311 VN->getEqualNumberNodes(Expr, Values);
314 // FIXME: This should handle PHI nodes correctly. To do this, we need to
315 // consider expressions of the following form equivalent to this set of
318 // If an operand is a PHI node, add any occurrences of the expression with the
319 // PHI operand replaced with the PHI node operands. This is only valid if the
320 // PHI operand occurrences exist in blocks post-dominated by the incoming edge
324 // We have to be careful to handle expression definitions which dominated by
325 // other expressions. These can be directly eliminated in favor of their
326 // dominating value. Keep track of which blocks contain definitions (the key)
327 // and if a block contains a definition, which instruction it is.
329 std::map<unsigned, Instruction*> Definitions;
330 Definitions.insert(std::make_pair(BlockNumbering[Expr->getParent()], Expr));
332 bool Changed = false;
334 // Look at all of the equal values. If any of the values is not an
335 // instruction, replace all other expressions immediately with it (it must be
336 // an argument or a constant or something). Otherwise, convert the list of
337 // values into a list of expression (instruction) definitions ordering
338 // according to their dominator tree ordering.
340 Value *NonInstValue = 0;
341 for (unsigned i = 0, e = Values.size(); i != e; ++i)
342 if (Instruction *I = dyn_cast<Instruction>(Values[i])) {
343 Instruction *&BlockInst = Definitions[BlockNumbering[I->getParent()]];
344 if (BlockInst && BlockInst != I) { // Eliminate direct redundancy
345 if (DS->dominates(I, BlockInst)) { // I dom BlockInst
346 BlockInst->replaceAllUsesWith(I);
347 BlockInst->getParent()->getInstList().erase(BlockInst);
348 } else { // BlockInst dom I
349 I->replaceAllUsesWith(BlockInst);
350 I->getParent()->getInstList().erase(I);
357 NonInstValue = Values[i];
360 std::vector<Value*>().swap(Values); // Done with the values list
363 // This is the good, though unlikely, case where we find out that this
364 // expression is equal to a constant or argument directly. We can replace
365 // this and all of the other equivalent instructions with the value
368 for (std::map<unsigned, Instruction*>::iterator I = Definitions.begin(),
369 E = Definitions.end(); I != E; ++I) {
370 Instruction *Inst = I->second;
371 // Replace the value with the specified non-instruction value.
372 Inst->replaceAllUsesWith(NonInstValue); // Fixup any uses
373 Inst->getParent()->getInstList().erase(Inst); // Erase the instruction
375 NumExprsEliminated += Definitions.size();
376 return true; // Program modified!
379 // There are no expressions equal to this one. Exit early.
380 assert(!Definitions.empty() && "no equal expressions??");
382 if (Definitions.size() == 1) {
383 ProcessedExpressions.insert(Definitions.begin()->second);
387 DEBUG(std::cerr << "\n====--- Expression: " << *Expr);
388 const Type *ExprType = Expr->getType();
390 // AnticipatibleBlocks - Blocks where the current expression is anticipatible.
391 // This is logically std::vector<bool> but using 'char' for performance.
392 std::vector<char> AnticipatibleBlocks;
394 // Calculate all of the blocks which the current expression is anticipatible.
395 CalculateAnticipatibleBlocks(Definitions, AnticipatibleBlocks);
397 // Print out anticipatible blocks...
398 DEBUG(std::cerr << "AntBlocks: ";
399 for (unsigned i = 0, e = AnticipatibleBlocks.size(); i != e; ++i)
400 if (AnticipatibleBlocks[i])
401 std::cerr << BlockMapping[i]->getName() <<" ";
406 // AvailabilityFrontier - Calculates the availability frontier for the current
407 // expression. The availability frontier contains the blocks on the dominance
408 // frontier of the current available expressions, iff they anticipate a
409 // definition of the expression.
410 hash_set<unsigned> AvailabilityFrontier;
412 Instruction *NonPHIOccurrence = 0;
414 while (!Definitions.empty() || !AvailabilityFrontier.empty()) {
415 if (!Definitions.empty() &&
416 (AvailabilityFrontier.empty() ||
417 Definitions.begin()->first < *AvailabilityFrontier.begin())) {
418 Instruction *Occurrence = Definitions.begin()->second;
419 BasicBlock *BB = Occurrence->getParent();
420 Definitions.erase(Definitions.begin());
422 DEBUG(std::cerr << "PROCESSING Occurrence: " << *Occurrence);
424 // Check to see if there is already an incoming value for this block...
425 AvailableBlocksTy::iterator LBI = AvailableBlocks.find(BB);
426 if (LBI != AvailableBlocks.end()) {
427 // Yes, there is a dominating definition for this block. Replace this
428 // occurrence with the incoming value.
429 if (LBI->second != Occurrence) {
430 DEBUG(std::cerr << " replacing with: " << *LBI->second);
431 Occurrence->replaceAllUsesWith(LBI->second);
432 BB->getInstList().erase(Occurrence); // Delete instruction
436 ProcessedExpressions.insert(Occurrence);
437 if (!isa<PHINode>(Occurrence))
438 NonPHIOccurrence = Occurrence; // Keep an occurrence of this expr
440 // Okay, there is no incoming value for this block, so this expression
441 // is a new definition that is good for this block and all blocks
442 // dominated by it. Add this information to the AvailableBlocks map.
444 MarkOccurrenceAvailableInAllDominatedBlocks(Occurrence, BB);
446 // Update the dominance frontier for the definitions so far... if a node
447 // in the dominator frontier now has all of its predecessors available,
448 // and the block is in an anticipatible region, we can insert a PHI node
450 DominanceFrontier::iterator DFI = DF->find(BB);
451 if (DFI != DF->end()) {
452 for (std::set<BasicBlock*>::iterator DI = DFI->second.begin();
453 DI != DFI->second.end(); ++DI) {
454 BasicBlock *DFBlock = *DI;
455 unsigned DFBlockID = BlockNumbering[DFBlock];
456 if (AnticipatibleBlocks[DFBlockID]) {
457 // Check to see if any of the predecessors of this block on the
458 // frontier are not available...
459 bool AnyNotAvailable = false;
460 for (pred_iterator PI = pred_begin(DFBlock),
461 PE = pred_end(DFBlock); PI != PE; ++PI)
462 if (!AvailableBlocks.count(*PI)) {
463 AnyNotAvailable = true;
467 // If any predecessor blocks are not available, add the node to
468 // the current expression dominance frontier.
469 if (AnyNotAvailable) {
470 AvailabilityFrontier.insert(DFBlockID);
472 // This block is no longer in the availability frontier, it IS
474 AvailabilityFrontier.erase(DFBlockID);
476 // If all of the predecessor blocks are available (and the block
477 // anticipates a definition along the path to the exit), we need
478 // to insert a new PHI node in this block. This block serves as
479 // a new definition for the expression, extending the available
482 PHINode *PN = new PHINode(ExprType, Expr->getName()+".pre",
484 ProcessedExpressions.insert(PN);
486 DEBUG(std::cerr << " INSERTING PHI on frontier: " << *PN);
488 // Add the incoming blocks for the PHI node
489 for (pred_iterator PI = pred_begin(DFBlock),
490 PE = pred_end(DFBlock); PI != PE; ++PI)
492 PN->addIncoming(AvailableBlocks[*PI], *PI);
493 else // edge from the current block
494 PN->addIncoming(PN, DFBlock);
496 Instruction *&BlockOcc = Definitions[DFBlockID];
498 DEBUG(std::cerr <<" PHI superceeds occurrence: "<<
500 BlockOcc->replaceAllUsesWith(PN);
501 BlockOcc->getParent()->getInstList().erase(BlockOcc);
512 // Otherwise we must be looking at a node in the availability frontier!
513 unsigned AFBlockID = *AvailabilityFrontier.begin();
514 AvailabilityFrontier.erase(AvailabilityFrontier.begin());
515 BasicBlock *AFBlock = BlockMapping[AFBlockID];
517 // We eliminate the partial redundancy on this frontier by inserting a PHI
518 // node into this block, merging any incoming available versions into the
519 // PHI and inserting a new computation into predecessors without an
520 // incoming value. Note that we would have to insert the expression on
521 // the edge if the predecessor didn't anticipate the expression and we
522 // didn't break critical edges.
524 PHINode *PN = new PHINode(ExprType, Expr->getName()+".PRE",
526 DEBUG(std::cerr << "INSERTING PHI for PR: " << *PN);
528 // If there is a pending occurrence in this block, make sure to replace it
529 // with the PHI node...
530 std::map<unsigned, Instruction*>::iterator EDFI =
531 Definitions.find(AFBlockID);
532 if (EDFI != Definitions.end()) {
533 // There is already an occurrence in this block. Replace it with PN and
535 Instruction *OldOcc = EDFI->second;
536 DEBUG(std::cerr << " Replaces occurrence: " << *OldOcc);
537 OldOcc->replaceAllUsesWith(PN);
538 AFBlock->getInstList().erase(OldOcc);
539 Definitions.erase(EDFI);
543 for (pred_iterator PI = pred_begin(AFBlock), PE = pred_end(AFBlock);
545 BasicBlock *Pred = *PI;
546 AvailableBlocksTy::iterator LBI = AvailableBlocks.find(Pred);
547 if (LBI != AvailableBlocks.end()) { // If there is a available value
548 PN->addIncoming(LBI->second, Pred); // for this pred, use it.
549 } else { // No available value yet...
550 unsigned PredID = BlockNumbering[Pred];
552 // Is the predecessor the same block that we inserted the PHI into?
554 if (Pred == AFBlock) {
555 // Yes, reuse the incoming value here...
556 PN->addIncoming(PN, Pred);
558 // No, we must insert a new computation into this block and add it
559 // to the definitions list...
560 assert(NonPHIOccurrence && "No non-phi occurrences seen so far???");
561 Instruction *New = NonPHIOccurrence->clone();
562 New->setName(NonPHIOccurrence->getName() + ".PRE-inserted");
563 ProcessedExpressions.insert(New);
565 DEBUG(std::cerr << " INSERTING OCCURRRENCE: " << *New);
567 // Insert it into the bottom of the predecessor, right before the
568 // terminator instruction...
569 Pred->getInstList().insert(Pred->getTerminator(), New);
571 // Make this block be the available definition for any blocks it
572 // dominates. The ONLY case that this can affect more than just the
573 // block itself is when we are moving a computation to a loop
574 // header. In all other cases, because we don't have critical
575 // edges, the node is guaranteed to only dominate itself.
577 ReplaceDominatedAvailableOccurrencesWith(New, DT->getNode(Pred));
579 // Add it as an incoming value on this edge to the PHI node
580 PN->addIncoming(New, Pred);
581 NonPHIOccurrence = New;
587 // Find out if there is already an available value in this block. If so,
588 // we need to replace the available value with the PHI node. This can
589 // only happen when we just inserted a PHI node on a backedge.
591 AvailableBlocksTy::iterator LBBlockAvailableValIt =
592 AvailableBlocks.find(AFBlock);
593 if (LBBlockAvailableValIt != AvailableBlocks.end()) {
594 if (LBBlockAvailableValIt->second->getParent() == AFBlock) {
595 Instruction *OldVal = LBBlockAvailableValIt->second;
596 OldVal->replaceAllUsesWith(PN); // Use the new PHI node now
598 DEBUG(std::cerr << " PHI replaces available value: %"
599 << OldVal->getName() << "\n");
601 // Loop over all of the blocks dominated by this PHI node, and change
602 // the AvailableBlocks entries to be the PHI node instead of the old
604 MarkOccurrenceAvailableInAllDominatedBlocks(PN, AFBlock);
606 AFBlock->getInstList().erase(OldVal); // Delete old instruction!
608 // The resultant PHI node is a new definition of the value!
609 Definitions.insert(std::make_pair(AFBlockID, PN));
611 // If the value is not defined in this block, that means that an
612 // inserted occurrence in a predecessor is now the live value for the
613 // region (occurs when hoisting loop invariants, f.e.). In this case,
614 // the PHI node should actually just be removed.
615 assert(PN->use_empty() && "No uses should exist for dead PHI node!");
616 PN->getParent()->getInstList().erase(PN);
619 // The resultant PHI node is a new definition of the value!
620 Definitions.insert(std::make_pair(AFBlockID, PN));
625 AvailableBlocks.clear();