1 //===-- AMDGPUStructurizeCFG.cpp - ------------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
11 /// The pass implemented in this file transforms the programs control flow
12 /// graph into a form that's suitable for code generation on hardware that
13 /// implements control flow by execution masking. This currently includes all
14 /// AMD GPUs but may as well be useful for other types of hardware.
16 //===----------------------------------------------------------------------===//
19 #include "llvm/ADT/SCCIterator.h"
20 #include "llvm/Analysis/RegionInfo.h"
21 #include "llvm/Analysis/RegionIterator.h"
22 #include "llvm/Analysis/RegionPass.h"
23 #include "llvm/IR/Module.h"
24 #include "llvm/Transforms/Utils/SSAUpdater.h"
30 // Definition of the complex types used in this pass.
32 typedef std::pair<BasicBlock *, Value *> BBValuePair;
33 typedef ArrayRef<BasicBlock*> BBVecRef;
35 typedef SmallVector<RegionNode*, 8> RNVector;
36 typedef SmallVector<BasicBlock*, 8> BBVector;
37 typedef SmallVector<BBValuePair, 2> BBValueVector;
39 typedef DenseMap<PHINode *, BBValueVector> PhiMap;
40 typedef DenseMap<BasicBlock *, PhiMap> BBPhiMap;
41 typedef DenseMap<BasicBlock *, Value *> BBPredicates;
42 typedef DenseMap<BasicBlock *, BBPredicates> PredMap;
43 typedef DenseMap<BasicBlock *, unsigned> VisitedMap;
45 // The name for newly created blocks.
47 static const char *FlowBlockName = "Flow";
49 /// @brief Transforms the control flow graph on one single entry/exit region
52 /// After the transform all "If"/"Then"/"Else" style control flow looks like
64 /// | | 1 = "If" block, calculates the condition
65 /// 4 | 2 = "Then" subregion, runs if the condition is true
66 /// | / 3 = "Flow" blocks, newly inserted flow blocks, rejoins the flow
67 /// |/ 4 = "Else" optional subregion, runs if the condition is false
68 /// 5 5 = "End" block, also rejoins the control flow
71 /// Control flow is expressed as a branch where the true exit goes into the
72 /// "Then"/"Else" region, while the false exit skips the region
73 /// The condition for the optional "Else" region is expressed as a PHI node.
74 /// The incomming values of the PHI node are true for the "If" edge and false
75 /// for the "Then" edge.
77 /// Additionally to that even complicated loops look like this:
84 /// | / 1 = "Entry" block
85 /// |/ 2 = "Loop" optional subregion, with all exits at "Flow" block
86 /// 3 3 = "Flow" block, with back edge to entry block
90 /// The back edge of the "Flow" block is always on the false side of the branch
91 /// while the true side continues the general flow. So the loop condition
92 /// consist of a network of PHI nodes where the true incoming values expresses
93 /// breaks and the false values expresses continue states.
94 class AMDGPUStructurizeCFG : public RegionPass {
99 ConstantInt *BoolTrue;
100 ConstantInt *BoolFalse;
101 UndefValue *BoolUndef;
104 Region *ParentRegion;
111 BBPhiMap DeletedPhis;
112 BBVector FlowsInserted;
114 BasicBlock *LoopStart;
116 BBPredicates LoopPred;
120 void buildPredicate(BranchInst *Term, unsigned Idx,
121 BBPredicates &Pred, bool Invert);
123 void analyzeBlock(BasicBlock *BB);
125 void analyzeLoop(BasicBlock *BB, unsigned &LoopIdx);
129 bool dominatesPredicates(BasicBlock *A, BasicBlock *B);
131 void killTerminator(BasicBlock *BB);
133 RegionNode *skipChained(RegionNode *Node);
135 void delPhiValues(BasicBlock *From, BasicBlock *To);
137 void addPhiValues(BasicBlock *From, BasicBlock *To);
139 BasicBlock *getNextFlow(BasicBlock *Prev);
141 bool isPredictableTrue(BasicBlock *Prev, BasicBlock *Node);
143 BasicBlock *wireFlowBlock(BasicBlock *Prev, RegionNode *Node);
147 void insertConditions();
152 AMDGPUStructurizeCFG():
155 initializeRegionInfoPass(*PassRegistry::getPassRegistry());
158 virtual bool doInitialization(Region *R, RGPassManager &RGM);
160 virtual bool runOnRegion(Region *R, RGPassManager &RGM);
162 virtual const char *getPassName() const {
163 return "AMDGPU simplify control flow";
166 void getAnalysisUsage(AnalysisUsage &AU) const {
168 AU.addRequired<DominatorTree>();
169 AU.addPreserved<DominatorTree>();
170 RegionPass::getAnalysisUsage(AU);
175 } // end anonymous namespace
177 char AMDGPUStructurizeCFG::ID = 0;
179 /// \brief Initialize the types and constants used in the pass
180 bool AMDGPUStructurizeCFG::doInitialization(Region *R, RGPassManager &RGM) {
181 LLVMContext &Context = R->getEntry()->getContext();
183 Boolean = Type::getInt1Ty(Context);
184 BoolTrue = ConstantInt::getTrue(Context);
185 BoolFalse = ConstantInt::getFalse(Context);
186 BoolUndef = UndefValue::get(Boolean);
191 /// \brief Build up the general order of nodes
192 void AMDGPUStructurizeCFG::orderNodes() {
193 scc_iterator<Region *> I = scc_begin(ParentRegion),
194 E = scc_end(ParentRegion);
195 for (Order.clear(); I != E; ++I) {
196 std::vector<RegionNode *> &Nodes = *I;
197 Order.append(Nodes.begin(), Nodes.end());
201 /// \brief Build blocks and loop predicates
202 void AMDGPUStructurizeCFG::buildPredicate(BranchInst *Term, unsigned Idx,
203 BBPredicates &Pred, bool Invert) {
204 Value *True = Invert ? BoolFalse : BoolTrue;
205 Value *False = Invert ? BoolTrue : BoolFalse;
207 RegionInfo *RI = ParentRegion->getRegionInfo();
208 BasicBlock *BB = Term->getParent();
210 // Handle the case where multiple regions start at the same block
211 Region *R = BB != ParentRegion->getEntry() ?
212 RI->getRegionFor(BB) : ParentRegion;
214 if (R == ParentRegion) {
215 // It's a top level block in our region
217 if (Term->isConditional()) {
218 BasicBlock *Other = Term->getSuccessor(!Idx);
220 if (Visited.count(Other)) {
221 if (!Pred.count(Other))
228 Cond = Term->getCondition();
231 Cond = BinaryOperator::CreateNot(Cond, "", Term);
236 } else if (ParentRegion->contains(R)) {
237 // It's a block in a sub region
238 while(R->getParent() != ParentRegion)
241 Pred[R->getEntry()] = True;
244 // It's a branch from outside into our parent region
249 /// \brief Analyze the successors of each block and build up predicates
250 void AMDGPUStructurizeCFG::analyzeBlock(BasicBlock *BB) {
251 pred_iterator PI = pred_begin(BB), PE = pred_end(BB);
252 BBPredicates &Pred = Predicates[BB];
254 for (; PI != PE; ++PI) {
255 BranchInst *Term = cast<BranchInst>((*PI)->getTerminator());
257 for (unsigned i = 0, e = Term->getNumSuccessors(); i != e; ++i) {
258 BasicBlock *Succ = Term->getSuccessor(i);
261 buildPredicate(Term, i, Pred, false);
266 /// \brief Analyze the conditions leading to loop to a previous block
267 void AMDGPUStructurizeCFG::analyzeLoop(BasicBlock *BB, unsigned &LoopIdx) {
268 BranchInst *Term = cast<BranchInst>(BB->getTerminator());
270 for (unsigned i = 0, e = Term->getNumSuccessors(); i != e; ++i) {
271 BasicBlock *Succ = Term->getSuccessor(i);
273 // Ignore it if it's not a back edge
274 if (!Visited.count(Succ))
277 buildPredicate(Term, i, LoopPred, true);
280 if (Visited[Succ] < LoopIdx) {
281 LoopIdx = Visited[Succ];
287 /// \brief Collect various loop and predicate infos
288 void AMDGPUStructurizeCFG::collectInfos() {
289 unsigned Number = 0, LoopIdx = ~0;
295 LoopStart = LoopEnd = 0;
298 RNVector::reverse_iterator OI = Order.rbegin(), OE = Order.rend();
299 for (Visited.clear(); OI != OE; Visited[(*OI++)->getEntry()] = ++Number) {
301 // Analyze all the conditions leading to a node
302 analyzeBlock((*OI)->getEntry());
304 if ((*OI)->isSubRegion())
307 // Find the first/last loop nodes and loop predicates
308 analyzeLoop((*OI)->getNodeAs<BasicBlock>(), LoopIdx);
312 /// \brief Does A dominate all the predicates of B ?
313 bool AMDGPUStructurizeCFG::dominatesPredicates(BasicBlock *A, BasicBlock *B) {
314 BBPredicates &Preds = Predicates[B];
315 for (BBPredicates::iterator PI = Preds.begin(), PE = Preds.end();
318 if (!DT->dominates(A, PI->first))
324 /// \brief Remove phi values from all successors and the remove the terminator.
325 void AMDGPUStructurizeCFG::killTerminator(BasicBlock *BB) {
326 TerminatorInst *Term = BB->getTerminator();
330 for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB);
333 delPhiValues(BB, *SI);
336 Term->eraseFromParent();
339 /// First: Skip forward to the first region node that either isn't a subregion or not
340 /// dominating it's exit, remove all the skipped nodes from the node order.
342 /// Second: Handle the first successor directly if the resulting nodes successor
343 /// predicates are still dominated by the original entry
344 RegionNode *AMDGPUStructurizeCFG::skipChained(RegionNode *Node) {
345 BasicBlock *Entry = Node->getEntry();
347 // Skip forward as long as it is just a linear flow
349 BasicBlock *Entry = Node->getEntry();
352 if (Node->isSubRegion()) {
353 Exit = Node->getNodeAs<Region>()->getExit();
355 TerminatorInst *Term = Entry->getTerminator();
356 if (Term->getNumSuccessors() != 1)
358 Exit = Term->getSuccessor(0);
361 // It's a back edge, break here so we can insert a loop node
362 if (!Visited.count(Exit))
365 // More than node edges are pointing to exit
366 if (!DT->dominates(Entry, Exit))
369 RegionNode *Next = ParentRegion->getNode(Exit);
370 RNVector::iterator I = std::find(Order.begin(), Order.end(), Next);
371 assert(I != Order.end());
373 Visited.erase(Next->getEntry());
378 BasicBlock *BB = Node->getEntry();
379 TerminatorInst *Term = BB->getTerminator();
380 if (Term->getNumSuccessors() != 2)
383 // Our node has exactly two succesors, check if we can handle
384 // any of them directly
385 BasicBlock *Succ = Term->getSuccessor(0);
386 if (!Visited.count(Succ) || !dominatesPredicates(Entry, Succ)) {
387 Succ = Term->getSuccessor(1);
388 if (!Visited.count(Succ) || !dominatesPredicates(Entry, Succ))
391 BasicBlock *Succ2 = Term->getSuccessor(1);
392 if (Visited.count(Succ2) && Visited[Succ] > Visited[Succ2] &&
393 dominatesPredicates(Entry, Succ2))
397 RegionNode *Next = ParentRegion->getNode(Succ);
398 RNVector::iterator E = Order.end();
399 RNVector::iterator I = std::find(Order.begin(), E, Next);
403 FlowsInserted.push_back(BB);
406 return ParentRegion->getNode(wireFlowBlock(BB, Next));
409 /// \brief Remove all PHI values coming from "From" into "To" and remember
410 /// them in DeletedPhis
411 void AMDGPUStructurizeCFG::delPhiValues(BasicBlock *From, BasicBlock *To) {
412 PhiMap &Map = DeletedPhis[To];
413 for (BasicBlock::iterator I = To->begin(), E = To->end();
414 I != E && isa<PHINode>(*I);) {
416 PHINode &Phi = cast<PHINode>(*I++);
417 while (Phi.getBasicBlockIndex(From) != -1) {
418 Value *Deleted = Phi.removeIncomingValue(From, false);
419 Map[&Phi].push_back(std::make_pair(From, Deleted));
424 /// \brief Add the PHI values back once we knew the new predecessor
425 void AMDGPUStructurizeCFG::addPhiValues(BasicBlock *From, BasicBlock *To) {
426 if (!DeletedPhis.count(To))
429 PhiMap &Map = DeletedPhis[To];
432 for (PhiMap::iterator I = Map.begin(), E = Map.end(); I != E; ++I) {
434 PHINode *Phi = I->first;
435 Updater.Initialize(Phi->getType(), "");
436 BasicBlock *Fallback = To;
437 bool HaveFallback = false;
439 for (BBValueVector::iterator VI = I->second.begin(), VE = I->second.end();
442 Updater.AddAvailableValue(VI->first, VI->second);
443 BasicBlock *Dom = DT->findNearestCommonDominator(Fallback, VI->first);
444 if (Dom == VI->first)
446 else if (Dom != Fallback)
447 HaveFallback = false;
451 Value *Undef = UndefValue::get(Phi->getType());
452 Updater.AddAvailableValue(Fallback, Undef);
455 Phi->addIncoming(Updater.GetValueAtEndOfBlock(From), From);
457 DeletedPhis.erase(To);
460 /// \brief Create a new flow node and update dominator tree and region info
461 BasicBlock *AMDGPUStructurizeCFG::getNextFlow(BasicBlock *Prev) {
462 LLVMContext &Context = Func->getContext();
463 BasicBlock *Insert = Order.empty() ? ParentRegion->getExit() :
464 Order.back()->getEntry();
465 BasicBlock *Flow = BasicBlock::Create(Context, FlowBlockName,
467 DT->addNewBlock(Flow, Prev);
468 ParentRegion->getRegionInfo()->setRegionFor(Flow, ParentRegion);
469 FlowsInserted.push_back(Flow);
473 /// \brief Can we predict that this node will always be called?
474 bool AMDGPUStructurizeCFG::isPredictableTrue(BasicBlock *Prev,
476 BBPredicates &Preds = Predicates[Node];
477 bool Dominated = false;
479 for (BBPredicates::iterator I = Preds.begin(), E = Preds.end();
482 if (I->second != BoolTrue)
485 if (!Dominated && DT->dominates(I->first, Prev))
491 /// \brief Wire up the new control flow by inserting or updating the branch
492 /// instructions at node exits
493 BasicBlock *AMDGPUStructurizeCFG::wireFlowBlock(BasicBlock *Prev,
495 BasicBlock *Entry = Node->getEntry();
497 if (LoopStart == Entry) {
499 LoopPred[Prev] = BoolTrue;
502 // Wire it up temporary, skipChained may recurse into us
503 BranchInst::Create(Entry, Prev);
504 DT->changeImmediateDominator(Entry, Prev);
505 addPhiValues(Prev, Entry);
507 Node = skipChained(Node);
509 BasicBlock *Next = getNextFlow(Prev);
510 if (!isPredictableTrue(Prev, Entry)) {
511 // Let Prev point to entry and next block
512 Prev->getTerminator()->eraseFromParent();
513 BranchInst::Create(Entry, Next, BoolUndef, Prev);
515 DT->changeImmediateDominator(Next, Entry);
518 // Let node exit(s) point to next block
519 if (Node->isSubRegion()) {
520 Region *SubRegion = Node->getNodeAs<Region>();
521 BasicBlock *Exit = SubRegion->getExit();
523 // Find all the edges from the sub region to the exit
525 for (pred_iterator I = pred_begin(Exit), E = pred_end(Exit); I != E; ++I) {
526 if (SubRegion->contains(*I))
530 // Modify the edges to point to the new flow block
531 for (BBVector::iterator I = ToDo.begin(), E = ToDo.end(); I != E; ++I) {
532 delPhiValues(*I, Exit);
533 TerminatorInst *Term = (*I)->getTerminator();
534 Term->replaceUsesOfWith(Exit, Next);
537 // Update the region info
538 SubRegion->replaceExit(Next);
541 BasicBlock *BB = Node->getNodeAs<BasicBlock>();
543 BranchInst::Create(Next, BB);
552 /// Destroy node order and visited map, build up flow order instead.
553 /// After this function control flow looks like it should be, but
554 /// branches only have undefined conditions.
555 void AMDGPUStructurizeCFG::createFlow() {
558 BasicBlock *Prev = Order.pop_back_val()->getEntry();
559 assert(Prev == ParentRegion->getEntry() && "Incorrect node order!");
562 if (LoopStart == Prev) {
563 // Loop starts at entry, split entry so that we can predicate it
564 BasicBlock::iterator Insert = Prev->getFirstInsertionPt();
565 BasicBlock *Split = Prev->splitBasicBlock(Insert, FlowBlockName);
566 DT->addNewBlock(Split, Prev);
567 ParentRegion->getRegionInfo()->setRegionFor(Split, ParentRegion);
568 Predicates[Split] = Predicates[Prev];
569 Order.push_back(ParentRegion->getBBNode(Split));
570 LoopPred[Prev] = BoolTrue;
572 } else if (LoopStart == Order.back()->getEntry()) {
573 // Loop starts behind entry, split entry so that we can jump to it
574 Instruction *Term = Prev->getTerminator();
575 BasicBlock *Split = Prev->splitBasicBlock(Term, FlowBlockName);
576 DT->addNewBlock(Split, Prev);
577 ParentRegion->getRegionInfo()->setRegionFor(Split, ParentRegion);
581 killTerminator(Prev);
582 FlowsInserted.clear();
583 FlowsInserted.push_back(Prev);
585 while (!Order.empty()) {
586 RegionNode *Node = Order.pop_back_val();
587 Visited.erase(Node->getEntry());
588 Prev = wireFlowBlock(Prev, Node);
589 if (LoopStart && !LoopEnd) {
590 // Create an extra loop end node
592 Prev = getNextFlow(LoopEnd);
593 BranchInst::Create(Prev, LoopStart, BoolUndef, LoopEnd);
594 addPhiValues(LoopEnd, LoopStart);
598 BasicBlock *Exit = ParentRegion->getExit();
599 BranchInst::Create(Exit, Prev);
600 addPhiValues(Prev, Exit);
601 if (DT->dominates(ParentRegion->getEntry(), Exit))
602 DT->changeImmediateDominator(Exit, Prev);
604 if (LoopStart && LoopEnd) {
605 BBVector::iterator FI = std::find(FlowsInserted.begin(),
608 for (; *FI != LoopEnd; ++FI) {
609 addPhiValues(*FI, (*FI)->getTerminator()->getSuccessor(0));
613 assert(Order.empty());
614 assert(Visited.empty());
615 assert(DeletedPhis.empty());
618 /// \brief Insert the missing branch conditions
619 void AMDGPUStructurizeCFG::insertConditions() {
620 SSAUpdater PhiInserter;
622 for (BBVector::iterator FI = FlowsInserted.begin(), FE = FlowsInserted.end();
625 BranchInst *Term = cast<BranchInst>((*FI)->getTerminator());
626 if (Term->isUnconditional())
629 PhiInserter.Initialize(Boolean, "");
630 PhiInserter.AddAvailableValue(&Func->getEntryBlock(), BoolFalse);
632 BasicBlock *Succ = Term->getSuccessor(0);
633 BBPredicates &Preds = (*FI == LoopEnd) ? LoopPred : Predicates[Succ];
634 for (BBPredicates::iterator PI = Preds.begin(), PE = Preds.end();
637 PhiInserter.AddAvailableValue(PI->first, PI->second);
640 Term->setCondition(PhiInserter.GetValueAtEndOfBlock(*FI));
644 /// Handle a rare case where the disintegrated nodes instructions
645 /// no longer dominate all their uses. Not sure if this is really nessasary
646 void AMDGPUStructurizeCFG::rebuildSSA() {
648 for (Region::block_iterator I = ParentRegion->block_begin(),
649 E = ParentRegion->block_end();
653 for (BasicBlock::iterator II = BB->begin(), IE = BB->end();
656 bool Initialized = false;
657 for (Use *I = &II->use_begin().getUse(), *Next; I; I = Next) {
661 Instruction *User = cast<Instruction>(I->getUser());
662 if (User->getParent() == BB) {
665 } else if (PHINode *UserPN = dyn_cast<PHINode>(User)) {
666 if (UserPN->getIncomingBlock(*I) == BB)
670 if (DT->dominates(II, User))
674 Value *Undef = UndefValue::get(II->getType());
675 Updater.Initialize(II->getType(), "");
676 Updater.AddAvailableValue(&Func->getEntryBlock(), Undef);
677 Updater.AddAvailableValue(BB, II);
680 Updater.RewriteUseAfterInsertions(*I);
686 /// \brief Run the transformation for each region found
687 bool AMDGPUStructurizeCFG::runOnRegion(Region *R, RGPassManager &RGM) {
688 if (R->isTopLevelRegion())
691 Func = R->getEntry()->getParent();
694 DT = &getAnalysis<DominatorTree>();
706 FlowsInserted.clear();
711 /// \brief Create the pass
712 Pass *llvm::createAMDGPUStructurizeCFGPass() {
713 return new AMDGPUStructurizeCFG();