1 //===-- AMDILCFGStructurizer.cpp - CFG Structurizer -----------------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
9 //==-----------------------------------------------------------------------===//
12 #define DEBUG_TYPE "structcfg"
14 #include "AMDGPUInstrInfo.h"
16 #include "llvm/ADT/SCCIterator.h"
17 #include "llvm/ADT/SmallVector.h"
18 #include "llvm/ADT/Statistic.h"
19 #include "llvm/Analysis/DominatorInternals.h"
20 #include "llvm/Analysis/Dominators.h"
21 #include "llvm/CodeGen/MachineDominators.h"
22 #include "llvm/CodeGen/MachineFunction.h"
23 #include "llvm/CodeGen/MachineFunctionAnalysis.h"
24 #include "llvm/CodeGen/MachineFunctionPass.h"
25 #include "llvm/CodeGen/MachineInstrBuilder.h"
26 #include "llvm/CodeGen/MachineJumpTableInfo.h"
27 #include "llvm/CodeGen/MachineLoopInfo.h"
28 #include "llvm/CodeGen/MachinePostDominators.h"
29 #include "llvm/CodeGen/MachineRegisterInfo.h"
30 #include "llvm/Target/TargetInstrInfo.h"
36 //===----------------------------------------------------------------------===//
38 // Statistics for CFGStructurizer.
40 //===----------------------------------------------------------------------===//
42 STATISTIC(numSerialPatternMatch, "CFGStructurizer number of serial pattern "
44 STATISTIC(numIfPatternMatch, "CFGStructurizer number of if pattern "
46 STATISTIC(numLoopbreakPatternMatch, "CFGStructurizer number of loop-break "
48 STATISTIC(numLoopcontPatternMatch, "CFGStructurizer number of loop-continue "
50 STATISTIC(numLoopPatternMatch, "CFGStructurizer number of loop pattern "
52 STATISTIC(numClonedBlock, "CFGStructurizer cloned blocks");
53 STATISTIC(numClonedInstr, "CFGStructurizer cloned instructions");
55 //===----------------------------------------------------------------------===//
57 // Miscellaneous utility for CFGStructurizer.
59 //===----------------------------------------------------------------------===//
60 namespace llvmCFGStruct {
61 #define SHOWNEWINSTR(i) \
62 if (DEBUGME) errs() << "New instr: " << *i << "\n"
64 #define SHOWNEWBLK(b, msg) \
66 errs() << msg << "BB" << b->getNumber() << "size " << b->size(); \
70 #define SHOWBLK_DETAIL(b, msg) \
73 errs() << msg << "BB" << b->getNumber() << "size " << b->size(); \
79 #define INVALIDSCCNUM -1
80 #define INVALIDREGNUM 0
82 template<class LoopinfoT>
83 void PrintLoopinfo(const LoopinfoT &LoopInfo, llvm::raw_ostream &OS) {
84 for (typename LoopinfoT::iterator iter = LoopInfo.begin(),
85 iterEnd = LoopInfo.end();
86 iter != iterEnd; ++iter) {
87 (*iter)->print(OS, 0);
92 void ReverseVector(SmallVector<NodeT *, DEFAULT_VEC_SLOTS> &Src) {
93 size_t sz = Src.size();
94 for (size_t i = 0; i < sz/2; ++i) {
96 Src[i] = Src[sz - i - 1];
101 } //end namespace llvmCFGStruct
103 //===----------------------------------------------------------------------===//
105 // supporting data structure for CFGStructurizer
107 //===----------------------------------------------------------------------===//
109 namespace llvmCFGStruct {
110 template<class PassT>
111 struct CFGStructTraits {
114 template <class InstrT>
115 class BlockInformation {
119 //SmallVector<InstrT*, DEFAULT_VEC_SLOTS> succInstr;
120 //Instructions defining the corresponding successor.
121 BlockInformation() : isRetired(false), sccNum(INVALIDSCCNUM) {}
124 template <class BlockT, class InstrT, class RegiT>
125 class LandInformation {
128 std::set<RegiT> breakInitRegs; //Registers that need to "reg = 0", before
129 //WHILELOOP(thisloop) init before entering
131 std::set<RegiT> contInitRegs; //Registers that need to "reg = 0", after
132 //WHILELOOP(thisloop) init after entering
134 std::set<RegiT> endbranchInitRegs; //Init before entering this loop, at loop
135 //land block, branch cond on this reg.
136 std::set<RegiT> breakOnRegs; //registers that need to "if (reg) break
137 //endif" after ENDLOOP(thisloop) break
138 //outerLoopOf(thisLoop).
139 std::set<RegiT> contOnRegs; //registers that need to "if (reg) continue
140 //endif" after ENDLOOP(thisloop) continue on
141 //outerLoopOf(thisLoop).
142 LandInformation() : landBlk(NULL) {}
145 } //end of namespace llvmCFGStruct
147 //===----------------------------------------------------------------------===//
151 //===----------------------------------------------------------------------===//
153 namespace llvmCFGStruct {
154 // bixia TODO: port it to BasicBlock, not just MachineBasicBlock.
155 template<class PassT>
156 class CFGStructurizer {
160 SinglePath_InPath = 1,
161 SinglePath_NotInPath = 2
165 typedef typename PassT::InstructionType InstrT;
166 typedef typename PassT::FunctionType FuncT;
167 typedef typename PassT::DominatortreeType DomTreeT;
168 typedef typename PassT::PostDominatortreeType PostDomTreeT;
169 typedef typename PassT::DomTreeNodeType DomTreeNodeT;
170 typedef typename PassT::LoopinfoType LoopInfoT;
172 typedef GraphTraits<FuncT *> FuncGTraits;
173 //typedef FuncGTraits::nodes_iterator BlockIterator;
174 typedef typename FuncT::iterator BlockIterator;
176 typedef typename FuncGTraits::NodeType BlockT;
177 typedef GraphTraits<BlockT *> BlockGTraits;
178 typedef GraphTraits<Inverse<BlockT *> > InvBlockGTraits;
179 //typedef BlockGTraits::succ_iterator InstructionIterator;
180 typedef typename BlockT::iterator InstrIterator;
182 typedef CFGStructTraits<PassT> CFGTraits;
183 typedef BlockInformation<InstrT> BlockInfo;
184 typedef std::map<BlockT *, BlockInfo *> BlockInfoMap;
187 typedef typename PassT::LoopType LoopT;
188 typedef LandInformation<BlockT, InstrT, RegiT> LoopLandInfo;
189 typedef std::map<LoopT *, LoopLandInfo *> LoopLandInfoMap;
190 //landing info for loop break
191 typedef SmallVector<BlockT *, 32> BlockTSmallerVector;
197 /// Perform the CFG structurization
198 bool run(FuncT &Func, PassT &Pass, const AMDGPURegisterInfo *tri);
200 /// Perform the CFG preparation
201 bool prepare(FuncT &Func, PassT &Pass, const AMDGPURegisterInfo *tri);
204 void reversePredicateSetter(typename BlockT::iterator);
206 void printOrderedBlocks(llvm::raw_ostream &OS);
207 int patternMatch(BlockT *CurBlock);
208 int patternMatchGroup(BlockT *CurBlock);
210 int serialPatternMatch(BlockT *CurBlock);
211 int ifPatternMatch(BlockT *CurBlock);
212 int switchPatternMatch(BlockT *CurBlock);
213 int loopendPatternMatch(BlockT *CurBlock);
214 int loopPatternMatch(BlockT *CurBlock);
216 int loopbreakPatternMatch(LoopT *LoopRep, BlockT *LoopHeader);
217 int loopcontPatternMatch(LoopT *LoopRep, BlockT *LoopHeader);
218 //int loopWithoutBreak(BlockT *);
220 void handleLoopbreak (BlockT *ExitingBlock, LoopT *ExitingLoop,
221 BlockT *ExitBlock, LoopT *exitLoop, BlockT *landBlock);
222 void handleLoopcontBlock(BlockT *ContingBlock, LoopT *contingLoop,
223 BlockT *ContBlock, LoopT *contLoop);
224 bool isSameloopDetachedContbreak(BlockT *Src1Block, BlockT *Src2Block);
225 int handleJumpintoIf(BlockT *HeadBlock, BlockT *TrueBlock,
227 int handleJumpintoIfImp(BlockT *HeadBlock, BlockT *TrueBlock,
229 int improveSimpleJumpintoIf(BlockT *HeadBlock, BlockT *TrueBlock,
230 BlockT *FalseBlock, BlockT **LandBlockPtr);
231 void showImproveSimpleJumpintoIf(BlockT *HeadBlock, BlockT *TrueBlock,
232 BlockT *FalseBlock, BlockT *LandBlock,
233 bool Detail = false);
234 PathToKind singlePathTo(BlockT *SrcBlock, BlockT *DstBlock,
235 bool AllowSideEntry = true);
236 BlockT *singlePathEnd(BlockT *srcBlock, BlockT *DstBlock,
237 bool AllowSideEntry = true);
238 int cloneOnSideEntryTo(BlockT *PreBlock, BlockT *SrcBlock, BlockT *DstBlock);
239 void mergeSerialBlock(BlockT *DstBlock, BlockT *srcBlock);
241 void mergeIfthenelseBlock(InstrT *BranchInstr, BlockT *CurBlock,
242 BlockT *TrueBlock, BlockT *FalseBlock,
244 void mergeLooplandBlock(BlockT *DstBlock, LoopLandInfo *LoopLand);
245 void mergeLoopbreakBlock(BlockT *ExitingBlock, BlockT *ExitBlock,
246 BlockT *ExitLandBlock, RegiT SetReg);
247 void settleLoopcontBlock(BlockT *ContingBlock, BlockT *ContBlock,
249 BlockT *relocateLoopcontBlock(LoopT *ParentLoopRep, LoopT *LoopRep,
250 std::set<BlockT*> &ExitBlockSet,
251 BlockT *ExitLandBlk);
252 BlockT *addLoopEndbranchBlock(LoopT *LoopRep,
253 BlockTSmallerVector &ExitingBlocks,
254 BlockTSmallerVector &ExitBlocks);
255 BlockT *normalizeInfiniteLoopExit(LoopT *LoopRep);
256 void removeUnconditionalBranch(BlockT *SrcBlock);
257 void removeRedundantConditionalBranch(BlockT *SrcBlock);
258 void addDummyExitBlock(SmallVector<BlockT *, DEFAULT_VEC_SLOTS> &RetBlocks);
260 void removeSuccessor(BlockT *SrcBlock);
261 BlockT *cloneBlockForPredecessor(BlockT *CurBlock, BlockT *PredBlock);
262 BlockT *exitingBlock2ExitBlock (LoopT *LoopRep, BlockT *exitingBlock);
264 void migrateInstruction(BlockT *SrcBlock, BlockT *DstBlock,
265 InstrIterator InsertPos);
267 void recordSccnum(BlockT *SrcBlock, int SCCNum);
268 int getSCCNum(BlockT *srcBlk);
270 void retireBlock(BlockT *DstBlock, BlockT *SrcBlock);
271 bool isRetiredBlock(BlockT *SrcBlock);
272 bool isActiveLoophead(BlockT *CurBlock);
273 bool needMigrateBlock(BlockT *Block);
275 BlockT *recordLoopLandBlock(LoopT *LoopRep, BlockT *LandBlock,
276 BlockTSmallerVector &exitBlocks,
277 std::set<BlockT*> &ExitBlockSet);
278 void setLoopLandBlock(LoopT *LoopRep, BlockT *Block = NULL);
279 BlockT *getLoopLandBlock(LoopT *LoopRep);
280 LoopLandInfo *getLoopLandInfo(LoopT *LoopRep);
282 void addLoopBreakOnReg(LoopT *LoopRep, RegiT RegNum);
283 void addLoopContOnReg(LoopT *LoopRep, RegiT RegNum);
284 void addLoopBreakInitReg(LoopT *LoopRep, RegiT RegNum);
285 void addLoopContInitReg(LoopT *LoopRep, RegiT RegNum);
286 void addLoopEndbranchInitReg(LoopT *LoopRep, RegiT RegNum);
288 bool hasBackEdge(BlockT *curBlock);
289 unsigned getLoopDepth (LoopT *LoopRep);
290 int countActiveBlock(
291 typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator IterStart,
292 typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator IterEnd);
293 BlockT *findNearestCommonPostDom(std::set<BlockT *>&);
294 BlockT *findNearestCommonPostDom(BlockT *Block1, BlockT *Block2);
298 PostDomTreeT *postDomTree;
303 BlockInfoMap blockInfoMap;
304 LoopLandInfoMap loopLandInfoMap;
305 SmallVector<BlockT *, DEFAULT_VEC_SLOTS> orderedBlks;
306 const AMDGPURegisterInfo *TRI;
308 }; //template class CFGStructurizer
310 template<class PassT> CFGStructurizer<PassT>::CFGStructurizer()
311 : domTree(NULL), postDomTree(NULL), loopInfo(NULL) {
314 template<class PassT> CFGStructurizer<PassT>::~CFGStructurizer() {
315 for (typename BlockInfoMap::iterator I = blockInfoMap.begin(),
316 E = blockInfoMap.end(); I != E; ++I) {
321 template<class PassT>
322 bool CFGStructurizer<PassT>::prepare(FuncT &func, PassT &pass,
323 const AMDGPURegisterInfo * tri) {
328 bool changed = false;
330 //FIXME: if not reducible flow graph, make it so ???
333 errs() << "AMDGPUCFGStructurizer::prepare\n";
336 loopInfo = CFGTraits::getLoopInfo(pass);
338 errs() << "LoopInfo:\n";
339 PrintLoopinfo(*loopInfo, errs());
344 errs() << "Ordered blocks:\n";
345 printOrderedBlocks(errs());
348 SmallVector<BlockT *, DEFAULT_VEC_SLOTS> retBlks;
350 for (typename LoopInfoT::iterator iter = loopInfo->begin(),
351 iterEnd = loopInfo->end();
352 iter != iterEnd; ++iter) {
353 LoopT* loopRep = (*iter);
354 BlockTSmallerVector exitingBlks;
355 loopRep->getExitingBlocks(exitingBlks);
357 if (exitingBlks.size() == 0) {
358 BlockT* dummyExitBlk = normalizeInfiniteLoopExit(loopRep);
359 if (dummyExitBlk != NULL)
360 retBlks.push_back(dummyExitBlk);
364 // Remove unconditional branch instr.
365 // Add dummy exit block iff there are multiple returns.
367 for (typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator
368 iterBlk = orderedBlks.begin(), iterEndBlk = orderedBlks.end();
369 iterBlk != iterEndBlk;
371 BlockT *curBlk = *iterBlk;
372 removeUnconditionalBranch(curBlk);
373 removeRedundantConditionalBranch(curBlk);
374 if (CFGTraits::isReturnBlock(curBlk)) {
375 retBlks.push_back(curBlk);
377 assert(curBlk->succ_size() <= 2);
380 if (retBlks.size() >= 2) {
381 addDummyExitBlock(retBlks);
386 } //CFGStructurizer::prepare
388 template<class PassT>
389 bool CFGStructurizer<PassT>::run(FuncT &func, PassT &pass,
390 const AMDGPURegisterInfo * tri) {
395 //Assume reducible CFG...
397 errs() << "AMDGPUCFGStructurizer::run\n";
401 domTree = CFGTraits::getDominatorTree(pass);
403 domTree->print(errs(), (const llvm::Module*)0);
406 postDomTree = CFGTraits::getPostDominatorTree(pass);
408 postDomTree->print(errs());
411 loopInfo = CFGTraits::getLoopInfo(pass);
413 errs() << "LoopInfo:\n";
414 PrintLoopinfo(*loopInfo, errs());
419 //Use the worse block ordering to test the algorithm.
420 ReverseVector(orderedBlks);
424 errs() << "Ordered blocks:\n";
425 printOrderedBlocks(errs());
430 bool makeProgress = false;
431 int numRemainedBlk = countActiveBlock(orderedBlks.begin(),
437 errs() << "numIter = " << numIter
438 << ", numRemaintedBlk = " << numRemainedBlk << "\n";
441 typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator
442 iterBlk = orderedBlks.begin();
443 typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator
444 iterBlkEnd = orderedBlks.end();
446 typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator
447 sccBeginIter = iterBlk;
448 BlockT *sccBeginBlk = NULL;
449 int sccNumBlk = 0; // The number of active blocks, init to a
450 // maximum possible number.
451 int sccNumIter; // Number of iteration in this SCC.
453 while (iterBlk != iterBlkEnd) {
456 if (sccBeginBlk == NULL) {
457 sccBeginIter = iterBlk;
458 sccBeginBlk = curBlk;
460 sccNumBlk = numRemainedBlk; // Init to maximum possible number.
462 errs() << "start processing SCC" << getSCCNum(sccBeginBlk);
467 if (!isRetiredBlock(curBlk)) {
468 patternMatch(curBlk);
473 bool contNextScc = true;
474 if (iterBlk == iterBlkEnd
475 || getSCCNum(sccBeginBlk) != getSCCNum(*iterBlk)) {
476 // Just finish one scc.
478 int sccRemainedNumBlk = countActiveBlock(sccBeginIter, iterBlk);
479 if (sccRemainedNumBlk != 1 && sccRemainedNumBlk >= sccNumBlk) {
481 errs() << "Can't reduce SCC " << getSCCNum(curBlk)
482 << ", sccNumIter = " << sccNumIter;
483 errs() << "doesn't make any progress\n";
486 } else if (sccRemainedNumBlk != 1 && sccRemainedNumBlk < sccNumBlk) {
487 sccNumBlk = sccRemainedNumBlk;
488 iterBlk = sccBeginIter;
491 errs() << "repeat processing SCC" << getSCCNum(curBlk)
492 << "sccNumIter = " << sccNumIter << "\n";
496 // Finish the current scc.
500 // Continue on next component in the current scc.
507 } //while, "one iteration" over the function.
509 BlockT *entryBlk = FuncGTraits::nodes_begin(&func);
510 if (entryBlk->succ_size() == 0) {
513 errs() << "Reduce to one block\n";
516 int newnumRemainedBlk
517 = countActiveBlock(orderedBlks.begin(), orderedBlks.end());
518 // consider cloned blocks ??
519 if (newnumRemainedBlk == 1 || newnumRemainedBlk < numRemainedBlk) {
521 numRemainedBlk = newnumRemainedBlk;
523 makeProgress = false;
525 errs() << "No progress\n";
529 } while (!finish && makeProgress);
531 // Misc wrap up to maintain the consistency of the Function representation.
532 CFGTraits::wrapup(FuncGTraits::nodes_begin(&func));
534 // Detach retired Block, release memory.
535 for (typename BlockInfoMap::iterator iterMap = blockInfoMap.begin(),
536 iterEndMap = blockInfoMap.end(); iterMap != iterEndMap; ++iterMap) {
537 if ((*iterMap).second && (*iterMap).second->isRetired) {
538 assert(((*iterMap).first)->getNumber() != -1);
540 errs() << "Erase BB" << ((*iterMap).first)->getNumber() << "\n";
542 (*iterMap).first->eraseFromParent(); //Remove from the parent Function.
544 delete (*iterMap).second;
546 blockInfoMap.clear();
548 // clear loopLandInfoMap
549 for (typename LoopLandInfoMap::iterator iterMap = loopLandInfoMap.begin(),
550 iterEndMap = loopLandInfoMap.end(); iterMap != iterEndMap; ++iterMap) {
551 delete (*iterMap).second;
553 loopLandInfoMap.clear();
560 assert(!"IRREDUCIBL_CF");
564 } //CFGStructurizer::run
566 /// Print the ordered Blocks.
568 template<class PassT>
569 void CFGStructurizer<PassT>::printOrderedBlocks(llvm::raw_ostream &os) {
571 for (typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator
572 iterBlk = orderedBlks.begin(), iterBlkEnd = orderedBlks.end();
573 iterBlk != iterBlkEnd;
575 os << "BB" << (*iterBlk)->getNumber();
576 os << "(" << getSCCNum(*iterBlk) << "," << (*iterBlk)->size() << ")";
577 if (i != 0 && i % 10 == 0) {
583 } //printOrderedBlocks
585 /// Compute the reversed DFS post order of Blocks
587 template<class PassT> void CFGStructurizer<PassT>::orderBlocks() {
590 for (scc_iterator<FuncT *> sccIter = scc_begin(funcRep),
591 sccEnd = scc_end(funcRep); sccIter != sccEnd; ++sccIter, ++sccNum) {
592 std::vector<BlockT *> &sccNext = *sccIter;
593 for (typename std::vector<BlockT *>::const_iterator
594 blockIter = sccNext.begin(), blockEnd = sccNext.end();
595 blockIter != blockEnd; ++blockIter) {
597 orderedBlks.push_back(bb);
598 recordSccnum(bb, sccNum);
602 //walk through all the block in func to check for unreachable
603 for (BlockIterator blockIter1 = FuncGTraits::nodes_begin(funcRep),
604 blockEnd1 = FuncGTraits::nodes_end(funcRep);
605 blockIter1 != blockEnd1; ++blockIter1) {
606 BlockT *bb = &(*blockIter1);
607 sccNum = getSCCNum(bb);
608 if (sccNum == INVALIDSCCNUM) {
609 errs() << "unreachable block BB" << bb->getNumber() << "\n";
614 template<class PassT> int CFGStructurizer<PassT>::patternMatch(BlockT *curBlk) {
619 errs() << "Begin patternMatch BB" << curBlk->getNumber() << "\n";
622 while ((curMatch = patternMatchGroup(curBlk)) > 0) {
623 numMatch += curMatch;
627 errs() << "End patternMatch BB" << curBlk->getNumber()
628 << ", numMatch = " << numMatch << "\n";
634 template<class PassT>
635 int CFGStructurizer<PassT>::patternMatchGroup(BlockT *curBlk) {
637 numMatch += serialPatternMatch(curBlk);
638 numMatch += ifPatternMatch(curBlk);
639 numMatch += loopendPatternMatch(curBlk);
640 numMatch += loopPatternMatch(curBlk);
644 template<class PassT>
645 int CFGStructurizer<PassT>::serialPatternMatch(BlockT *curBlk) {
646 if (curBlk->succ_size() != 1) {
650 BlockT *childBlk = *curBlk->succ_begin();
651 if (childBlk->pred_size() != 1 || isActiveLoophead(childBlk)) {
655 mergeSerialBlock(curBlk, childBlk);
656 ++numSerialPatternMatch;
658 } //serialPatternMatch
660 template<class PassT>
661 int CFGStructurizer<PassT>::ifPatternMatch(BlockT *curBlk) {
663 if (curBlk->succ_size() != 2) {
667 if (hasBackEdge(curBlk)) {
671 InstrT *branchInstr = CFGTraits::getNormalBlockBranchInstr(curBlk);
672 if (branchInstr == NULL) {
676 assert(CFGTraits::isCondBranch(branchInstr));
678 BlockT *trueBlk = CFGTraits::getTrueBranch(branchInstr);
679 BlockT *falseBlk = CFGTraits::getFalseBranch(curBlk, branchInstr);
684 if (trueBlk->succ_size() == 1 && falseBlk->succ_size() == 1
685 && *trueBlk->succ_begin() == *falseBlk->succ_begin()) {
686 landBlk = *trueBlk->succ_begin();
687 } else if (trueBlk->succ_size() == 0 && falseBlk->succ_size() == 0) {
689 } else if (trueBlk->succ_size() == 1 && *trueBlk->succ_begin() == falseBlk) {
692 } else if (falseBlk->succ_size() == 1
693 && *falseBlk->succ_begin() == trueBlk) {
696 } else if (falseBlk->succ_size() == 1
697 && isSameloopDetachedContbreak(trueBlk, falseBlk)) {
698 landBlk = *falseBlk->succ_begin();
699 } else if (trueBlk->succ_size() == 1
700 && isSameloopDetachedContbreak(falseBlk, trueBlk)) {
701 landBlk = *trueBlk->succ_begin();
703 return handleJumpintoIf(curBlk, trueBlk, falseBlk);
706 // improveSimpleJumpinfoIf can handle the case where landBlk == NULL but the
707 // new BB created for landBlk==NULL may introduce new challenge to the
708 // reduction process.
709 if (landBlk != NULL &&
710 ((trueBlk && trueBlk->pred_size() > 1)
711 || (falseBlk && falseBlk->pred_size() > 1))) {
712 cloned += improveSimpleJumpintoIf(curBlk, trueBlk, falseBlk, &landBlk);
715 if (trueBlk && trueBlk->pred_size() > 1) {
716 trueBlk = cloneBlockForPredecessor(trueBlk, curBlk);
720 if (falseBlk && falseBlk->pred_size() > 1) {
721 falseBlk = cloneBlockForPredecessor(falseBlk, curBlk);
725 mergeIfthenelseBlock(branchInstr, curBlk, trueBlk, falseBlk, landBlk);
729 numClonedBlock += cloned;
734 template<class PassT>
735 int CFGStructurizer<PassT>::switchPatternMatch(BlockT *curBlk) {
737 } //switchPatternMatch
739 template<class PassT>
740 int CFGStructurizer<PassT>::loopendPatternMatch(BlockT *curBlk) {
741 LoopT *loopRep = loopInfo->getLoopFor(curBlk);
742 typename std::vector<LoopT *> nestedLoops;
744 nestedLoops.push_back(loopRep);
745 loopRep = loopRep->getParentLoop();
748 if (nestedLoops.size() == 0) {
752 // Process nested loop outside->inside, so "continue" to a outside loop won't
753 // be mistaken as "break" of the current loop.
755 for (typename std::vector<LoopT *>::reverse_iterator
756 iter = nestedLoops.rbegin(), iterEnd = nestedLoops.rend();
757 iter != iterEnd; ++iter) {
760 if (getLoopLandBlock(loopRep) != NULL) {
764 BlockT *loopHeader = loopRep->getHeader();
766 int numBreak = loopbreakPatternMatch(loopRep, loopHeader);
768 if (numBreak == -1) {
772 int numCont = loopcontPatternMatch(loopRep, loopHeader);
773 num += numBreak + numCont;
777 } //loopendPatternMatch
779 template<class PassT>
780 int CFGStructurizer<PassT>::loopPatternMatch(BlockT *curBlk) {
781 if (curBlk->succ_size() != 0) {
786 LoopT *loopRep = loopInfo->getLoopFor(curBlk);
787 while (loopRep && loopRep->getHeader() == curBlk) {
788 LoopLandInfo *loopLand = getLoopLandInfo(loopRep);
790 BlockT *landBlk = loopLand->landBlk;
792 if (!isRetiredBlock(landBlk)) {
793 mergeLooplandBlock(curBlk, loopLand);
797 loopRep = loopRep->getParentLoop();
800 numLoopPatternMatch += numLoop;
805 template<class PassT>
806 int CFGStructurizer<PassT>::loopbreakPatternMatch(LoopT *loopRep,
807 BlockT *loopHeader) {
808 BlockTSmallerVector exitingBlks;
809 loopRep->getExitingBlocks(exitingBlks);
812 errs() << "Loop has " << exitingBlks.size() << " exiting blocks\n";
815 if (exitingBlks.size() == 0) {
816 setLoopLandBlock(loopRep);
820 // Compute the corresponding exitBlks and exit block set.
821 BlockTSmallerVector exitBlks;
822 std::set<BlockT *> exitBlkSet;
823 for (typename BlockTSmallerVector::const_iterator iter = exitingBlks.begin(),
824 iterEnd = exitingBlks.end(); iter != iterEnd; ++iter) {
825 BlockT *exitingBlk = *iter;
826 BlockT *exitBlk = exitingBlock2ExitBlock(loopRep, exitingBlk);
827 exitBlks.push_back(exitBlk);
828 exitBlkSet.insert(exitBlk); //non-duplicate insert
831 assert(exitBlkSet.size() > 0);
832 assert(exitBlks.size() == exitingBlks.size());
835 errs() << "Loop has " << exitBlkSet.size() << " exit blocks\n";
839 BlockT *exitLandBlk = NULL;
843 if (exitBlkSet.size() == 1) {
844 exitLandBlk = *exitBlkSet.begin();
846 exitLandBlk = findNearestCommonPostDom(exitBlkSet);
848 if (exitLandBlk == NULL) {
852 bool allInPath = true;
853 bool allNotInPath = true;
854 for (typename std::set<BlockT*>::const_iterator
855 iter = exitBlkSet.begin(),
856 iterEnd = exitBlkSet.end();
857 iter != iterEnd; ++iter) {
858 BlockT *exitBlk = *iter;
860 PathToKind pathKind = singlePathTo(exitBlk, exitLandBlk, true);
862 errs() << "BB" << exitBlk->getNumber()
863 << " to BB" << exitLandBlk->getNumber() << " PathToKind="
867 allInPath = allInPath && (pathKind == SinglePath_InPath);
868 allNotInPath = allNotInPath && (pathKind == SinglePath_NotInPath);
870 if (!allInPath && !allNotInPath) {
872 errs() << "singlePath check fail\n";
876 } // check all exit blocks
880 // TODO: Simplify, maybe separate function?
881 LoopT *parentLoopRep = loopRep->getParentLoop();
882 BlockT *parentLoopHeader = NULL;
884 parentLoopHeader = parentLoopRep->getHeader();
886 if (exitLandBlk == parentLoopHeader &&
887 (exitLandBlk = relocateLoopcontBlock(parentLoopRep,
890 exitLandBlk)) != NULL) {
892 errs() << "relocateLoopcontBlock success\n";
894 } else if ((exitLandBlk = addLoopEndbranchBlock(loopRep,
896 exitBlks)) != NULL) {
898 errs() << "insertEndbranchBlock success\n";
902 errs() << "loop exit fail\n";
908 // Handle side entry to exit path.
911 for (typename BlockTSmallerVector::iterator iterExiting =
913 iterExitingEnd = exitingBlks.end();
914 iterExiting != iterExitingEnd; ++iterExiting) {
915 BlockT *exitingBlk = *iterExiting;
916 BlockT *exitBlk = exitingBlock2ExitBlock(loopRep, exitingBlk);
917 BlockT *newExitBlk = exitBlk;
919 if (exitBlk != exitLandBlk && exitBlk->pred_size() > 1) {
920 newExitBlk = cloneBlockForPredecessor(exitBlk, exitingBlk);
924 numCloned += cloneOnSideEntryTo(exitingBlk, newExitBlk, exitLandBlk);
926 exitBlks.push_back(newExitBlk);
927 exitBlkSet.insert(newExitBlk);
930 for (typename BlockTSmallerVector::iterator iterExit = exitBlks.begin(),
931 iterExitEnd = exitBlks.end();
932 iterExit != iterExitEnd; ++iterExit) {
933 BlockT *exitBlk = *iterExit;
934 numSerial += serialPatternMatch(exitBlk);
937 for (typename BlockTSmallerVector::iterator iterExit = exitBlks.begin(),
938 iterExitEnd = exitBlks.end();
939 iterExit != iterExitEnd; ++iterExit) {
940 BlockT *exitBlk = *iterExit;
941 if (exitBlk->pred_size() > 1) {
942 if (exitBlk != exitLandBlk) {
946 if (exitBlk != exitLandBlk &&
947 (exitBlk->succ_size() != 1 ||
948 *exitBlk->succ_begin() != exitLandBlk)) {
955 exitLandBlk = recordLoopLandBlock(loopRep, exitLandBlk, exitBlks, exitBlkSet);
957 // Fold break into the breaking block. Leverage across level breaks.
958 assert(exitingBlks.size() == exitBlks.size());
959 for (typename BlockTSmallerVector::const_iterator iterExit = exitBlks.begin(),
960 iterExiting = exitingBlks.begin(), iterExitEnd = exitBlks.end();
961 iterExit != iterExitEnd; ++iterExit, ++iterExiting) {
962 BlockT *exitBlk = *iterExit;
963 BlockT *exitingBlk = *iterExiting;
964 assert(exitBlk->pred_size() == 1 || exitBlk == exitLandBlk);
965 LoopT *exitingLoop = loopInfo->getLoopFor(exitingBlk);
966 handleLoopbreak(exitingBlk, exitingLoop, exitBlk, loopRep, exitLandBlk);
969 int numBreak = static_cast<int>(exitingBlks.size());
970 numLoopbreakPatternMatch += numBreak;
971 numClonedBlock += numCloned;
972 return numBreak + numSerial + numCloned;
973 } //loopbreakPatternMatch
975 template<class PassT>
976 int CFGStructurizer<PassT>::loopcontPatternMatch(LoopT *loopRep,
977 BlockT *loopHeader) {
979 SmallVector<BlockT *, DEFAULT_VEC_SLOTS> contBlk;
980 for (typename InvBlockGTraits::ChildIteratorType iter =
981 InvBlockGTraits::child_begin(loopHeader),
982 iterEnd = InvBlockGTraits::child_end(loopHeader);
983 iter != iterEnd; ++iter) {
984 BlockT *curBlk = *iter;
985 if (loopRep->contains(curBlk)) {
986 handleLoopcontBlock(curBlk, loopInfo->getLoopFor(curBlk),
987 loopHeader, loopRep);
988 contBlk.push_back(curBlk);
993 for (typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::iterator
994 iter = contBlk.begin(), iterEnd = contBlk.end();
995 iter != iterEnd; ++iter) {
996 (*iter)->removeSuccessor(loopHeader);
999 numLoopcontPatternMatch += numCont;
1002 } //loopcontPatternMatch
1005 template<class PassT>
1006 bool CFGStructurizer<PassT>::isSameloopDetachedContbreak(BlockT *src1Blk,
1008 // return true iff src1Blk->succ_size() == 0 && src1Blk and src2Blk are in the
1009 // same loop with LoopLandInfo without explicitly keeping track of
1010 // loopContBlks and loopBreakBlks, this is a method to get the information.
1012 if (src1Blk->succ_size() == 0) {
1013 LoopT *loopRep = loopInfo->getLoopFor(src1Blk);
1014 if (loopRep != NULL && loopRep == loopInfo->getLoopFor(src2Blk)) {
1015 LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
1016 if (theEntry != NULL) {
1018 errs() << "isLoopContBreakBlock yes src1 = BB"
1019 << src1Blk->getNumber()
1020 << " src2 = BB" << src2Blk->getNumber() << "\n";
1027 } //isSameloopDetachedContbreak
1029 template<class PassT>
1030 int CFGStructurizer<PassT>::handleJumpintoIf(BlockT *headBlk,
1033 int num = handleJumpintoIfImp(headBlk, trueBlk, falseBlk);
1036 errs() << "handleJumpintoIf swap trueBlk and FalseBlk" << "\n";
1038 num = handleJumpintoIfImp(headBlk, falseBlk, trueBlk);
1043 template<class PassT>
1044 int CFGStructurizer<PassT>::handleJumpintoIfImp(BlockT *headBlk,
1050 //trueBlk could be the common post dominator
1054 errs() << "handleJumpintoIfImp head = BB" << headBlk->getNumber()
1055 << " true = BB" << trueBlk->getNumber()
1056 << ", numSucc=" << trueBlk->succ_size()
1057 << " false = BB" << falseBlk->getNumber() << "\n";
1062 errs() << "check down = BB" << downBlk->getNumber();
1065 if (singlePathTo(falseBlk, downBlk) == SinglePath_InPath) {
1067 errs() << " working\n";
1070 num += cloneOnSideEntryTo(headBlk, trueBlk, downBlk);
1071 num += cloneOnSideEntryTo(headBlk, falseBlk, downBlk);
1073 numClonedBlock += num;
1074 num += serialPatternMatch(*headBlk->succ_begin());
1075 num += serialPatternMatch(*(++headBlk->succ_begin()));
1076 num += ifPatternMatch(headBlk);
1082 errs() << " not working\n";
1084 downBlk = (downBlk->succ_size() == 1) ? (*downBlk->succ_begin()) : NULL;
1085 } // walk down the postDomTree
1088 } //handleJumpintoIf
1090 template<class PassT>
1091 void CFGStructurizer<PassT>::showImproveSimpleJumpintoIf(BlockT *headBlk,
1096 errs() << "head = BB" << headBlk->getNumber()
1097 << " size = " << headBlk->size();
1100 headBlk->print(errs());
1105 errs() << ", true = BB" << trueBlk->getNumber() << " size = "
1106 << trueBlk->size() << " numPred = " << trueBlk->pred_size();
1109 trueBlk->print(errs());
1114 errs() << ", false = BB" << falseBlk->getNumber() << " size = "
1115 << falseBlk->size() << " numPred = " << falseBlk->pred_size();
1118 falseBlk->print(errs());
1123 errs() << ", land = BB" << landBlk->getNumber() << " size = "
1124 << landBlk->size() << " numPred = " << landBlk->pred_size();
1127 landBlk->print(errs());
1133 } //showImproveSimpleJumpintoIf
1135 template<class PassT>
1136 int CFGStructurizer<PassT>::improveSimpleJumpintoIf(BlockT *headBlk,
1139 BlockT **plandBlk) {
1140 bool migrateTrue = false;
1141 bool migrateFalse = false;
1143 BlockT *landBlk = *plandBlk;
1145 assert((trueBlk == NULL || trueBlk->succ_size() <= 1)
1146 && (falseBlk == NULL || falseBlk->succ_size() <= 1));
1148 if (trueBlk == falseBlk) {
1152 migrateTrue = needMigrateBlock(trueBlk);
1153 migrateFalse = needMigrateBlock(falseBlk);
1155 if (!migrateTrue && !migrateFalse) {
1159 // If we need to migrate either trueBlk and falseBlk, migrate the rest that
1160 // have more than one predecessors. without doing this, its predecessor
1161 // rather than headBlk will have undefined value in initReg.
1162 if (!migrateTrue && trueBlk && trueBlk->pred_size() > 1) {
1165 if (!migrateFalse && falseBlk && falseBlk->pred_size() > 1) {
1166 migrateFalse = true;
1170 errs() << "before improveSimpleJumpintoIf: ";
1171 showImproveSimpleJumpintoIf(headBlk, trueBlk, falseBlk, landBlk, 0);
1174 // org: headBlk => if () {trueBlk} else {falseBlk} => landBlk
1176 // new: headBlk => if () {initReg = 1; org trueBlk branch} else
1177 // {initReg = 0; org falseBlk branch }
1178 // => landBlk => if (initReg) {org trueBlk} else {org falseBlk}
1180 // if landBlk->pred_size() > 2, put the about if-else inside
1181 // if (initReg !=2) {...}
1183 // add initReg = initVal to headBlk
1185 const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32);
1187 funcRep->getRegInfo().createVirtualRegister(I32RC);
1188 if (!migrateTrue || !migrateFalse) {
1189 int initVal = migrateTrue ? 0 : 1;
1190 CFGTraits::insertAssignInstrBefore(headBlk, passRep, initReg, initVal);
1195 if (landBlk == NULL) {
1196 landBlk = funcRep->CreateMachineBasicBlock();
1197 funcRep->push_back(landBlk); //insert to function
1200 trueBlk->addSuccessor(landBlk);
1202 headBlk->addSuccessor(landBlk);
1206 falseBlk->addSuccessor(landBlk);
1208 headBlk->addSuccessor(landBlk);
1214 bool landBlkHasOtherPred = (landBlk->pred_size() > 2);
1216 //insert AMDGPU::ENDIF to avoid special case "input landBlk == NULL"
1217 typename BlockT::iterator insertPos =
1218 CFGTraits::getInstrPos
1219 (landBlk, CFGTraits::insertInstrBefore(landBlk, AMDGPU::ENDIF, passRep));
1221 if (landBlkHasOtherPred) {
1223 funcRep->getRegInfo().createVirtualRegister(I32RC);
1224 CFGTraits::insertAssignInstrBefore(insertPos, passRep, immReg, 2);
1225 unsigned cmpResReg =
1226 funcRep->getRegInfo().createVirtualRegister(I32RC);
1228 CFGTraits::insertCompareInstrBefore(landBlk, insertPos, passRep, cmpResReg,
1230 CFGTraits::insertCondBranchBefore(landBlk, insertPos,
1231 AMDGPU::IF_PREDICATE_SET, passRep,
1232 cmpResReg, DebugLoc());
1235 CFGTraits::insertCondBranchBefore(landBlk, insertPos, AMDGPU::IF_PREDICATE_SET,
1236 passRep, initReg, DebugLoc());
1239 migrateInstruction(trueBlk, landBlk, insertPos);
1240 // need to uncondionally insert the assignment to ensure a path from its
1241 // predecessor rather than headBlk has valid value in initReg if
1243 CFGTraits::insertAssignInstrBefore(trueBlk, passRep, initReg, 1);
1245 CFGTraits::insertInstrBefore(insertPos, AMDGPU::ELSE, passRep);
1248 migrateInstruction(falseBlk, landBlk, insertPos);
1249 // need to uncondionally insert the assignment to ensure a path from its
1250 // predecessor rather than headBlk has valid value in initReg if
1252 CFGTraits::insertAssignInstrBefore(falseBlk, passRep, initReg, 0);
1255 if (landBlkHasOtherPred) {
1257 CFGTraits::insertInstrBefore(insertPos, AMDGPU::ENDIF, passRep);
1259 // put initReg = 2 to other predecessors of landBlk
1260 for (typename BlockT::pred_iterator predIter = landBlk->pred_begin(),
1261 predIterEnd = landBlk->pred_end(); predIter != predIterEnd;
1263 BlockT *curBlk = *predIter;
1264 if (curBlk != trueBlk && curBlk != falseBlk) {
1265 CFGTraits::insertAssignInstrBefore(curBlk, passRep, initReg, 2);
1270 errs() << "result from improveSimpleJumpintoIf: ";
1271 showImproveSimpleJumpintoIf(headBlk, trueBlk, falseBlk, landBlk, 0);
1275 *plandBlk = landBlk;
1278 } //improveSimpleJumpintoIf
1280 template<class PassT>
1281 void CFGStructurizer<PassT>::handleLoopbreak(BlockT *exitingBlk,
1287 errs() << "Trying to break loop-depth = " << getLoopDepth(exitLoop)
1288 << " from loop-depth = " << getLoopDepth(exitingLoop) << "\n";
1290 const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32);
1292 RegiT initReg = INVALIDREGNUM;
1293 if (exitingLoop != exitLoop) {
1294 initReg = static_cast<int>
1295 (funcRep->getRegInfo().createVirtualRegister(I32RC));
1296 assert(initReg != INVALIDREGNUM);
1297 addLoopBreakInitReg(exitLoop, initReg);
1298 while (exitingLoop != exitLoop && exitingLoop) {
1299 addLoopBreakOnReg(exitingLoop, initReg);
1300 exitingLoop = exitingLoop->getParentLoop();
1302 assert(exitingLoop == exitLoop);
1305 mergeLoopbreakBlock(exitingBlk, exitBlk, landBlk, initReg);
1309 template<class PassT>
1310 void CFGStructurizer<PassT>::handleLoopcontBlock(BlockT *contingBlk,
1315 errs() << "loopcontPattern cont = BB" << contingBlk->getNumber()
1316 << " header = BB" << contBlk->getNumber() << "\n";
1318 errs() << "Trying to continue loop-depth = "
1319 << getLoopDepth(contLoop)
1320 << " from loop-depth = " << getLoopDepth(contingLoop) << "\n";
1323 RegiT initReg = INVALIDREGNUM;
1324 const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32);
1325 if (contingLoop != contLoop) {
1326 initReg = static_cast<int>
1327 (funcRep->getRegInfo().createVirtualRegister(I32RC));
1328 assert(initReg != INVALIDREGNUM);
1329 addLoopContInitReg(contLoop, initReg);
1330 while (contingLoop && contingLoop->getParentLoop() != contLoop) {
1331 addLoopBreakOnReg(contingLoop, initReg); //not addLoopContOnReg
1332 contingLoop = contingLoop->getParentLoop();
1334 assert(contingLoop && contingLoop->getParentLoop() == contLoop);
1335 addLoopContOnReg(contingLoop, initReg);
1338 settleLoopcontBlock(contingBlk, contBlk, initReg);
1339 } //handleLoopcontBlock
1341 template<class PassT>
1342 void CFGStructurizer<PassT>::mergeSerialBlock(BlockT *dstBlk, BlockT *srcBlk) {
1344 errs() << "serialPattern BB" << dstBlk->getNumber()
1345 << " <= BB" << srcBlk->getNumber() << "\n";
1347 dstBlk->splice(dstBlk->end(), srcBlk, srcBlk->begin(), srcBlk->end());
1349 dstBlk->removeSuccessor(srcBlk);
1350 CFGTraits::cloneSuccessorList(dstBlk, srcBlk);
1352 removeSuccessor(srcBlk);
1353 retireBlock(dstBlk, srcBlk);
1354 } //mergeSerialBlock
1356 template<class PassT>
1357 void CFGStructurizer<PassT>::mergeIfthenelseBlock(InstrT *branchInstr,
1363 errs() << "ifPattern BB" << curBlk->getNumber();
1366 errs() << "BB" << trueBlk->getNumber();
1368 errs() << " } else ";
1371 errs() << "BB" << falseBlk->getNumber();
1374 errs() << "landBlock: ";
1375 if (landBlk == NULL) {
1378 errs() << "BB" << landBlk->getNumber();
1383 int oldOpcode = branchInstr->getOpcode();
1384 DebugLoc branchDL = branchInstr->getDebugLoc();
1394 typename BlockT::iterator branchInstrPos =
1395 CFGTraits::getInstrPos(curBlk, branchInstr);
1396 CFGTraits::insertCondBranchBefore(branchInstrPos,
1397 CFGTraits::getBranchNzeroOpcode(oldOpcode),
1402 curBlk->splice(branchInstrPos, trueBlk, trueBlk->begin(), trueBlk->end());
1403 curBlk->removeSuccessor(trueBlk);
1404 if (landBlk && trueBlk->succ_size()!=0) {
1405 trueBlk->removeSuccessor(landBlk);
1407 retireBlock(curBlk, trueBlk);
1409 CFGTraits::insertInstrBefore(branchInstrPos, AMDGPU::ELSE, passRep);
1412 curBlk->splice(branchInstrPos, falseBlk, falseBlk->begin(),
1414 curBlk->removeSuccessor(falseBlk);
1415 if (landBlk && falseBlk->succ_size() != 0) {
1416 falseBlk->removeSuccessor(landBlk);
1418 retireBlock(curBlk, falseBlk);
1420 CFGTraits::insertInstrBefore(branchInstrPos, AMDGPU::ENDIF, passRep);
1422 branchInstr->eraseFromParent();
1424 if (landBlk && trueBlk && falseBlk) {
1425 curBlk->addSuccessor(landBlk);
1428 } //mergeIfthenelseBlock
1430 template<class PassT>
1431 void CFGStructurizer<PassT>::mergeLooplandBlock(BlockT *dstBlk,
1432 LoopLandInfo *loopLand) {
1433 BlockT *landBlk = loopLand->landBlk;
1436 errs() << "loopPattern header = BB" << dstBlk->getNumber()
1437 << " land = BB" << landBlk->getNumber() << "\n";
1440 // Loop contInitRegs are init at the beginning of the loop.
1441 for (typename std::set<RegiT>::const_iterator iter =
1442 loopLand->contInitRegs.begin(),
1443 iterEnd = loopLand->contInitRegs.end(); iter != iterEnd; ++iter) {
1444 CFGTraits::insertAssignInstrBefore(dstBlk, passRep, *iter, 0);
1447 /* we last inserterd the DebugLoc in the
1448 * BREAK_LOGICALZ_i32 or AMDGPU::BREAK_LOGICALNZ statement in the current dstBlk.
1449 * search for the DebugLoc in the that statement.
1450 * if not found, we have to insert the empty/default DebugLoc */
1451 InstrT *loopBreakInstr = CFGTraits::getLoopBreakInstr(dstBlk);
1452 DebugLoc DLBreak = (loopBreakInstr) ? loopBreakInstr->getDebugLoc() : DebugLoc();
1454 CFGTraits::insertInstrBefore(dstBlk, AMDGPU::WHILELOOP, passRep, DLBreak);
1455 // Loop breakInitRegs are init before entering the loop.
1456 for (typename std::set<RegiT>::const_iterator iter =
1457 loopLand->breakInitRegs.begin(),
1458 iterEnd = loopLand->breakInitRegs.end(); iter != iterEnd; ++iter) {
1459 CFGTraits::insertAssignInstrBefore(dstBlk, passRep, *iter, 0);
1461 // Loop endbranchInitRegs are init before entering the loop.
1462 for (typename std::set<RegiT>::const_iterator iter =
1463 loopLand->endbranchInitRegs.begin(),
1464 iterEnd = loopLand->endbranchInitRegs.end(); iter != iterEnd; ++iter) {
1465 CFGTraits::insertAssignInstrBefore(dstBlk, passRep, *iter, 0);
1468 /* we last inserterd the DebugLoc in the continue statement in the current dstBlk
1469 * search for the DebugLoc in the continue statement.
1470 * if not found, we have to insert the empty/default DebugLoc */
1471 InstrT *continueInstr = CFGTraits::getContinueInstr(dstBlk);
1472 DebugLoc DLContinue = (continueInstr) ? continueInstr->getDebugLoc() : DebugLoc();
1474 CFGTraits::insertInstrEnd(dstBlk, AMDGPU::ENDLOOP, passRep, DLContinue);
1475 // Loop breakOnRegs are check after the ENDLOOP: break the loop outside this
1477 for (typename std::set<RegiT>::const_iterator iter =
1478 loopLand->breakOnRegs.begin(),
1479 iterEnd = loopLand->breakOnRegs.end(); iter != iterEnd; ++iter) {
1480 CFGTraits::insertCondBranchEnd(dstBlk, AMDGPU::PREDICATED_BREAK, passRep,
1484 // Loop contOnRegs are check after the ENDLOOP: cont the loop outside this
1486 for (std::set<RegiT>::const_iterator iter = loopLand->contOnRegs.begin(),
1487 iterEnd = loopLand->contOnRegs.end(); iter != iterEnd; ++iter) {
1488 CFGTraits::insertCondBranchEnd(dstBlk, AMDGPU::CONTINUE_LOGICALNZ_i32,
1492 dstBlk->splice(dstBlk->end(), landBlk, landBlk->begin(), landBlk->end());
1494 for (typename BlockT::succ_iterator iter = landBlk->succ_begin(),
1495 iterEnd = landBlk->succ_end(); iter != iterEnd; ++iter) {
1496 dstBlk->addSuccessor(*iter); // *iter's predecessor is also taken care of.
1499 removeSuccessor(landBlk);
1500 retireBlock(dstBlk, landBlk);
1501 } //mergeLooplandBlock
1503 template<class PassT>
1504 void CFGStructurizer<PassT>::reversePredicateSetter(typename BlockT::iterator I) {
1506 if (I->getOpcode() == AMDGPU::PRED_X) {
1507 switch (static_cast<MachineInstr *>(I)->getOperand(2).getImm()) {
1508 case OPCODE_IS_ZERO_INT:
1509 static_cast<MachineInstr *>(I)->getOperand(2).setImm(OPCODE_IS_NOT_ZERO_INT);
1511 case OPCODE_IS_NOT_ZERO_INT:
1512 static_cast<MachineInstr *>(I)->getOperand(2).setImm(OPCODE_IS_ZERO_INT);
1514 case OPCODE_IS_ZERO:
1515 static_cast<MachineInstr *>(I)->getOperand(2).setImm(OPCODE_IS_NOT_ZERO);
1517 case OPCODE_IS_NOT_ZERO:
1518 static_cast<MachineInstr *>(I)->getOperand(2).setImm(OPCODE_IS_ZERO);
1521 assert(0 && "PRED_X Opcode invalid!");
1527 template<class PassT>
1528 void CFGStructurizer<PassT>::mergeLoopbreakBlock(BlockT *exitingBlk,
1530 BlockT *exitLandBlk,
1533 errs() << "loopbreakPattern exiting = BB" << exitingBlk->getNumber()
1534 << " exit = BB" << exitBlk->getNumber()
1535 << " land = BB" << exitLandBlk->getNumber() << "\n";
1538 InstrT *branchInstr = CFGTraits::getLoopendBlockBranchInstr(exitingBlk);
1539 assert(branchInstr && CFGTraits::isCondBranch(branchInstr));
1541 DebugLoc DL = branchInstr->getDebugLoc();
1543 BlockT *trueBranch = CFGTraits::getTrueBranch(branchInstr);
1545 // transform exitingBlk to
1547 // exitBlk (if exitBlk != exitLandBlk)
1551 // successor = {orgSuccessor(exitingBlk) - exitBlk}
1553 typename BlockT::iterator branchInstrPos =
1554 CFGTraits::getInstrPos(exitingBlk, branchInstr);
1556 if (exitBlk == exitLandBlk && setReg == INVALIDREGNUM) {
1559 if (trueBranch != exitBlk) {
1560 reversePredicateSetter(branchInstrPos);
1562 CFGTraits::insertCondBranchBefore(branchInstrPos, AMDGPU::PREDICATED_BREAK, passRep, DL);
1564 if (trueBranch != exitBlk) {
1565 reversePredicateSetter(branchInstr);
1567 CFGTraits::insertCondBranchBefore(branchInstrPos, AMDGPU::PREDICATED_BREAK, passRep, DL);
1568 if (exitBlk != exitLandBlk) {
1569 //splice is insert-before ...
1570 exitingBlk->splice(branchInstrPos, exitBlk, exitBlk->begin(),
1573 if (setReg != INVALIDREGNUM) {
1574 CFGTraits::insertAssignInstrBefore(branchInstrPos, passRep, setReg, 1);
1576 CFGTraits::insertInstrBefore(branchInstrPos, AMDGPU::BREAK, passRep);
1579 //now branchInst can be erase safely
1580 branchInstr->eraseFromParent();
1582 //now take care of successors, retire blocks
1583 exitingBlk->removeSuccessor(exitBlk);
1584 if (exitBlk != exitLandBlk) {
1585 //splice is insert-before ...
1586 exitBlk->removeSuccessor(exitLandBlk);
1587 retireBlock(exitingBlk, exitBlk);
1590 } //mergeLoopbreakBlock
1592 template<class PassT>
1593 void CFGStructurizer<PassT>::settleLoopcontBlock(BlockT *contingBlk,
1597 errs() << "settleLoopcontBlock conting = BB"
1598 << contingBlk->getNumber()
1599 << ", cont = BB" << contBlk->getNumber() << "\n";
1602 InstrT *branchInstr = CFGTraits::getLoopendBlockBranchInstr(contingBlk);
1604 assert(CFGTraits::isCondBranch(branchInstr));
1605 typename BlockT::iterator branchInstrPos =
1606 CFGTraits::getInstrPos(contingBlk, branchInstr);
1607 BlockT *trueBranch = CFGTraits::getTrueBranch(branchInstr);
1608 int oldOpcode = branchInstr->getOpcode();
1609 DebugLoc DL = branchInstr->getDebugLoc();
1611 // transform contingBlk to
1613 // move instr after branchInstr
1619 // successor = {orgSuccessor(contingBlk) - loopHeader}
1621 bool useContinueLogical =
1622 (setReg == INVALIDREGNUM && (&*contingBlk->rbegin()) == branchInstr);
1624 if (useContinueLogical == false) {
1626 trueBranch == contBlk ? CFGTraits::getBranchNzeroOpcode(oldOpcode)
1627 : CFGTraits::getBranchZeroOpcode(oldOpcode);
1629 CFGTraits::insertCondBranchBefore(branchInstrPos, branchOpcode, passRep, DL);
1631 if (setReg != INVALIDREGNUM) {
1632 CFGTraits::insertAssignInstrBefore(branchInstrPos, passRep, setReg, 1);
1633 // insertEnd to ensure phi-moves, if exist, go before the continue-instr.
1634 CFGTraits::insertInstrEnd(contingBlk, AMDGPU::BREAK, passRep, DL);
1636 // insertEnd to ensure phi-moves, if exist, go before the continue-instr.
1637 CFGTraits::insertInstrEnd(contingBlk, AMDGPU::CONTINUE, passRep, DL);
1640 CFGTraits::insertInstrEnd(contingBlk, AMDGPU::ENDIF, passRep, DL);
1643 trueBranch == contBlk ? CFGTraits::getContinueNzeroOpcode(oldOpcode)
1644 : CFGTraits::getContinueZeroOpcode(oldOpcode);
1646 CFGTraits::insertCondBranchBefore(branchInstrPos, branchOpcode, passRep, DL);
1649 branchInstr->eraseFromParent();
1651 // if we've arrived here then we've already erased the branch instruction
1652 // travel back up the basic block to see the last reference of our debug location
1653 // we've just inserted that reference here so it should be representative
1654 if (setReg != INVALIDREGNUM) {
1655 CFGTraits::insertAssignInstrBefore(contingBlk, passRep, setReg, 1);
1656 // insertEnd to ensure phi-moves, if exist, go before the continue-instr.
1657 CFGTraits::insertInstrEnd(contingBlk, AMDGPU::BREAK, passRep, CFGTraits::getLastDebugLocInBB(contingBlk));
1659 // insertEnd to ensure phi-moves, if exist, go before the continue-instr.
1660 CFGTraits::insertInstrEnd(contingBlk, AMDGPU::CONTINUE, passRep, CFGTraits::getLastDebugLocInBB(contingBlk));
1664 } //settleLoopcontBlock
1666 // BBs in exitBlkSet are determined as in break-path for loopRep,
1667 // before we can put code for BBs as inside loop-body for loopRep
1668 // check whether those BBs are determined as cont-BB for parentLoopRep
1670 // If so, generate a new BB newBlk
1671 // (1) set newBlk common successor of BBs in exitBlkSet
1672 // (2) change the continue-instr in BBs in exitBlkSet to break-instr
1673 // (3) generate continue-instr in newBlk
1675 template<class PassT>
1676 typename CFGStructurizer<PassT>::BlockT *
1677 CFGStructurizer<PassT>::relocateLoopcontBlock(LoopT *parentLoopRep,
1679 std::set<BlockT *> &exitBlkSet,
1680 BlockT *exitLandBlk) {
1681 std::set<BlockT *> endBlkSet;
1685 for (typename std::set<BlockT *>::const_iterator iter = exitBlkSet.begin(),
1686 iterEnd = exitBlkSet.end();
1687 iter != iterEnd; ++iter) {
1688 BlockT *exitBlk = *iter;
1689 BlockT *endBlk = singlePathEnd(exitBlk, exitLandBlk);
1691 if (endBlk == NULL || CFGTraits::getContinueInstr(endBlk) == NULL)
1694 endBlkSet.insert(endBlk);
1697 BlockT *newBlk = funcRep->CreateMachineBasicBlock();
1698 funcRep->push_back(newBlk); //insert to function
1699 CFGTraits::insertInstrEnd(newBlk, AMDGPU::CONTINUE, passRep);
1700 SHOWNEWBLK(newBlk, "New continue block: ");
1702 for (typename std::set<BlockT*>::const_iterator iter = endBlkSet.begin(),
1703 iterEnd = endBlkSet.end();
1704 iter != iterEnd; ++iter) {
1705 BlockT *endBlk = *iter;
1706 InstrT *contInstr = CFGTraits::getContinueInstr(endBlk);
1708 contInstr->eraseFromParent();
1710 endBlk->addSuccessor(newBlk);
1712 errs() << "Add new continue Block to BB"
1713 << endBlk->getNumber() << " successors\n";
1718 } //relocateLoopcontBlock
1721 // LoopEndbranchBlock is a BB created by the CFGStructurizer to use as
1722 // LoopLandBlock. This BB branch on the loop endBranchInit register to the
1723 // pathes corresponding to the loop exiting branches.
1725 template<class PassT>
1726 typename CFGStructurizer<PassT>::BlockT *
1727 CFGStructurizer<PassT>::addLoopEndbranchBlock(LoopT *loopRep,
1728 BlockTSmallerVector &exitingBlks,
1729 BlockTSmallerVector &exitBlks) {
1730 const AMDGPUInstrInfo *tii =
1731 static_cast<const AMDGPUInstrInfo *>(passRep->getTargetInstrInfo());
1732 const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32);
1734 RegiT endBranchReg = static_cast<int>
1735 (funcRep->getRegInfo().createVirtualRegister(I32RC));
1736 assert(endBranchReg >= 0);
1738 // reg = 0 before entering the loop
1739 addLoopEndbranchInitReg(loopRep, endBranchReg);
1741 uint32_t numBlks = static_cast<uint32_t>(exitingBlks.size());
1742 assert(numBlks >=2 && numBlks == exitBlks.size());
1744 BlockT *preExitingBlk = exitingBlks[0];
1745 BlockT *preExitBlk = exitBlks[0];
1746 BlockT *preBranchBlk = funcRep->CreateMachineBasicBlock();
1747 funcRep->push_back(preBranchBlk); //insert to function
1748 SHOWNEWBLK(preBranchBlk, "New loopEndbranch block: ");
1750 BlockT *newLandBlk = preBranchBlk;
1752 CFGTraits::replaceInstrUseOfBlockWith(preExitingBlk, preExitBlk,
1754 preExitingBlk->removeSuccessor(preExitBlk);
1755 preExitingBlk->addSuccessor(newLandBlk);
1757 //it is redundant to add reg = 0 to exitingBlks[0]
1759 // For 1..n th exiting path (the last iteration handles two pathes) create the
1760 // branch to the previous path and the current path.
1761 for (uint32_t i = 1; i < numBlks; ++i) {
1762 BlockT *curExitingBlk = exitingBlks[i];
1763 BlockT *curExitBlk = exitBlks[i];
1764 BlockT *curBranchBlk;
1766 if (i == numBlks - 1) {
1767 curBranchBlk = curExitBlk;
1769 curBranchBlk = funcRep->CreateMachineBasicBlock();
1770 funcRep->push_back(curBranchBlk); //insert to function
1771 SHOWNEWBLK(curBranchBlk, "New loopEndbranch block: ");
1774 // Add reg = i to exitingBlks[i].
1775 CFGTraits::insertAssignInstrBefore(curExitingBlk, passRep,
1778 // Remove the edge (exitingBlks[i] exitBlks[i]) add new edge
1779 // (exitingBlks[i], newLandBlk).
1780 CFGTraits::replaceInstrUseOfBlockWith(curExitingBlk, curExitBlk,
1782 curExitingBlk->removeSuccessor(curExitBlk);
1783 curExitingBlk->addSuccessor(newLandBlk);
1785 // add to preBranchBlk the branch instruction:
1786 // if (endBranchReg == preVal)
1791 // preValReg = i - 1
1794 RegiT preValReg = static_cast<int>
1795 (funcRep->getRegInfo().createVirtualRegister(I32RC));
1797 preBranchBlk->insert(preBranchBlk->begin(),
1798 tii->getMovImmInstr(preBranchBlk->getParent(), preValReg,
1801 // condResReg = (endBranchReg == preValReg)
1802 RegiT condResReg = static_cast<int>
1803 (funcRep->getRegInfo().createVirtualRegister(I32RC));
1804 BuildMI(preBranchBlk, DL, tii->get(tii->getIEQOpcode()), condResReg)
1805 .addReg(endBranchReg).addReg(preValReg);
1807 BuildMI(preBranchBlk, DL, tii->get(AMDGPU::BRANCH_COND_i32))
1808 .addMBB(preExitBlk).addReg(condResReg);
1810 preBranchBlk->addSuccessor(preExitBlk);
1811 preBranchBlk->addSuccessor(curBranchBlk);
1813 // Update preExitingBlk, preExitBlk, preBranchBlk.
1814 preExitingBlk = curExitingBlk;
1815 preExitBlk = curExitBlk;
1816 preBranchBlk = curBranchBlk;
1818 } //end for 1 .. n blocks
1821 } //addLoopEndbranchBlock
1823 template<class PassT>
1824 typename CFGStructurizer<PassT>::PathToKind
1825 CFGStructurizer<PassT>::singlePathTo(BlockT *srcBlk, BlockT *dstBlk,
1826 bool allowSideEntry) {
1829 if (srcBlk == dstBlk) {
1830 return SinglePath_InPath;
1833 while (srcBlk && srcBlk->succ_size() == 1) {
1834 srcBlk = *srcBlk->succ_begin();
1835 if (srcBlk == dstBlk) {
1836 return SinglePath_InPath;
1839 if (!allowSideEntry && srcBlk->pred_size() > 1) {
1840 return Not_SinglePath;
1844 if (srcBlk && srcBlk->succ_size()==0) {
1845 return SinglePath_NotInPath;
1848 return Not_SinglePath;
1851 // If there is a single path from srcBlk to dstBlk, return the last block before
1852 // dstBlk If there is a single path from srcBlk->end without dstBlk, return the
1853 // last block in the path Otherwise, return NULL
1854 template<class PassT>
1855 typename CFGStructurizer<PassT>::BlockT *
1856 CFGStructurizer<PassT>::singlePathEnd(BlockT *srcBlk, BlockT *dstBlk,
1857 bool allowSideEntry) {
1860 if (srcBlk == dstBlk) {
1864 if (srcBlk->succ_size() == 0) {
1868 while (srcBlk && srcBlk->succ_size() == 1) {
1869 BlockT *preBlk = srcBlk;
1871 srcBlk = *srcBlk->succ_begin();
1872 if (srcBlk == NULL) {
1876 if (!allowSideEntry && srcBlk->pred_size() > 1) {
1881 if (srcBlk && srcBlk->succ_size()==0) {
1889 template<class PassT>
1890 int CFGStructurizer<PassT>::cloneOnSideEntryTo(BlockT *preBlk, BlockT *srcBlk,
1893 assert(preBlk->isSuccessor(srcBlk));
1894 while (srcBlk && srcBlk != dstBlk) {
1895 assert(srcBlk->succ_size() == 1);
1896 if (srcBlk->pred_size() > 1) {
1897 srcBlk = cloneBlockForPredecessor(srcBlk, preBlk);
1902 srcBlk = *srcBlk->succ_begin();
1906 } //cloneOnSideEntryTo
1908 template<class PassT>
1909 typename CFGStructurizer<PassT>::BlockT *
1910 CFGStructurizer<PassT>::cloneBlockForPredecessor(BlockT *curBlk,
1912 assert(predBlk->isSuccessor(curBlk) &&
1913 "succBlk is not a prececessor of curBlk");
1915 BlockT *cloneBlk = CFGTraits::clone(curBlk); //clone instructions
1916 CFGTraits::replaceInstrUseOfBlockWith(predBlk, curBlk, cloneBlk);
1917 //srcBlk, oldBlk, newBlk
1919 predBlk->removeSuccessor(curBlk);
1920 predBlk->addSuccessor(cloneBlk);
1922 // add all successor to cloneBlk
1923 CFGTraits::cloneSuccessorList(cloneBlk, curBlk);
1925 numClonedInstr += curBlk->size();
1928 errs() << "Cloned block: " << "BB"
1929 << curBlk->getNumber() << "size " << curBlk->size() << "\n";
1932 SHOWNEWBLK(cloneBlk, "result of Cloned block: ");
1935 } //cloneBlockForPredecessor
1937 template<class PassT>
1938 typename CFGStructurizer<PassT>::BlockT *
1939 CFGStructurizer<PassT>::exitingBlock2ExitBlock(LoopT *loopRep,
1940 BlockT *exitingBlk) {
1941 BlockT *exitBlk = NULL;
1943 for (typename BlockT::succ_iterator iterSucc = exitingBlk->succ_begin(),
1944 iterSuccEnd = exitingBlk->succ_end();
1945 iterSucc != iterSuccEnd; ++iterSucc) {
1946 BlockT *curBlk = *iterSucc;
1947 if (!loopRep->contains(curBlk)) {
1948 assert(exitBlk == NULL);
1953 assert(exitBlk != NULL);
1956 } //exitingBlock2ExitBlock
1958 template<class PassT>
1959 void CFGStructurizer<PassT>::migrateInstruction(BlockT *srcBlk,
1961 InstrIterator insertPos) {
1962 InstrIterator spliceEnd;
1963 //look for the input branchinstr, not the AMDGPU branchinstr
1964 InstrT *branchInstr = CFGTraits::getNormalBlockBranchInstr(srcBlk);
1965 if (branchInstr == NULL) {
1967 errs() << "migrateInstruction don't see branch instr\n" ;
1969 spliceEnd = srcBlk->end();
1972 errs() << "migrateInstruction see branch instr\n" ;
1973 branchInstr->dump();
1975 spliceEnd = CFGTraits::getInstrPos(srcBlk, branchInstr);
1978 errs() << "migrateInstruction before splice dstSize = " << dstBlk->size()
1979 << "srcSize = " << srcBlk->size() << "\n";
1982 //splice insert before insertPos
1983 dstBlk->splice(insertPos, srcBlk, srcBlk->begin(), spliceEnd);
1986 errs() << "migrateInstruction after splice dstSize = " << dstBlk->size()
1987 << "srcSize = " << srcBlk->size() << "\n";
1989 } //migrateInstruction
1991 // normalizeInfiniteLoopExit change
1993 // uncond_br LoopHeader
1997 // cond_br 1 LoopHeader dummyExit
1998 // and return the newly added dummy exit block
2000 template<class PassT>
2001 typename CFGStructurizer<PassT>::BlockT *
2002 CFGStructurizer<PassT>::normalizeInfiniteLoopExit(LoopT* LoopRep) {
2005 loopHeader = LoopRep->getHeader();
2006 loopLatch = LoopRep->getLoopLatch();
2007 BlockT *dummyExitBlk = NULL;
2008 const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32);
2009 if (loopHeader!=NULL && loopLatch!=NULL) {
2010 InstrT *branchInstr = CFGTraits::getLoopendBlockBranchInstr(loopLatch);
2011 if (branchInstr!=NULL && CFGTraits::isUncondBranch(branchInstr)) {
2012 dummyExitBlk = funcRep->CreateMachineBasicBlock();
2013 funcRep->push_back(dummyExitBlk); //insert to function
2014 SHOWNEWBLK(dummyExitBlk, "DummyExitBlock to normalize infiniteLoop: ");
2016 if (DEBUGME) errs() << "Old branch instr: " << *branchInstr << "\n";
2018 typename BlockT::iterator insertPos =
2019 CFGTraits::getInstrPos(loopLatch, branchInstr);
2021 funcRep->getRegInfo().createVirtualRegister(I32RC);
2022 CFGTraits::insertAssignInstrBefore(insertPos, passRep, immReg, 1);
2024 CFGTraits::insertInstrBefore(insertPos, AMDGPU::BRANCH_COND_i32, passRep);
2025 MachineInstrBuilder MIB(*funcRep, newInstr);
2026 MIB.addMBB(loopHeader);
2027 MIB.addReg(immReg, false);
2029 SHOWNEWINSTR(newInstr);
2031 branchInstr->eraseFromParent();
2032 loopLatch->addSuccessor(dummyExitBlk);
2036 return dummyExitBlk;
2037 } //normalizeInfiniteLoopExit
2039 template<class PassT>
2040 void CFGStructurizer<PassT>::removeUnconditionalBranch(BlockT *srcBlk) {
2041 InstrT *branchInstr;
2043 // I saw two unconditional branch in one basic block in example
2044 // test_fc_do_while_or.c need to fix the upstream on this to remove the loop.
2045 while ((branchInstr = CFGTraits::getLoopendBlockBranchInstr(srcBlk))
2046 && CFGTraits::isUncondBranch(branchInstr)) {
2048 errs() << "Removing unconditional branch instruction" ;
2049 branchInstr->dump();
2051 branchInstr->eraseFromParent();
2053 } //removeUnconditionalBranch
2055 template<class PassT>
2056 void CFGStructurizer<PassT>::removeRedundantConditionalBranch(BlockT *srcBlk) {
2057 if (srcBlk->succ_size() == 2) {
2058 BlockT *blk1 = *srcBlk->succ_begin();
2059 BlockT *blk2 = *(++srcBlk->succ_begin());
2062 InstrT *branchInstr = CFGTraits::getNormalBlockBranchInstr(srcBlk);
2063 assert(branchInstr && CFGTraits::isCondBranch(branchInstr));
2065 errs() << "Removing unneeded conditional branch instruction" ;
2066 branchInstr->dump();
2068 branchInstr->eraseFromParent();
2069 SHOWNEWBLK(blk1, "Removing redundant successor");
2070 srcBlk->removeSuccessor(blk1);
2073 } //removeRedundantConditionalBranch
2075 template<class PassT>
2076 void CFGStructurizer<PassT>::addDummyExitBlock(SmallVector<BlockT*,
2077 DEFAULT_VEC_SLOTS> &retBlks) {
2078 BlockT *dummyExitBlk = funcRep->CreateMachineBasicBlock();
2079 funcRep->push_back(dummyExitBlk); //insert to function
2080 CFGTraits::insertInstrEnd(dummyExitBlk, AMDGPU::RETURN, passRep);
2082 for (typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::iterator iter =
2084 iterEnd = retBlks.end(); iter != iterEnd; ++iter) {
2085 BlockT *curBlk = *iter;
2086 InstrT *curInstr = CFGTraits::getReturnInstr(curBlk);
2088 curInstr->eraseFromParent();
2090 curBlk->addSuccessor(dummyExitBlk);
2092 errs() << "Add dummyExitBlock to BB" << curBlk->getNumber()
2097 SHOWNEWBLK(dummyExitBlk, "DummyExitBlock: ");
2098 } //addDummyExitBlock
2100 template<class PassT>
2101 void CFGStructurizer<PassT>::removeSuccessor(BlockT *srcBlk) {
2102 while (srcBlk->succ_size()) {
2103 srcBlk->removeSuccessor(*srcBlk->succ_begin());
2107 template<class PassT>
2108 void CFGStructurizer<PassT>::recordSccnum(BlockT *srcBlk, int sccNum) {
2109 BlockInfo *&srcBlkInfo = blockInfoMap[srcBlk];
2111 if (srcBlkInfo == NULL) {
2112 srcBlkInfo = new BlockInfo();
2115 srcBlkInfo->sccNum = sccNum;
2118 template<class PassT>
2119 int CFGStructurizer<PassT>::getSCCNum(BlockT *srcBlk) {
2120 BlockInfo *srcBlkInfo = blockInfoMap[srcBlk];
2121 return srcBlkInfo ? srcBlkInfo->sccNum : INVALIDSCCNUM;
2124 template<class PassT>
2125 void CFGStructurizer<PassT>::retireBlock(BlockT *dstBlk, BlockT *srcBlk) {
2127 errs() << "Retiring BB" << srcBlk->getNumber() << "\n";
2130 BlockInfo *&srcBlkInfo = blockInfoMap[srcBlk];
2132 if (srcBlkInfo == NULL) {
2133 srcBlkInfo = new BlockInfo();
2136 srcBlkInfo->isRetired = true;
2137 assert(srcBlk->succ_size() == 0 && srcBlk->pred_size() == 0
2138 && "can't retire block yet");
2141 template<class PassT>
2142 bool CFGStructurizer<PassT>::isRetiredBlock(BlockT *srcBlk) {
2143 BlockInfo *srcBlkInfo = blockInfoMap[srcBlk];
2144 return (srcBlkInfo && srcBlkInfo->isRetired);
2147 template<class PassT>
2148 bool CFGStructurizer<PassT>::isActiveLoophead(BlockT *curBlk) {
2149 LoopT *loopRep = loopInfo->getLoopFor(curBlk);
2150 while (loopRep && loopRep->getHeader() == curBlk) {
2151 LoopLandInfo *loopLand = getLoopLandInfo(loopRep);
2153 if(loopLand == NULL)
2156 BlockT *landBlk = loopLand->landBlk;
2158 if (!isRetiredBlock(landBlk)) {
2162 loopRep = loopRep->getParentLoop();
2166 } //isActiveLoophead
2168 template<class PassT>
2169 bool CFGStructurizer<PassT>::needMigrateBlock(BlockT *blk) {
2170 const unsigned blockSizeThreshold = 30;
2171 const unsigned cloneInstrThreshold = 100;
2173 bool multiplePreds = blk && (blk->pred_size() > 1);
2178 unsigned blkSize = blk->size();
2179 return ((blkSize > blockSizeThreshold)
2180 && (blkSize * (blk->pred_size() - 1) > cloneInstrThreshold));
2181 } //needMigrateBlock
2183 template<class PassT>
2184 typename CFGStructurizer<PassT>::BlockT *
2185 CFGStructurizer<PassT>::recordLoopLandBlock(LoopT *loopRep, BlockT *landBlk,
2186 BlockTSmallerVector &exitBlks,
2187 std::set<BlockT *> &exitBlkSet) {
2188 SmallVector<BlockT *, DEFAULT_VEC_SLOTS> inpathBlks; //in exit path blocks
2190 for (typename BlockT::pred_iterator predIter = landBlk->pred_begin(),
2191 predIterEnd = landBlk->pred_end();
2192 predIter != predIterEnd; ++predIter) {
2193 BlockT *curBlk = *predIter;
2194 if (loopRep->contains(curBlk) || exitBlkSet.count(curBlk)) {
2195 inpathBlks.push_back(curBlk);
2199 //if landBlk has predecessors that are not in the given loop,
2200 //create a new block
2201 BlockT *newLandBlk = landBlk;
2202 if (inpathBlks.size() != landBlk->pred_size()) {
2203 newLandBlk = funcRep->CreateMachineBasicBlock();
2204 funcRep->push_back(newLandBlk); //insert to function
2205 newLandBlk->addSuccessor(landBlk);
2206 for (typename SmallVector<BlockT*, DEFAULT_VEC_SLOTS>::iterator iter =
2208 iterEnd = inpathBlks.end(); iter != iterEnd; ++iter) {
2209 BlockT *curBlk = *iter;
2210 CFGTraits::replaceInstrUseOfBlockWith(curBlk, landBlk, newLandBlk);
2211 //srcBlk, oldBlk, newBlk
2212 curBlk->removeSuccessor(landBlk);
2213 curBlk->addSuccessor(newLandBlk);
2215 for (size_t i = 0, tot = exitBlks.size(); i < tot; ++i) {
2216 if (exitBlks[i] == landBlk) {
2217 exitBlks[i] = newLandBlk;
2220 SHOWNEWBLK(newLandBlk, "NewLandingBlock: ");
2223 setLoopLandBlock(loopRep, newLandBlk);
2226 } // recordLoopbreakLand
2228 template<class PassT>
2229 void CFGStructurizer<PassT>::setLoopLandBlock(LoopT *loopRep, BlockT *blk) {
2230 LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
2232 if (theEntry == NULL) {
2233 theEntry = new LoopLandInfo();
2235 assert(theEntry->landBlk == NULL);
2238 blk = funcRep->CreateMachineBasicBlock();
2239 funcRep->push_back(blk); //insert to function
2240 SHOWNEWBLK(blk, "DummyLandingBlock for loop without break: ");
2243 theEntry->landBlk = blk;
2246 errs() << "setLoopLandBlock loop-header = BB"
2247 << loopRep->getHeader()->getNumber()
2248 << " landing-block = BB" << blk->getNumber() << "\n";
2250 } // setLoopLandBlock
2252 template<class PassT>
2253 void CFGStructurizer<PassT>::addLoopBreakOnReg(LoopT *loopRep, RegiT regNum) {
2254 LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
2256 if (theEntry == NULL) {
2257 theEntry = new LoopLandInfo();
2260 theEntry->breakOnRegs.insert(regNum);
2263 errs() << "addLoopBreakOnReg loop-header = BB"
2264 << loopRep->getHeader()->getNumber()
2265 << " regNum = " << regNum << "\n";
2267 } // addLoopBreakOnReg
2269 template<class PassT>
2270 void CFGStructurizer<PassT>::addLoopContOnReg(LoopT *loopRep, RegiT regNum) {
2271 LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
2273 if (theEntry == NULL) {
2274 theEntry = new LoopLandInfo();
2276 theEntry->contOnRegs.insert(regNum);
2279 errs() << "addLoopContOnReg loop-header = BB"
2280 << loopRep->getHeader()->getNumber()
2281 << " regNum = " << regNum << "\n";
2283 } // addLoopContOnReg
2285 template<class PassT>
2286 void CFGStructurizer<PassT>::addLoopBreakInitReg(LoopT *loopRep, RegiT regNum) {
2287 LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
2289 if (theEntry == NULL) {
2290 theEntry = new LoopLandInfo();
2292 theEntry->breakInitRegs.insert(regNum);
2295 errs() << "addLoopBreakInitReg loop-header = BB"
2296 << loopRep->getHeader()->getNumber()
2297 << " regNum = " << regNum << "\n";
2299 } // addLoopBreakInitReg
2301 template<class PassT>
2302 void CFGStructurizer<PassT>::addLoopContInitReg(LoopT *loopRep, RegiT regNum) {
2303 LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
2305 if (theEntry == NULL) {
2306 theEntry = new LoopLandInfo();
2308 theEntry->contInitRegs.insert(regNum);
2311 errs() << "addLoopContInitReg loop-header = BB"
2312 << loopRep->getHeader()->getNumber()
2313 << " regNum = " << regNum << "\n";
2315 } // addLoopContInitReg
2317 template<class PassT>
2318 void CFGStructurizer<PassT>::addLoopEndbranchInitReg(LoopT *loopRep,
2320 LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
2322 if (theEntry == NULL) {
2323 theEntry = new LoopLandInfo();
2325 theEntry->endbranchInitRegs.insert(regNum);
2328 errs() << "addLoopEndbranchInitReg loop-header = BB"
2329 << loopRep->getHeader()->getNumber()
2330 << " regNum = " << regNum << "\n";
2332 } // addLoopEndbranchInitReg
2334 template<class PassT>
2335 typename CFGStructurizer<PassT>::LoopLandInfo *
2336 CFGStructurizer<PassT>::getLoopLandInfo(LoopT *loopRep) {
2337 LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
2340 } // getLoopLandInfo
2342 template<class PassT>
2343 typename CFGStructurizer<PassT>::BlockT *
2344 CFGStructurizer<PassT>::getLoopLandBlock(LoopT *loopRep) {
2345 LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
2347 return theEntry ? theEntry->landBlk : NULL;
2348 } // getLoopLandBlock
2351 template<class PassT>
2352 bool CFGStructurizer<PassT>::hasBackEdge(BlockT *curBlk) {
2353 LoopT *loopRep = loopInfo->getLoopFor(curBlk);
2354 if (loopRep == NULL)
2357 BlockT *loopHeader = loopRep->getHeader();
2359 return curBlk->isSuccessor(loopHeader);
2363 template<class PassT>
2364 unsigned CFGStructurizer<PassT>::getLoopDepth(LoopT *loopRep) {
2365 return loopRep ? loopRep->getLoopDepth() : 0;
2368 template<class PassT>
2369 int CFGStructurizer<PassT>::countActiveBlock
2370 (typename SmallVector<BlockT*, DEFAULT_VEC_SLOTS>::const_iterator iterStart,
2371 typename SmallVector<BlockT*, DEFAULT_VEC_SLOTS>::const_iterator iterEnd) {
2373 while (iterStart != iterEnd) {
2374 if (!isRetiredBlock(*iterStart)) {
2381 } //countActiveBlock
2383 // This is work around solution for findNearestCommonDominator not avaiable to
2384 // post dom a proper fix should go to Dominators.h.
2386 template<class PassT>
2387 typename CFGStructurizer<PassT>::BlockT*
2388 CFGStructurizer<PassT>::findNearestCommonPostDom(BlockT *blk1, BlockT *blk2) {
2390 if (postDomTree->dominates(blk1, blk2)) {
2393 if (postDomTree->dominates(blk2, blk1)) {
2397 DomTreeNodeT *node1 = postDomTree->getNode(blk1);
2398 DomTreeNodeT *node2 = postDomTree->getNode(blk2);
2400 // Handle newly cloned node.
2401 if (node1 == NULL && blk1->succ_size() == 1) {
2402 return findNearestCommonPostDom(*blk1->succ_begin(), blk2);
2404 if (node2 == NULL && blk2->succ_size() == 1) {
2405 return findNearestCommonPostDom(blk1, *blk2->succ_begin());
2408 if (node1 == NULL || node2 == NULL) {
2412 node1 = node1->getIDom();
2414 if (postDomTree->dominates(node1, node2)) {
2415 return node1->getBlock();
2417 node1 = node1->getIDom();
2423 template<class PassT>
2424 typename CFGStructurizer<PassT>::BlockT *
2425 CFGStructurizer<PassT>::findNearestCommonPostDom
2426 (typename std::set<BlockT *> &blks) {
2428 typename std::set<BlockT *>::const_iterator iter = blks.begin();
2429 typename std::set<BlockT *>::const_iterator iterEnd = blks.end();
2430 for (commonDom = *iter; iter != iterEnd && commonDom != NULL; ++iter) {
2431 BlockT *curBlk = *iter;
2432 if (curBlk != commonDom) {
2433 commonDom = findNearestCommonPostDom(curBlk, commonDom);
2438 errs() << "Common post dominator for exit blocks is ";
2440 errs() << "BB" << commonDom->getNumber() << "\n";
2447 } //findNearestCommonPostDom
2449 } //end namespace llvm
2454 //===----------------------------------------------------------------------===//
2456 // CFGStructurizer for AMDGPU
2458 //===----------------------------------------------------------------------===//
2461 using namespace llvmCFGStruct;
2464 class AMDGPUCFGStructurizer : public MachineFunctionPass {
2466 typedef MachineInstr InstructionType;
2467 typedef MachineFunction FunctionType;
2468 typedef MachineBasicBlock BlockType;
2469 typedef MachineLoopInfo LoopinfoType;
2470 typedef MachineDominatorTree DominatortreeType;
2471 typedef MachinePostDominatorTree PostDominatortreeType;
2472 typedef MachineDomTreeNode DomTreeNodeType;
2473 typedef MachineLoop LoopType;
2477 const TargetInstrInfo *TII;
2478 const AMDGPURegisterInfo *TRI;
2481 AMDGPUCFGStructurizer(char &pid, TargetMachine &tm);
2482 const TargetInstrInfo *getTargetInstrInfo() const;
2488 } //end of namespace llvm
2489 AMDGPUCFGStructurizer::AMDGPUCFGStructurizer(char &pid, TargetMachine &tm)
2490 : MachineFunctionPass(pid), TM(tm), TII(tm.getInstrInfo()),
2491 TRI(static_cast<const AMDGPURegisterInfo *>(tm.getRegisterInfo())) {
2494 const TargetInstrInfo *AMDGPUCFGStructurizer::getTargetInstrInfo() const {
2497 //===----------------------------------------------------------------------===//
2501 //===----------------------------------------------------------------------===//
2504 using namespace llvmCFGStruct;
2507 class AMDGPUCFGPrepare : public AMDGPUCFGStructurizer {
2512 AMDGPUCFGPrepare(TargetMachine &tm);
2514 virtual const char *getPassName() const;
2515 virtual void getAnalysisUsage(AnalysisUsage &AU) const;
2517 bool runOnMachineFunction(MachineFunction &F);
2523 char AMDGPUCFGPrepare::ID = 0;
2524 } //end of namespace llvm
2526 AMDGPUCFGPrepare::AMDGPUCFGPrepare(TargetMachine &tm)
2527 : AMDGPUCFGStructurizer(ID, tm ) {
2529 const char *AMDGPUCFGPrepare::getPassName() const {
2530 return "AMD IL Control Flow Graph Preparation Pass";
2533 void AMDGPUCFGPrepare::getAnalysisUsage(AnalysisUsage &AU) const {
2534 AU.addPreserved<MachineFunctionAnalysis>();
2535 AU.addRequired<MachineFunctionAnalysis>();
2536 AU.addRequired<MachineDominatorTree>();
2537 AU.addRequired<MachinePostDominatorTree>();
2538 AU.addRequired<MachineLoopInfo>();
2541 //===----------------------------------------------------------------------===//
2545 //===----------------------------------------------------------------------===//
2548 using namespace llvmCFGStruct;
2551 class AMDGPUCFGPerform : public AMDGPUCFGStructurizer {
2556 AMDGPUCFGPerform(TargetMachine &tm);
2557 virtual const char *getPassName() const;
2558 virtual void getAnalysisUsage(AnalysisUsage &AU) const;
2559 bool runOnMachineFunction(MachineFunction &F);
2565 char AMDGPUCFGPerform::ID = 0;
2566 } //end of namespace llvm
2568 AMDGPUCFGPerform::AMDGPUCFGPerform(TargetMachine &tm)
2569 : AMDGPUCFGStructurizer(ID, tm) {
2572 const char *AMDGPUCFGPerform::getPassName() const {
2573 return "AMD IL Control Flow Graph structurizer Pass";
2576 void AMDGPUCFGPerform::getAnalysisUsage(AnalysisUsage &AU) const {
2577 AU.addPreserved<MachineFunctionAnalysis>();
2578 AU.addRequired<MachineFunctionAnalysis>();
2579 AU.addRequired<MachineDominatorTree>();
2580 AU.addRequired<MachinePostDominatorTree>();
2581 AU.addRequired<MachineLoopInfo>();
2584 //===----------------------------------------------------------------------===//
2586 // CFGStructTraits<AMDGPUCFGStructurizer>
2588 //===----------------------------------------------------------------------===//
2590 namespace llvmCFGStruct {
2591 // this class is tailor to the AMDGPU backend
2593 struct CFGStructTraits<AMDGPUCFGStructurizer> {
2596 static int getBranchNzeroOpcode(int oldOpcode) {
2598 case AMDGPU::JUMP: return AMDGPU::IF_PREDICATE_SET;
2599 case AMDGPU::BRANCH_COND_i32:
2600 case AMDGPU::BRANCH_COND_f32: return AMDGPU::IF_LOGICALNZ_f32;
2602 assert(0 && "internal error");
2607 static int getBranchZeroOpcode(int oldOpcode) {
2609 case AMDGPU::JUMP: return AMDGPU::IF_PREDICATE_SET;
2610 case AMDGPU::BRANCH_COND_i32:
2611 case AMDGPU::BRANCH_COND_f32: return AMDGPU::IF_LOGICALZ_f32;
2613 assert(0 && "internal error");
2618 static int getContinueNzeroOpcode(int oldOpcode) {
2620 case AMDGPU::JUMP: return AMDGPU::CONTINUE_LOGICALNZ_i32;
2622 assert(0 && "internal error");
2627 static int getContinueZeroOpcode(int oldOpcode) {
2629 case AMDGPU::JUMP: return AMDGPU::CONTINUE_LOGICALZ_i32;
2631 assert(0 && "internal error");
2636 static MachineBasicBlock *getTrueBranch(MachineInstr *instr) {
2637 return instr->getOperand(0).getMBB();
2640 static void setTrueBranch(MachineInstr *instr, MachineBasicBlock *blk) {
2641 instr->getOperand(0).setMBB(blk);
2644 static MachineBasicBlock *
2645 getFalseBranch(MachineBasicBlock *blk, MachineInstr *instr) {
2646 assert(blk->succ_size() == 2);
2647 MachineBasicBlock *trueBranch = getTrueBranch(instr);
2648 MachineBasicBlock::succ_iterator iter = blk->succ_begin();
2649 MachineBasicBlock::succ_iterator iterNext = iter;
2652 return (*iter == trueBranch) ? *iterNext : *iter;
2655 static bool isCondBranch(MachineInstr *instr) {
2656 switch (instr->getOpcode()) {
2658 return instr->getOperand(instr->findFirstPredOperandIdx()).getReg() != 0;
2659 case AMDGPU::BRANCH_COND_i32:
2660 case AMDGPU::BRANCH_COND_f32:
2668 static bool isUncondBranch(MachineInstr *instr) {
2669 switch (instr->getOpcode()) {
2671 return instr->getOperand(instr->findFirstPredOperandIdx()).getReg() == 0;
2672 case AMDGPU::BRANCH:
2680 static DebugLoc getLastDebugLocInBB(MachineBasicBlock *blk) {
2681 //get DebugLoc from the first MachineBasicBlock instruction with debug info
2683 for (MachineBasicBlock::iterator iter = blk->begin(); iter != blk->end(); ++iter) {
2684 MachineInstr *instr = &(*iter);
2685 if (instr->getDebugLoc().isUnknown() == false) {
2686 DL = instr->getDebugLoc();
2692 static MachineInstr *getNormalBlockBranchInstr(MachineBasicBlock *blk) {
2693 MachineBasicBlock::reverse_iterator iter = blk->rbegin();
2694 MachineInstr *instr = &*iter;
2695 if (instr && (isCondBranch(instr) || isUncondBranch(instr))) {
2701 // The correct naming for this is getPossibleLoopendBlockBranchInstr.
2703 // BB with backward-edge could have move instructions after the branch
2704 // instruction. Such move instruction "belong to" the loop backward-edge.
2706 static MachineInstr *getLoopendBlockBranchInstr(MachineBasicBlock *blk) {
2707 const AMDGPUInstrInfo * TII = static_cast<const AMDGPUInstrInfo *>(
2708 blk->getParent()->getTarget().getInstrInfo());
2710 for (MachineBasicBlock::reverse_iterator iter = blk->rbegin(),
2711 iterEnd = blk->rend(); iter != iterEnd; ++iter) {
2713 MachineInstr *instr = &*iter;
2715 if (isCondBranch(instr) || isUncondBranch(instr)) {
2717 } else if (!TII->isMov(instr->getOpcode())) {
2725 static MachineInstr *getReturnInstr(MachineBasicBlock *blk) {
2726 MachineBasicBlock::reverse_iterator iter = blk->rbegin();
2727 if (iter != blk->rend()) {
2728 MachineInstr *instr = &(*iter);
2729 if (instr->getOpcode() == AMDGPU::RETURN) {
2736 static MachineInstr *getContinueInstr(MachineBasicBlock *blk) {
2737 MachineBasicBlock::reverse_iterator iter = blk->rbegin();
2738 if (iter != blk->rend()) {
2739 MachineInstr *instr = &(*iter);
2740 if (instr->getOpcode() == AMDGPU::CONTINUE) {
2747 static MachineInstr *getLoopBreakInstr(MachineBasicBlock *blk) {
2748 for (MachineBasicBlock::iterator iter = blk->begin(); (iter != blk->end()); ++iter) {
2749 MachineInstr *instr = &(*iter);
2750 if (instr->getOpcode() == AMDGPU::PREDICATED_BREAK) {
2757 static bool isReturnBlock(MachineBasicBlock *blk) {
2758 MachineInstr *instr = getReturnInstr(blk);
2759 bool isReturn = (blk->succ_size() == 0);
2762 } else if (isReturn) {
2764 errs() << "BB" << blk->getNumber()
2765 <<" is return block without RETURN instr\n";
2772 static MachineBasicBlock::iterator
2773 getInstrPos(MachineBasicBlock *blk, MachineInstr *instr) {
2774 assert(instr->getParent() == blk && "instruction doesn't belong to block");
2775 MachineBasicBlock::iterator iter = blk->begin();
2776 MachineBasicBlock::iterator iterEnd = blk->end();
2777 while (&(*iter) != instr && iter != iterEnd) {
2781 assert(iter != iterEnd);
2785 static MachineInstr *insertInstrBefore(MachineBasicBlock *blk, int newOpcode,
2786 AMDGPUCFGStructurizer *passRep) {
2787 return insertInstrBefore(blk,newOpcode,passRep,DebugLoc());
2788 } //insertInstrBefore
2790 static MachineInstr *insertInstrBefore(MachineBasicBlock *blk, int newOpcode,
2791 AMDGPUCFGStructurizer *passRep, DebugLoc DL) {
2792 const TargetInstrInfo *tii = passRep->getTargetInstrInfo();
2793 MachineInstr *newInstr =
2794 blk->getParent()->CreateMachineInstr(tii->get(newOpcode), DL);
2796 MachineBasicBlock::iterator res;
2797 if (blk->begin() != blk->end()) {
2798 blk->insert(blk->begin(), newInstr);
2800 blk->push_back(newInstr);
2803 SHOWNEWINSTR(newInstr);
2806 } //insertInstrBefore
2808 static void insertInstrEnd(MachineBasicBlock *blk, int newOpcode,
2809 AMDGPUCFGStructurizer *passRep) {
2810 insertInstrEnd(blk,newOpcode,passRep,DebugLoc());
2813 static void insertInstrEnd(MachineBasicBlock *blk, int newOpcode,
2814 AMDGPUCFGStructurizer *passRep, DebugLoc DL) {
2815 const TargetInstrInfo *tii = passRep->getTargetInstrInfo();
2816 MachineInstr *newInstr = blk->getParent()
2817 ->CreateMachineInstr(tii->get(newOpcode), DL);
2819 blk->push_back(newInstr);
2820 //assume the instruction doesn't take any reg operand ...
2822 SHOWNEWINSTR(newInstr);
2825 static MachineInstr *insertInstrBefore(MachineBasicBlock::iterator instrPos,
2827 AMDGPUCFGStructurizer *passRep) {
2828 MachineInstr *oldInstr = &(*instrPos);
2829 const TargetInstrInfo *tii = passRep->getTargetInstrInfo();
2830 MachineBasicBlock *blk = oldInstr->getParent();
2831 MachineInstr *newInstr =
2832 blk->getParent()->CreateMachineInstr(tii->get(newOpcode),
2835 blk->insert(instrPos, newInstr);
2836 //assume the instruction doesn't take any reg operand ...
2838 SHOWNEWINSTR(newInstr);
2840 } //insertInstrBefore
2842 static void insertCondBranchBefore(MachineBasicBlock::iterator instrPos,
2844 AMDGPUCFGStructurizer *passRep,
2846 MachineInstr *oldInstr = &(*instrPos);
2847 const TargetInstrInfo *tii = passRep->getTargetInstrInfo();
2848 MachineBasicBlock *blk = oldInstr->getParent();
2849 MachineFunction *MF = blk->getParent();
2850 MachineInstr *newInstr = MF->CreateMachineInstr(tii->get(newOpcode), DL);
2852 blk->insert(instrPos, newInstr);
2853 MachineInstrBuilder MIB(*MF, newInstr);
2854 MIB.addReg(oldInstr->getOperand(1).getReg(), false);
2856 SHOWNEWINSTR(newInstr);
2857 //erase later oldInstr->eraseFromParent();
2858 } //insertCondBranchBefore
2860 static void insertCondBranchBefore(MachineBasicBlock *blk,
2861 MachineBasicBlock::iterator insertPos,
2863 AMDGPUCFGStructurizer *passRep,
2866 const TargetInstrInfo *tii = passRep->getTargetInstrInfo();
2867 MachineFunction *MF = blk->getParent();
2869 MachineInstr *newInstr = MF->CreateMachineInstr(tii->get(newOpcode), DL);
2872 blk->insert(insertPos, newInstr);
2873 MachineInstrBuilder(*MF, newInstr).addReg(regNum, false);
2875 SHOWNEWINSTR(newInstr);
2876 } //insertCondBranchBefore
2878 static void insertCondBranchEnd(MachineBasicBlock *blk,
2880 AMDGPUCFGStructurizer *passRep,
2882 const TargetInstrInfo *tii = passRep->getTargetInstrInfo();
2883 MachineFunction *MF = blk->getParent();
2884 MachineInstr *newInstr =
2885 MF->CreateMachineInstr(tii->get(newOpcode), DebugLoc());
2887 blk->push_back(newInstr);
2888 MachineInstrBuilder(*MF, newInstr).addReg(regNum, false);
2890 SHOWNEWINSTR(newInstr);
2891 } //insertCondBranchEnd
2894 static void insertAssignInstrBefore(MachineBasicBlock::iterator instrPos,
2895 AMDGPUCFGStructurizer *passRep,
2896 RegiT regNum, int regVal) {
2897 MachineInstr *oldInstr = &(*instrPos);
2898 const AMDGPUInstrInfo *tii =
2899 static_cast<const AMDGPUInstrInfo *>(passRep->getTargetInstrInfo());
2900 MachineBasicBlock *blk = oldInstr->getParent();
2901 MachineInstr *newInstr = tii->getMovImmInstr(blk->getParent(), regNum,
2903 blk->insert(instrPos, newInstr);
2905 SHOWNEWINSTR(newInstr);
2906 } //insertAssignInstrBefore
2908 static void insertAssignInstrBefore(MachineBasicBlock *blk,
2909 AMDGPUCFGStructurizer *passRep,
2910 RegiT regNum, int regVal) {
2911 const AMDGPUInstrInfo *tii =
2912 static_cast<const AMDGPUInstrInfo *>(passRep->getTargetInstrInfo());
2914 MachineInstr *newInstr = tii->getMovImmInstr(blk->getParent(), regNum,
2916 if (blk->begin() != blk->end()) {
2917 blk->insert(blk->begin(), newInstr);
2919 blk->push_back(newInstr);
2922 SHOWNEWINSTR(newInstr);
2924 } //insertInstrBefore
2926 static void insertCompareInstrBefore(MachineBasicBlock *blk,
2927 MachineBasicBlock::iterator instrPos,
2928 AMDGPUCFGStructurizer *passRep,
2929 RegiT dstReg, RegiT src1Reg,
2931 const AMDGPUInstrInfo *tii =
2932 static_cast<const AMDGPUInstrInfo *>(passRep->getTargetInstrInfo());
2933 MachineFunction *MF = blk->getParent();
2934 MachineInstr *newInstr =
2935 MF->CreateMachineInstr(tii->get(tii->getIEQOpcode()), DebugLoc());
2937 MachineInstrBuilder MIB(*MF, newInstr);
2938 MIB.addReg(dstReg, RegState::Define); //set target
2939 MIB.addReg(src1Reg); //set src value
2940 MIB.addReg(src2Reg); //set src value
2942 blk->insert(instrPos, newInstr);
2943 SHOWNEWINSTR(newInstr);
2945 } //insertCompareInstrBefore
2947 static void cloneSuccessorList(MachineBasicBlock *dstBlk,
2948 MachineBasicBlock *srcBlk) {
2949 for (MachineBasicBlock::succ_iterator iter = srcBlk->succ_begin(),
2950 iterEnd = srcBlk->succ_end(); iter != iterEnd; ++iter) {
2951 dstBlk->addSuccessor(*iter); // *iter's predecessor is also taken care of
2953 } //cloneSuccessorList
2955 static MachineBasicBlock *clone(MachineBasicBlock *srcBlk) {
2956 MachineFunction *func = srcBlk->getParent();
2957 MachineBasicBlock *newBlk = func->CreateMachineBasicBlock();
2958 func->push_back(newBlk); //insert to function
2959 for (MachineBasicBlock::iterator iter = srcBlk->begin(),
2960 iterEnd = srcBlk->end();
2961 iter != iterEnd; ++iter) {
2962 MachineInstr *instr = func->CloneMachineInstr(iter);
2963 newBlk->push_back(instr);
2968 //MachineBasicBlock::ReplaceUsesOfBlockWith doesn't serve the purpose because
2969 //the AMDGPU instruction is not recognized as terminator fix this and retire
2971 static void replaceInstrUseOfBlockWith(MachineBasicBlock *srcBlk,
2972 MachineBasicBlock *oldBlk,
2973 MachineBasicBlock *newBlk) {
2974 MachineInstr *branchInstr = getLoopendBlockBranchInstr(srcBlk);
2975 if (branchInstr && isCondBranch(branchInstr) &&
2976 getTrueBranch(branchInstr) == oldBlk) {
2977 setTrueBranch(branchInstr, newBlk);
2981 static void wrapup(MachineBasicBlock *entryBlk) {
2982 assert((!entryBlk->getParent()->getJumpTableInfo()
2983 || entryBlk->getParent()->getJumpTableInfo()->isEmpty())
2984 && "found a jump table");
2986 //collect continue right before endloop
2987 SmallVector<MachineInstr *, DEFAULT_VEC_SLOTS> contInstr;
2988 MachineBasicBlock::iterator pre = entryBlk->begin();
2989 MachineBasicBlock::iterator iterEnd = entryBlk->end();
2990 MachineBasicBlock::iterator iter = pre;
2991 while (iter != iterEnd) {
2992 if (pre->getOpcode() == AMDGPU::CONTINUE
2993 && iter->getOpcode() == AMDGPU::ENDLOOP) {
2994 contInstr.push_back(pre);
3000 //delete continue right before endloop
3001 for (unsigned i = 0; i < contInstr.size(); ++i) {
3002 contInstr[i]->eraseFromParent();
3005 // TODO to fix up jump table so later phase won't be confused. if
3006 // (jumpTableInfo->isEmpty() == false) { need to clean the jump table, but
3007 // there isn't such an interface yet. alternatively, replace all the other
3008 // blocks in the jump table with the entryBlk //}
3012 static MachineDominatorTree *getDominatorTree(AMDGPUCFGStructurizer &pass) {
3013 return &pass.getAnalysis<MachineDominatorTree>();
3016 static MachinePostDominatorTree*
3017 getPostDominatorTree(AMDGPUCFGStructurizer &pass) {
3018 return &pass.getAnalysis<MachinePostDominatorTree>();
3021 static MachineLoopInfo *getLoopInfo(AMDGPUCFGStructurizer &pass) {
3022 return &pass.getAnalysis<MachineLoopInfo>();
3024 }; // template class CFGStructTraits
3025 } //end of namespace llvm
3027 // createAMDGPUCFGPreparationPass- Returns a pass
3028 FunctionPass *llvm::createAMDGPUCFGPreparationPass(TargetMachine &tm
3030 return new AMDGPUCFGPrepare(tm );
3033 bool AMDGPUCFGPrepare::runOnMachineFunction(MachineFunction &func) {
3034 return llvmCFGStruct::CFGStructurizer<AMDGPUCFGStructurizer>().prepare(func,
3039 // createAMDGPUCFGStructurizerPass- Returns a pass
3040 FunctionPass *llvm::createAMDGPUCFGStructurizerPass(TargetMachine &tm
3042 return new AMDGPUCFGPerform(tm );
3045 bool AMDGPUCFGPerform::runOnMachineFunction(MachineFunction &func) {
3046 return llvmCFGStruct::CFGStructurizer<AMDGPUCFGStructurizer>().run(func,