SmallSetVector<MachineBasicBlock*, 8> &Succs);
bool TailDuplicateBlocks(MachineFunction &MF);
bool shouldTailDuplicate(const MachineFunction &MF,
- MachineBasicBlock &TailBB);
+ bool IsSimple, MachineBasicBlock &TailBB);
+ bool isSimpleBB(MachineBasicBlock *TailBB);
+ bool canCompletelyDuplicateBB(MachineBasicBlock &BB);
+ bool duplicateSimpleBB(MachineBasicBlock *TailBB,
+ SmallVector<MachineBasicBlock*, 8> &TDBBs,
+ const DenseSet<unsigned> &RegsUsedByPhi,
+ SmallVector<MachineInstr*, 16> &Copies);
bool TailDuplicate(MachineBasicBlock *TailBB, MachineFunction &MF,
SmallVector<MachineBasicBlock*, 8> &TDBBs,
SmallVector<MachineInstr*, 16> &Copies);
// TailBB's immediate successors are now successors of those predecessors
// which duplicated TailBB. Add the predecessors as sources to the PHI
// instructions.
- bool isDead = MBB->pred_empty();
+ bool isDead = MBB->pred_empty() && !MBB->hasAddressTaken();
if (PreRegAlloc)
UpdateSuccessorsPHIs(MBB, isDead, TDBBs, Succs);
MachineOperand &UseMO = UI.getOperand();
MachineInstr *UseMI = &*UI;
++UI;
+ if (UseMI->isDebugValue()) {
+ // SSAUpdate can replace the use with an undef. That creates
+ // a debug instruction that is a kill.
+ // FIXME: Should it SSAUpdate job to delete debug instructions
+ // instead of replacing the use with undef?
+ UseMI->eraseFromParent();
+ continue;
+ }
if (UseMI->getParent() == DefBB && !UseMI->isPHI())
continue;
SSAUpdate.RewriteUse(UseMO);
for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(Reg),
UE = MRI->use_end(); UI != UE; ++UI) {
MachineInstr *UseMI = &*UI;
+ if (UseMI->isDebugValue())
+ continue;
if (UseMI->getParent() != BB)
return true;
}
// used to determine which registers are liveout while modifying the
// block (which is why we need to copy the information).
static void getRegsUsedByPHIs(const MachineBasicBlock &BB,
- DenseSet<unsigned> *UsedByPhi) {
+ DenseSet<unsigned> *UsedByPhi) {
for(MachineBasicBlock::const_iterator I = BB.begin(), E = BB.end();
I != E; ++I) {
const MachineInstr &MI = *I;
MachineBasicBlock *PredBB,
DenseMap<unsigned, unsigned> &LocalVRMap,
SmallVector<std::pair<unsigned,unsigned>, 4> &Copies,
- const DenseSet<unsigned> &RegsUsedByPhi,
+ const DenseSet<unsigned> &RegsUsedByPhi,
bool Remove) {
unsigned DefReg = MI->getOperand(0).getReg();
unsigned SrcOpIdx = getPHISrcRegOpIdx(MI, PredBB);
/// shouldTailDuplicate - Determine if it is profitable to duplicate this block.
bool
TailDuplicatePass::shouldTailDuplicate(const MachineFunction &MF,
+ bool IsSimple,
MachineBasicBlock &TailBB) {
// Only duplicate blocks that end with unconditional branches.
if (TailBB.canFallThrough())
return false;
+ // Don't try to tail-duplicate single-block loops.
+ if (TailBB.isSuccessor(&TailBB))
+ return false;
+
// Set the limit on the cost to duplicate. When optimizing for size,
// duplicate only one, because one branch instruction can be eliminated to
// compensate for the duplication.
else
MaxDuplicateCount = TailDuplicateSize;
- if (PreRegAlloc) {
- if (TailBB.empty())
- return false;
- const TargetInstrDesc &TID = TailBB.back().getDesc();
- // Pre-regalloc tail duplication hurts compile time and doesn't help
- // much except for indirect branches.
- if (!TID.isIndirectBranch())
- return false;
- // If the target has hardware branch prediction that can handle indirect
- // branches, duplicating them can often make them predictable when there
- // are common paths through the code. The limit needs to be high enough
- // to allow undoing the effects of tail merging and other optimizations
- // that rearrange the predecessors of the indirect branch.
- MaxDuplicateCount = 20;
+ // If the target has hardware branch prediction that can handle indirect
+ // branches, duplicating them can often make them predictable when there
+ // are common paths through the code. The limit needs to be high enough
+ // to allow undoing the effects of tail merging and other optimizations
+ // that rearrange the predecessors of the indirect branch.
+
+ bool hasIndirectBR = false;
+ if (PreRegAlloc && !TailBB.empty()) {
+ const MCInstrDesc &MCID = TailBB.back().getDesc();
+ if (MCID.isIndirectBranch()) {
+ MaxDuplicateCount = 20;
+ hasIndirectBR = true;
+ }
}
- // Don't try to tail-duplicate single-block loops.
- if (TailBB.isSuccessor(&TailBB))
- return false;
-
// Check the instructions in the block to determine whether tail-duplication
// is invalid or unlikely to be profitable.
unsigned InstrCount = 0;
- bool HasCall = false;
for (MachineBasicBlock::const_iterator I = TailBB.begin(); I != TailBB.end();
++I) {
// Non-duplicable things shouldn't be tail-duplicated.
- if (I->getDesc().isNotDuplicable()) return false;
+ if (I->getDesc().isNotDuplicable())
+ return false;
+
// Do not duplicate 'return' instructions if this is a pre-regalloc run.
// A return may expand into a lot more instructions (e.g. reload of callee
// saved registers) after PEI.
- if (PreRegAlloc && I->getDesc().isReturn()) return false;
- // Don't duplicate more than the threshold.
- if (InstrCount == MaxDuplicateCount) return false;
- // Remember if we saw a call.
- if (I->getDesc().isCall()) HasCall = true;
+ if (PreRegAlloc && I->getDesc().isReturn())
+ return false;
+
+ // Avoid duplicating calls before register allocation. Calls presents a
+ // barrier to register allocation so duplicating them may end up increasing
+ // spills.
+ if (PreRegAlloc && I->getDesc().isCall())
+ return false;
+
if (!I->isPHI() && !I->isDebugValue())
InstrCount += 1;
+
+ if (InstrCount > MaxDuplicateCount)
+ return false;
}
- // Don't tail-duplicate calls before register allocation. Calls presents a
- // barrier to register allocation so duplicating them may end up increasing
- // spills.
- if (InstrCount > 1 && (PreRegAlloc && HasCall))
+
+ if (hasIndirectBR)
+ return true;
+
+ if (IsSimple)
+ return true;
+
+ if (!PreRegAlloc)
+ return true;
+
+ return canCompletelyDuplicateBB(TailBB);
+}
+
+/// isSimpleBB - True if this BB has only one unconditional jump.
+bool
+TailDuplicatePass::isSimpleBB(MachineBasicBlock *TailBB) {
+ if (TailBB->succ_size() != 1)
return false;
+ if (TailBB->pred_empty())
+ return false;
+ MachineBasicBlock::iterator I = TailBB->begin();
+ MachineBasicBlock::iterator E = TailBB->end();
+ while (I != E && I->isDebugValue())
+ ++I;
+ if (I == E)
+ return true;
+ return I->getDesc().isUnconditionalBranch();
+}
+
+static bool
+bothUsedInPHI(const MachineBasicBlock &A,
+ SmallPtrSet<MachineBasicBlock*, 8> SuccsB) {
+ for (MachineBasicBlock::const_succ_iterator SI = A.succ_begin(),
+ SE = A.succ_end(); SI != SE; ++SI) {
+ MachineBasicBlock *BB = *SI;
+ if (SuccsB.count(BB) && !BB->empty() && BB->begin()->isPHI())
+ return true;
+ }
+ return false;
+}
+
+bool
+TailDuplicatePass::canCompletelyDuplicateBB(MachineBasicBlock &BB) {
+ SmallPtrSet<MachineBasicBlock*, 8> Succs(BB.succ_begin(), BB.succ_end());
+
+ for (MachineBasicBlock::pred_iterator PI = BB.pred_begin(),
+ PE = BB.pred_end(); PI != PE; ++PI) {
+ MachineBasicBlock *PredBB = *PI;
+
+ if (PredBB->succ_size() > 1)
+ return false;
+
+ MachineBasicBlock *PredTBB = NULL, *PredFBB = NULL;
+ SmallVector<MachineOperand, 4> PredCond;
+ if (TII->AnalyzeBranch(*PredBB, PredTBB, PredFBB, PredCond, true))
+ return false;
+
+ if (!PredCond.empty())
+ return false;
+ }
return true;
}
+bool
+TailDuplicatePass::duplicateSimpleBB(MachineBasicBlock *TailBB,
+ SmallVector<MachineBasicBlock*, 8> &TDBBs,
+ const DenseSet<unsigned> &UsedByPhi,
+ SmallVector<MachineInstr*, 16> &Copies) {
+ SmallPtrSet<MachineBasicBlock*, 8> Succs(TailBB->succ_begin(),
+ TailBB->succ_end());
+ SmallVector<MachineBasicBlock*, 8> Preds(TailBB->pred_begin(),
+ TailBB->pred_end());
+ bool Changed = false;
+ for (SmallSetVector<MachineBasicBlock *, 8>::iterator PI = Preds.begin(),
+ PE = Preds.end(); PI != PE; ++PI) {
+ MachineBasicBlock *PredBB = *PI;
+
+ if (PredBB->getLandingPadSuccessor())
+ continue;
+
+ if (bothUsedInPHI(*PredBB, Succs))
+ continue;
+
+ MachineBasicBlock *PredTBB = NULL, *PredFBB = NULL;
+ SmallVector<MachineOperand, 4> PredCond;
+ if (TII->AnalyzeBranch(*PredBB, PredTBB, PredFBB, PredCond, true))
+ continue;
+
+ Changed = true;
+ DEBUG(dbgs() << "\nTail-duplicating into PredBB: " << *PredBB
+ << "From simple Succ: " << *TailBB);
+
+ MachineBasicBlock *NewTarget = *TailBB->succ_begin();
+ MachineBasicBlock *NextBB = llvm::next(MachineFunction::iterator(PredBB));
+
+ // Make PredFBB explicit.
+ if (PredCond.empty())
+ PredFBB = PredTBB;
+
+ // Make fall through explicit.
+ if (!PredTBB)
+ PredTBB = NextBB;
+ if (!PredFBB)
+ PredFBB = NextBB;
+
+ // Redirect
+ if (PredFBB == TailBB)
+ PredFBB = NewTarget;
+ if (PredTBB == TailBB)
+ PredTBB = NewTarget;
+
+ // Make the branch unconditional if possible
+ if (PredTBB == PredFBB) {
+ PredCond.clear();
+ PredFBB = NULL;
+ }
+
+ // Avoid adding fall through branches.
+ if (PredFBB == NextBB)
+ PredFBB = NULL;
+ if (PredTBB == NextBB && PredFBB == NULL)
+ PredTBB = NULL;
+
+ TII->RemoveBranch(*PredBB);
+
+ if (PredTBB)
+ TII->InsertBranch(*PredBB, PredTBB, PredFBB, PredCond, DebugLoc());
+
+ PredBB->removeSuccessor(TailBB);
+ unsigned NumSuccessors = PredBB->succ_size();
+ assert(NumSuccessors <= 1);
+ if (NumSuccessors == 0 || *PredBB->succ_begin() != NewTarget)
+ PredBB->addSuccessor(NewTarget);
+
+ TDBBs.push_back(PredBB);
+ }
+ return Changed;
+}
+
/// TailDuplicate - If it is profitable, duplicate TailBB's contents in each
/// of its predecessors.
bool
TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB, MachineFunction &MF,
SmallVector<MachineBasicBlock*, 8> &TDBBs,
SmallVector<MachineInstr*, 16> &Copies) {
- if (!shouldTailDuplicate(MF, *TailBB))
+ bool IsSimple = isSimpleBB(TailBB);
+
+ if (!shouldTailDuplicate(MF, IsSimple, *TailBB))
return false;
DEBUG(dbgs() << "\n*** Tail-duplicating BB#" << TailBB->getNumber() << '\n');
+ DenseSet<unsigned> UsedByPhi;
+ getRegsUsedByPHIs(*TailBB, &UsedByPhi);
+
+ if (IsSimple)
+ return duplicateSimpleBB(TailBB, TDBBs, UsedByPhi, Copies);
+
// Iterate through all the unique predecessors and tail-duplicate this
// block into them, if possible. Copying the list ahead of time also
// avoids trouble with the predecessor list reallocating.
bool Changed = false;
SmallSetVector<MachineBasicBlock*, 8> Preds(TailBB->pred_begin(),
TailBB->pred_end());
- DenseSet<unsigned> UsedByPhi;
- getRegsUsedByPHIs(*TailBB, &UsedByPhi);
for (SmallSetVector<MachineBasicBlock *, 8>::iterator PI = Preds.begin(),
PE = Preds.end(); PI != PE; ++PI) {
MachineBasicBlock *PredBB = *PI;
TII->get(TargetOpcode::COPY),
CopyInfos[i].first).addReg(CopyInfos[i].second));
}
+
+ // Simplify
+ TII->AnalyzeBranch(*PredBB, PredTBB, PredFBB, PredCond, true);
+
NumInstrDups += TailBB->size() - 1; // subtract one for removed branch
// Update the CFG.
// Remove the block.
MBB->eraseFromParent();
}
-