Reduce indentation and fix the count of how many PHIs we have inserted.
[oota-llvm.git] / lib / CodeGen / TailDuplication.cpp
index 7aeef6ad16e3b4d0dc396516ad1386ed1087edbe..95ce2a664892b8f8b98aaf8b08c10f5c88a287a2 100644 (file)
@@ -95,7 +95,13 @@ namespace {
                               SmallSetVector<MachineBasicBlock*, 8> &Succs);
     bool TailDuplicateBlocks(MachineFunction &MF);
     bool shouldTailDuplicate(const MachineFunction &MF,
-                             MachineBasicBlock &TailBB);
+                             bool IsSimple, MachineBasicBlock &TailBB);
+    bool isSimpleBB(MachineBasicBlock *TailBB);
+    bool canCompletelyDuplicateBB(MachineBasicBlock &BB);
+    bool duplicateSimpleBB(MachineBasicBlock *TailBB,
+                           SmallVector<MachineBasicBlock*, 8> &TDBBs,
+                           const DenseSet<unsigned> &RegsUsedByPhi,
+                           SmallVector<MachineInstr*, 16> &Copies);
     bool TailDuplicate(MachineBasicBlock *TailBB, MachineFunction &MF,
                        SmallVector<MachineBasicBlock*, 8> &TDBBs,
                        SmallVector<MachineInstr*, 16> &Copies);
@@ -195,85 +201,98 @@ bool TailDuplicatePass::TailDuplicateBlocks(MachineFunction &MF) {
 
     SmallVector<MachineBasicBlock*, 8> TDBBs;
     SmallVector<MachineInstr*, 16> Copies;
-    if (TailDuplicate(MBB, MF, TDBBs, Copies)) {
-      ++NumTails;
-
-      // TailBB's immediate successors are now successors of those predecessors
-      // which duplicated TailBB. Add the predecessors as sources to the PHI
-      // instructions.
-      bool isDead = MBB->pred_empty();
-      if (PreRegAlloc)
-        UpdateSuccessorsPHIs(MBB, isDead, TDBBs, Succs);
+    if (!TailDuplicate(MBB, MF, TDBBs, Copies))
+      continue;
 
-      // If it is dead, remove it.
-      if (isDead) {
-        NumInstrDups -= MBB->size();
-        RemoveDeadBlock(MBB);
-        ++NumDeadBlocks;
-      }
+    ++NumTails;
 
-      // Update SSA form.
-      if (!SSAUpdateVRs.empty()) {
-        for (unsigned i = 0, e = SSAUpdateVRs.size(); i != e; ++i) {
-          unsigned VReg = SSAUpdateVRs[i];
-          SSAUpdate.Initialize(VReg);
-
-          // If the original definition is still around, add it as an available
-          // value.
-          MachineInstr *DefMI = MRI->getVRegDef(VReg);
-          MachineBasicBlock *DefBB = 0;
-          if (DefMI) {
-            DefBB = DefMI->getParent();
-            SSAUpdate.AddAvailableValue(DefBB, VReg);
-          }
+    // TailBB's immediate successors are now successors of those predecessors
+    // which duplicated TailBB. Add the predecessors as sources to the PHI
+    // instructions.
+    bool isDead = MBB->pred_empty() && !MBB->hasAddressTaken();
+    if (PreRegAlloc)
+      UpdateSuccessorsPHIs(MBB, isDead, TDBBs, Succs);
 
-          // Add the new vregs as available values.
-          DenseMap<unsigned, AvailableValsTy>::iterator LI =
-            SSAUpdateVals.find(VReg);  
-          for (unsigned j = 0, ee = LI->second.size(); j != ee; ++j) {
-            MachineBasicBlock *SrcBB = LI->second[j].first;
-            unsigned SrcReg = LI->second[j].second;
-            SSAUpdate.AddAvailableValue(SrcBB, SrcReg);
-          }
+    // If it is dead, remove it.
+    if (isDead) {
+      NumInstrDups -= MBB->size();
+      RemoveDeadBlock(MBB);
+      ++NumDeadBlocks;
+    }
 
-          // Rewrite uses that are outside of the original def's block.
-          MachineRegisterInfo::use_iterator UI = MRI->use_begin(VReg);
-          while (UI != MRI->use_end()) {
-            MachineOperand &UseMO = UI.getOperand();
-            MachineInstr *UseMI = &*UI;
-            ++UI;
-            if (UseMI->getParent() == DefBB && !UseMI->isPHI())
-              continue;
-            SSAUpdate.RewriteUse(UseMO);
-          }
+    // Update SSA form.
+    if (!SSAUpdateVRs.empty()) {
+      NewPHIs.clear();
+      for (unsigned i = 0, e = SSAUpdateVRs.size(); i != e; ++i) {
+        unsigned VReg = SSAUpdateVRs[i];
+        SSAUpdate.Initialize(VReg);
+
+        // If the original definition is still around, add it as an available
+        // value.
+        MachineInstr *DefMI = MRI->getVRegDef(VReg);
+        MachineBasicBlock *DefBB = 0;
+        if (DefMI) {
+          DefBB = DefMI->getParent();
+          SSAUpdate.AddAvailableValue(DefBB, VReg);
         }
 
-        SSAUpdateVRs.clear();
-        SSAUpdateVals.clear();
-      }
+        // Add the new vregs as available values.
+        DenseMap<unsigned, AvailableValsTy>::iterator LI =
+          SSAUpdateVals.find(VReg);  
+        for (unsigned j = 0, ee = LI->second.size(); j != ee; ++j) {
+          MachineBasicBlock *SrcBB = LI->second[j].first;
+          unsigned SrcReg = LI->second[j].second;
+          SSAUpdate.AddAvailableValue(SrcBB, SrcReg);
+        }
 
-      // Eliminate some of the copies inserted by tail duplication to maintain
-      // SSA form.
-      for (unsigned i = 0, e = Copies.size(); i != e; ++i) {
-        MachineInstr *Copy = Copies[i];
-        if (!Copy->isCopy())
-          continue;
-        unsigned Dst = Copy->getOperand(0).getReg();
-        unsigned Src = Copy->getOperand(1).getReg();
-        MachineRegisterInfo::use_iterator UI = MRI->use_begin(Src);
-        if (++UI == MRI->use_end()) {
-          // Copy is the only use. Do trivial copy propagation here.
-          MRI->replaceRegWith(Dst, Src);
-          Copy->eraseFromParent();
+        // Rewrite uses that are outside of the original def's block.
+        MachineRegisterInfo::use_iterator UI = MRI->use_begin(VReg);
+        while (UI != MRI->use_end()) {
+          MachineOperand &UseMO = UI.getOperand();
+          MachineInstr *UseMI = &*UI;
+          ++UI;
+          if (UseMI->isDebugValue()) {
+            // SSAUpdate can replace the use with an undef. That creates
+            // a debug instruction that is a kill.
+            // FIXME: Should it SSAUpdate job to delete debug instructions
+            // instead of replacing the use with undef?
+            UseMI->eraseFromParent();
+            continue;
+          }
+          if (UseMI->getParent() == DefBB && !UseMI->isPHI())
+            continue;
+          SSAUpdate.RewriteUse(UseMO);
         }
       }
 
-      if (PreRegAlloc && TailDupVerify)
-        VerifyPHIs(MF, false);
-      MadeChange = true;
+      SSAUpdateVRs.clear();
+      SSAUpdateVals.clear();
+    }
+
+    // Eliminate some of the copies inserted by tail duplication to maintain
+    // SSA form.
+    for (unsigned i = 0, e = Copies.size(); i != e; ++i) {
+      MachineInstr *Copy = Copies[i];
+      if (!Copy->isCopy())
+        continue;
+      unsigned Dst = Copy->getOperand(0).getReg();
+      unsigned Src = Copy->getOperand(1).getReg();
+      MachineRegisterInfo::use_iterator UI = MRI->use_begin(Src);
+      if (++UI == MRI->use_end()) {
+        // Copy is the only use. Do trivial copy propagation here.
+        MRI->replaceRegWith(Dst, Src);
+        Copy->eraseFromParent();
+      }
     }
+
+    if (PreRegAlloc && TailDupVerify)
+      VerifyPHIs(MF, false);
+    MadeChange = true;
+
+    if (NewPHIs.size())
+      NumAddedPHIs += NewPHIs.size();
   }
-  NumAddedPHIs += NewPHIs.size();
+
 
   return MadeChange;
 }
@@ -283,6 +302,8 @@ static bool isDefLiveOut(unsigned Reg, MachineBasicBlock *BB,
   for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(Reg),
          UE = MRI->use_end(); UI != UE; ++UI) {
     MachineInstr *UseMI = &*UI;
+    if (UseMI->isDebugValue())
+      continue;
     if (UseMI->getParent() != BB)
       return true;
   }
@@ -301,7 +322,7 @@ static unsigned getPHISrcRegOpIdx(MachineInstr *MI, MachineBasicBlock *SrcBB) {
 // used to determine which registers are liveout while modifying the
 // block (which is why we need to copy the information).
 static void getRegsUsedByPHIs(const MachineBasicBlock &BB,
-                             DenseSet<unsigned> *UsedByPhi) {
+                              DenseSet<unsigned> *UsedByPhi) {
   for(MachineBasicBlock::const_iterator I = BB.begin(), E = BB.end();
       I != E; ++I) {
     const MachineInstr &MI = *I;
@@ -337,7 +358,7 @@ void TailDuplicatePass::ProcessPHI(MachineInstr *MI,
                                    MachineBasicBlock *PredBB,
                                    DenseMap<unsigned, unsigned> &LocalVRMap,
                            SmallVector<std::pair<unsigned,unsigned>, 4> &Copies,
-                                  const DenseSet<unsigned> &RegsUsedByPhi,
+                                   const DenseSet<unsigned> &RegsUsedByPhi,
                                    bool Remove) {
   unsigned DefReg = MI->getOperand(0).getReg();
   unsigned SrcOpIdx = getPHISrcRegOpIdx(MI, PredBB);
@@ -485,11 +506,16 @@ TailDuplicatePass::UpdateSuccessorsPHIs(MachineBasicBlock *FromBB, bool isDead,
 /// shouldTailDuplicate - Determine if it is profitable to duplicate this block.
 bool
 TailDuplicatePass::shouldTailDuplicate(const MachineFunction &MF,
+                                       bool IsSimple,
                                        MachineBasicBlock &TailBB) {
   // Only duplicate blocks that end with unconditional branches.
   if (TailBB.canFallThrough())
     return false;
 
+  // Don't try to tail-duplicate single-block loops.
+  if (TailBB.isSuccessor(&TailBB))
+    return false;
+
   // Set the limit on the cost to duplicate. When optimizing for size,
   // duplicate only one, because one branch instruction can be eliminated to
   // compensate for the duplication.
@@ -500,86 +526,225 @@ TailDuplicatePass::shouldTailDuplicate(const MachineFunction &MF,
   else
     MaxDuplicateCount = TailDuplicateSize;
 
-  if (PreRegAlloc) {
-    if (TailBB.empty())
-      return false;
-    const TargetInstrDesc &TID = TailBB.back().getDesc();
-    // Pre-regalloc tail duplication hurts compile time and doesn't help
-    // much except for indirect branches.
-    if (!TID.isIndirectBranch())
-      return false;
-    // If the target has hardware branch prediction that can handle indirect
-    // branches, duplicating them can often make them predictable when there
-    // are common paths through the code.  The limit needs to be high enough
-    // to allow undoing the effects of tail merging and other optimizations
-    // that rearrange the predecessors of the indirect branch.
-    MaxDuplicateCount = 20;
+  // If the target has hardware branch prediction that can handle indirect
+  // branches, duplicating them can often make them predictable when there
+  // are common paths through the code.  The limit needs to be high enough
+  // to allow undoing the effects of tail merging and other optimizations
+  // that rearrange the predecessors of the indirect branch.
+
+  bool hasIndirectBR = false;
+  if (PreRegAlloc && !TailBB.empty()) {
+    const MCInstrDesc &MCID = TailBB.back().getDesc();
+    if (MCID.isIndirectBranch()) {
+      MaxDuplicateCount = 20;
+      hasIndirectBR = true;
+    }
   }
 
-  // Don't try to tail-duplicate single-block loops.
-  if (TailBB.isSuccessor(&TailBB))
-    return false;
-
   // Check the instructions in the block to determine whether tail-duplication
   // is invalid or unlikely to be profitable.
   unsigned InstrCount = 0;
-  bool HasCall = false;
   for (MachineBasicBlock::const_iterator I = TailBB.begin(); I != TailBB.end();
        ++I) {
     // Non-duplicable things shouldn't be tail-duplicated.
-    if (I->getDesc().isNotDuplicable()) return false;
+    if (I->getDesc().isNotDuplicable())
+      return false;
+
     // Do not duplicate 'return' instructions if this is a pre-regalloc run.
     // A return may expand into a lot more instructions (e.g. reload of callee
     // saved registers) after PEI.
-    if (PreRegAlloc && I->getDesc().isReturn()) return false;
-    // Don't duplicate more than the threshold.
-    if (InstrCount == MaxDuplicateCount) return false;
-    // Remember if we saw a call.
-    if (I->getDesc().isCall()) HasCall = true;
+    if (PreRegAlloc && I->getDesc().isReturn())
+      return false;
+
+    // Avoid duplicating calls before register allocation. Calls presents a
+    // barrier to register allocation so duplicating them may end up increasing
+    // spills.
+    if (PreRegAlloc && I->getDesc().isCall())
+      return false;
+
     if (!I->isPHI() && !I->isDebugValue())
       InstrCount += 1;
+
+    if (InstrCount > MaxDuplicateCount)
+      return false;
   }
-  // Don't tail-duplicate calls before register allocation. Calls presents a
-  // barrier to register allocation so duplicating them may end up increasing
-  // spills.
-  if (InstrCount > 1 && (PreRegAlloc && HasCall))
+
+  if (hasIndirectBR)
+    return true;
+
+  if (IsSimple)
+    return true;
+
+  if (!PreRegAlloc)
+    return true;
+
+  return canCompletelyDuplicateBB(TailBB);
+}
+
+/// isSimpleBB - True if this BB has only one unconditional jump.
+bool
+TailDuplicatePass::isSimpleBB(MachineBasicBlock *TailBB) {
+  if (TailBB->succ_size() != 1)
     return false;
+  if (TailBB->pred_empty())
+    return false;
+  MachineBasicBlock::iterator I = TailBB->begin();
+  MachineBasicBlock::iterator E = TailBB->end();
+  while (I != E && I->isDebugValue())
+    ++I;
+  if (I == E)
+    return true;
+  return I->getDesc().isUnconditionalBranch();
+}
 
+static bool
+bothUsedInPHI(const MachineBasicBlock &A,
+              SmallPtrSet<MachineBasicBlock*, 8> SuccsB) {
+  for (MachineBasicBlock::const_succ_iterator SI = A.succ_begin(),
+         SE = A.succ_end(); SI != SE; ++SI) {
+    MachineBasicBlock *BB = *SI;
+    if (SuccsB.count(BB) && !BB->empty() && BB->begin()->isPHI())
+      return true;
+  }
+
+  return false;
+}
+
+bool
+TailDuplicatePass::canCompletelyDuplicateBB(MachineBasicBlock &BB) {
+  SmallPtrSet<MachineBasicBlock*, 8> Succs(BB.succ_begin(), BB.succ_end());
+
+  for (MachineBasicBlock::pred_iterator PI = BB.pred_begin(),
+       PE = BB.pred_end(); PI != PE; ++PI) {
+    MachineBasicBlock *PredBB = *PI;
+
+    if (PredBB->succ_size() > 1)
+      return false;
+
+    MachineBasicBlock *PredTBB = NULL, *PredFBB = NULL;
+    SmallVector<MachineOperand, 4> PredCond;
+    if (TII->AnalyzeBranch(*PredBB, PredTBB, PredFBB, PredCond, true))
+      return false;
+
+    if (!PredCond.empty())
+      return false;
+  }
   return true;
 }
 
+bool
+TailDuplicatePass::duplicateSimpleBB(MachineBasicBlock *TailBB,
+                                     SmallVector<MachineBasicBlock*, 8> &TDBBs,
+                                     const DenseSet<unsigned> &UsedByPhi,
+                                     SmallVector<MachineInstr*, 16> &Copies) {
+  SmallPtrSet<MachineBasicBlock*, 8> Succs(TailBB->succ_begin(),
+                                           TailBB->succ_end());
+  SmallVector<MachineBasicBlock*, 8> Preds(TailBB->pred_begin(),
+                                           TailBB->pred_end());
+  bool Changed = false;
+  for (SmallSetVector<MachineBasicBlock *, 8>::iterator PI = Preds.begin(),
+       PE = Preds.end(); PI != PE; ++PI) {
+    MachineBasicBlock *PredBB = *PI;
+
+    if (PredBB->getLandingPadSuccessor())
+      continue;
+
+    if (bothUsedInPHI(*PredBB, Succs))
+      continue;
+
+    MachineBasicBlock *PredTBB = NULL, *PredFBB = NULL;
+    SmallVector<MachineOperand, 4> PredCond;
+    if (TII->AnalyzeBranch(*PredBB, PredTBB, PredFBB, PredCond, true))
+      continue;
+
+    Changed = true;
+    DEBUG(dbgs() << "\nTail-duplicating into PredBB: " << *PredBB
+                 << "From simple Succ: " << *TailBB);
+
+    MachineBasicBlock *NewTarget = *TailBB->succ_begin();
+    MachineBasicBlock *NextBB = llvm::next(MachineFunction::iterator(PredBB));
+
+    // Make PredFBB explicit.
+    if (PredCond.empty())
+      PredFBB = PredTBB;
+
+    // Make fall through explicit.
+    if (!PredTBB)
+      PredTBB = NextBB;
+    if (!PredFBB)
+      PredFBB = NextBB;
+
+    // Redirect
+    if (PredFBB == TailBB)
+      PredFBB = NewTarget;
+    if (PredTBB == TailBB)
+      PredTBB = NewTarget;
+
+    // Make the branch unconditional if possible
+    if (PredTBB == PredFBB) {
+      PredCond.clear();
+      PredFBB = NULL;
+    }
+
+    // Avoid adding fall through branches.
+    if (PredFBB == NextBB)
+      PredFBB = NULL;
+    if (PredTBB == NextBB && PredFBB == NULL)
+      PredTBB = NULL;
+
+    TII->RemoveBranch(*PredBB);
+
+    if (PredTBB)
+      TII->InsertBranch(*PredBB, PredTBB, PredFBB, PredCond, DebugLoc());
+
+    PredBB->removeSuccessor(TailBB);
+    unsigned NumSuccessors = PredBB->succ_size();
+    assert(NumSuccessors <= 1);
+    if (NumSuccessors == 0 || *PredBB->succ_begin() != NewTarget)
+      PredBB->addSuccessor(NewTarget);
+
+    TDBBs.push_back(PredBB);
+  }
+  return Changed;
+}
+
 /// TailDuplicate - If it is profitable, duplicate TailBB's contents in each
 /// of its predecessors.
 bool
 TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB, MachineFunction &MF,
                                  SmallVector<MachineBasicBlock*, 8> &TDBBs,
                                  SmallVector<MachineInstr*, 16> &Copies) {
-  if (!shouldTailDuplicate(MF, *TailBB))
+  bool IsSimple = isSimpleBB(TailBB);
+
+  if (!shouldTailDuplicate(MF, IsSimple, *TailBB))
     return false;
 
   DEBUG(dbgs() << "\n*** Tail-duplicating BB#" << TailBB->getNumber() << '\n');
 
+  DenseSet<unsigned> UsedByPhi;
+  getRegsUsedByPHIs(*TailBB, &UsedByPhi);
+
+  if (IsSimple)
+    return duplicateSimpleBB(TailBB, TDBBs, UsedByPhi, Copies);
+
   // Iterate through all the unique predecessors and tail-duplicate this
   // block into them, if possible. Copying the list ahead of time also
   // avoids trouble with the predecessor list reallocating.
   bool Changed = false;
   SmallSetVector<MachineBasicBlock*, 8> Preds(TailBB->pred_begin(),
                                               TailBB->pred_end());
-  DenseSet<unsigned> UsedByPhi;
-  getRegsUsedByPHIs(*TailBB, &UsedByPhi);
   for (SmallSetVector<MachineBasicBlock *, 8>::iterator PI = Preds.begin(),
        PE = Preds.end(); PI != PE; ++PI) {
     MachineBasicBlock *PredBB = *PI;
 
     assert(TailBB != PredBB &&
            "Single-block loop should have been rejected earlier!");
-    if (PredBB->succ_size() > 1) continue;
+    // EH edges are ignored by AnalyzeBranch.
+    if (PredBB->succ_size() > 1)
+      continue;
 
     MachineBasicBlock *PredTBB, *PredFBB;
     SmallVector<MachineOperand, 4> PredCond;
-    // EH edges are ignored by AnalyzeBranch.
-    if (PredBB->succ_size() != 1)
-      continue;
     if (TII->AnalyzeBranch(*PredBB, PredTBB, PredFBB, PredCond, true))
       continue;
     if (!PredCond.empty())
@@ -619,6 +784,10 @@ TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB, MachineFunction &MF,
                                TII->get(TargetOpcode::COPY),
                                CopyInfos[i].first).addReg(CopyInfos[i].second));
     }
+
+    // Simplify
+    TII->AnalyzeBranch(*PredBB, PredTBB, PredFBB, PredCond, true);
+
     NumInstrDups += TailBB->size() - 1; // subtract one for removed branch
 
     // Update the CFG.
@@ -754,4 +923,3 @@ void TailDuplicatePass::RemoveDeadBlock(MachineBasicBlock *MBB) {
   // Remove the block.
   MBB->eraseFromParent();
 }
-