#include "llvm/Function.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/MC/MCInstrItineraries.h"
#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetInstrItineraries.h"
#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/STLExtras.h"
using namespace llvm;
/// ClobbersPred - True if BB could modify predicates (e.g. has
/// cmp, call, etc.)
/// NonPredSize - Number of non-predicated instructions.
+ /// ExtraCost - Extra cost for multi-cycle instructions.
+ /// ExtraCost2 - Some instructions are slower when predicated
/// BB - Corresponding MachineBasicBlock.
/// TrueBB / FalseBB- See AnalyzeBranch().
/// BrCond - Conditions for end of block conditional branches.
bool CannotBeCopied : 1;
bool ClobbersPred : 1;
unsigned NonPredSize;
+ unsigned ExtraCost;
+ unsigned ExtraCost2;
MachineBasicBlock *BB;
MachineBasicBlock *TrueBB;
MachineBasicBlock *FalseBB;
IsAnalyzed(false), IsEnqueued(false), IsBrAnalyzable(false),
HasFallThrough(false), IsUnpredicable(false),
CannotBeCopied(false), ClobbersPred(false), NonPredSize(0),
- BB(0), TrueBB(0), FalseBB(0) {}
+ ExtraCost(0), ExtraCost2(0), BB(0), TrueBB(0), FalseBB(0) {}
};
/// IfcvtToken - Record information about pending if-conversions to attempt:
: BBI(b), Kind(k), NeedSubsumption(s), NumDups(d), NumDups2(d2) {}
};
- /// Roots - Basic blocks that do not have successors. These are the starting
- /// points of Graph traversal.
- std::vector<MachineBasicBlock*> Roots;
-
/// BBAnalysis - Results of if-conversion feasibility analysis indexed by
/// basic block number.
std::vector<BBInfo> BBAnalysis;
const TargetInstrInfo *TII;
const TargetRegisterInfo *TRI;
const InstrItineraryData *InstrItins;
- const MachineLoopInfo *MLI;
+ const MachineBranchProbabilityInfo *MBPI;
+
bool MadeChange;
int FnNum;
public:
static char ID;
- IfConverter() : MachineFunctionPass(ID), FnNum(-1) {}
-
+ IfConverter() : MachineFunctionPass(ID), FnNum(-1) {
+ initializeIfConverterPass(*PassRegistry::getPassRegistry());
+ }
+
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
- AU.addRequired<MachineLoopInfo>();
+ AU.addRequired<MachineBranchProbabilityInfo>();
MachineFunctionPass::getAnalysisUsage(AU);
}
private:
bool ReverseBranchCondition(BBInfo &BBI);
bool ValidSimple(BBInfo &TrueBBI, unsigned &Dups,
- float Prediction, float Confidence) const;
+ const BranchProbability &Prediction) const;
bool ValidTriangle(BBInfo &TrueBBI, BBInfo &FalseBBI,
bool FalseBranch, unsigned &Dups,
- float Prediction, float Confidence) const;
+ const BranchProbability &Prediction) const;
bool ValidDiamond(BBInfo &TrueBBI, BBInfo &FalseBBI,
unsigned &Dups1, unsigned &Dups2) const;
void ScanInstructions(BBInfo &BBI);
bool IgnoreBr = false);
void MergeBlocks(BBInfo &ToBBI, BBInfo &FromBBI, bool AddEdges = true);
- bool MeetIfcvtSizeLimit(MachineBasicBlock &BB, unsigned Size,
- float Prediction, float Confidence) const {
- return Size > 0 && TII->isProfitableToIfCvt(BB, Size,
- Prediction, Confidence);
+ bool MeetIfcvtSizeLimit(MachineBasicBlock &BB,
+ unsigned Cycle, unsigned Extra,
+ const BranchProbability &Prediction) const {
+ return Cycle > 0 && TII->isProfitableToIfCvt(BB, Cycle, Extra,
+ Prediction);
}
- bool MeetIfcvtSizeLimit(MachineBasicBlock &TBB, unsigned TSize,
- MachineBasicBlock &FBB, unsigned FSize,
- float Prediction, float Confidence) const {
- return TSize > 0 && FSize > 0 &&
- TII->isProfitableToIfCvt(TBB, TSize, FBB, FSize,
- Prediction, Confidence);
+ bool MeetIfcvtSizeLimit(MachineBasicBlock &TBB,
+ unsigned TCycle, unsigned TExtra,
+ MachineBasicBlock &FBB,
+ unsigned FCycle, unsigned FExtra,
+ const BranchProbability &Prediction) const {
+ return TCycle > 0 && FCycle > 0 &&
+ TII->isProfitableToIfCvt(TBB, TCycle, TExtra, FBB, FCycle, FExtra,
+ Prediction);
}
// blockAlwaysFallThrough - Block ends without a terminator.
char IfConverter::ID = 0;
}
-INITIALIZE_PASS(IfConverter, "if-converter", "If Converter", false, false);
+INITIALIZE_PASS_BEGIN(IfConverter, "if-converter", "If Converter", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo)
+INITIALIZE_PASS_END(IfConverter, "if-converter", "If Converter", false, false)
FunctionPass *llvm::createIfConverterPass() { return new IfConverter(); }
TLI = MF.getTarget().getTargetLowering();
TII = MF.getTarget().getInstrInfo();
TRI = MF.getTarget().getRegisterInfo();
- MLI = &getAnalysis<MachineLoopInfo>();
+ MBPI = &getAnalysis<MachineBranchProbabilityInfo>();
InstrItins = MF.getTarget().getInstrItineraryData();
if (!TII) return false;
// Tail merge tend to expose more if-conversion opportunities.
- BranchFolder BF(true);
+ BranchFolder BF(true, false);
bool BFChange = BF.OptimizeFunction(MF, TII,
MF.getTarget().getRegisterInfo(),
getAnalysisIfAvailable<MachineModuleInfo>());
MF.RenumberBlocks();
BBAnalysis.resize(MF.getNumBlockIDs());
- // Look for root nodes, i.e. blocks without successors.
- for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I)
- if (I->succ_empty())
- Roots.push_back(I);
-
std::vector<IfcvtToken*> Tokens;
MadeChange = false;
unsigned NumIfCvts = NumSimple + NumSimpleFalse + NumTriangle +
}
Tokens.clear();
- Roots.clear();
BBAnalysis.clear();
if (MadeChange && IfCvtBranchFold) {
- BranchFolder BF(false);
+ BranchFolder BF(false, false);
BF.OptimizeFunction(MF, TII,
MF.getTarget().getRegisterInfo(),
getAnalysisIfAvailable<MachineModuleInfo>());
/// number of instructions that the ifcvt would need to duplicate if performed
/// in Dups.
bool IfConverter::ValidSimple(BBInfo &TrueBBI, unsigned &Dups,
- float Prediction, float Confidence) const {
+ const BranchProbability &Prediction) const {
Dups = 0;
if (TrueBBI.IsBeingAnalyzed || TrueBBI.IsDone)
return false;
if (TrueBBI.BB->pred_size() > 1) {
if (TrueBBI.CannotBeCopied ||
!TII->isProfitableToDupForIfCvt(*TrueBBI.BB, TrueBBI.NonPredSize,
- Prediction, Confidence))
+ Prediction))
return false;
Dups = TrueBBI.NonPredSize;
}
/// if performed in 'Dups'.
bool IfConverter::ValidTriangle(BBInfo &TrueBBI, BBInfo &FalseBBI,
bool FalseBranch, unsigned &Dups,
- float Prediction, float Confidence) const {
+ const BranchProbability &Prediction) const {
Dups = 0;
if (TrueBBI.IsBeingAnalyzed || TrueBBI.IsDone)
return false;
++Size;
}
}
- if (!TII->isProfitableToDupForIfCvt(*TrueBBI.BB, Size,
- Prediction, Confidence))
+ if (!TII->isProfitableToDupForIfCvt(*TrueBBI.BB, Size, Prediction))
return false;
Dups = Size;
}
return TExit && TExit == FalseBBI.BB;
}
-static
-MachineBasicBlock::iterator firstNonBranchInst(MachineBasicBlock *BB,
- const TargetInstrInfo *TII) {
- MachineBasicBlock::iterator I = BB->end();
- while (I != BB->begin()) {
- --I;
- if (!I->getDesc().isBranch())
- break;
- }
- return I;
-}
-
/// ValidDiamond - Returns true if the 'true' and 'false' blocks (along
/// with their common predecessor) forms a valid diamond shape for ifcvt.
bool IfConverter::ValidDiamond(BBInfo &TrueBBI, BBInfo &FalseBBI,
(TrueBBI.ClobbersPred && FalseBBI.ClobbersPred))
return false;
- MachineBasicBlock::iterator TI = TrueBBI.BB->begin();
- MachineBasicBlock::iterator FI = FalseBBI.BB->begin();
+ // Count duplicate instructions at the beginning of the true and false blocks.
+ MachineBasicBlock::iterator TIB = TrueBBI.BB->begin();
+ MachineBasicBlock::iterator FIB = FalseBBI.BB->begin();
MachineBasicBlock::iterator TIE = TrueBBI.BB->end();
MachineBasicBlock::iterator FIE = FalseBBI.BB->end();
- // Skip dbg_value instructions
- while (TI != TIE && TI->isDebugValue())
- ++TI;
- while (FI != FIE && FI->isDebugValue())
- ++FI;
- while (TI != TIE && FI != FIE) {
+ while (TIB != TIE && FIB != FIE) {
// Skip dbg_value instructions. These do not count.
- if (TI->isDebugValue()) {
- while (TI != TIE && TI->isDebugValue())
- ++TI;
- if (TI == TIE)
+ if (TIB->isDebugValue()) {
+ while (TIB != TIE && TIB->isDebugValue())
+ ++TIB;
+ if (TIB == TIE)
break;
}
- if (FI->isDebugValue()) {
- while (FI != FIE && FI->isDebugValue())
- ++FI;
- if (FI == FIE)
+ if (FIB->isDebugValue()) {
+ while (FIB != FIE && FIB->isDebugValue())
+ ++FIB;
+ if (FIB == FIE)
break;
}
- if (!TI->isIdenticalTo(FI))
+ if (!TIB->isIdenticalTo(FIB))
break;
++Dups1;
- ++TI;
- ++FI;
+ ++TIB;
+ ++FIB;
}
- TI = firstNonBranchInst(TrueBBI.BB, TII);
- FI = firstNonBranchInst(FalseBBI.BB, TII);
- MachineBasicBlock::iterator TIB = TrueBBI.BB->begin();
- MachineBasicBlock::iterator FIB = FalseBBI.BB->begin();
- // Skip dbg_value instructions at end of the bb's.
- while (TI != TIB && TI->isDebugValue())
- --TI;
- while (FI != FIB && FI->isDebugValue())
- --FI;
- while (TI != TIB && FI != FIB) {
+ // Now, in preparation for counting duplicate instructions at the ends of the
+ // blocks, move the end iterators up past any branch instructions.
+ while (TIE != TIB) {
+ --TIE;
+ if (!TIE->isBranch())
+ break;
+ }
+ while (FIE != FIB) {
+ --FIE;
+ if (!FIE->isBranch())
+ break;
+ }
+
+ // If Dups1 includes all of a block, then don't count duplicate
+ // instructions at the end of the blocks.
+ if (TIB == TIE || FIB == FIE)
+ return true;
+
+ // Count duplicate instructions at the ends of the blocks.
+ while (TIE != TIB && FIE != FIB) {
// Skip dbg_value instructions. These do not count.
- if (TI->isDebugValue()) {
- while (TI != TIB && TI->isDebugValue())
- --TI;
- if (TI == TIB)
+ if (TIE->isDebugValue()) {
+ while (TIE != TIB && TIE->isDebugValue())
+ --TIE;
+ if (TIE == TIB)
break;
}
- if (FI->isDebugValue()) {
- while (FI != FIB && FI->isDebugValue())
- --FI;
- if (FI == FIB)
+ if (FIE->isDebugValue()) {
+ while (FIE != FIB && FIE->isDebugValue())
+ --FIE;
+ if (FIE == FIB)
break;
}
- if (!TI->isIdenticalTo(FI))
+ if (!TIE->isIdenticalTo(FIE))
break;
++Dups2;
- --TI;
- --FI;
+ --TIE;
+ --FIE;
}
return true;
// Then scan all the instructions.
BBI.NonPredSize = 0;
+ BBI.ExtraCost = 0;
+ BBI.ExtraCost2 = 0;
BBI.ClobbersPred = false;
for (MachineBasicBlock::iterator I = BBI.BB->begin(), E = BBI.BB->end();
I != E; ++I) {
if (I->isDebugValue())
continue;
- const TargetInstrDesc &TID = I->getDesc();
- if (TID.isNotDuplicable())
+ if (I->isNotDuplicable())
BBI.CannotBeCopied = true;
bool isPredicated = TII->isPredicated(I);
- bool isCondBr = BBI.IsBrAnalyzable && TID.isConditionalBranch();
+ bool isCondBr = BBI.IsBrAnalyzable && I->isConditionalBranch();
if (!isCondBr) {
if (!isPredicated) {
- unsigned NumOps = TII->getNumMicroOps(&*I, InstrItins);
- BBI.NonPredSize += NumOps;
+ BBI.NonPredSize++;
+ unsigned ExtraPredCost = 0;
+ unsigned NumCycles = TII->getInstrLatency(InstrItins, &*I,
+ &ExtraPredCost);
+ if (NumCycles > 1)
+ BBI.ExtraCost += NumCycles-1;
+ BBI.ExtraCost2 += ExtraPredCost;
} else if (!AlreadyPredicated) {
// FIXME: This instruction is already predicated before the
// if-conversion pass. It's probably something like a conditional move.
ScanInstructions(BBI);
- // Unanalyzable or ends with fallthrough or unconditional branch.
- if (!BBI.IsBrAnalyzable || BBI.BrCond.empty()) {
+ // Unanalyzable or ends with fallthrough or unconditional branch, or if is not
+ // considered for ifcvt anymore.
+ if (!BBI.IsBrAnalyzable || BBI.BrCond.empty() || BBI.IsDone) {
BBI.IsBeingAnalyzed = false;
BBI.IsAnalyzed = true;
return BBI;
bool TNeedSub = TrueBBI.Predicate.size() > 0;
bool FNeedSub = FalseBBI.Predicate.size() > 0;
bool Enqueued = false;
-
- // Try to predict the branch, using loop info to guide us.
- // General heuristics are:
- // - backedge -> 90% taken
- // - early exit -> 20% taken
- // - branch predictor confidence -> 90%
- float Prediction = 0.5f;
- float Confidence = 0.9f;
- MachineLoop *Loop = MLI->getLoopFor(BB);
- if (Loop) {
- if (TrueBBI.BB == Loop->getHeader())
- Prediction = 0.9f;
- else if (FalseBBI.BB == Loop->getHeader())
- Prediction = 0.1f;
-
- MachineLoop *TrueLoop = MLI->getLoopFor(TrueBBI.BB);
- MachineLoop *FalseLoop = MLI->getLoopFor(FalseBBI.BB);
- if (!TrueLoop || TrueLoop->getParentLoop() == Loop)
- Prediction = 0.2f;
- else if (!FalseLoop || FalseLoop->getParentLoop() == Loop)
- Prediction = 0.8f;
- }
-
+
+ BranchProbability Prediction = MBPI->getEdgeProbability(BB, TrueBBI.BB);
+
if (CanRevCond && ValidDiamond(TrueBBI, FalseBBI, Dups, Dups2) &&
- MeetIfcvtSizeLimit(*TrueBBI.BB, TrueBBI.NonPredSize - (Dups + Dups2),
- *FalseBBI.BB, FalseBBI.NonPredSize - (Dups + Dups2),
- Prediction, Confidence) &&
+ MeetIfcvtSizeLimit(*TrueBBI.BB, (TrueBBI.NonPredSize - (Dups + Dups2) +
+ TrueBBI.ExtraCost), TrueBBI.ExtraCost2,
+ *FalseBBI.BB, (FalseBBI.NonPredSize - (Dups + Dups2) +
+ FalseBBI.ExtraCost),FalseBBI.ExtraCost2,
+ Prediction) &&
FeasibilityAnalysis(TrueBBI, BBI.BrCond) &&
FeasibilityAnalysis(FalseBBI, RevCond)) {
// Diamond:
Enqueued = true;
}
- if (ValidTriangle(TrueBBI, FalseBBI, false, Dups, Prediction, Confidence) &&
- MeetIfcvtSizeLimit(*TrueBBI.BB, TrueBBI.NonPredSize,
- Prediction, Confidence) &&
+ if (ValidTriangle(TrueBBI, FalseBBI, false, Dups, Prediction) &&
+ MeetIfcvtSizeLimit(*TrueBBI.BB, TrueBBI.NonPredSize + TrueBBI.ExtraCost,
+ TrueBBI.ExtraCost2, Prediction) &&
FeasibilityAnalysis(TrueBBI, BBI.BrCond, true)) {
// Triangle:
// EBB
Enqueued = true;
}
- if (ValidTriangle(TrueBBI, FalseBBI, true, Dups, Prediction, Confidence) &&
- MeetIfcvtSizeLimit(*TrueBBI.BB, TrueBBI.NonPredSize,
- Prediction, Confidence) &&
+ if (ValidTriangle(TrueBBI, FalseBBI, true, Dups, Prediction) &&
+ MeetIfcvtSizeLimit(*TrueBBI.BB, TrueBBI.NonPredSize + TrueBBI.ExtraCost,
+ TrueBBI.ExtraCost2, Prediction) &&
FeasibilityAnalysis(TrueBBI, BBI.BrCond, true, true)) {
Tokens.push_back(new IfcvtToken(BBI, ICTriangleRev, TNeedSub, Dups));
Enqueued = true;
}
- if (ValidSimple(TrueBBI, Dups, Prediction, Confidence) &&
- MeetIfcvtSizeLimit(*TrueBBI.BB, TrueBBI.NonPredSize,
- Prediction, Confidence) &&
+ if (ValidSimple(TrueBBI, Dups, Prediction) &&
+ MeetIfcvtSizeLimit(*TrueBBI.BB, TrueBBI.NonPredSize + TrueBBI.ExtraCost,
+ TrueBBI.ExtraCost2, Prediction) &&
FeasibilityAnalysis(TrueBBI, BBI.BrCond)) {
// Simple (split, no rejoin):
// EBB
if (CanRevCond) {
// Try the other path...
if (ValidTriangle(FalseBBI, TrueBBI, false, Dups,
- 1.0-Prediction, Confidence) &&
- MeetIfcvtSizeLimit(*FalseBBI.BB, FalseBBI.NonPredSize,
- 1.0-Prediction, Confidence) &&
+ Prediction.getCompl()) &&
+ MeetIfcvtSizeLimit(*FalseBBI.BB,
+ FalseBBI.NonPredSize + FalseBBI.ExtraCost,
+ FalseBBI.ExtraCost2, Prediction.getCompl()) &&
FeasibilityAnalysis(FalseBBI, RevCond, true)) {
Tokens.push_back(new IfcvtToken(BBI, ICTriangleFalse, FNeedSub, Dups));
Enqueued = true;
}
if (ValidTriangle(FalseBBI, TrueBBI, true, Dups,
- 1.0-Prediction, Confidence) &&
- MeetIfcvtSizeLimit(*FalseBBI.BB, FalseBBI.NonPredSize,
- 1.0-Prediction, Confidence) &&
+ Prediction.getCompl()) &&
+ MeetIfcvtSizeLimit(*FalseBBI.BB,
+ FalseBBI.NonPredSize + FalseBBI.ExtraCost,
+ FalseBBI.ExtraCost2, Prediction.getCompl()) &&
FeasibilityAnalysis(FalseBBI, RevCond, true, true)) {
Tokens.push_back(new IfcvtToken(BBI, ICTriangleFRev, FNeedSub, Dups));
Enqueued = true;
}
- if (ValidSimple(FalseBBI, Dups, 1.0-Prediction, Confidence) &&
- MeetIfcvtSizeLimit(*FalseBBI.BB, FalseBBI.NonPredSize,
- 1.0-Prediction, Confidence) &&
+ if (ValidSimple(FalseBBI, Dups, Prediction.getCompl()) &&
+ MeetIfcvtSizeLimit(*FalseBBI.BB,
+ FalseBBI.NonPredSize + FalseBBI.ExtraCost,
+ FalseBBI.ExtraCost2, Prediction.getCompl()) &&
FeasibilityAnalysis(FalseBBI, RevCond)) {
Tokens.push_back(new IfcvtToken(BBI, ICSimpleFalse, FNeedSub, Dups));
Enqueued = true;
/// candidates.
void IfConverter::AnalyzeBlocks(MachineFunction &MF,
std::vector<IfcvtToken*> &Tokens) {
- std::set<MachineBasicBlock*> Visited;
- for (unsigned i = 0, e = Roots.size(); i != e; ++i) {
- for (idf_ext_iterator<MachineBasicBlock*> I=idf_ext_begin(Roots[i],Visited),
- E = idf_ext_end(Roots[i], Visited); I != E; ++I) {
- MachineBasicBlock *BB = *I;
- AnalyzeBlock(BB, Tokens);
- }
+ for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) {
+ MachineBasicBlock *BB = I;
+ AnalyzeBlock(BB, Tokens);
}
// Sort to favor more complex ifcvt scheme.
// fold the tail block in as well. Otherwise, unless it falls through to the
// tail, add a unconditional branch to it.
if (TailBB) {
- BBInfo TailBBI = BBAnalysis[TailBB->getNumber()];
+ BBInfo &TailBBI = BBAnalysis[TailBB->getNumber()];
bool CanMergeTail = !TailBBI.HasFallThrough;
// There may still be a fall-through edge from BBI1 or BBI2 to TailBB;
// check if there are any other predecessors besides those.
for (MachineBasicBlock::iterator I = FromBBI.BB->begin(),
E = FromBBI.BB->end(); I != E; ++I) {
- const TargetInstrDesc &TID = I->getDesc();
// Do not copy the end of the block branches.
- if (IgnoreBr && TID.isBranch())
+ if (IgnoreBr && I->isBranch())
break;
MachineInstr *MI = MF.CloneMachineInstr(I);
ToBBI.BB->insert(ToBBI.BB->end(), MI);
- unsigned NumOps = TII->getNumMicroOps(MI, InstrItins);
- ToBBI.NonPredSize += NumOps;
+ ToBBI.NonPredSize++;
+ unsigned ExtraPredCost = 0;
+ unsigned NumCycles = TII->getInstrLatency(InstrItins, &*I, &ExtraPredCost);
+ if (NumCycles > 1)
+ ToBBI.ExtraCost += NumCycles-1;
+ ToBBI.ExtraCost2 += ExtraPredCost;
if (!TII->isPredicated(I) && !MI->isDebugValue()) {
if (!TII->PredicateInstruction(MI, Cond)) {
FromBBI.Predicate.clear();
ToBBI.NonPredSize += FromBBI.NonPredSize;
+ ToBBI.ExtraCost += FromBBI.ExtraCost;
+ ToBBI.ExtraCost2 += FromBBI.ExtraCost2;
FromBBI.NonPredSize = 0;
+ FromBBI.ExtraCost = 0;
+ FromBBI.ExtraCost2 = 0;
ToBBI.ClobbersPred |= FromBBI.ClobbersPred;
ToBBI.HasFallThrough = FromBBI.HasFallThrough;