X-Git-Url: http://demsky.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FCodeGen%2FIfConversion.cpp;h=db53b0473a9a9dfbb1b52a0f2c1b2d1ba3d699ad;hb=806562cc59ad35e6c742abe9109e9b8090b3f820;hp=d73cd538f32ee23c6e2014abd4b129ab6a684e69;hpb=3ef1c8759a20167457eb7fd82ebcaffe7ccaa1d1;p=oota-llvm.git diff --git a/lib/CodeGen/IfConversion.cpp b/lib/CodeGen/IfConversion.cpp index d73cd538f32..db53b0473a9 100644 --- a/lib/CodeGen/IfConversion.cpp +++ b/lib/CodeGen/IfConversion.cpp @@ -17,6 +17,7 @@ #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetInstrItineraries.h" #include "llvm/Target/TargetLowering.h" @@ -27,6 +28,7 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include "llvm/ADT/DepthFirstIterator.h" +#include "llvm/ADT/SmallSet.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/STLExtras.h" using namespace llvm; @@ -92,6 +94,8 @@ namespace { /// ClobbersPred - True if BB could modify predicates (e.g. has /// cmp, call, etc.) /// NonPredSize - Number of non-predicated instructions. + /// ExtraCost - Extra cost for multi-cycle instructions. + /// ExtraCost2 - Some instructions are slower when predicated /// BB - Corresponding MachineBasicBlock. /// TrueBB / FalseBB- See AnalyzeBranch(). /// BrCond - Conditions for end of block conditional branches. @@ -107,6 +111,8 @@ namespace { bool CannotBeCopied : 1; bool ClobbersPred : 1; unsigned NonPredSize; + unsigned ExtraCost; + unsigned ExtraCost2; MachineBasicBlock *BB; MachineBasicBlock *TrueBB; MachineBasicBlock *FalseBB; @@ -116,7 +122,7 @@ namespace { IsAnalyzed(false), IsEnqueued(false), IsBrAnalyzable(false), HasFallThrough(false), IsUnpredicable(false), CannotBeCopied(false), ClobbersPred(false), NonPredSize(0), - BB(0), TrueBB(0), FalseBB(0) {} + ExtraCost(0), ExtraCost2(0), BB(0), TrueBB(0), FalseBB(0) {} }; /// IfcvtToken - Record information about pending if-conversions to attempt: @@ -152,20 +158,30 @@ namespace { const TargetInstrInfo *TII; const TargetRegisterInfo *TRI; const InstrItineraryData *InstrItins; + const MachineLoopInfo *MLI; bool MadeChange; int FnNum; public: static char ID; - IfConverter() : MachineFunctionPass(ID), FnNum(-1) {} + IfConverter() : MachineFunctionPass(ID), FnNum(-1) { + initializeIfConverterPass(*PassRegistry::getPassRegistry()); + } + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired(); + MachineFunctionPass::getAnalysisUsage(AU); + } virtual bool runOnMachineFunction(MachineFunction &MF); virtual const char *getPassName() const { return "If Converter"; } private: bool ReverseBranchCondition(BBInfo &BBI); - bool ValidSimple(BBInfo &TrueBBI, unsigned &Dups) const; + bool ValidSimple(BBInfo &TrueBBI, unsigned &Dups, + float Prediction, float Confidence) const; bool ValidTriangle(BBInfo &TrueBBI, BBInfo &FalseBBI, - bool FalseBranch, unsigned &Dups) const; + bool FalseBranch, unsigned &Dups, + float Prediction, float Confidence) const; bool ValidDiamond(BBInfo &TrueBBI, BBInfo &FalseBBI, unsigned &Dups1, unsigned &Dups2) const; void ScanInstructions(BBInfo &BBI); @@ -190,14 +206,21 @@ namespace { bool IgnoreBr = false); void MergeBlocks(BBInfo &ToBBI, BBInfo &FromBBI, bool AddEdges = true); - bool MeetIfcvtSizeLimit(MachineBasicBlock &BB, unsigned Size) const { - return Size > 0 && TII->isProfitableToIfCvt(BB, Size); + bool MeetIfcvtSizeLimit(MachineBasicBlock &BB, + unsigned Cycle, unsigned Extra, + float Prediction, float Confidence) const { + return Cycle > 0 && TII->isProfitableToIfCvt(BB, Cycle, Extra, + Prediction, Confidence); } - bool MeetIfcvtSizeLimit(MachineBasicBlock &TBB, unsigned TSize, - MachineBasicBlock &FBB, unsigned FSize) const { - return TSize > 0 && FSize > 0 && - TII->isProfitableToIfCvt(TBB, TSize, FBB, FSize); + bool MeetIfcvtSizeLimit(MachineBasicBlock &TBB, + unsigned TCycle, unsigned TExtra, + MachineBasicBlock &FBB, + unsigned FCycle, unsigned FExtra, + float Prediction, float Confidence) const { + return TCycle > 0 && FCycle > 0 && + TII->isProfitableToIfCvt(TBB, TCycle, TExtra, FBB, FCycle, FExtra, + Prediction, Confidence); } // blockAlwaysFallThrough - Block ends without a terminator. @@ -232,7 +255,9 @@ namespace { char IfConverter::ID = 0; } -INITIALIZE_PASS(IfConverter, "if-converter", "If Converter", false, false); +INITIALIZE_PASS_BEGIN(IfConverter, "if-converter", "If Converter", false, false) +INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) +INITIALIZE_PASS_END(IfConverter, "if-converter", "If Converter", false, false) FunctionPass *llvm::createIfConverterPass() { return new IfConverter(); } @@ -240,6 +265,7 @@ bool IfConverter::runOnMachineFunction(MachineFunction &MF) { TLI = MF.getTarget().getTargetLowering(); TII = MF.getTarget().getInstrInfo(); TRI = MF.getTarget().getRegisterInfo(); + MLI = &getAnalysis(); InstrItins = MF.getTarget().getInstrItineraryData(); if (!TII) return false; @@ -434,7 +460,8 @@ static inline MachineBasicBlock *getNextBlock(MachineBasicBlock *BB) { /// predecessor) forms a valid simple shape for ifcvt. It also returns the /// number of instructions that the ifcvt would need to duplicate if performed /// in Dups. -bool IfConverter::ValidSimple(BBInfo &TrueBBI, unsigned &Dups) const { +bool IfConverter::ValidSimple(BBInfo &TrueBBI, unsigned &Dups, + float Prediction, float Confidence) const { Dups = 0; if (TrueBBI.IsBeingAnalyzed || TrueBBI.IsDone) return false; @@ -444,7 +471,8 @@ bool IfConverter::ValidSimple(BBInfo &TrueBBI, unsigned &Dups) const { if (TrueBBI.BB->pred_size() > 1) { if (TrueBBI.CannotBeCopied || - !TII->isProfitableToDupForIfCvt(*TrueBBI.BB, TrueBBI.NonPredSize)) + !TII->isProfitableToDupForIfCvt(*TrueBBI.BB, TrueBBI.NonPredSize, + Prediction, Confidence)) return false; Dups = TrueBBI.NonPredSize; } @@ -459,7 +487,8 @@ bool IfConverter::ValidSimple(BBInfo &TrueBBI, unsigned &Dups) const { /// returns the number of instructions that the ifcvt would need to duplicate /// if performed in 'Dups'. bool IfConverter::ValidTriangle(BBInfo &TrueBBI, BBInfo &FalseBBI, - bool FalseBranch, unsigned &Dups) const { + bool FalseBranch, unsigned &Dups, + float Prediction, float Confidence) const { Dups = 0; if (TrueBBI.IsBeingAnalyzed || TrueBBI.IsDone) return false; @@ -481,7 +510,8 @@ bool IfConverter::ValidTriangle(BBInfo &TrueBBI, BBInfo &FalseBBI, ++Size; } } - if (!TII->isProfitableToDupForIfCvt(*TrueBBI.BB, Size)) + if (!TII->isProfitableToDupForIfCvt(*TrueBBI.BB, Size, + Prediction, Confidence)) return false; Dups = Size; } @@ -496,18 +526,6 @@ bool IfConverter::ValidTriangle(BBInfo &TrueBBI, BBInfo &FalseBBI, return TExit && TExit == FalseBBI.BB; } -static -MachineBasicBlock::iterator firstNonBranchInst(MachineBasicBlock *BB, - const TargetInstrInfo *TII) { - MachineBasicBlock::iterator I = BB->end(); - while (I != BB->begin()) { - --I; - if (!I->getDesc().isBranch()) - break; - } - return I; -} - /// ValidDiamond - Returns true if the 'true' and 'false' blocks (along /// with their common predecessor) forms a valid diamond shape for ifcvt. bool IfConverter::ValidDiamond(BBInfo &TrueBBI, BBInfo &FalseBBI, @@ -536,64 +554,70 @@ bool IfConverter::ValidDiamond(BBInfo &TrueBBI, BBInfo &FalseBBI, (TrueBBI.ClobbersPred && FalseBBI.ClobbersPred)) return false; - MachineBasicBlock::iterator TI = TrueBBI.BB->begin(); - MachineBasicBlock::iterator FI = FalseBBI.BB->begin(); + // Count duplicate instructions at the beginning of the true and false blocks. + MachineBasicBlock::iterator TIB = TrueBBI.BB->begin(); + MachineBasicBlock::iterator FIB = FalseBBI.BB->begin(); MachineBasicBlock::iterator TIE = TrueBBI.BB->end(); MachineBasicBlock::iterator FIE = FalseBBI.BB->end(); - // Skip dbg_value instructions - while (TI != TIE && TI->isDebugValue()) - ++TI; - while (FI != FIE && FI->isDebugValue()) - ++FI; - while (TI != TIE && FI != FIE) { + while (TIB != TIE && FIB != FIE) { // Skip dbg_value instructions. These do not count. - if (TI->isDebugValue()) { - while (TI != TIE && TI->isDebugValue()) - ++TI; - if (TI == TIE) + if (TIB->isDebugValue()) { + while (TIB != TIE && TIB->isDebugValue()) + ++TIB; + if (TIB == TIE) break; } - if (FI->isDebugValue()) { - while (FI != FIE && FI->isDebugValue()) - ++FI; - if (FI == FIE) + if (FIB->isDebugValue()) { + while (FIB != FIE && FIB->isDebugValue()) + ++FIB; + if (FIB == FIE) break; } - if (!TI->isIdenticalTo(FI)) + if (!TIB->isIdenticalTo(FIB)) break; ++Dups1; - ++TI; - ++FI; + ++TIB; + ++FIB; } - TI = firstNonBranchInst(TrueBBI.BB, TII); - FI = firstNonBranchInst(FalseBBI.BB, TII); - MachineBasicBlock::iterator TIB = TrueBBI.BB->begin(); - MachineBasicBlock::iterator FIB = FalseBBI.BB->begin(); - // Skip dbg_value instructions at end of the bb's. - while (TI != TIB && TI->isDebugValue()) - --TI; - while (FI != FIB && FI->isDebugValue()) - --FI; - while (TI != TIB && FI != FIB) { + // Now, in preparation for counting duplicate instructions at the ends of the + // blocks, move the end iterators up past any branch instructions. + while (TIE != TIB) { + --TIE; + if (!TIE->getDesc().isBranch()) + break; + } + while (FIE != FIB) { + --FIE; + if (!FIE->getDesc().isBranch()) + break; + } + + // If Dups1 includes all of a block, then don't count duplicate + // instructions at the end of the blocks. + if (TIB == TIE || FIB == FIE) + return true; + + // Count duplicate instructions at the ends of the blocks. + while (TIE != TIB && FIE != FIB) { // Skip dbg_value instructions. These do not count. - if (TI->isDebugValue()) { - while (TI != TIB && TI->isDebugValue()) - --TI; - if (TI == TIB) + if (TIE->isDebugValue()) { + while (TIE != TIB && TIE->isDebugValue()) + --TIE; + if (TIE == TIB) break; } - if (FI->isDebugValue()) { - while (FI != FIB && FI->isDebugValue()) - --FI; - if (FI == FIB) + if (FIE->isDebugValue()) { + while (FIE != FIB && FIE->isDebugValue()) + --FIE; + if (FIE == FIB) break; } - if (!TI->isIdenticalTo(FI)) + if (!TIE->isIdenticalTo(FIE)) break; ++Dups2; - --TI; - --FI; + --TIE; + --FIE; } return true; @@ -630,6 +654,8 @@ void IfConverter::ScanInstructions(BBInfo &BBI) { // Then scan all the instructions. BBI.NonPredSize = 0; + BBI.ExtraCost = 0; + BBI.ExtraCost2 = 0; BBI.ClobbersPred = false; for (MachineBasicBlock::iterator I = BBI.BB->begin(), E = BBI.BB->end(); I != E; ++I) { @@ -645,8 +671,13 @@ void IfConverter::ScanInstructions(BBInfo &BBI) { if (!isCondBr) { if (!isPredicated) { - unsigned NumOps = TII->getNumMicroOps(&*I, InstrItins); - BBI.NonPredSize += NumOps; + BBI.NonPredSize++; + unsigned ExtraPredCost = 0; + unsigned NumCycles = TII->getInstrLatency(InstrItins, &*I, + &ExtraPredCost); + if (NumCycles > 1) + BBI.ExtraCost += NumCycles-1; + BBI.ExtraCost2 += ExtraPredCost; } else if (!AlreadyPredicated) { // FIXME: This instruction is already predicated before the // if-conversion pass. It's probably something like a conditional move. @@ -769,9 +800,35 @@ IfConverter::BBInfo &IfConverter::AnalyzeBlock(MachineBasicBlock *BB, bool TNeedSub = TrueBBI.Predicate.size() > 0; bool FNeedSub = FalseBBI.Predicate.size() > 0; bool Enqueued = false; + + // Try to predict the branch, using loop info to guide us. + // General heuristics are: + // - backedge -> 90% taken + // - early exit -> 20% taken + // - branch predictor confidence -> 90% + float Prediction = 0.5f; + float Confidence = 0.9f; + MachineLoop *Loop = MLI->getLoopFor(BB); + if (Loop) { + if (TrueBBI.BB == Loop->getHeader()) + Prediction = 0.9f; + else if (FalseBBI.BB == Loop->getHeader()) + Prediction = 0.1f; + + MachineLoop *TrueLoop = MLI->getLoopFor(TrueBBI.BB); + MachineLoop *FalseLoop = MLI->getLoopFor(FalseBBI.BB); + if (!TrueLoop || TrueLoop->getParentLoop() == Loop) + Prediction = 0.2f; + else if (!FalseLoop || FalseLoop->getParentLoop() == Loop) + Prediction = 0.8f; + } + if (CanRevCond && ValidDiamond(TrueBBI, FalseBBI, Dups, Dups2) && - MeetIfcvtSizeLimit(*TrueBBI.BB, TrueBBI.NonPredSize - (Dups + Dups2), - *FalseBBI.BB, FalseBBI.NonPredSize - (Dups + Dups2)) && + MeetIfcvtSizeLimit(*TrueBBI.BB, (TrueBBI.NonPredSize - (Dups + Dups2) + + TrueBBI.ExtraCost), TrueBBI.ExtraCost2, + *FalseBBI.BB, (FalseBBI.NonPredSize - (Dups + Dups2) + + FalseBBI.ExtraCost),FalseBBI.ExtraCost2, + Prediction, Confidence) && FeasibilityAnalysis(TrueBBI, BBI.BrCond) && FeasibilityAnalysis(FalseBBI, RevCond)) { // Diamond: @@ -787,8 +844,9 @@ IfConverter::BBInfo &IfConverter::AnalyzeBlock(MachineBasicBlock *BB, Enqueued = true; } - if (ValidTriangle(TrueBBI, FalseBBI, false, Dups) && - MeetIfcvtSizeLimit(*TrueBBI.BB, TrueBBI.NonPredSize) && + if (ValidTriangle(TrueBBI, FalseBBI, false, Dups, Prediction, Confidence) && + MeetIfcvtSizeLimit(*TrueBBI.BB, TrueBBI.NonPredSize + TrueBBI.ExtraCost, + TrueBBI.ExtraCost2, Prediction, Confidence) && FeasibilityAnalysis(TrueBBI, BBI.BrCond, true)) { // Triangle: // EBB @@ -801,15 +859,17 @@ IfConverter::BBInfo &IfConverter::AnalyzeBlock(MachineBasicBlock *BB, Enqueued = true; } - if (ValidTriangle(TrueBBI, FalseBBI, true, Dups) && - MeetIfcvtSizeLimit(*TrueBBI.BB, TrueBBI.NonPredSize) && + if (ValidTriangle(TrueBBI, FalseBBI, true, Dups, Prediction, Confidence) && + MeetIfcvtSizeLimit(*TrueBBI.BB, TrueBBI.NonPredSize + TrueBBI.ExtraCost, + TrueBBI.ExtraCost2, Prediction, Confidence) && FeasibilityAnalysis(TrueBBI, BBI.BrCond, true, true)) { Tokens.push_back(new IfcvtToken(BBI, ICTriangleRev, TNeedSub, Dups)); Enqueued = true; } - if (ValidSimple(TrueBBI, Dups) && - MeetIfcvtSizeLimit(*TrueBBI.BB, TrueBBI.NonPredSize) && + if (ValidSimple(TrueBBI, Dups, Prediction, Confidence) && + MeetIfcvtSizeLimit(*TrueBBI.BB, TrueBBI.NonPredSize + TrueBBI.ExtraCost, + TrueBBI.ExtraCost2, Prediction, Confidence) && FeasibilityAnalysis(TrueBBI, BBI.BrCond)) { // Simple (split, no rejoin): // EBB @@ -824,22 +884,30 @@ IfConverter::BBInfo &IfConverter::AnalyzeBlock(MachineBasicBlock *BB, if (CanRevCond) { // Try the other path... - if (ValidTriangle(FalseBBI, TrueBBI, false, Dups) && - MeetIfcvtSizeLimit(*FalseBBI.BB, FalseBBI.NonPredSize) && + if (ValidTriangle(FalseBBI, TrueBBI, false, Dups, + 1.0-Prediction, Confidence) && + MeetIfcvtSizeLimit(*FalseBBI.BB, + FalseBBI.NonPredSize + FalseBBI.ExtraCost, + FalseBBI.ExtraCost2, 1.0-Prediction, Confidence) && FeasibilityAnalysis(FalseBBI, RevCond, true)) { Tokens.push_back(new IfcvtToken(BBI, ICTriangleFalse, FNeedSub, Dups)); Enqueued = true; } - if (ValidTriangle(FalseBBI, TrueBBI, true, Dups) && - MeetIfcvtSizeLimit(*FalseBBI.BB, FalseBBI.NonPredSize) && + if (ValidTriangle(FalseBBI, TrueBBI, true, Dups, + 1.0-Prediction, Confidence) && + MeetIfcvtSizeLimit(*FalseBBI.BB, + FalseBBI.NonPredSize + FalseBBI.ExtraCost, + FalseBBI.ExtraCost2, 1.0-Prediction, Confidence) && FeasibilityAnalysis(FalseBBI, RevCond, true, true)) { Tokens.push_back(new IfcvtToken(BBI, ICTriangleFRev, FNeedSub, Dups)); Enqueued = true; } - if (ValidSimple(FalseBBI, Dups) && - MeetIfcvtSizeLimit(*FalseBBI.BB, FalseBBI.NonPredSize) && + if (ValidSimple(FalseBBI, Dups, 1.0-Prediction, Confidence) && + MeetIfcvtSizeLimit(*FalseBBI.BB, + FalseBBI.NonPredSize + FalseBBI.ExtraCost, + FalseBBI.ExtraCost2, 1.0-Prediction, Confidence) && FeasibilityAnalysis(FalseBBI, RevCond)) { Tokens.push_back(new IfcvtToken(BBI, ICSimpleFalse, FNeedSub, Dups)); Enqueued = true; @@ -1368,8 +1436,12 @@ void IfConverter::CopyAndPredicateBlock(BBInfo &ToBBI, BBInfo &FromBBI, MachineInstr *MI = MF.CloneMachineInstr(I); ToBBI.BB->insert(ToBBI.BB->end(), MI); - unsigned NumOps = TII->getNumMicroOps(MI, InstrItins); - ToBBI.NonPredSize += NumOps; + ToBBI.NonPredSize++; + unsigned ExtraPredCost = 0; + unsigned NumCycles = TII->getInstrLatency(InstrItins, &*I, &ExtraPredCost); + if (NumCycles > 1) + ToBBI.ExtraCost += NumCycles-1; + ToBBI.ExtraCost2 += ExtraPredCost; if (!TII->isPredicated(I) && !MI->isDebugValue()) { if (!TII->PredicateInstruction(MI, Cond)) { @@ -1443,7 +1515,11 @@ void IfConverter::MergeBlocks(BBInfo &ToBBI, BBInfo &FromBBI, bool AddEdges) { FromBBI.Predicate.clear(); ToBBI.NonPredSize += FromBBI.NonPredSize; + ToBBI.ExtraCost += FromBBI.ExtraCost; + ToBBI.ExtraCost2 += FromBBI.ExtraCost2; FromBBI.NonPredSize = 0; + FromBBI.ExtraCost = 0; + FromBBI.ExtraCost2 = 0; ToBBI.ClobbersPred |= FromBBI.ClobbersPred; ToBBI.HasFallThrough = FromBBI.HasFallThrough;