#include "llvm/Analysis/LoopPass.h"
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Analysis/BlockFrequencyInfoImpl.h"
+#include "llvm/Analysis/BlockFrequencyInfo.h"
+#include "llvm/Analysis/BranchProbabilityInfo.h"
+#include "llvm/Support/BranchProbability.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Dominators.h"
Threshold("loop-unswitch-threshold", cl::desc("Max loop size to unswitch"),
cl::init(100), cl::Hidden);
+static cl::opt<bool>
+LoopUnswitchWithBlockFrequency("loop-unswitch-with-block-frequency",
+ cl::init(false), cl::Hidden,
+ cl::desc("Enable the use of the block frequency analysis to access PGO "
+ "heuristics to minimize code growth in cold regions."));
+
+static cl::opt<unsigned>
+ColdnessThreshold("loop-unswitch-coldness-threshold", cl::init(1), cl::Hidden,
+ cl::desc("Coldness threshold in percentage. The loop header frequency "
+ "(relative to the entry frequency) is compared with this "
+ "threshold to determine if non-trivial unswitching should be "
+ "enabled."));
+
namespace {
class LUAnalysisCache {
LUAnalysisCache BranchesInfo;
+ bool EnabledPGO;
+
+ // BFI and ColdEntryFreq are only used when PGO and
+ // LoopUnswitchWithBlockFrequency are enabled.
+ BlockFrequencyInfo BFI;
+ BlockFrequency ColdEntryFreq;
+
bool OptimizeForSize;
bool redoLoop;
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
currentLoop = L;
Function *F = currentLoop->getHeader()->getParent();
+
+ EnabledPGO = F->getEntryCount().hasValue();
+
+ if (LoopUnswitchWithBlockFrequency && EnabledPGO) {
+ BranchProbabilityInfo BPI(*F, *LI);
+ BFI.calculate(*L->getHeader()->getParent(), BPI, *LI);
+
+ // Use BranchProbability to compute a minimum frequency based on
+ // function entry baseline frequency. Loops with headers below this
+ // frequency are considered as cold.
+ const BranchProbability ColdProb(ColdnessThreshold, 100);
+ ColdEntryFreq = BlockFrequency(BFI.getEntryFreq()) * ColdProb;
+ }
+
bool Changed = false;
do {
assert(currentLoop->isLCSSAForm(*DT));
loopHeader->getParent()->hasFnAttribute(Attribute::OptimizeForSize))
return false;
+ if (LoopUnswitchWithBlockFrequency && EnabledPGO) {
+ // Compute the weighted frequency of the hottest block in the
+ // loop (loopHeader in this case since inner loops should be
+ // processed before outer loop). If it is less than ColdFrequency,
+ // we should not unswitch.
+ BlockFrequency LoopEntryFreq = BFI.getBlockFreq(loopHeader);
+ if (LoopEntryFreq < ColdEntryFreq)
+ return false;
+ }
+
// Loop over all of the basic blocks in the loop. If we find an interior
// block that is branching on a loop-invariant condition, we can unswitch this
// loop.
--- /dev/null
+; RUN: opt < %s -loop-unswitch -loop-unswitch-with-block-frequency -S 2>&1 | FileCheck %s
+
+;; trivial condition should be unswithed regardless of coldness.
+define i32 @test1(i1 %cond1, i1 %cond2) !prof !1 {
+ br i1 %cond1, label %loop_begin, label %loop_exit, !prof !0
+
+loop_begin:
+; CHECK: br i1 true, label %continue, label %loop_exit.loopexit
+ br i1 %cond2, label %continue, label %loop_exit ; trivial condition
+
+continue:
+ call void @some_func1() noreturn nounwind
+ br label %loop_begin
+
+loop_exit:
+ ret i32 0
+}
+
+;; cold non-trivial condition should not be unswitched.
+define i32 @test2(i32* %var, i1 %cond1, i1 %cond2) !prof !1 {
+ br i1 %cond1, label %loop_begin, label %loop_exit, !prof !0
+
+loop_begin:
+ store i32 1, i32* %var
+; CHECK: br i1 %cond2, label %continue1, label %continue2
+ br i1 %cond2, label %continue1, label %continue2 ; non-trivial condition
+
+continue1:
+ call void @some_func1() noreturn nounwind
+ br label %joint
+
+continue2:
+ call void @some_func2() noreturn nounwind
+ br label %joint
+
+joint:
+;; unswitching will duplicate these calls.
+ call void @some_func3() noreturn nounwind
+ call void @some_func4() noreturn nounwind
+ br label %loop_begin
+
+loop_exit:
+ ret i32 0
+}
+
+declare void @some_func1() noreturn
+declare void @some_func2() noreturn
+declare void @some_func3() noreturn
+declare void @some_func4() noreturn
+
+!0 = !{!"branch_weights", i32 1, i32 100000000}
+!1 = !{!"function_entry_count", i64 100}