+void BasicTTI::getUnrollingPreferences(Loop *L,
+ UnrollingPreferences &UP) const {
+ // This unrolling functionality is target independent, but to provide some
+ // motivation for its intended use, for x86:
+
+ // According to the Intel 64 and IA-32 Architectures Optimization Reference
+ // Manual, Intel Core models and later have a loop stream detector
+ // (and associated uop queue) that can benefit from partial unrolling.
+ // The relevant requirements are:
+ // - The loop must have no more than 4 (8 for Nehalem and later) branches
+ // taken, and none of them may be calls.
+ // - The loop can have no more than 18 (28 for Nehalem and later) uops.
+
+ // According to the Software Optimization Guide for AMD Family 15h Processors,
+ // models 30h-4fh (Steamroller and later) have a loop predictor and loop
+ // buffer which can benefit from partial unrolling.
+ // The relevant requirements are:
+ // - The loop must have fewer than 16 branches
+ // - The loop must have less than 40 uops in all executed loop branches
+
+ // The number of taken branches in a loop is hard to estimate here, and
+ // benchmarking has revealed that it is better not to be conservative when
+ // estimating the branch count. As a result, we'll ignore the branch limits
+ // until someone finds a case where it matters in practice.
+
+ unsigned MaxOps;
+ const TargetSubtargetInfo *ST = &TM->getSubtarget<TargetSubtargetInfo>();
+ if (PartialUnrollingThreshold.getNumOccurrences() > 0)
+ MaxOps = PartialUnrollingThreshold;
+ else if (ST->getSchedModel()->LoopMicroOpBufferSize > 0)
+ MaxOps = ST->getSchedModel()->LoopMicroOpBufferSize;
+ else
+ return;
+
+ // Scan the loop: don't unroll loops with calls.
+ for (Loop::block_iterator I = L->block_begin(), E = L->block_end();
+ I != E; ++I) {
+ BasicBlock *BB = *I;
+
+ for (BasicBlock::iterator J = BB->begin(), JE = BB->end(); J != JE; ++J)
+ if (isa<CallInst>(J) || isa<InvokeInst>(J)) {
+ ImmutableCallSite CS(J);
+ if (const Function *F = CS.getCalledFunction()) {
+ if (!TopTTI->isLoweredToCall(F))
+ continue;
+ }
+
+ return;
+ }
+ }
+
+ // Enable runtime and partial unrolling up to the specified size.
+ UP.Partial = UP.Runtime = true;
+ UP.PartialThreshold = UP.PartialOptSizeThreshold = MaxOps;
+}