sched/fair: Energy-aware wake-up task placement
authorMorten Rasmussen <morten.rasmussen@arm.com>
Wed, 30 Mar 2016 13:29:48 +0000 (14:29 +0100)
committerAmit Pundir <amit.pundir@linaro.org>
Wed, 21 Jun 2017 11:07:28 +0000 (16:37 +0530)
When the systems is not overutilized, place waking tasks on the most
energy efficient cpu. Previous attempts reduced the search space by
matching task utilization to cpu capacity before consulting the energy
model as this is an expensive operation. The search heuristics didn't
work very well and lacking any better alternatives this patch takes the
brute-force route and tries all potential targets.

This approach doesn't scale, but it might be sufficient for many
embedded applications while work is continuing on a heuristic that can
minimize the necessary computations. The heuristic must be derrived from
the platform energy model rather than make additional assumptions, such
lower capacity implies better energy efficiency. PeterZ mentioned in the
past that we might be able to derrive some simpler deciding functions
using mathematical (modal?) analysis.

Change-Id: I772bacb4c8fd599f8006fa422f842e66377a9c6c
Signed-off-by: Morten Rasmussen <morten.rasmussen@arm.com>
[rebase: on top of msm-google/android-msm-marlin-3.18]
Signed-off-by: Dietmar Eggemann <dietmar.eggemann@arm.com>
(cherry picked from commit a894422dbdb7b77ea2acfe7ff909ccb5ded23514)
Signed-off-by: Chris Redpath <chris.redpath@arm.com>
kernel/sched/fair.c

index 16f15549b5d03083bab31d4c2d5a3b8a614cd61f..c4d9d8bf8d1f3bec9a566bf1bb8ffc1c07fd6c47 100644 (file)
@@ -5889,6 +5889,60 @@ static int wake_cap(struct task_struct *p, int cpu, int prev_cpu)
        return min_cap * 1024 < task_util(p) * capacity_margin;
 }
 
+static int select_energy_cpu_brute(struct task_struct *p, int prev_cpu)
+{
+       int i;
+       int min_diff = 0, energy_cpu = prev_cpu, spare_cpu = prev_cpu;
+       unsigned long max_spare = 0;
+       struct sched_domain *sd;
+
+       rcu_read_lock();
+
+       sd = rcu_dereference(per_cpu(sd_ea, prev_cpu));
+
+       if (!sd)
+               goto unlock;
+
+       for_each_cpu_and(i, tsk_cpus_allowed(p), sched_domain_span(sd)) {
+               int diff;
+               unsigned long spare;
+
+               struct energy_env eenv = {
+                       .util_delta     = task_util(p),
+                       .src_cpu        = prev_cpu,
+                       .dst_cpu        = i,
+               };
+
+               spare = capacity_spare_wake(i, p);
+
+               if (i == prev_cpu)
+                       continue;
+
+               if (spare > max_spare) {
+                       max_spare = spare;
+                       spare_cpu = i;
+               }
+
+               if (spare * 1024 < capacity_margin * task_util(p))
+                       continue;
+
+               diff = energy_diff(&eenv);
+
+               if (diff < min_diff) {
+                       min_diff = diff;
+                       energy_cpu = i;
+               }
+       }
+
+unlock:
+       rcu_read_unlock();
+
+       if (energy_cpu == prev_cpu && !cpu_overutilized(prev_cpu))
+               return prev_cpu;
+
+       return energy_cpu != prev_cpu ? energy_cpu : spare_cpu;
+}
+
 /*
  * select_task_rq_fair: Select target runqueue for the waking task in domains
  * that have the 'sd_flag' flag set. In practice, this is SD_BALANCE_WAKE,
@@ -5914,6 +5968,9 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int sd_flag, int wake_f
                want_affine = !wake_wide(p) && !wake_cap(p, cpu, prev_cpu)
                              && cpumask_test_cpu(cpu, tsk_cpus_allowed(p));
 
+       if (energy_aware() && !(cpu_rq(prev_cpu)->rd->overutilized))
+               return select_energy_cpu_brute(p, prev_cpu);
+
        rcu_read_lock();
        for_each_domain(cpu, tmp) {
                if (!(tmp->flags & SD_LOAD_BALANCE))