UPSTREAM: sched/fair: Consider spare capacity in find_idlest_group()
authorMorten Rasmussen <morten.rasmussen@arm.com>
Fri, 14 Oct 2016 13:41:08 +0000 (14:41 +0100)
committerAmit Pundir <amit.pundir@linaro.org>
Wed, 21 Jun 2017 11:07:24 +0000 (16:37 +0530)
In low-utilization scenarios comparing relative loads in
find_idlest_group() doesn't always lead to the most optimum choice.
Systems with groups containing different numbers of cpus and/or cpus of
different compute capacity are significantly better off when considering
spare capacity rather than relative load in those scenarios.

In addition to existing load based search an alternative spare capacity
based candidate sched_group is found and selected instead if sufficient
spare capacity exists. If not, existing behaviour is preserved.

Change-Id: I6097af76c302a5a12e240ca24c70f707ad118242
Signed-off-by: Morten Rasmussen <morten.rasmussen@arm.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: dietmar.eggemann@arm.com
Cc: freedom.tan@mediatek.com
Cc: keita.kobayashi.ym@renesas.com
Cc: mgalbraith@suse.de
Cc: sgurrappadi@nvidia.com
Cc: vincent.guittot@linaro.org
Cc: yuyang.du@intel.com
Link: http://lkml.kernel.org/r/1476452472-24740-3-git-send-email-morten.rasmussen@arm.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
(cherry picked from commit 6a0b19c0f39a7a7b7fb77d3867a733136ff059a3)
Signed-off-by: Chris Redpath <chris.redpath@arm.com>
kernel/sched/fair.c

index 615e1dcab36f052b6505bb65bd6bdbbe4a86de09..82eb26bd4361e2f4f20fca366a09717ede0842e3 100644 (file)
@@ -5526,7 +5526,9 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p,
                  int this_cpu, int sd_flag)
 {
        struct sched_group *idlest = NULL, *group = sd->groups;
+       struct sched_group *most_spare_sg = NULL;
        unsigned long min_load = ULONG_MAX, this_load = 0;
+       unsigned long most_spare = 0, this_spare = 0;
        int load_idx = sd->forkexec_idx;
        int imbalance = 100 + (sd->imbalance_pct-100)/2;
 
@@ -5534,7 +5536,7 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p,
                load_idx = sd->wake_idx;
 
        do {
-               unsigned long load, avg_load;
+               unsigned long load, avg_load, spare_cap, max_spare_cap;
                int local_group;
                int i;
 
@@ -5546,8 +5548,12 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p,
                local_group = cpumask_test_cpu(this_cpu,
                                               sched_group_cpus(group));
 
-               /* Tally up the load of all CPUs in the group */
+               /*
+                * Tally up the load of all CPUs in the group and find
+                * the group containing the CPU with most spare capacity.
+                */
                avg_load = 0;
+               max_spare_cap = 0;
 
                for_each_cpu(i, sched_group_cpus(group)) {
                        /* Bias balancing toward cpus of our domain */
@@ -5557,6 +5563,11 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p,
                                load = target_load(i, load_idx);
 
                        avg_load += load;
+
+                       spare_cap = capacity_spare_wake(i, p);
+
+                       if (spare_cap > max_spare_cap)
+                               max_spare_cap = spare_cap;
                }
 
                /* Adjust by relative CPU capacity of the group */
@@ -5564,12 +5575,33 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p,
 
                if (local_group) {
                        this_load = avg_load;
-               } else if (avg_load < min_load) {
-                       min_load = avg_load;
-                       idlest = group;
+                       this_spare = max_spare_cap;
+               } else {
+                       if (avg_load < min_load) {
+                               min_load = avg_load;
+                               idlest = group;
+                       }
+
+                       if (most_spare < max_spare_cap) {
+                               most_spare = max_spare_cap;
+                               most_spare_sg = group;
+                       }
                }
        } while (group = group->next, group != sd->groups);
 
+       /*
+        * The cross-over point between using spare capacity or least load
+        * is too conservative for high utilization tasks on partially
+        * utilized systems if we require spare_capacity > task_util(p),
+        * so we allow for some task stuffing by using
+        * spare_capacity > task_util(p)/2.
+        */
+       if (this_spare > task_util(p) / 2 &&
+           imbalance*this_spare > 100*most_spare)
+               return NULL;
+       else if (most_spare > task_util(p) / 2)
+               return most_spare_sg;
+
        if (!idlest || 100*this_load < imbalance*min_load)
                return NULL;
        return idlest;