From d7009d25723ff902d6aeddcd906689eec919c6e6 Mon Sep 17 00:00:00 2001 From: Morten Rasmussen Date: Mon, 25 Jul 2016 14:34:26 +0100 Subject: [PATCH] UPSTREAM: sched/fair: Let asymmetric CPU configurations balance at wake-up Currently, SD_WAKE_AFFINE always takes priority over wakeup balancing if SD_BALANCE_WAKE is set on the sched_domains. For asymmetric configurations SD_WAKE_AFFINE is only desirable if the waking task's compute demand (utilization) is suitable for the waking CPU and the previous CPU, and all CPUs within their respective SD_SHARE_PKG_RESOURCES domains (sd_llc). If not, let wakeup balancing take over (find_idlest_{group, cpu}()). This patch makes affine wake-ups conditional on whether both the waker CPU and the previous CPU has sufficient capacity for the waking task, or not, assuming that the CPU capacities within an SD_SHARE_PKG_RESOURCES domain (sd_llc) are homogeneous. Change-Id: I6d5d0426713da9ef6198f574ad9afbe58dacc1f0 Signed-off-by: Morten Rasmussen Signed-off-by: Peter Zijlstra (Intel) Acked-by: Vincent Guittot Cc: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: dietmar.eggemann@arm.com Cc: freedom.tan@mediatek.com Cc: keita.kobayashi.ym@renesas.com Cc: mgalbraith@suse.de Cc: sgurrappadi@nvidia.com Cc: yuyang.du@intel.com Link: http://lkml.kernel.org/r/1469453670-2660-10-git-send-email-morten.rasmussen@arm.com Signed-off-by: Ingo Molnar (cherry picked from commit 3273163c6775c4c21823985304c2364b08ca6ea2) [removed existing definition of capacity_margin] Signed-off-by: Chris Redpath --- kernel/sched/fair.c | 57 ++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 54 insertions(+), 3 deletions(-) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 5294c5f47939..19edbc48b1bc 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -128,6 +128,12 @@ unsigned int __read_mostly sysctl_sched_shares_window = 10000000UL; unsigned int sysctl_sched_cfs_bandwidth_slice = 5000UL; #endif +/* + * The margin used when comparing utilization with CPU capacity: + * util * 1024 < capacity * margin + */ +unsigned int capacity_margin = 1280; /* ~20% */ + static inline void update_load_add(struct load_weight *lw, unsigned long inc) { lw->weight += inc; @@ -5358,8 +5364,6 @@ static inline unsigned long task_util(struct task_struct *p) return p->se.avg.util_avg; } -unsigned int capacity_margin = 1280; /* ~20% margin */ - static inline unsigned long boosted_task_util(struct task_struct *task); static inline bool __task_fits(struct task_struct *p, int cpu, int util) @@ -5496,6 +5500,13 @@ boosted_task_util(struct task_struct *task) return util + margin; } +static int cpu_util_wake(int cpu, struct task_struct *p); + +static unsigned long capacity_spare_wake(int cpu, struct task_struct *p) +{ + return capacity_orig_of(cpu) - cpu_util_wake(cpu, p); +} + /* * find_idlest_group finds and returns the least busy CPU group within the * domain. @@ -5899,6 +5910,45 @@ static int energy_aware_wake_cpu(struct task_struct *p, int target, int sync) return target_cpu; } +/* + * cpu_util_wake: Compute cpu utilization with any contributions from + * the waking task p removed. + */ +static int cpu_util_wake(int cpu, struct task_struct *p) +{ + unsigned long util, capacity; + + /* Task has no contribution or is new */ + if (cpu != task_cpu(p) || !p->se.avg.last_update_time) + return cpu_util(cpu); + + capacity = capacity_orig_of(cpu); + util = max_t(long, cpu_rq(cpu)->cfs.avg.util_avg - task_util(p), 0); + + return (util >= capacity) ? capacity : util; +} + +/* + * Disable WAKE_AFFINE in the case where task @p doesn't fit in the + * capacity of either the waking CPU @cpu or the previous CPU @prev_cpu. + * + * In that case WAKE_AFFINE doesn't make sense and we'll let + * BALANCE_WAKE sort things out. + */ +static int wake_cap(struct task_struct *p, int cpu, int prev_cpu) +{ + long min_cap, max_cap; + + min_cap = min(capacity_orig_of(prev_cpu), capacity_orig_of(cpu)); + max_cap = cpu_rq(cpu)->rd->max_cpu_capacity.val; + + /* Minimum capacity is close to max, no need to abort wake_affine */ + if (max_cap - min_cap < max_cap >> 3) + return 0; + + return min_cap * 1024 < task_util(p) * capacity_margin; +} + /* * select_task_rq_fair: Select target runqueue for the waking task in domains * that have the 'sd_flag' flag set. In practice, this is SD_BALANCE_WAKE, @@ -5921,7 +5971,8 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int sd_flag, int wake_f int sync = wake_flags & WF_SYNC; if (sd_flag & SD_BALANCE_WAKE) - want_affine = (!wake_wide(p) && cpumask_test_cpu(cpu, tsk_cpus_allowed(p))) || + want_affine = (!wake_wide(p) && !wake_cap(p, cpu, prev_cpu) + && cpumask_test_cpu(cpu, tsk_cpus_allowed(p))) || energy_aware(); rcu_read_lock(); -- 2.34.1