From f1bfd7f09d63fefa0859a20e91279eb66dbf7a6f Mon Sep 17 00:00:00 2001 From: Patrick Bellasi Date: Thu, 30 Jun 2016 15:09:24 +0100 Subject: [PATCH] FIXUP: sched: fix SchedFreq integration for both PELT and WALT The current kernel allows to use either PELT or WALT to track CPUs utilizations. One of the main differences between the two approaches is that PELT tracks only utilization of SCHED_OTHER classes while WALT tracks all tasks with a single signal. The current sched_freq_tick does not make this distinction and, when WALT is in use, we end up adding multiple time the contribution related to the RT and DL classes. This patch fixes this issue by: 1. providing two different code paths for PELT and WALT, thus granting that when we switch to PELT we get the original behaviour based on the assumption that class aggregations is done underneath by SchedFreq. 2. avoiding the double accounting of DL and RT workloads, when WALT is in use, by just adding a margin to the original WALT signal when we need to check if the CFS capacity has to be increased. Change-Id: I7326fd50e868e97fb5e12351917e9d2969bfdae7 Signed-off-by: Patrick Bellasi --- kernel/sched/core.c | 87 ++++++++++++++++++++++++++++++++++----------- 1 file changed, 66 insertions(+), 21 deletions(-) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 19decf8c07d5..e001ee1e3175 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -2926,28 +2926,31 @@ unsigned long long task_sched_runtime(struct task_struct *p) } #ifdef CONFIG_CPU_FREQ_GOV_SCHED -static unsigned long sum_capacity_reqs(unsigned long cfs_cap, - struct sched_capacity_reqs *scr) + +static inline +unsigned long add_capacity_margin(unsigned long cpu_capacity) { - unsigned long total = cfs_cap + scr->rt; + cpu_capacity = cpu_capacity * capacity_margin; + cpu_capacity /= SCHED_CAPACITY_SCALE; + return cpu_capacity; +} - total = total * capacity_margin; - total /= SCHED_CAPACITY_SCALE; - total += scr->dl; - return total; +static inline +unsigned long sum_capacity_reqs(unsigned long cfs_cap, + struct sched_capacity_reqs *scr) +{ + unsigned long total = add_capacity_margin(cfs_cap + scr->rt); + return total += scr->dl; } -static void sched_freq_tick(int cpu) +static void sched_freq_tick_pelt(int cpu) { + unsigned long cpu_utilization = capacity_max; + unsigned long capacity_curr = capacity_curr_of(cpu); struct sched_capacity_reqs *scr; - unsigned long capacity_orig, capacity_curr, capacity_sum; - if (!sched_freq()) - return; - - capacity_orig = capacity_orig_of(cpu); - capacity_curr = capacity_curr_of(cpu); - if (capacity_curr == capacity_orig) + scr = &per_cpu(cpu_sched_capacity_reqs, cpu); + if (sum_capacity_reqs(cpu_utilization, scr) < capacity_curr) return; /* @@ -2956,16 +2959,58 @@ static void sched_freq_tick(int cpu) * a jump to a higher OPP as soon as the margin of free capacity * is impacted (specified by capacity_margin). */ + set_cfs_cpu_capacity(cpu, true, cpu_utilization); +} + +#ifdef CONFIG_SCHED_WALT +static void sched_freq_tick_walt(int cpu) +{ + unsigned long cpu_utilization = cpu_util(cpu); + unsigned long capacity_curr = capacity_curr_of(cpu); + + if (walt_disabled || !sysctl_sched_use_walt_cpu_util) + return sched_freq_tick_pelt(cpu); + + /* + * Add a margin to the WALT utilization. + * NOTE: WALT tracks a single CPU signal for all the scheduling + * classes, thus this margin is going to be added to the DL class as + * well, which is something we do not do in sched_freq_tick_pelt case. + */ + cpu_utilization = add_capacity_margin(cpu_utilization); + if (cpu_utilization <= capacity_curr) + return; + + /* + * It is likely that the load is growing so we + * keep the added margin in our request as an + * extra boost. + */ + set_cfs_cpu_capacity(cpu, true, cpu_utilization); - scr = &per_cpu(cpu_sched_capacity_reqs, cpu); - capacity_sum = sum_capacity_reqs(cpu_util(cpu), scr); - if (capacity_curr < capacity_sum) { - set_cfs_cpu_capacity(cpu, true, capacity_sum); - } +} +#define _sched_freq_tick(cpu) sched_freq_tick_walt(cpu) +#else +#define _sched_freq_tick(cpu) sched_freq_tick_pelt(cpu) +#endif /* CONFIG_SCHED_WALT */ + +static void sched_freq_tick(int cpu) +{ + unsigned long capacity_orig, capacity_curr; + + if (!sched_freq()) + return; + + capacity_orig = capacity_orig_of(cpu); + capacity_curr = capacity_curr_of(cpu); + if (capacity_curr == capacity_orig) + return; + + _sched_freq_tick(cpu); } #else static inline void sched_freq_tick(int cpu) { } -#endif +#endif /* CONFIG_CPU_FREQ_GOV_SCHED */ /* * This function gets called by the timer code, with HZ frequency. -- 2.34.1