From: Juri Lelli Date: Fri, 29 Jul 2016 13:04:11 +0000 (+0100) Subject: sched/fair: add tunable to force selection at cpu granularity X-Git-Tag: firefly_0821_release~176^2~222 X-Git-Url: http://demsky.eecs.uci.edu/git/?a=commitdiff_plain;h=d42fb8f959562bc34f7f2b17ca1e370f93a306a9;p=firefly-linux-kernel-4.4.55.git sched/fair: add tunable to force selection at cpu granularity EAS assumes that clusters with smaller capacity cores are more energy-efficient. This may not be true on non-big-little devices, so EAS can make incorrect cluster selections when finding a CPU to wake. The "sched_is_big_little" hint can be used to cause a cpu-based selection instead of cluster-based selection. This change incorporates the addition of the sync hint enable patch EAS did not honour synchronous wakeup hints, a new sysctl is created to ask EAS to use this information when selecting a CPU. The control is called "sched_sync_hint_enable". Also contains: EAS: sched/fair: for SMP bias toward idle core with capacity For SMP devices, on wakeup bias towards idle cores that have capacity vs busy devices that need a higher OPP eas: favor idle cpus for boosted tasks BUG: 29533997 BUG: 29512132 Change-Id: I0cc9a1b1b88fb52916f18bf2d25715bdc3634f9c Signed-off-by: Juri Lelli Signed-off-by: Srinath Sridharan eas/sched/fair: Favoring busy cpus with low OPPs BUG: 29533997 BUG: 29512132 Change-Id: I9305b3239698d64278db715a2e277ea0bb4ece79 Signed-off-by: Juri Lelli --- diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h index 7d021393b0da..4883dcf3e1a9 100644 --- a/include/linux/sched/sysctl.h +++ b/include/linux/sched/sysctl.h @@ -39,6 +39,8 @@ extern unsigned int sysctl_sched_latency; extern unsigned int sysctl_sched_min_granularity; extern unsigned int sysctl_sched_wakeup_granularity; extern unsigned int sysctl_sched_child_runs_first; +extern unsigned int sysctl_sched_is_big_little; +extern unsigned int sysctl_sched_sync_hint_enable; extern unsigned int sysctl_sched_cstate_aware; enum sched_tunable_scaling { diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 4742a17c7d53..e2b6174db07d 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -51,7 +51,10 @@ unsigned int sysctl_sched_latency = 6000000ULL; unsigned int normalized_sysctl_sched_latency = 6000000ULL; +unsigned int sysctl_sched_is_big_little = 0; +unsigned int sysctl_sched_sync_hint_enable = 1; unsigned int sysctl_sched_cstate_aware = 1; + /* * The initial- and re-scaling of tunables is configurable * (default SCHED_TUNABLESCALING_LOG = *(1+ilog(ncpus)) @@ -5555,7 +5558,97 @@ done: return target; } -static int energy_aware_wake_cpu(struct task_struct *p, int target) +static inline int find_best_target(struct task_struct *p) +{ + int i, boosted; + int target_cpu = -1; + int target_capacity = 0; + int backup_capacity = 0; + int idle_cpu = -1; + int best_idle_cstate = INT_MAX; + int backup_cpu = -1; + unsigned long task_util_boosted, new_util; + + /* + * Favor 1) busy cpu with most capacity at current OPP + * 2) idle_cpu with capacity at current OPP + * 3) busy cpu with capacity at higher OPP + */ +#ifdef CONFIG_CGROUP_SCHEDTUNE + boosted = schedtune_task_boost(p); +#else + boosted = 0; +#endif + task_util_boosted = boosted_task_util(p); + for_each_cpu(i, tsk_cpus_allowed(p)) { + int cur_capacity = capacity_curr_of(i); + struct rq *rq = cpu_rq(i); + int idle_idx = idle_get_state_idx(rq); + + /* + * p's blocked utilization is still accounted for on prev_cpu + * so prev_cpu will receive a negative bias due to the double + * accounting. However, the blocked utilization may be zero. + */ + new_util = cpu_util(i) + task_util_boosted; + + /* + * Ensure minimum capacity to grant the required boost. + * The target CPU can be already at a capacity level higher + * than the one required to boost the task. + */ + + if (new_util > capacity_orig_of(i)) + continue; + + /* + * For boosted tasks we favor idle cpus unconditionally to + * improve latency. + */ + if (idle_idx >= 0 && boosted) { + if (idle_cpu < 0 || + (sysctl_sched_cstate_aware && + best_idle_cstate > idle_idx)) { + best_idle_cstate = idle_idx; + idle_cpu = i; + } + continue; + } + + if (new_util < cur_capacity) { + if (cpu_rq(i)->nr_running) { + if (target_capacity == 0 || + target_capacity > cur_capacity) { + /* busy CPU with most capacity at current OPP */ + target_cpu = i; + target_capacity = cur_capacity; + } + } else if (!boosted) { + if (idle_cpu < 0 || + (sysctl_sched_cstate_aware && + best_idle_cstate > idle_idx)) { + best_idle_cstate = idle_idx; + idle_cpu = i; + } + } + } else if (backup_capacity == 0 || + backup_capacity > cur_capacity) { + /* first busy CPU with capacity at higher OPP */ + backup_capacity = cur_capacity; + backup_cpu = i; + } + } + + if (!boosted && target_cpu < 0) { + target_cpu = idle_cpu >= 0 ? idle_cpu : backup_cpu; + } + + if (boosted && idle_cpu >= 0) + target_cpu = idle_cpu; + return target_cpu; +} + +static int energy_aware_wake_cpu(struct task_struct *p, int target, int sync) { struct sched_domain *sd; struct sched_group *sg, *sg_target; @@ -5563,6 +5656,14 @@ static int energy_aware_wake_cpu(struct task_struct *p, int target) int target_cpu = task_cpu(p); int i; + if (sysctl_sched_sync_hint_enable && sync) { + int cpu = smp_processor_id(); + cpumask_t search_cpus; + cpumask_and(&search_cpus, tsk_cpus_allowed(p), cpu_online_mask); + if (cpumask_test_cpu(cpu, &search_cpus)) + return cpu; + } + sd = rcu_dereference(per_cpu(sd_ea, task_cpu(p))); if (!sd) @@ -5571,50 +5672,60 @@ static int energy_aware_wake_cpu(struct task_struct *p, int target) sg = sd->groups; sg_target = sg; - /* - * Find group with sufficient capacity. We only get here if no cpu is - * overutilized. We may end up overutilizing a cpu by adding the task, - * but that should not be any worse than select_idle_sibling(). - * load_balance() should sort it out later as we get above the tipping - * point. - */ - do { - /* Assuming all cpus are the same in group */ - int max_cap_cpu = group_first_cpu(sg); + if (sysctl_sched_is_big_little) { /* - * Assume smaller max capacity means more energy-efficient. - * Ideally we should query the energy model for the right - * answer but it easily ends up in an exhaustive search. + * Find group with sufficient capacity. We only get here if no cpu is + * overutilized. We may end up overutilizing a cpu by adding the task, + * but that should not be any worse than select_idle_sibling(). + * load_balance() should sort it out later as we get above the tipping + * point. */ - if (capacity_of(max_cap_cpu) < target_max_cap && - task_fits_max(p, max_cap_cpu)) { - sg_target = sg; - target_max_cap = capacity_of(max_cap_cpu); - } - } while (sg = sg->next, sg != sd->groups); + do { + /* Assuming all cpus are the same in group */ + int max_cap_cpu = group_first_cpu(sg); - /* Find cpu with sufficient capacity */ - for_each_cpu_and(i, tsk_cpus_allowed(p), sched_group_cpus(sg_target)) { - /* - * p's blocked utilization is still accounted for on prev_cpu - * so prev_cpu will receive a negative bias due to the double - * accounting. However, the blocked utilization may be zero. - */ - int new_util = cpu_util(i) + boosted_task_util(p); + /* + * Assume smaller max capacity means more energy-efficient. + * Ideally we should query the energy model for the right + * answer but it easily ends up in an exhaustive search. + */ + if (capacity_of(max_cap_cpu) < target_max_cap && + task_fits_max(p, max_cap_cpu)) { + sg_target = sg; + target_max_cap = capacity_of(max_cap_cpu); + } + } while (sg = sg->next, sg != sd->groups); - if (new_util > capacity_orig_of(i)) - continue; + /* Find cpu with sufficient capacity */ + for_each_cpu_and(i, tsk_cpus_allowed(p), sched_group_cpus(sg_target)) { + /* + * p's blocked utilization is still accounted for on prev_cpu + * so prev_cpu will receive a negative bias due to the double + * accounting. However, the blocked utilization may be zero. + */ + int new_util = cpu_util(i) + boosted_task_util(p); - if (new_util < capacity_curr_of(i)) { - target_cpu = i; - if (cpu_rq(i)->nr_running) - break; - } + if (new_util > capacity_orig_of(i)) + continue; + + if (new_util < capacity_curr_of(i)) { + target_cpu = i; + if (cpu_rq(i)->nr_running) + break; + } - /* cpu has capacity at higher OPP, keep it as fallback */ - if (target_cpu == task_cpu(p)) - target_cpu = i; + /* cpu has capacity at higher OPP, keep it as fallback */ + if (target_cpu == task_cpu(p)) + target_cpu = i; + } + } else { + /* + * Find a cpu with sufficient capacity + */ + int tmp_target = find_best_target(p); + if (tmp_target >= 0) + target_cpu = tmp_target; } if (target_cpu != task_cpu(p)) { @@ -5691,7 +5802,7 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int sd_flag, int wake_f if (!sd) { if (energy_aware() && !cpu_rq(cpu)->rd->overutilized) - new_cpu = energy_aware_wake_cpu(p, prev_cpu); + new_cpu = energy_aware_wake_cpu(p, prev_cpu, sync); else if (sd_flag & SD_BALANCE_WAKE) /* XXX always ? */ new_cpu = select_idle_sibling(p, new_cpu); diff --git a/kernel/sysctl.c b/kernel/sysctl.c index fc204ae8487d..831d674a5566 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -304,6 +304,20 @@ static struct ctl_table kern_table[] = { .extra1 = &min_sched_granularity_ns, .extra2 = &max_sched_granularity_ns, }, + { + .procname = "sched_is_big_little", + .data = &sysctl_sched_is_big_little, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + { + .procname = "sched_sync_hint_enable", + .data = &sysctl_sched_sync_hint_enable, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, { .procname = "sched_cstate_aware", .data = &sysctl_sched_cstate_aware,