From: Chris Redpath Date: Mon, 24 Mar 2014 13:47:29 +0000 (+0000) Subject: hmp: Use idle pull to perform forced up-migrations X-Git-Tag: firefly_0821_release~3680^2~16^2^2~15 X-Git-Url: http://demsky.eecs.uci.edu/git/?a=commitdiff_plain;h=aae7721f20f2520d24a149408a74f18e58f56472;p=firefly-linux-kernel-4.4.55.git hmp: Use idle pull to perform forced up-migrations When a normal forced up-migration takes place we stop the task to be migrated while the target CPU becomes available. This delay can range from 80us to 1500us on TC2 if the target CPU is in a deep idle state. Instead, interrupt the target CPU and ask it to pull a task. This lets the current eligible task continue executing on the original CPU while the target CPU wakes. Use a pinned timer to prevent the pulling CPU going back into power-down with pending up-migrations. If we trigger for a nohz kick, it doesn't matter about triggering for an idle pull since the idle_pull flag will be set when we execute the softirq and we'll still do the idle pull. If the target CPU is busy, we will not pull any tasks. Signed-off-by: Chris Redpath Signed-off-by: Jon Medhurst --- diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 3e326f9208fe..5f242330ef85 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -1407,7 +1407,11 @@ void scheduler_ipi(void) { if (llist_empty(&this_rq()->wake_list) && !tick_nohz_full_cpu(smp_processor_id()) - && !got_nohz_idle_kick()) + && !got_nohz_idle_kick() +#ifdef CONFIG_SCHED_HMP + && !this_rq()->wake_for_idle_pull +#endif + ) return; /* @@ -1434,6 +1438,11 @@ void scheduler_ipi(void) this_rq()->idle_balance = 1; raise_softirq_irqoff(SCHED_SOFTIRQ); } +#ifdef CONFIG_SCHED_HMP + else if (unlikely(this_rq()->wake_for_idle_pull)) + raise_softirq_irqoff(SCHED_SOFTIRQ); +#endif + irq_exit(); } diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 1c003e9e1ef2..128d5723ae4d 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -39,6 +39,9 @@ */ #include #endif /* CONFIG_HMP_FREQUENCY_INVARIANT_SCALE */ +#ifdef CONFIG_SCHED_HMP +#include +#endif #include "sched.h" @@ -3514,6 +3517,110 @@ static const int hmp_max_tasks = 5; extern void __init arch_get_hmp_domains(struct list_head *hmp_domains_list); +#ifdef CONFIG_CPU_IDLE +/* + * hmp_idle_pull: + * + * In this version we have stopped using forced up migrations when we + * detect that a task running on a little CPU should be moved to a bigger + * CPU. In most cases, the bigger CPU is in a deep sleep state and a forced + * migration means we stop the task immediately but need to wait for the + * target CPU to wake up before we can restart the task which is being + * moved. Instead, we now wake a big CPU with an IPI and ask it to pull + * a task when ready. This allows the task to continue executing on its + * current CPU, reducing the amount of time that the task is stalled for. + * + * keepalive timers: + * + * The keepalive timer is used as a way to keep a CPU engaged in an + * idle pull operation out of idle while waiting for the source + * CPU to stop and move the task. Ideally this would not be necessary + * and we could impose a temporary zero-latency requirement on the + * current CPU, but in the current QoS framework this will result in + * all CPUs in the system being unable to enter idle states which is + * not desirable. The timer does not perform any work when it expires. + */ +struct hmp_keepalive { + bool init; + ktime_t delay; /* if zero, no need for timer */ + struct hrtimer timer; +}; +DEFINE_PER_CPU(struct hmp_keepalive, hmp_cpu_keepalive); + +/* setup per-cpu keepalive timers */ +static enum hrtimer_restart hmp_cpu_keepalive_notify(struct hrtimer *hrtimer) +{ + return HRTIMER_NORESTART; +} + +/* + * Work out if any of the idle states have an exit latency too high for us. + * ns_delay is passed in containing the max we are willing to tolerate. + * If there are none, set ns_delay to zero. + * If there are any, set ns_delay to + * ('target_residency of state with shortest too-big latency' - 1) * 1000. + */ +static void hmp_keepalive_delay(unsigned int *ns_delay) +{ + struct cpuidle_driver *drv; + drv = cpuidle_driver_ref(); + if (drv) { + unsigned int us_delay = UINT_MAX; + unsigned int us_max_delay = *ns_delay / 1000; + int idx; + /* if cpuidle states are guaranteed to be sorted we + * could stop at the first match. + */ + for (idx = 0; idx < drv->state_count; idx++) { + if (drv->states[idx].exit_latency > us_max_delay && + drv->states[idx].target_residency < us_delay) { + us_delay = drv->states[idx].target_residency; + } + } + if (us_delay == UINT_MAX) + *ns_delay = 0; /* no timer required */ + else + *ns_delay = 1000 * (us_delay - 1); + } + cpuidle_driver_unref(); +} + +static void hmp_cpu_keepalive_trigger(void) +{ + int cpu = smp_processor_id(); + struct hmp_keepalive *keepalive = &per_cpu(hmp_cpu_keepalive, cpu); + if (!keepalive->init) { + unsigned int ns_delay = 100000; /* tolerate 100usec delay */ + + hrtimer_init(&keepalive->timer, + CLOCK_MONOTONIC, HRTIMER_MODE_REL_PINNED); + keepalive->timer.function = hmp_cpu_keepalive_notify; + + hmp_keepalive_delay(&ns_delay); + keepalive->delay = ns_to_ktime(ns_delay); + keepalive->init = true; + } + if (ktime_to_ns(keepalive->delay)) + hrtimer_start(&keepalive->timer, + keepalive->delay, HRTIMER_MODE_REL_PINNED); +} + +static void hmp_cpu_keepalive_cancel(int cpu) +{ + struct hmp_keepalive *keepalive = &per_cpu(hmp_cpu_keepalive, cpu); + if (keepalive->init) + hrtimer_cancel(&keepalive->timer); +} +#else /* !CONFIG_CPU_IDLE */ +static void hmp_cpu_keepalive_trigger(void) +{ +} + +static void hmp_cpu_keepalive_cancel(int cpu) +{ +} +#endif + /* Setup hmp_domains */ static int __init hmp_cpu_mask_setup(void) { @@ -3574,6 +3681,8 @@ static void hmp_offline_cpu(int cpu) if(domain) cpumask_clear_cpu(cpu, &domain->cpus); + + hmp_cpu_keepalive_cancel(cpu); } /* * Needed to determine heaviest tasks etc. @@ -7003,7 +7112,7 @@ static void hmp_force_up_migration(int this_cpu) target = cpu_rq(cpu); raw_spin_lock_irqsave(&target->lock, flags); curr = target->cfs.curr; - if (!curr) { + if (!curr || target->active_balance) { raw_spin_unlock_irqrestore(&target->lock, flags); continue; } @@ -7020,16 +7129,13 @@ static void hmp_force_up_migration(int this_cpu) curr = hmp_get_heaviest_task(curr, 1); p = task_of(curr); if (hmp_up_migration(cpu, &target_cpu, curr)) { - if (!target->active_balance) { - get_task_struct(p); - target->push_cpu = target_cpu; - target->migrate_task = p; - got_target = 1; - trace_sched_hmp_migrate(p, target->push_cpu, HMP_MIGRATE_FORCE); - hmp_next_up_delay(&p->se, target->push_cpu); - } + cpu_rq(target_cpu)->wake_for_idle_pull = 1; + raw_spin_unlock_irqrestore(&target->lock, flags); + spin_unlock(&hmp_force_migration); + smp_send_reschedule(target_cpu); + return; } - if (!got_target && !target->active_balance) { + if (!got_target) { /* * For now we just check the currently running task. * Selecting the lightest task for offloading will @@ -7051,7 +7157,7 @@ static void hmp_force_up_migration(int this_cpu) * is not currently running move it, otherwise let the * CPU stopper take care of it. */ - if (got_target && !target->active_balance) { + if (got_target) { if (!task_running(target, p)) { trace_sched_hmp_migrate_force_running(p, 0); hmp_migrate_runnable_task(target); @@ -7157,6 +7263,8 @@ static unsigned int hmp_idle_pull(int this_cpu) raw_spin_unlock_irqrestore(&target->lock, flags); if (force) { + /* start timer to keep us awake */ + hmp_cpu_keepalive_trigger(); stop_one_cpu_nowait(cpu_of(target), hmp_active_task_migration_cpu_stop, target, &target->active_balance_work); @@ -7180,6 +7288,18 @@ static void run_rebalance_domains(struct softirq_action *h) enum cpu_idle_type idle = this_rq->idle_balance ? CPU_IDLE : CPU_NOT_IDLE; +#ifdef CONFIG_SCHED_HMP + /* shortcut for hmp idle pull wakeups */ + if (unlikely(this_rq->wake_for_idle_pull)) { + this_rq->wake_for_idle_pull = 0; + if (hmp_idle_pull(this_cpu)) { + /* break out unless running nohz idle as well */ + if (idle != CPU_IDLE) + return; + } + } +#endif + hmp_force_up_migration(this_cpu); rebalance_domains(this_cpu, idle); diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 27f51ac86700..fbcda9339fc7 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -466,6 +466,7 @@ struct rq { struct cpu_stop_work active_balance_work; #ifdef CONFIG_SCHED_HMP struct task_struct *migrate_task; + int wake_for_idle_pull; #endif /* cpu of this runqueue: */ int cpu;