From 58088ad0152ba4b7997388c93d0ca208ec1ece75 Mon Sep 17 00:00:00 2001 From: Paul Turner Date: Thu, 21 Jul 2011 09:43:31 -0700 Subject: [PATCH] sched: Add a timer to handle CFS bandwidth refresh This patch adds a per-task_group timer which handles the refresh of the global CFS bandwidth pool. Since the RT pool is using a similar timer there's some small refactoring to share this support. Signed-off-by: Paul Turner Reviewed-by: Hidetoshi Seto Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/20110721184757.277271273@google.com Signed-off-by: Ingo Molnar --- kernel/sched.c | 107 +++++++++++++++++++++++++++++++++++--------- kernel/sched_fair.c | 40 +++++++++++++++-- 2 files changed, 123 insertions(+), 24 deletions(-) diff --git a/kernel/sched.c b/kernel/sched.c index 35561c63a490..34bf8e6db9af 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -196,10 +196,28 @@ static inline int rt_bandwidth_enabled(void) return sysctl_sched_rt_runtime >= 0; } -static void start_rt_bandwidth(struct rt_bandwidth *rt_b) +static void start_bandwidth_timer(struct hrtimer *period_timer, ktime_t period) { - ktime_t now; + unsigned long delta; + ktime_t soft, hard, now; + for (;;) { + if (hrtimer_active(period_timer)) + break; + + now = hrtimer_cb_get_time(period_timer); + hrtimer_forward(period_timer, now, period); + + soft = hrtimer_get_softexpires(period_timer); + hard = hrtimer_get_expires(period_timer); + delta = ktime_to_ns(ktime_sub(hard, soft)); + __hrtimer_start_range_ns(period_timer, soft, delta, + HRTIMER_MODE_ABS_PINNED, 0); + } +} + +static void start_rt_bandwidth(struct rt_bandwidth *rt_b) +{ if (!rt_bandwidth_enabled() || rt_b->rt_runtime == RUNTIME_INF) return; @@ -207,22 +225,7 @@ static void start_rt_bandwidth(struct rt_bandwidth *rt_b) return; raw_spin_lock(&rt_b->rt_runtime_lock); - for (;;) { - unsigned long delta; - ktime_t soft, hard; - - if (hrtimer_active(&rt_b->rt_period_timer)) - break; - - now = hrtimer_cb_get_time(&rt_b->rt_period_timer); - hrtimer_forward(&rt_b->rt_period_timer, now, rt_b->rt_period); - - soft = hrtimer_get_softexpires(&rt_b->rt_period_timer); - hard = hrtimer_get_expires(&rt_b->rt_period_timer); - delta = ktime_to_ns(ktime_sub(hard, soft)); - __hrtimer_start_range_ns(&rt_b->rt_period_timer, soft, delta, - HRTIMER_MODE_ABS_PINNED, 0); - } + start_bandwidth_timer(&rt_b->rt_period_timer, rt_b->rt_period); raw_spin_unlock(&rt_b->rt_runtime_lock); } @@ -253,6 +256,9 @@ struct cfs_bandwidth { ktime_t period; u64 quota, runtime; s64 hierarchal_quota; + + int idle, timer_active; + struct hrtimer period_timer; #endif }; @@ -403,6 +409,28 @@ static inline struct cfs_bandwidth *tg_cfs_bandwidth(struct task_group *tg) } static inline u64 default_cfs_period(void); +static int do_sched_cfs_period_timer(struct cfs_bandwidth *cfs_b, int overrun); + +static enum hrtimer_restart sched_cfs_period_timer(struct hrtimer *timer) +{ + struct cfs_bandwidth *cfs_b = + container_of(timer, struct cfs_bandwidth, period_timer); + ktime_t now; + int overrun; + int idle = 0; + + for (;;) { + now = hrtimer_cb_get_time(timer); + overrun = hrtimer_forward(timer, now, cfs_b->period); + + if (!overrun) + break; + + idle = do_sched_cfs_period_timer(cfs_b, overrun); + } + + return idle ? HRTIMER_NORESTART : HRTIMER_RESTART; +} static void init_cfs_bandwidth(struct cfs_bandwidth *cfs_b) { @@ -410,6 +438,9 @@ static void init_cfs_bandwidth(struct cfs_bandwidth *cfs_b) cfs_b->runtime = 0; cfs_b->quota = RUNTIME_INF; cfs_b->period = ns_to_ktime(default_cfs_period()); + + hrtimer_init(&cfs_b->period_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); + cfs_b->period_timer.function = sched_cfs_period_timer; } static void init_cfs_rq_runtime(struct cfs_rq *cfs_rq) @@ -417,8 +448,34 @@ static void init_cfs_rq_runtime(struct cfs_rq *cfs_rq) cfs_rq->runtime_enabled = 0; } +/* requires cfs_b->lock, may release to reprogram timer */ +static void __start_cfs_bandwidth(struct cfs_bandwidth *cfs_b) +{ + /* + * The timer may be active because we're trying to set a new bandwidth + * period or because we're racing with the tear-down path + * (timer_active==0 becomes visible before the hrtimer call-back + * terminates). In either case we ensure that it's re-programmed + */ + while (unlikely(hrtimer_active(&cfs_b->period_timer))) { + raw_spin_unlock(&cfs_b->lock); + /* ensure cfs_b->lock is available while we wait */ + hrtimer_cancel(&cfs_b->period_timer); + + raw_spin_lock(&cfs_b->lock); + /* if someone else restarted the timer then we're done */ + if (cfs_b->timer_active) + return; + } + + cfs_b->timer_active = 1; + start_bandwidth_timer(&cfs_b->period_timer, cfs_b->period); +} + static void destroy_cfs_bandwidth(struct cfs_bandwidth *cfs_b) -{} +{ + hrtimer_cancel(&cfs_b->period_timer); +} #else static void init_cfs_rq_runtime(struct cfs_rq *cfs_rq) {} static void init_cfs_bandwidth(struct cfs_bandwidth *cfs_b) {} @@ -9078,7 +9135,7 @@ static int __cfs_schedulable(struct task_group *tg, u64 period, u64 runtime); static int tg_set_cfs_bandwidth(struct task_group *tg, u64 period, u64 quota) { - int i, ret = 0; + int i, ret = 0, runtime_enabled; struct cfs_bandwidth *cfs_b = tg_cfs_bandwidth(tg); if (tg == &root_task_group) @@ -9105,10 +9162,18 @@ static int tg_set_cfs_bandwidth(struct task_group *tg, u64 period, u64 quota) if (ret) goto out_unlock; + runtime_enabled = quota != RUNTIME_INF; raw_spin_lock_irq(&cfs_b->lock); cfs_b->period = ns_to_ktime(period); cfs_b->quota = quota; cfs_b->runtime = quota; + + /* restart the period timer (if active) to handle new period expiry */ + if (runtime_enabled && cfs_b->timer_active) { + /* force a reprogram */ + cfs_b->timer_active = 0; + __start_cfs_bandwidth(cfs_b); + } raw_spin_unlock_irq(&cfs_b->lock); for_each_possible_cpu(i) { @@ -9116,7 +9181,7 @@ static int tg_set_cfs_bandwidth(struct task_group *tg, u64 period, u64 quota) struct rq *rq = rq_of(cfs_rq); raw_spin_lock_irq(&rq->lock); - cfs_rq->runtime_enabled = quota != RUNTIME_INF; + cfs_rq->runtime_enabled = runtime_enabled; cfs_rq->runtime_remaining = 0; raw_spin_unlock_irq(&rq->lock); } diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index 9502aa899f73..af73a8a85eef 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c @@ -1284,9 +1284,16 @@ static void assign_cfs_rq_runtime(struct cfs_rq *cfs_rq) raw_spin_lock(&cfs_b->lock); if (cfs_b->quota == RUNTIME_INF) amount = min_amount; - else if (cfs_b->runtime > 0) { - amount = min(cfs_b->runtime, min_amount); - cfs_b->runtime -= amount; + else { + /* ensure bandwidth timer remains active under consumption */ + if (!cfs_b->timer_active) + __start_cfs_bandwidth(cfs_b); + + if (cfs_b->runtime > 0) { + amount = min(cfs_b->runtime, min_amount); + cfs_b->runtime -= amount; + cfs_b->idle = 0; + } } raw_spin_unlock(&cfs_b->lock); @@ -1315,6 +1322,33 @@ static __always_inline void account_cfs_rq_runtime(struct cfs_rq *cfs_rq, __account_cfs_rq_runtime(cfs_rq, delta_exec); } +/* + * Responsible for refilling a task_group's bandwidth and unthrottling its + * cfs_rqs as appropriate. If there has been no activity within the last + * period the timer is deactivated until scheduling resumes; cfs_b->idle is + * used to track this state. + */ +static int do_sched_cfs_period_timer(struct cfs_bandwidth *cfs_b, int overrun) +{ + int idle = 1; + + raw_spin_lock(&cfs_b->lock); + /* no need to continue the timer with no bandwidth constraint */ + if (cfs_b->quota == RUNTIME_INF) + goto out_unlock; + + idle = cfs_b->idle; + cfs_b->runtime = cfs_b->quota; + + /* mark as potentially idle for the upcoming period */ + cfs_b->idle = 1; +out_unlock: + if (idle) + cfs_b->timer_active = 0; + raw_spin_unlock(&cfs_b->lock); + + return idle; +} #else static void account_cfs_rq_runtime(struct cfs_rq *cfs_rq, unsigned long delta_exec) {} -- 2.34.1