From ad4b78bbcbab66998b05d422ac6106b645796e54 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 16 Sep 2009 12:31:31 +0200 Subject: [PATCH] sched: Add new wakeup preemption mode: WAKEUP_RUNNING Create a new wakeup preemption mode, preempt towards tasks that run shorter on avg. It sets next buddy to be sure we actually run the task we preempted for. Test results: root@twins:~# while :; do :; done & [1] 6537 root@twins:~# while :; do :; done & [2] 6538 root@twins:~# while :; do :; done & [3] 6539 root@twins:~# while :; do :; done & [4] 6540 root@twins:/home/peter# ./latt -c4 sleep 4 Entries: 48 (clients=4) Averages: ------------------------------ Max 4750 usec Avg 497 usec Stdev 737 usec root@twins:/home/peter# echo WAKEUP_RUNNING > /debug/sched_features root@twins:/home/peter# ./latt -c4 sleep 4 Entries: 48 (clients=4) Averages: ------------------------------ Max 14 usec Avg 5 usec Stdev 3 usec Disabled by default - needs more testing. Signed-off-by: Peter Zijlstra Acked-by: Mike Galbraith Signed-off-by: Ingo Molnar LKML-Reference: --- include/linux/sched.h | 2 ++ kernel/sched.c | 17 ++++++++++------- kernel/sched_debug.c | 1 + kernel/sched_fair.c | 14 +++++++++++--- kernel/sched_features.h | 5 +++++ 5 files changed, 29 insertions(+), 10 deletions(-) diff --git a/include/linux/sched.h b/include/linux/sched.h index b4a39bb2b4a4..8af3d249170e 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1113,6 +1113,8 @@ struct sched_entity { u64 start_runtime; u64 avg_wakeup; + u64 avg_running; + #ifdef CONFIG_SCHEDSTATS u64 wait_start; u64 wait_max; diff --git a/kernel/sched.c b/kernel/sched.c index 969dfaef2465..3bb4ea2ee6f0 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -2458,6 +2458,7 @@ static void __sched_fork(struct task_struct *p) p->se.avg_overlap = 0; p->se.start_runtime = 0; p->se.avg_wakeup = sysctl_sched_wakeup_granularity; + p->se.avg_running = 0; #ifdef CONFIG_SCHEDSTATS p->se.wait_start = 0; @@ -5310,14 +5311,13 @@ static inline void schedule_debug(struct task_struct *prev) #endif } -static void put_prev_task(struct rq *rq, struct task_struct *prev) +static void put_prev_task(struct rq *rq, struct task_struct *p) { - if (prev->state == TASK_RUNNING) { - u64 runtime = prev->se.sum_exec_runtime; + u64 runtime = p->se.sum_exec_runtime - p->se.prev_sum_exec_runtime; - runtime -= prev->se.prev_sum_exec_runtime; - runtime = min_t(u64, runtime, 2*sysctl_sched_migration_cost); + update_avg(&p->se.avg_running, runtime); + if (p->state == TASK_RUNNING) { /* * In order to avoid avg_overlap growing stale when we are * indeed overlapping and hence not getting put to sleep, grow @@ -5327,9 +5327,12 @@ static void put_prev_task(struct rq *rq, struct task_struct *prev) * correlates to the amount of cache footprint a task can * build up. */ - update_avg(&prev->se.avg_overlap, runtime); + runtime = min_t(u64, runtime, 2*sysctl_sched_migration_cost); + update_avg(&p->se.avg_overlap, runtime); + } else { + update_avg(&p->se.avg_running, 0); } - prev->sched_class->put_prev_task(rq, prev); + p->sched_class->put_prev_task(rq, p); } /* diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c index 5ddbd0891267..efb84409bc43 100644 --- a/kernel/sched_debug.c +++ b/kernel/sched_debug.c @@ -395,6 +395,7 @@ void proc_sched_show_task(struct task_struct *p, struct seq_file *m) PN(se.sum_exec_runtime); PN(se.avg_overlap); PN(se.avg_wakeup); + PN(se.avg_running); nr_switches = p->nvcsw + p->nivcsw; diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index c741cd9d38de..3e6f78c66876 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c @@ -1605,9 +1605,6 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_ return; } - if (!sched_feat(WAKEUP_PREEMPT)) - return; - if ((sched_feat(WAKEUP_SYNC) && sync) || (sched_feat(WAKEUP_OVERLAP) && (se->avg_overlap < sysctl_sched_migration_cost && @@ -1616,6 +1613,17 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_ return; } + if (sched_feat(WAKEUP_RUNNING)) { + if (pse->avg_running < se->avg_running) { + set_next_buddy(pse); + resched_task(curr); + return; + } + } + + if (!sched_feat(WAKEUP_PREEMPT)) + return; + find_matching_se(&se, &pse); BUG_ON(!pse); diff --git a/kernel/sched_features.h b/kernel/sched_features.h index d5059fd761d9..0d94083582c7 100644 --- a/kernel/sched_features.h +++ b/kernel/sched_features.h @@ -53,6 +53,11 @@ SCHED_FEAT(WAKEUP_SYNC, 0) */ SCHED_FEAT(WAKEUP_OVERLAP, 0) +/* + * Wakeup preemption towards tasks that run short + */ +SCHED_FEAT(WAKEUP_RUNNING, 0) + /* * Use the SYNC wakeup hint, pipes and the likes use this to indicate * the remote end is likely to consume the data we just wrote, and -- 2.34.1