sched: Aggregate load contributed by task entities on parenting cfs_rq
authorPaul Turner <pjt@google.com>
Thu, 4 Oct 2012 11:18:30 +0000 (13:18 +0200)
committerIngo Molnar <mingo@kernel.org>
Wed, 24 Oct 2012 08:27:21 +0000 (10:27 +0200)
For a given task t, we can compute its contribution to load as:

  task_load(t) = runnable_avg(t) * weight(t)

On a parenting cfs_rq we can then aggregate:

  runnable_load(cfs_rq) = \Sum task_load(t), for all runnable children t

Maintain this bottom up, with task entities adding their contributed load to
the parenting cfs_rq sum.  When a task entity's load changes we add the same
delta to the maintained sum.

Signed-off-by: Paul Turner <pjt@google.com>
Reviewed-by: Ben Segall <bsegall@google.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/20120823141506.514678907@google.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
include/linux/sched.h
kernel/sched/debug.c
kernel/sched/fair.c
kernel/sched/sched.h

index 418fc6d8a4dac5e6a76119bc27c1100a081bd4ed..81d8b1ba4100c35a3f61177736065dd7e5181d34 100644 (file)
@@ -1103,6 +1103,7 @@ struct sched_avg {
         */
        u32 runnable_avg_sum, runnable_avg_period;
        u64 last_runnable_update;
+       unsigned long load_avg_contrib;
 };
 
 #ifdef CONFIG_SCHEDSTATS
index 4240abce41163f89420850956405c513fe064a20..c953a89f94aa603363192fedc533251f9c598372 100644 (file)
@@ -94,6 +94,7 @@ static void print_cfs_group_stats(struct seq_file *m, int cpu, struct task_group
 #ifdef CONFIG_SMP
        P(se->avg.runnable_avg_sum);
        P(se->avg.runnable_avg_period);
+       P(se->avg.load_avg_contrib);
 #endif
 #undef PN
 #undef P
@@ -224,6 +225,8 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
                        cfs_rq->load_contribution);
        SEQ_printf(m, "  .%-30s: %d\n", "load_tg",
                        atomic_read(&cfs_rq->tg->load_weight));
+       SEQ_printf(m, "  .%-30s: %lld\n", "runnable_load_avg",
+                       cfs_rq->runnable_load_avg);
 #endif
 
        print_cfs_group_stats(m, cpu, cfs_rq->tg);
index 8c5468fcf10dc61988ec2aa0062b8d608b06f2e5..77af759e56754115c8a411a386bb551e970897dc 100644 (file)
@@ -1081,20 +1081,63 @@ static __always_inline int __update_entity_runnable_avg(u64 now,
        return decayed;
 }
 
+/* Compute the current contribution to load_avg by se, return any delta */
+static long __update_entity_load_avg_contrib(struct sched_entity *se)
+{
+       long old_contrib = se->avg.load_avg_contrib;
+
+       if (!entity_is_task(se))
+               return 0;
+
+       se->avg.load_avg_contrib = div64_u64(se->avg.runnable_avg_sum *
+                                            se->load.weight,
+                                            se->avg.runnable_avg_period + 1);
+
+       return se->avg.load_avg_contrib - old_contrib;
+}
+
 /* Update a sched_entity's runnable average */
 static inline void update_entity_load_avg(struct sched_entity *se)
 {
-       __update_entity_runnable_avg(rq_of(cfs_rq_of(se))->clock_task, &se->avg,
-                                    se->on_rq);
+       struct cfs_rq *cfs_rq = cfs_rq_of(se);
+       long contrib_delta;
+
+       if (!__update_entity_runnable_avg(rq_of(cfs_rq)->clock_task, &se->avg,
+                                         se->on_rq))
+               return;
+
+       contrib_delta = __update_entity_load_avg_contrib(se);
+       if (se->on_rq)
+               cfs_rq->runnable_load_avg += contrib_delta;
 }
 
 static inline void update_rq_runnable_avg(struct rq *rq, int runnable)
 {
        __update_entity_runnable_avg(rq->clock_task, &rq->avg, runnable);
 }
+
+/* Add the load generated by se into cfs_rq's child load-average */
+static inline void enqueue_entity_load_avg(struct cfs_rq *cfs_rq,
+                                                 struct sched_entity *se)
+{
+       update_entity_load_avg(se);
+       cfs_rq->runnable_load_avg += se->avg.load_avg_contrib;
+}
+
+/* Remove se's load from this cfs_rq child load-average */
+static inline void dequeue_entity_load_avg(struct cfs_rq *cfs_rq,
+                                                 struct sched_entity *se)
+{
+       update_entity_load_avg(se);
+       cfs_rq->runnable_load_avg -= se->avg.load_avg_contrib;
+}
 #else
 static inline void update_entity_load_avg(struct sched_entity *se) {}
 static inline void update_rq_runnable_avg(struct rq *rq, int runnable) {}
+static inline void enqueue_entity_load_avg(struct cfs_rq *cfs_rq,
+                                                 struct sched_entity *se) {}
+static inline void dequeue_entity_load_avg(struct cfs_rq *cfs_rq,
+                                                 struct sched_entity *se) {}
 #endif
 
 static void enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se)
@@ -1223,7 +1266,7 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
         */
        update_curr(cfs_rq);
        update_cfs_load(cfs_rq, 0);
-       update_entity_load_avg(se);
+       enqueue_entity_load_avg(cfs_rq, se);
        account_entity_enqueue(cfs_rq, se);
        update_cfs_shares(cfs_rq);
 
@@ -1298,7 +1341,7 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
         * Update run-time statistics of the 'current'.
         */
        update_curr(cfs_rq);
-       update_entity_load_avg(se);
+       dequeue_entity_load_avg(cfs_rq, se);
 
        update_stats_dequeue(cfs_rq, se);
        if (flags & DEQUEUE_SLEEP) {
index 14b571968713c378ab3d37ca32ddef16d092534e..e6539736af5844aa2fdf7da156c3b68f51dd4bea 100644 (file)
@@ -222,6 +222,15 @@ struct cfs_rq {
        unsigned int nr_spread_over;
 #endif
 
+#ifdef CONFIG_SMP
+       /*
+        * CFS Load tracking
+        * Under CFS, load is tracked on a per-entity basis and aggregated up.
+        * This allows for the description of both thread and group usage (in
+        * the FAIR_GROUP_SCHED case).
+        */
+       u64 runnable_load_avg;
+#endif
 #ifdef CONFIG_FAIR_GROUP_SCHED
        struct rq *rq;  /* cpu runqueue to which this cfs_rq is attached */
 
@@ -1214,4 +1223,3 @@ static inline u64 irq_time_read(int cpu)
 }
 #endif /* CONFIG_64BIT */
 #endif /* CONFIG_IRQ_TIME_ACCOUNTING */
-