From: Chris Redpath Date: Mon, 19 Aug 2013 14:06:18 +0000 (+0100) Subject: HMP: Modify the runqueue stats to add a new child stat X-Git-Tag: firefly_0821_release~3680^2~5^2~3^2~18^2~5 X-Git-Url: http://demsky.eecs.uci.edu/git/?a=commitdiff_plain;h=b98cd6acc86f2c3bc10902476836746727b73ba9;p=firefly-linux-kernel-4.4.55.git HMP: Modify the runqueue stats to add a new child stat The original intent here was to track unweighted runqueue load with less resolution so we could use the least-recently-disturbed runqueue to choose between 'closely related' load levels. However, after experimenting with the resolution it turns out that the following algorithm is highly beneficial for mobile workloads. In hmp_domain_min_load: * If any CPU is zero, the overall load is zero * If no CPUs are idle, the domain is 'fully loaded' Additionally, the time since last migration count is used to discriminate between idle CPUs. Signed-off-by: Chris Redpath Signed-off-by: Liviu Dudau Signed-off-by: Jon Medhurst --- diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 62302a372796..c3ba73750d62 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -1582,9 +1582,10 @@ static inline void __update_task_entity_contrib(struct sched_entity *se) } /* Compute the current contribution to load_avg by se, return any delta */ -static long __update_entity_load_avg_contrib(struct sched_entity *se) +static long __update_entity_load_avg_contrib(struct sched_entity *se, long *ratio) { long old_contrib = se->avg.load_avg_contrib; + long old_ratio = se->avg.load_avg_ratio; if (entity_is_task(se)) { __update_task_entity_contrib(se); @@ -1593,6 +1594,8 @@ static long __update_entity_load_avg_contrib(struct sched_entity *se) __update_group_entity_contrib(se); } + if (ratio) + *ratio = se->avg.load_avg_ratio - old_ratio; return se->avg.load_avg_contrib - old_contrib; } @@ -1612,7 +1615,7 @@ static inline void update_entity_load_avg(struct sched_entity *se, int update_cfs_rq) { struct cfs_rq *cfs_rq = cfs_rq_of(se); - long contrib_delta; + long contrib_delta, ratio_delta; u64 now; int cpu = -1; /* not used in normal case */ @@ -1632,15 +1635,17 @@ static inline void update_entity_load_avg(struct sched_entity *se, cfs_rq->curr == se, cpu)) return; - contrib_delta = __update_entity_load_avg_contrib(se); + contrib_delta = __update_entity_load_avg_contrib(se, &ratio_delta); if (!update_cfs_rq) return; - if (se->on_rq) + if (se->on_rq) { cfs_rq->runnable_load_avg += contrib_delta; - else + rq_of(cfs_rq)->avg.load_avg_ratio += ratio_delta; + } else { subtract_blocked_load_contrib(cfs_rq, -contrib_delta); + } } /* @@ -1673,7 +1678,6 @@ static void update_cfs_rq_blocked_load(struct cfs_rq *cfs_rq, int force_update) static inline void update_rq_runnable_avg(struct rq *rq, int runnable) { - u32 contrib; int cpu = -1; /* not used in normal case */ #ifdef CONFIG_HMP_FREQUENCY_INVARIANT_SCALE @@ -1682,9 +1686,7 @@ static inline void update_rq_runnable_avg(struct rq *rq, int runnable) __update_entity_runnable_avg(rq->clock_task, &rq->avg, runnable, runnable, cpu); __update_tg_runnable_avg(&rq->avg, &rq->cfs); - contrib = rq->avg.runnable_avg_sum * scale_load_down(1024); - contrib /= (rq->avg.runnable_avg_period + 1); - trace_sched_rq_runnable_ratio(cpu_of(rq), scale_load(contrib)); + trace_sched_rq_runnable_ratio(cpu_of(rq), rq->avg.load_avg_ratio); trace_sched_rq_runnable_load(cpu_of(rq), rq->cfs.runnable_load_avg); } @@ -1727,6 +1729,8 @@ static inline void enqueue_entity_load_avg(struct cfs_rq *cfs_rq, } cfs_rq->runnable_load_avg += se->avg.load_avg_contrib; + rq_of(cfs_rq)->avg.load_avg_ratio += se->avg.load_avg_ratio; + /* we force update consideration on load-balancer moves */ update_cfs_rq_blocked_load(cfs_rq, !wakeup); } @@ -1745,6 +1749,8 @@ static inline void dequeue_entity_load_avg(struct cfs_rq *cfs_rq, update_cfs_rq_blocked_load(cfs_rq, !sleep); cfs_rq->runnable_load_avg -= se->avg.load_avg_contrib; + rq_of(cfs_rq)->avg.load_avg_ratio -= se->avg.load_avg_ratio; + if (sleep) { cfs_rq->blocked_load_avg += se->avg.load_avg_contrib; se->avg.decay_count = atomic64_read(&cfs_rq->decay_counter); @@ -3964,8 +3970,8 @@ static inline unsigned int hmp_domain_min_load(struct hmp_domain *hmpd, avg->hmp_last_up_migration : avg->hmp_last_down_migration; /* don't use the divisor in the loop, just at the end */ - contrib = avg->runnable_avg_sum * scale_load_down(1024); - scaled_contrib = contrib >> 22; + contrib = avg->load_avg_ratio * scale_load_down(1024); + scaled_contrib = contrib >> 13; if ((contrib < min_runnable_load) || (scaled_contrib == scaled_min_runnable_load && @@ -3988,7 +3994,9 @@ static inline unsigned int hmp_domain_min_load(struct hmp_domain *hmpd, *min_cpu = min_cpu_runnable_temp; /* domain will often have at least one empty CPU */ - return min_runnable_load ? min_runnable_load / (LOAD_AVG_MAX + 1) : 0; + trace_printk("hmp_domain_min_load returning %lu\n", + min_runnable_load > 1023 ? 1023 : min_runnable_load); + return min_runnable_load > 1023 ? 1023 : min_runnable_load; } /*