sched/fair: Add eas (& cas) specific rq, sd and task stats
authorDietmar Eggemann <dietmar.eggemann@arm.com>
Wed, 22 Mar 2017 18:23:13 +0000 (18:23 +0000)
committerAmit Pundir <amit.pundir@linaro.org>
Wed, 21 Jun 2017 11:07:38 +0000 (16:37 +0530)
The statistic counter are placed in the eas (& cas) wakeup path. Each
of them has one representation for the runqueue (rq), the sched_domain
(sd) and the task.
A task counter is always incremented. A rq counter is always
incremented for the rq the scheduler is currently running on. A sd
counter is only incremented if a relation to a sd exists.

The counters are exposed:

(1) In /proc/schedstat for rq's and sd's:

$ cat /proc/schedstat
...
cpu0 71422 0 2321254 ...
eas  44144 0 0 19446 0 24698 568435 51621 156932 133 222011 17459 120279 516814 83 0 156962 359235 176439 139981
  <- runqueue for cpu0
...
domain0 3 42430 42331 ...
eas 0 0 0 14200 0 0 0 0 0 0 0 0 0 0 0 0 0 0 66355 0  <- MC sched domain for cpu0
...

The per-cpu eas vector has the following elements:

sis_attempts  sis_idle   sis_cache_affine sis_suff_cap    sis_idle_cpu    sis_count               ||
secb_attempts secb_sync  secb_idle_bt     secb_insuff_cap secb_no_nrg_sav secb_nrg_sav secb_count ||
fbt_attempts  fbt_no_cpu fbt_no_sd        fbt_pref_idle   fbt_count                               ||
cas_attempts  cas_count

The following relations exist between these counters (from cpu0 eas
vector above):

sis_attempts = sis_idle + sis_cache_affine + sis_suff_cap + sis_idle_cpu + sis_count

44144        = 0        + 0                + 19446        + 0            + 24698

secb_attempts = secb_sync + secb_idle_bt + secb_insuff_cap + secb_no_nrg_sav + secb_nrg_sav + secb_count

568435        = 51621     + 156932       + 133             + 222011          + 17459        + 120279

fbt_attempts = fbt_no_cpu + fbt_no_sd + fbt_pref_idle + fbt_count + (return -1)

516814       = 83         + 0         + 156962        + 359235    + (534)

cas_attempts = cas_count + (return -1 or smp_processor_id())

176439       = 139981    + (36458)

(2) In /proc/$PROCESS_PID/task/$TASK_PID/sched for a task.

example: main thread of system_server

$ cat /proc/1083/task/1083/sched

...
se.statistics.nr_wakeups_sis_attempts        :                  945
se.statistics.nr_wakeups_sis_idle            :                    0
se.statistics.nr_wakeups_sis_cache_affine    :                    0
se.statistics.nr_wakeups_sis_suff_cap        :                  219
se.statistics.nr_wakeups_sis_idle_cpu        :                    0
se.statistics.nr_wakeups_sis_count           :                  726
se.statistics.nr_wakeups_secb_attempts       :                10376
se.statistics.nr_wakeups_secb_sync           :                 1462
se.statistics.nr_wakeups_secb_idle_bt        :                 6984
se.statistics.nr_wakeups_secb_insuff_cap     :                    3
se.statistics.nr_wakeups_secb_no_nrg_sav     :                  927
se.statistics.nr_wakeups_secb_nrg_sav        :                  206
se.statistics.nr_wakeups_secb_count          :                  794
se.statistics.nr_wakeups_fbt_attempts        :                 8914
se.statistics.nr_wakeups_fbt_no_cpu          :                    0
se.statistics.nr_wakeups_fbt_no_sd           :                    0
se.statistics.nr_wakeups_fbt_pref_idle       :                 6987
se.statistics.nr_wakeups_fbt_count           :                 1554
se.statistics.nr_wakeups_cas_attempts        :                 3107
se.statistics.nr_wakeups_cas_count           :                 1195
...

The same relation between the counters as in the per-cpu case apply.

Change-Id: Ie7d01267c78a3f41f60a3ef52917d5a5d463f195
Signed-off-by: Dietmar Eggemann <dietmar.eggemann@arm.com>
Signed-off-by: Chris Redpath <chris.redpath@arm.com>
include/linux/sched.h
kernel/sched/debug.c
kernel/sched/fair.c
kernel/sched/sched.h
kernel/sched/stats.c

index 436f36f768c6b91f5ad683edaa062244fa8046a8..ad2c304b29b8f637bfa5e651f522f3bc6d2224be 100644 (file)
@@ -1065,6 +1065,37 @@ unsigned long capacity_curr_of(int cpu);
 
 struct sched_group;
 
+struct eas_stats {
+       /* select_idle_sibling() stats */
+       u64 sis_attempts;
+       u64 sis_idle;
+       u64 sis_cache_affine;
+       u64 sis_suff_cap;
+       u64 sis_idle_cpu;
+       u64 sis_count;
+
+       /* select_energy_cpu_brute() stats */
+       u64 secb_attempts;
+       u64 secb_sync;
+       u64 secb_idle_bt;
+       u64 secb_insuff_cap;
+       u64 secb_no_nrg_sav;
+       u64 secb_nrg_sav;
+       u64 secb_count;
+
+       /* find_best_target() stats */
+       u64 fbt_attempts;
+       u64 fbt_no_cpu;
+       u64 fbt_no_sd;
+       u64 fbt_pref_idle;
+       u64 fbt_count;
+
+       /* cas */
+       /* select_task_rq_fair() stats */
+       u64 cas_attempts;
+       u64 cas_count;
+};
+
 struct sched_domain {
        /* These fields must be setup */
        struct sched_domain *parent;    /* top domain must be null terminated */
@@ -1125,6 +1156,8 @@ struct sched_domain {
        unsigned int ttwu_wake_remote;
        unsigned int ttwu_move_affine;
        unsigned int ttwu_move_balance;
+
+       struct eas_stats eas_stats;
 #endif
 #ifdef CONFIG_SCHED_DEBUG
        char *name;
@@ -1283,6 +1316,35 @@ struct sched_statistics {
        u64                     nr_wakeups_affine_attempts;
        u64                     nr_wakeups_passive;
        u64                     nr_wakeups_idle;
+
+       /* select_idle_sibling() */
+       u64                     nr_wakeups_sis_attempts;
+       u64                     nr_wakeups_sis_idle;
+       u64                     nr_wakeups_sis_cache_affine;
+       u64                     nr_wakeups_sis_suff_cap;
+       u64                     nr_wakeups_sis_idle_cpu;
+       u64                     nr_wakeups_sis_count;
+
+       /* energy_aware_wake_cpu() */
+       u64                     nr_wakeups_secb_attempts;
+       u64                     nr_wakeups_secb_sync;
+       u64                     nr_wakeups_secb_idle_bt;
+       u64                     nr_wakeups_secb_insuff_cap;
+       u64                     nr_wakeups_secb_no_nrg_sav;
+       u64                     nr_wakeups_secb_nrg_sav;
+       u64                     nr_wakeups_secb_count;
+
+       /* find_best_target() */
+       u64                     nr_wakeups_fbt_attempts;
+       u64                     nr_wakeups_fbt_no_cpu;
+       u64                     nr_wakeups_fbt_no_sd;
+       u64                     nr_wakeups_fbt_pref_idle;
+       u64                     nr_wakeups_fbt_count;
+
+       /* cas */
+       /* select_task_rq_fair() */
+       u64                     nr_wakeups_cas_attempts;
+       u64                     nr_wakeups_cas_count;
 };
 #endif
 
index 641511771ae6a696271f77532ac9e40e28175749..7f711662263191d72ebfe40af0389a7327048030 100644 (file)
@@ -597,6 +597,32 @@ void proc_sched_show_task(struct task_struct *p, struct seq_file *m)
        P(se.statistics.nr_wakeups_affine_attempts);
        P(se.statistics.nr_wakeups_passive);
        P(se.statistics.nr_wakeups_idle);
+       /* eas */
+       /* select_idle_sibling() */
+       P(se.statistics.nr_wakeups_sis_attempts);
+       P(se.statistics.nr_wakeups_sis_idle);
+       P(se.statistics.nr_wakeups_sis_cache_affine);
+       P(se.statistics.nr_wakeups_sis_suff_cap);
+       P(se.statistics.nr_wakeups_sis_idle_cpu);
+       P(se.statistics.nr_wakeups_sis_count);
+       /* select_energy_cpu_brute() */
+       P(se.statistics.nr_wakeups_secb_attempts);
+       P(se.statistics.nr_wakeups_secb_sync);
+       P(se.statistics.nr_wakeups_secb_idle_bt);
+       P(se.statistics.nr_wakeups_secb_insuff_cap);
+       P(se.statistics.nr_wakeups_secb_no_nrg_sav);
+       P(se.statistics.nr_wakeups_secb_nrg_sav);
+       P(se.statistics.nr_wakeups_secb_count);
+       /* find_best_target() */
+       P(se.statistics.nr_wakeups_fbt_attempts);
+       P(se.statistics.nr_wakeups_fbt_no_cpu);
+       P(se.statistics.nr_wakeups_fbt_no_sd);
+       P(se.statistics.nr_wakeups_fbt_pref_idle);
+       P(se.statistics.nr_wakeups_fbt_count);
+       /* cas */
+       /* select_task_rq_fair() */
+       P(se.statistics.nr_wakeups_cas_attempts);
+       P(se.statistics.nr_wakeups_cas_count);
 
        {
                u64 avg_atom, avg_per_cpu;
index c84e15e3cd560dfc72a17c33786aa1291b475439..dfd150b02e633619c27903f754476e8dcd398c16 100644 (file)
@@ -6059,15 +6059,24 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target)
        int best_idle_cstate = INT_MAX;
        unsigned long best_idle_capacity = ULONG_MAX;
 
+       schedstat_inc(p, se.statistics.nr_wakeups_sis_attempts);
+       schedstat_inc(this_rq(), eas_stats.sis_attempts);
+
        if (!sysctl_sched_cstate_aware) {
-               if (idle_cpu(target))
+               if (idle_cpu(target)) {
+                       schedstat_inc(p, se.statistics.nr_wakeups_sis_idle);
+                       schedstat_inc(this_rq(), eas_stats.sis_idle);
                        return target;
+               }
 
                /*
                 * If the prevous cpu is cache affine and idle, don't be stupid.
                 */
-               if (prev != target && cpus_share_cache(prev, target) && idle_cpu(prev))
+               if (prev != target && cpus_share_cache(prev, target) && idle_cpu(prev)) {
+                       schedstat_inc(p, se.statistics.nr_wakeups_sis_cache_affine);
+                       schedstat_inc(this_rq(), eas_stats.sis_cache_affine);
                        return prev;
+               }
        }
 
        /*
@@ -6091,8 +6100,12 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target)
                                        if (new_usage > capacity_orig || !idle_cpu(i))
                                                goto next;
 
-                                       if (i == target && new_usage <= capacity_curr_of(target))
+                                       if (i == target && new_usage <= capacity_curr_of(target)) {
+                                               schedstat_inc(p, se.statistics.nr_wakeups_sis_suff_cap);
+                                               schedstat_inc(this_rq(), eas_stats.sis_suff_cap);
+                                               schedstat_inc(sd, eas_stats.sis_suff_cap);
                                                return target;
+                                       }
 
                                        if (idle_idx < best_idle_cstate &&
                                            capacity_orig <= best_idle_capacity) {
@@ -6109,6 +6122,9 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target)
 
                                target = cpumask_first_and(sched_group_cpus(sg),
                                        tsk_cpus_allowed(p));
+                               schedstat_inc(p, se.statistics.nr_wakeups_sis_idle_cpu);
+                               schedstat_inc(this_rq(), eas_stats.sis_idle_cpu);
+                               schedstat_inc(sd, eas_stats.sis_idle_cpu);
                                goto done;
                        }
 next:
@@ -6120,6 +6136,9 @@ next:
                target = best_idle_cpu;
 
 done:
+       schedstat_inc(p, se.statistics.nr_wakeups_sis_count);
+       schedstat_inc(this_rq(), eas_stats.sis_count);
+
        return target;
 }
 
@@ -6146,13 +6165,22 @@ static inline int find_best_target(struct task_struct *p, bool boosted, bool pre
        struct sched_group *sg;
        int cpu = start_cpu(boosted);
 
-       if (cpu < 0)
+       schedstat_inc(p, se.statistics.nr_wakeups_fbt_attempts);
+       schedstat_inc(this_rq(), eas_stats.fbt_attempts);
+
+       if (cpu < 0) {
+               schedstat_inc(p, se.statistics.nr_wakeups_fbt_no_cpu);
+               schedstat_inc(this_rq(), eas_stats.fbt_no_cpu);
                return target_cpu;
+       }
 
        sd = rcu_dereference(per_cpu(sd_ea, cpu));
 
-       if (!sd)
+       if (!sd) {
+               schedstat_inc(p, se.statistics.nr_wakeups_fbt_no_sd);
+               schedstat_inc(this_rq(), eas_stats.fbt_no_sd);
                return target_cpu;
+       }
 
        sg = sd->groups;
 
@@ -6191,8 +6219,11 @@ static inline int find_best_target(struct task_struct *p, bool boosted, bool pre
                         * Unconditionally favoring tasks that prefer idle cpus to
                         * improve latency.
                         */
-                       if (idle_cpu(i) && prefer_idle)
+                       if (idle_cpu(i) && prefer_idle) {
+                               schedstat_inc(p, se.statistics.nr_wakeups_fbt_pref_idle);
+                               schedstat_inc(this_rq(), eas_stats.fbt_pref_idle);
                                return i;
+                       }
 
                        cur_capacity = capacity_curr_of(i);
 
@@ -6228,6 +6259,11 @@ static inline int find_best_target(struct task_struct *p, bool boosted, bool pre
        if (target_cpu < 0)
                target_cpu = best_idle_cpu >= 0 ? best_idle_cpu : backup_cpu;
 
+       if (target_cpu >= 0) {
+               schedstat_inc(p, se.statistics.nr_wakeups_fbt_count);
+               schedstat_inc(this_rq(), eas_stats.fbt_count);
+       }
+
        return target_cpu;
 }
 
@@ -6279,11 +6315,17 @@ static int select_energy_cpu_brute(struct task_struct *p, int prev_cpu, int sync
        int target_cpu = prev_cpu, tmp_target;
        bool boosted, prefer_idle;
 
+       schedstat_inc(p, se.statistics.nr_wakeups_secb_attempts);
+       schedstat_inc(this_rq(), eas_stats.secb_attempts);
+
        if (sysctl_sched_sync_hint_enable && sync) {
                int cpu = smp_processor_id();
 
-               if (cpumask_test_cpu(cpu, tsk_cpus_allowed(p)))
+               if (cpumask_test_cpu(cpu, tsk_cpus_allowed(p))) {
+                       schedstat_inc(p, se.statistics.nr_wakeups_secb_sync);
+                       schedstat_inc(this_rq(), eas_stats.secb_sync);
                        return cpu;
+               }
        }
 
        rcu_read_lock();
@@ -6303,8 +6345,11 @@ static int select_energy_cpu_brute(struct task_struct *p, int prev_cpu, int sync
                goto unlock;
        if (tmp_target >= 0) {
                target_cpu = tmp_target;
-               if ((boosted || prefer_idle) && idle_cpu(target_cpu))
+               if ((boosted || prefer_idle) && idle_cpu(target_cpu)) {
+                       schedstat_inc(p, se.statistics.nr_wakeups_secb_idle_bt);
+                       schedstat_inc(this_rq(), eas_stats.secb_idle_bt);
                        goto unlock;
+               }
        }
 
        if (target_cpu != prev_cpu) {
@@ -6316,15 +6361,30 @@ static int select_energy_cpu_brute(struct task_struct *p, int prev_cpu, int sync
                };
 
                /* Not enough spare capacity on previous cpu */
-               if (cpu_overutilized(prev_cpu))
+               if (cpu_overutilized(prev_cpu)) {
+                       schedstat_inc(p, se.statistics.nr_wakeups_secb_insuff_cap);
+                       schedstat_inc(this_rq(), eas_stats.secb_insuff_cap);
                        goto unlock;
+               }
 
-               if (energy_diff(&eenv) >= 0)
+               if (energy_diff(&eenv) >= 0) {
+                       schedstat_inc(p, se.statistics.nr_wakeups_secb_no_nrg_sav);
+                       schedstat_inc(this_rq(), eas_stats.secb_no_nrg_sav);
                        target_cpu = prev_cpu;
+                       goto unlock;
+               }
+
+               schedstat_inc(p, se.statistics.nr_wakeups_secb_nrg_sav);
+               schedstat_inc(this_rq(), eas_stats.secb_nrg_sav);
+               goto unlock;
        }
 
+       schedstat_inc(p, se.statistics.nr_wakeups_secb_count);
+       schedstat_inc(this_rq(), eas_stats.secb_count);
+
 unlock:
        rcu_read_unlock();
+
        return target_cpu;
 }
 
@@ -6387,39 +6447,57 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int sd_flag, int wake_f
                if (sd_flag & SD_BALANCE_WAKE) /* XXX always ? */
                        new_cpu = select_idle_sibling(p, prev_cpu, new_cpu);
 
-       } else while (sd) {
-               struct sched_group *group;
-               int weight;
+       } else {
+               int wu = sd_flag & SD_BALANCE_WAKE;
+               int cas_cpu = -1;
 
-               if (!(sd->flags & sd_flag)) {
-                       sd = sd->child;
-                       continue;
+               if (wu) {
+                       schedstat_inc(p, se.statistics.nr_wakeups_cas_attempts);
+                       schedstat_inc(this_rq(), eas_stats.cas_attempts);
                }
 
-               group = find_idlest_group(sd, p, cpu, sd_flag);
-               if (!group) {
-                       sd = sd->child;
-                       continue;
-               }
+               while (sd) {
+                       struct sched_group *group;
+                       int weight;
 
-               new_cpu = find_idlest_cpu(group, p, cpu);
-               if (new_cpu == -1 || new_cpu == cpu) {
-                       /* Now try balancing at a lower domain level of cpu */
-                       sd = sd->child;
-                       continue;
+                       if (wu)
+                               schedstat_inc(sd, eas_stats.cas_attempts);
+
+                       if (!(sd->flags & sd_flag)) {
+                               sd = sd->child;
+                               continue;
+                       }
+
+                       group = find_idlest_group(sd, p, cpu, sd_flag);
+                       if (!group) {
+                               sd = sd->child;
+                               continue;
+                       }
+
+                       new_cpu = find_idlest_cpu(group, p, cpu);
+                       if (new_cpu == -1 || new_cpu == cpu) {
+                               /* Now try balancing at a lower domain level of cpu */
+                               sd = sd->child;
+                               continue;
+                       }
+
+                       /* Now try balancing at a lower domain level of new_cpu */
+                       cpu = cas_cpu = new_cpu;
+                       weight = sd->span_weight;
+                       sd = NULL;
+                       for_each_domain(cpu, tmp) {
+                               if (weight <= tmp->span_weight)
+                                       break;
+                               if (tmp->flags & sd_flag)
+                                       sd = tmp;
+                       }
+                       /* while loop will break here if sd == NULL */
                }
 
-               /* Now try balancing at a lower domain level of new_cpu */
-               cpu = new_cpu;
-               weight = sd->span_weight;
-               sd = NULL;
-               for_each_domain(cpu, tmp) {
-                       if (weight <= tmp->span_weight)
-                               break;
-                       if (tmp->flags & sd_flag)
-                               sd = tmp;
+               if (wu && (cas_cpu >= 0)) {
+                       schedstat_inc(p, se.statistics.nr_wakeups_cas_count);
+                       schedstat_inc(this_rq(), eas_stats.cas_count);
                }
-               /* while loop will break here if sd == NULL */
        }
        rcu_read_unlock();
 
index d0971e5c9269ed7ca9133f8d8c8f3671e60e13de..6880fbc3976080c47144841591c1cf5c8fb5377b 100644 (file)
@@ -734,6 +734,8 @@ struct rq {
        /* try_to_wake_up() stats */
        unsigned int ttwu_count;
        unsigned int ttwu_local;
+
+       struct eas_stats eas_stats;
 #endif
 
 #ifdef CONFIG_SMP
index 87e2c9f0c33e7470252206c893fd5fb1e250790c..b63879918cd6080857048eb285de081c112e6d0c 100644 (file)
  */
 #define SCHEDSTAT_VERSION 15
 
+static inline void show_easstat(struct seq_file *seq, struct eas_stats *stats)
+{
+       /* eas-specific runqueue stats */
+       seq_printf(seq, "eas %llu %llu %llu %llu %llu %llu ",
+           stats->sis_attempts, stats->sis_idle, stats->sis_cache_affine,
+           stats->sis_suff_cap, stats->sis_idle_cpu, stats->sis_count);
+
+       seq_printf(seq, "%llu %llu %llu %llu %llu %llu %llu ",
+           stats->secb_attempts, stats->secb_sync, stats->secb_idle_bt,
+           stats->secb_insuff_cap, stats->secb_no_nrg_sav,
+           stats->secb_nrg_sav, stats->secb_count);
+
+       seq_printf(seq, "%llu %llu %llu %llu %llu ",
+           stats->fbt_attempts, stats->fbt_no_cpu, stats->fbt_no_sd,
+           stats->fbt_pref_idle, stats->fbt_count);
+
+       seq_printf(seq, "%llu %llu\n",
+           stats->cas_attempts, stats->cas_count);
+}
+
 static int show_schedstat(struct seq_file *seq, void *v)
 {
        int cpu;
@@ -39,6 +59,7 @@ static int show_schedstat(struct seq_file *seq, void *v)
 
                seq_printf(seq, "\n");
 
+               show_easstat(seq, &rq->eas_stats);
 #ifdef CONFIG_SMP
                /* domain-specific stats */
                rcu_read_lock();
@@ -66,6 +87,8 @@ static int show_schedstat(struct seq_file *seq, void *v)
                            sd->sbf_count, sd->sbf_balanced, sd->sbf_pushed,
                            sd->ttwu_wake_remote, sd->ttwu_move_affine,
                            sd->ttwu_move_balance);
+
+                       show_easstat(seq, &sd->eas_stats);
                }
                rcu_read_unlock();
 #endif