DEBUG: sched: add tracepoint for RD overutilized

[firefly-linux-kernel-4.4.55.git] / kernel / sched / fair.c
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c

index 9c717c3be75df324761fd473b7fbe920113e5c40..9139e153671a499495983331ba929d4e052534f5 100644 (file)
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -30,11 +30,13 @@
  #include <linux/mempolicy.h>
  #include <linux/migrate.h>
  #include <linux/task_work.h>
+#include <linux/module.h>
  
  #include <trace/events/sched.h>
  
  #include "sched.h"
  #include "tune.h"
+#include "walt.h"
  
  /*
   * Targeted preemption latency for CPU-bound tasks:
@@ -56,6 +58,12 @@ unsigned int sysctl_sched_sync_hint_enable = 1;
  unsigned int sysctl_sched_initial_task_util = 0;
  unsigned int sysctl_sched_cstate_aware = 1;
  
+#ifdef CONFIG_SCHED_WALT
+unsigned int sysctl_sched_use_walt_cpu_util = 1;
+unsigned int sysctl_sched_use_walt_task_util = 1;
+__read_mostly unsigned int sysctl_sched_walt_cpu_high_irqload =
+    (10 * NSEC_PER_MSEC);
+#endif
  /*
   * The initial- and re-scaling of tunables is configurable
   * (default SCHED_TUNABLESCALING_LOG = *(1+ilog(ncpus))
@@ -4225,6 +4233,7 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
                 if (cfs_rq_throttled(cfs_rq))
                         break;
                 cfs_rq->h_nr_running++;
+               walt_inc_cfs_cumulative_runnable_avg(cfs_rq, p);
  
                 flags = ENQUEUE_WAKEUP;
         }
@@ -4232,6 +4241,7 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
         for_each_sched_entity(se) {
                 cfs_rq = cfs_rq_of(se);
                 cfs_rq->h_nr_running++;
+               walt_inc_cfs_cumulative_runnable_avg(cfs_rq, p);
  
                 if (cfs_rq_throttled(cfs_rq))
                         break;
@@ -4246,11 +4256,12 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
  #ifdef CONFIG_SMP
  
         if (!se) {
+               walt_inc_cumulative_runnable_avg(rq, p);
                 if (!task_new && !rq->rd->overutilized &&
-                   cpu_overutilized(rq->cpu))
+                   cpu_overutilized(rq->cpu)) {
                         rq->rd->overutilized = true;
-
-               schedtune_enqueue_task(p, cpu_of(rq));
+                       trace_sched_overutilized(true);
+               }
  
                 /*
                  * We want to potentially trigger a freq switch
@@ -4262,8 +4273,11 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
                 if (task_new || task_wakeup)
                         update_capacity_of(cpu_of(rq));
         }
-#endif /* CONFIG_SMP */
  
+       /* Update SchedTune accouting */
+       schedtune_enqueue_task(p, cpu_of(rq));
+
+#endif /* CONFIG_SMP */
         hrtick_update(rq);
  }
  
@@ -4293,6 +4307,7 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
                 if (cfs_rq_throttled(cfs_rq))
                         break;
                 cfs_rq->h_nr_running--;
+               walt_dec_cfs_cumulative_runnable_avg(cfs_rq, p);
  
                 /* Don't dequeue parent if it has other entities besides us */
                 if (cfs_rq->load.weight) {
@@ -4313,6 +4328,7 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
         for_each_sched_entity(se) {
                 cfs_rq = cfs_rq_of(se);
                 cfs_rq->h_nr_running--;
+               walt_dec_cfs_cumulative_runnable_avg(cfs_rq, p);
  
                 if (cfs_rq_throttled(cfs_rq))
                         break;
@@ -4327,7 +4343,7 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
  #ifdef CONFIG_SMP
  
         if (!se) {
-               schedtune_dequeue_task(p, cpu_of(rq));
+               walt_dec_cumulative_runnable_avg(rq, p);
  
                 /*
                  * We want to potentially trigger a freq switch
@@ -4345,6 +4361,9 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
                 }
         }
  
+       /* Update SchedTune accouting */
+       schedtune_dequeue_task(p, cpu_of(rq));
+
  #endif /* CONFIG_SMP */
  
         hrtick_update(rq);
@@ -4984,6 +5003,7 @@ static int sched_group_energy(struct energy_env *eenv)
                         } while (sg = sg->next, sg != sd->groups);
                 }
  next_cpu:
+               cpumask_clear_cpu(cpu, &visit_cpus);
                 continue;
         }
  
@@ -5224,6 +5244,12 @@ static int wake_affine(struct sched_domain *sd, struct task_struct *p, int sync)
  
  static inline unsigned long task_util(struct task_struct *p)
  {
+#ifdef CONFIG_SCHED_WALT
+       if (!walt_disabled && sysctl_sched_use_walt_task_util) {
+               unsigned long demand = p->ravg.demand;
+               return (demand << 10) / walt_ravg_window;
+       }
+#endif
         return p->se.avg.util_avg;
  }
  
@@ -5586,32 +5612,30 @@ done:
         return target;
  }
  
-static inline int find_best_target(struct task_struct *p)
+static inline int find_best_target(struct task_struct *p, bool prefer_idle)
  {
-       int i, boosted;
+       int iter_cpu;
         int target_cpu = -1;
-       int target_capacity = 0;
+       int target_util = 0;
         int backup_capacity = 0;
-       int idle_cpu = -1;
+       int best_idle_cpu = -1;
         int best_idle_cstate = INT_MAX;
         int backup_cpu = -1;
         unsigned long task_util_boosted, new_util;
  
-       /*
-        * Favor 1) busy cpu with most capacity at current OPP
-        *       2) idle_cpu with capacity at current OPP
-        *       3) busy cpu with capacity at higher OPP
-        */
-#ifdef CONFIG_CGROUP_SCHEDTUNE
-       boosted = schedtune_task_boost(p);
-#else
-       boosted = 0;
-#endif
         task_util_boosted = boosted_task_util(p);
-       for_each_cpu(i, tsk_cpus_allowed(p)) {
-               int cur_capacity = capacity_curr_of(i);
-               struct rq *rq = cpu_rq(i);
-               int idle_idx = idle_get_state_idx(rq);
+       for (iter_cpu = 0; iter_cpu < NR_CPUS; iter_cpu++) {
+               int cur_capacity;
+               struct rq *rq;
+               int idle_idx;
+
+               /*
+                * favor higher cpus for tasks that prefer idle cores
+                */
+               int i = prefer_idle ? NR_CPUS-iter_cpu-1 : iter_cpu;
+
+               if (!cpu_online(i) || !cpumask_test_cpu(i, tsk_cpus_allowed(p)))
+                       continue;
  
                 /*
                  * p's blocked utilization is still accounted for on prev_cpu
@@ -5625,54 +5649,54 @@ static inline int find_best_target(struct task_struct *p)
                  * The target CPU can be already at a capacity level higher
                  * than the one required to boost the task.
                  */
-
                 if (new_util > capacity_orig_of(i))
                         continue;
  
+#ifdef CONFIG_SCHED_WALT
+               if (walt_cpu_high_irqload(i))
+                       continue;
+#endif
                 /*
-                * For boosted tasks we favor idle cpus unconditionally to
+                * Unconditionally favoring tasks that prefer idle cpus to
                  * improve latency.
                  */
-               if (idle_idx >= 0 && boosted) {
-                       if (idle_cpu < 0 ||
-                               (sysctl_sched_cstate_aware &&
-                                best_idle_cstate > idle_idx)) {
-                               best_idle_cstate = idle_idx;
-                               idle_cpu = i;
-                       }
+               if (idle_cpu(i) && prefer_idle) {
+                       if (best_idle_cpu < 0)
+                               best_idle_cpu = i;
                         continue;
                 }
  
+               cur_capacity = capacity_curr_of(i);
+               rq = cpu_rq(i);
+               idle_idx = idle_get_state_idx(rq);
+
                 if (new_util < cur_capacity) {
                         if (cpu_rq(i)->nr_running) {
-                               if (target_capacity == 0 ||
-                                       target_capacity > cur_capacity) {
-                                       /* busy CPU with most capacity at current OPP */
+                               if (target_util == 0 ||
+                                       target_util > new_util) {
                                         target_cpu = i;
-                                       target_capacity = cur_capacity;
+                                       target_util = new_util;
                                 }
-                       } else if (!boosted) {
-                               if (idle_cpu < 0 ||
+                       } else if (!prefer_idle) {
+                               if (best_idle_cpu < 0 ||
                                         (sysctl_sched_cstate_aware &&
                                                 best_idle_cstate > idle_idx)) {
                                         best_idle_cstate = idle_idx;
-                                       idle_cpu = i;
+                                       best_idle_cpu = i;
                                 }
                         }
                 } else if (backup_capacity == 0 ||
                                 backup_capacity > cur_capacity) {
-                       /* first busy CPU with capacity at higher OPP */
                         backup_capacity = cur_capacity;
                         backup_cpu = i;
                 }
         }
  
-       if (!boosted && target_cpu < 0) {
-               target_cpu = idle_cpu >= 0 ? idle_cpu : backup_cpu;
-       }
+       if (prefer_idle && best_idle_cpu >= 0)
+               target_cpu = best_idle_cpu;
+       else if (target_cpu < 0)
+               target_cpu = best_idle_cpu >= 0 ? best_idle_cpu : backup_cpu;
  
-       if (boosted && idle_cpu >= 0)
-               target_cpu = idle_cpu;
         return target_cpu;
  }
  
@@ -5758,9 +5782,19 @@ static int energy_aware_wake_cpu(struct task_struct *p, int target, int sync)
                 /*
                  * Find a cpu with sufficient capacity
                  */
-               int tmp_target = find_best_target(p);
-               if (tmp_target >= 0)
+#ifdef CONFIG_CGROUP_SCHEDTUNE
+               bool boosted = schedtune_task_boost(p) > 0;
+               bool prefer_idle = schedtune_prefer_idle(p) > 0;
+#else
+               bool boosted = 0;
+               bool prefer_idle = 0;
+#endif
+               int tmp_target = find_best_target(p, boosted || prefer_idle);
+               if (tmp_target >= 0) {
                         target_cpu = tmp_target;
+                       if ((boosted || prefer_idle) && idle_cpu(target_cpu))
+                               return target_cpu;
+               }
         }
  
         if (target_cpu != task_cpu(p)) {
@@ -6615,7 +6649,9 @@ static void detach_task(struct task_struct *p, struct lb_env *env)
  
         deactivate_task(env->src_rq, p, 0);
         p->on_rq = TASK_ON_RQ_MIGRATING;
+       double_lock_balance(env->src_rq, env->dst_rq);
         set_task_cpu(p, env->dst_cpu);
+       double_unlock_balance(env->src_rq, env->dst_rq);
  }
  
  /*
@@ -7490,12 +7526,17 @@ next_group:
                         env->dst_rq->rd->overload = overload;
  
                 /* Update over-utilization (tipping point, U >= 0) indicator */
-               if (env->dst_rq->rd->overutilized != overutilized)
+               if (env->dst_rq->rd->overutilized != overutilized) {
                         env->dst_rq->rd->overutilized = overutilized;
+                       trace_sched_overutilized(overutilized);
+               }
         } else {
-               if (!env->dst_rq->rd->overutilized && overutilized)
+               if (!env->dst_rq->rd->overutilized && overutilized) {
                         env->dst_rq->rd->overutilized = true;
+                       trace_sched_overutilized(true);
+               }
         }
+
  }
  
  /**
@@ -8935,8 +8976,10 @@ static void task_tick_fair(struct rq *rq, struct task_struct *curr, int queued)
                 task_tick_numa(rq, curr);
  
  #ifdef CONFIG_SMP
-       if (!rq->rd->overutilized && cpu_overutilized(task_cpu(curr)))
+       if (!rq->rd->overutilized && cpu_overutilized(task_cpu(curr))) {
                 rq->rd->overutilized = true;
+               trace_sched_overutilized(true);
+       }
  
         rq->misfit_task = !task_fits_max(curr, rq->cpu);
  #endif