DEBUG: sched: add tracepoint for RD overutilized
[firefly-linux-kernel-4.4.55.git] / kernel / sched / fair.c
index 9c717c3be75df324761fd473b7fbe920113e5c40..9139e153671a499495983331ba929d4e052534f5 100644 (file)
 #include <linux/mempolicy.h>
 #include <linux/migrate.h>
 #include <linux/task_work.h>
+#include <linux/module.h>
 
 #include <trace/events/sched.h>
 
 #include "sched.h"
 #include "tune.h"
+#include "walt.h"
 
 /*
  * Targeted preemption latency for CPU-bound tasks:
@@ -56,6 +58,12 @@ unsigned int sysctl_sched_sync_hint_enable = 1;
 unsigned int sysctl_sched_initial_task_util = 0;
 unsigned int sysctl_sched_cstate_aware = 1;
 
+#ifdef CONFIG_SCHED_WALT
+unsigned int sysctl_sched_use_walt_cpu_util = 1;
+unsigned int sysctl_sched_use_walt_task_util = 1;
+__read_mostly unsigned int sysctl_sched_walt_cpu_high_irqload =
+    (10 * NSEC_PER_MSEC);
+#endif
 /*
  * The initial- and re-scaling of tunables is configurable
  * (default SCHED_TUNABLESCALING_LOG = *(1+ilog(ncpus))
@@ -4225,6 +4233,7 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
                if (cfs_rq_throttled(cfs_rq))
                        break;
                cfs_rq->h_nr_running++;
+               walt_inc_cfs_cumulative_runnable_avg(cfs_rq, p);
 
                flags = ENQUEUE_WAKEUP;
        }
@@ -4232,6 +4241,7 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
        for_each_sched_entity(se) {
                cfs_rq = cfs_rq_of(se);
                cfs_rq->h_nr_running++;
+               walt_inc_cfs_cumulative_runnable_avg(cfs_rq, p);
 
                if (cfs_rq_throttled(cfs_rq))
                        break;
@@ -4246,11 +4256,12 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
 #ifdef CONFIG_SMP
 
        if (!se) {
+               walt_inc_cumulative_runnable_avg(rq, p);
                if (!task_new && !rq->rd->overutilized &&
-                   cpu_overutilized(rq->cpu))
+                   cpu_overutilized(rq->cpu)) {
                        rq->rd->overutilized = true;
-
-               schedtune_enqueue_task(p, cpu_of(rq));
+                       trace_sched_overutilized(true);
+               }
 
                /*
                 * We want to potentially trigger a freq switch
@@ -4262,8 +4273,11 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
                if (task_new || task_wakeup)
                        update_capacity_of(cpu_of(rq));
        }
-#endif /* CONFIG_SMP */
 
+       /* Update SchedTune accouting */
+       schedtune_enqueue_task(p, cpu_of(rq));
+
+#endif /* CONFIG_SMP */
        hrtick_update(rq);
 }
 
@@ -4293,6 +4307,7 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
                if (cfs_rq_throttled(cfs_rq))
                        break;
                cfs_rq->h_nr_running--;
+               walt_dec_cfs_cumulative_runnable_avg(cfs_rq, p);
 
                /* Don't dequeue parent if it has other entities besides us */
                if (cfs_rq->load.weight) {
@@ -4313,6 +4328,7 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
        for_each_sched_entity(se) {
                cfs_rq = cfs_rq_of(se);
                cfs_rq->h_nr_running--;
+               walt_dec_cfs_cumulative_runnable_avg(cfs_rq, p);
 
                if (cfs_rq_throttled(cfs_rq))
                        break;
@@ -4327,7 +4343,7 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
 #ifdef CONFIG_SMP
 
        if (!se) {
-               schedtune_dequeue_task(p, cpu_of(rq));
+               walt_dec_cumulative_runnable_avg(rq, p);
 
                /*
                 * We want to potentially trigger a freq switch
@@ -4345,6 +4361,9 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
                }
        }
 
+       /* Update SchedTune accouting */
+       schedtune_dequeue_task(p, cpu_of(rq));
+
 #endif /* CONFIG_SMP */
 
        hrtick_update(rq);
@@ -4984,6 +5003,7 @@ static int sched_group_energy(struct energy_env *eenv)
                        } while (sg = sg->next, sg != sd->groups);
                }
 next_cpu:
+               cpumask_clear_cpu(cpu, &visit_cpus);
                continue;
        }
 
@@ -5224,6 +5244,12 @@ static int wake_affine(struct sched_domain *sd, struct task_struct *p, int sync)
 
 static inline unsigned long task_util(struct task_struct *p)
 {
+#ifdef CONFIG_SCHED_WALT
+       if (!walt_disabled && sysctl_sched_use_walt_task_util) {
+               unsigned long demand = p->ravg.demand;
+               return (demand << 10) / walt_ravg_window;
+       }
+#endif
        return p->se.avg.util_avg;
 }
 
@@ -5586,32 +5612,30 @@ done:
        return target;
 }
 
-static inline int find_best_target(struct task_struct *p)
+static inline int find_best_target(struct task_struct *p, bool prefer_idle)
 {
-       int i, boosted;
+       int iter_cpu;
        int target_cpu = -1;
-       int target_capacity = 0;
+       int target_util = 0;
        int backup_capacity = 0;
-       int idle_cpu = -1;
+       int best_idle_cpu = -1;
        int best_idle_cstate = INT_MAX;
        int backup_cpu = -1;
        unsigned long task_util_boosted, new_util;
 
-       /*
-        * Favor 1) busy cpu with most capacity at current OPP
-        *       2) idle_cpu with capacity at current OPP
-        *       3) busy cpu with capacity at higher OPP
-        */
-#ifdef CONFIG_CGROUP_SCHEDTUNE
-       boosted = schedtune_task_boost(p);
-#else
-       boosted = 0;
-#endif
        task_util_boosted = boosted_task_util(p);
-       for_each_cpu(i, tsk_cpus_allowed(p)) {
-               int cur_capacity = capacity_curr_of(i);
-               struct rq *rq = cpu_rq(i);
-               int idle_idx = idle_get_state_idx(rq);
+       for (iter_cpu = 0; iter_cpu < NR_CPUS; iter_cpu++) {
+               int cur_capacity;
+               struct rq *rq;
+               int idle_idx;
+
+               /*
+                * favor higher cpus for tasks that prefer idle cores
+                */
+               int i = prefer_idle ? NR_CPUS-iter_cpu-1 : iter_cpu;
+
+               if (!cpu_online(i) || !cpumask_test_cpu(i, tsk_cpus_allowed(p)))
+                       continue;
 
                /*
                 * p's blocked utilization is still accounted for on prev_cpu
@@ -5625,54 +5649,54 @@ static inline int find_best_target(struct task_struct *p)
                 * The target CPU can be already at a capacity level higher
                 * than the one required to boost the task.
                 */
-
                if (new_util > capacity_orig_of(i))
                        continue;
 
+#ifdef CONFIG_SCHED_WALT
+               if (walt_cpu_high_irqload(i))
+                       continue;
+#endif
                /*
-                * For boosted tasks we favor idle cpus unconditionally to
+                * Unconditionally favoring tasks that prefer idle cpus to
                 * improve latency.
                 */
-               if (idle_idx >= 0 && boosted) {
-                       if (idle_cpu < 0 ||
-                               (sysctl_sched_cstate_aware &&
-                                best_idle_cstate > idle_idx)) {
-                               best_idle_cstate = idle_idx;
-                               idle_cpu = i;
-                       }
+               if (idle_cpu(i) && prefer_idle) {
+                       if (best_idle_cpu < 0)
+                               best_idle_cpu = i;
                        continue;
                }
 
+               cur_capacity = capacity_curr_of(i);
+               rq = cpu_rq(i);
+               idle_idx = idle_get_state_idx(rq);
+
                if (new_util < cur_capacity) {
                        if (cpu_rq(i)->nr_running) {
-                               if (target_capacity == 0 ||
-                                       target_capacity > cur_capacity) {
-                                       /* busy CPU with most capacity at current OPP */
+                               if (target_util == 0 ||
+                                       target_util > new_util) {
                                        target_cpu = i;
-                                       target_capacity = cur_capacity;
+                                       target_util = new_util;
                                }
-                       } else if (!boosted) {
-                               if (idle_cpu < 0 ||
+                       } else if (!prefer_idle) {
+                               if (best_idle_cpu < 0 ||
                                        (sysctl_sched_cstate_aware &&
                                                best_idle_cstate > idle_idx)) {
                                        best_idle_cstate = idle_idx;
-                                       idle_cpu = i;
+                                       best_idle_cpu = i;
                                }
                        }
                } else if (backup_capacity == 0 ||
                                backup_capacity > cur_capacity) {
-                       /* first busy CPU with capacity at higher OPP */
                        backup_capacity = cur_capacity;
                        backup_cpu = i;
                }
        }
 
-       if (!boosted && target_cpu < 0) {
-               target_cpu = idle_cpu >= 0 ? idle_cpu : backup_cpu;
-       }
+       if (prefer_idle && best_idle_cpu >= 0)
+               target_cpu = best_idle_cpu;
+       else if (target_cpu < 0)
+               target_cpu = best_idle_cpu >= 0 ? best_idle_cpu : backup_cpu;
 
-       if (boosted && idle_cpu >= 0)
-               target_cpu = idle_cpu;
        return target_cpu;
 }
 
@@ -5758,9 +5782,19 @@ static int energy_aware_wake_cpu(struct task_struct *p, int target, int sync)
                /*
                 * Find a cpu with sufficient capacity
                 */
-               int tmp_target = find_best_target(p);
-               if (tmp_target >= 0)
+#ifdef CONFIG_CGROUP_SCHEDTUNE
+               bool boosted = schedtune_task_boost(p) > 0;
+               bool prefer_idle = schedtune_prefer_idle(p) > 0;
+#else
+               bool boosted = 0;
+               bool prefer_idle = 0;
+#endif
+               int tmp_target = find_best_target(p, boosted || prefer_idle);
+               if (tmp_target >= 0) {
                        target_cpu = tmp_target;
+                       if ((boosted || prefer_idle) && idle_cpu(target_cpu))
+                               return target_cpu;
+               }
        }
 
        if (target_cpu != task_cpu(p)) {
@@ -6615,7 +6649,9 @@ static void detach_task(struct task_struct *p, struct lb_env *env)
 
        deactivate_task(env->src_rq, p, 0);
        p->on_rq = TASK_ON_RQ_MIGRATING;
+       double_lock_balance(env->src_rq, env->dst_rq);
        set_task_cpu(p, env->dst_cpu);
+       double_unlock_balance(env->src_rq, env->dst_rq);
 }
 
 /*
@@ -7490,12 +7526,17 @@ next_group:
                        env->dst_rq->rd->overload = overload;
 
                /* Update over-utilization (tipping point, U >= 0) indicator */
-               if (env->dst_rq->rd->overutilized != overutilized)
+               if (env->dst_rq->rd->overutilized != overutilized) {
                        env->dst_rq->rd->overutilized = overutilized;
+                       trace_sched_overutilized(overutilized);
+               }
        } else {
-               if (!env->dst_rq->rd->overutilized && overutilized)
+               if (!env->dst_rq->rd->overutilized && overutilized) {
                        env->dst_rq->rd->overutilized = true;
+                       trace_sched_overutilized(true);
+               }
        }
+
 }
 
 /**
@@ -8935,8 +8976,10 @@ static void task_tick_fair(struct rq *rq, struct task_struct *curr, int queued)
                task_tick_numa(rq, curr);
 
 #ifdef CONFIG_SMP
-       if (!rq->rd->overutilized && cpu_overutilized(task_cpu(curr)))
+       if (!rq->rd->overutilized && cpu_overutilized(task_cpu(curr))) {
                rq->rd->overutilized = true;
+               trace_sched_overutilized(true);
+       }
 
        rq->misfit_task = !task_fits_max(curr, rq->cpu);
 #endif