Revert "sched: Improve scalability via 'CPU buddies', which withstand random perturba...

[firefly-linux-kernel-4.4.55.git] / kernel / sched / core.c
diff --git a/kernel/sched/core.c b/kernel/sched/core.c

index d325c4b2dcbb0c5995a903d76e3035129d1590c3..649c9f876cb164b0e16683479b59bf4d4f3b794b 100644 (file)
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -3142,6 +3142,20 @@ void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t *st)
  # define nsecs_to_cputime(__nsecs)     nsecs_to_jiffies(__nsecs)
  #endif
  
+static cputime_t scale_utime(cputime_t utime, cputime_t rtime, cputime_t total)
+{
+       u64 temp = (__force u64) rtime;
+
+       temp *= (__force u64) utime;
+
+       if (sizeof(cputime_t) == 4)
+               temp = div_u64(temp, (__force u32) total);
+       else
+               temp = div64_u64(temp, (__force u64) total);
+
+       return (__force cputime_t) temp;
+}
+
  void task_times(struct task_struct *p, cputime_t *ut, cputime_t *st)
  {
         cputime_t rtime, utime = p->utime, total = utime + p->stime;
@@ -3151,13 +3165,9 @@ void task_times(struct task_struct *p, cputime_t *ut, cputime_t *st)
          */
         rtime = nsecs_to_cputime(p->se.sum_exec_runtime);
  
-       if (total) {
-               u64 temp = (__force u64) rtime;
-
-               temp *= (__force u64) utime;
-               do_div(temp, (__force u32) total);
-               utime = (__force cputime_t) temp;
-       } else
+       if (total)
+               utime = scale_utime(utime, rtime, total);
+       else
                 utime = rtime;
  
         /*
@@ -3184,13 +3194,9 @@ void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t *st)
         total = cputime.utime + cputime.stime;
         rtime = nsecs_to_cputime(cputime.sum_exec_runtime);
  
-       if (total) {
-               u64 temp = (__force u64) rtime;
-
-               temp *= (__force u64) cputime.utime;
-               do_div(temp, (__force u32) total);
-               utime = (__force cputime_t) temp;
-       } else
+       if (total)
+               utime = scale_utime(cputime.utime, rtime, total);
+       else
                 utime = rtime;
  
         sig->prev_utime = max(sig->prev_utime, utime);
@@ -4340,9 +4346,7 @@ recheck:
          */
         if (unlikely(policy == p->policy && (!rt_policy(policy) ||
                         param->sched_priority == p->rt_priority))) {
-
-               __task_rq_unlock(rq);
-               raw_spin_unlock_irqrestore(&p->pi_lock, flags);
+               task_rq_unlock(rq, p, &flags);
                 return 0;
         }
  
@@ -5300,27 +5304,17 @@ void idle_task_exit(void)
  }
  
  /*
- * While a dead CPU has no uninterruptible tasks queued at this point,
- * it might still have a nonzero ->nr_uninterruptible counter, because
- * for performance reasons the counter is not stricly tracking tasks to
- * their home CPUs. So we just add the counter to another CPU's counter,
- * to keep the global sum constant after CPU-down:
- */
-static void migrate_nr_uninterruptible(struct rq *rq_src)
-{
-       struct rq *rq_dest = cpu_rq(cpumask_any(cpu_active_mask));
-
-       rq_dest->nr_uninterruptible += rq_src->nr_uninterruptible;
-       rq_src->nr_uninterruptible = 0;
-}
-
-/*
- * remove the tasks which were accounted by rq from calc_load_tasks.
+ * Since this CPU is going 'away' for a while, fold any nr_active delta
+ * we might have. Assumes we're called after migrate_tasks() so that the
+ * nr_active count is stable.
+ *
+ * Also see the comment "Global load-average calculations".
   */
-static void calc_global_load_remove(struct rq *rq)
+static void calc_load_migrate(struct rq *rq)
  {
-       atomic_long_sub(rq->calc_load_active, &calc_load_tasks);
-       rq->calc_load_active = 0;
+       long delta = calc_load_fold_active(rq);
+       if (delta)
+               atomic_long_add(delta, &calc_load_tasks);
  }
  
  /*
@@ -5348,9 +5342,6 @@ static void migrate_tasks(unsigned int dead_cpu)
          */
         rq->stop = NULL;
  
-       /* Ensure any throttled groups are reachable by pick_next_task */
-       unthrottle_offline_cfs_rqs(rq);
-
         for ( ; ; ) {
                 /*
                  * There's this thread running, bail when that's the only
@@ -5614,8 +5605,7 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu)
                 BUG_ON(rq->nr_running != 1); /* the migration thread */
                 raw_spin_unlock_irqrestore(&rq->lock, flags);
  
-               migrate_nr_uninterruptible(rq);
-               calc_global_load_remove(rq);
+               calc_load_migrate(rq);
                 break;
  #endif
         }
@@ -6024,11 +6014,6 @@ static void destroy_sched_domains(struct sched_domain *sd, int cpu)
   * SD_SHARE_PKG_RESOURCE set (Last Level Cache Domain) for this
   * allows us to avoid some pointer chasing select_idle_sibling().
   *
- * Iterate domains and sched_groups downward, assigning CPUs to be
- * select_idle_sibling() hw buddy.  Cross-wiring hw makes bouncing
- * due to random perturbation self canceling, ie sw buddies pull
- * their counterpart to their CPU's hw counterpart.
- *
   * Also keep a unique ID per domain (we use the first cpu number in
   * the cpumask of the domain), this allows us to quickly tell if
   * two cpus are in the same cache domain, see cpus_share_cache().
@@ -6042,40 +6027,8 @@ static void update_top_cache_domain(int cpu)
         int id = cpu;
  
         sd = highest_flag_domain(cpu, SD_SHARE_PKG_RESOURCES);
-       if (sd) {
-               struct sched_domain *tmp = sd;
-               struct sched_group *sg, *prev;
-               bool right;
-
-               /*
-                * Traverse to first CPU in group, and count hops
-                * to cpu from there, switching direction on each
-                * hop, never ever pointing the last CPU rightward.
-                */
-               do {
-                       id = cpumask_first(sched_domain_span(tmp));
-                       prev = sg = tmp->groups;
-                       right = 1;
-
-                       while (cpumask_first(sched_group_cpus(sg)) != id)
-                               sg = sg->next;
-
-                       while (!cpumask_test_cpu(cpu, sched_group_cpus(sg))) {
-                               prev = sg;
-                               sg = sg->next;
-                               right = !right;
-                       }
-
-                       /* A CPU went down, never point back to domain start. */
-                       if (right && cpumask_first(sched_group_cpus(sg->next)) == id)
-                               right = false;
-
-                       sg = right ? sg->next : prev;
-                       tmp->idle_buddy = cpumask_first(sched_group_cpus(sg));
-               } while ((tmp = tmp->child));
-
+       if (sd)
                 id = cpumask_first(sched_domain_span(sd));
-       }
  
         rcu_assign_pointer(per_cpu(sd_llc, cpu), sd);
         per_cpu(sd_llc_id, cpu) = id;
@@ -7248,6 +7201,7 @@ int in_sched_functions(unsigned long addr)
  
  #ifdef CONFIG_CGROUP_SCHED
  struct task_group root_task_group;
+LIST_HEAD(task_groups);
  #endif
  
  DECLARE_PER_CPU(cpumask_var_t, load_balance_tmpmask);