Merge commit 'v2.6.37-rc6' into sched/core

[firefly-linux-kernel-4.4.55.git] / kernel / sched.c
diff --git a/kernel/sched.c b/kernel/sched.c

index 22436dd2e19f2b3c29324bf182aa287af5df65b4..3925a1bbf5ddfac43271aec828bb260a1892111b 100644 (file)
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -75,9 +75,11 @@
  
  #include <asm/tlb.h>
  #include <asm/irq_regs.h>
+#include <asm/mutex.h>
  
  #include "sched_cpupri.h"
  #include "workqueue_sched.h"
+#include "sched_autogroup.h"
  
  #define CREATE_TRACE_POINTS
  #include <trace/events/sched.h>
@@ -270,6 +272,10 @@ struct task_group {
         struct task_group *parent;
         struct list_head siblings;
         struct list_head children;
+
+#ifdef CONFIG_SCHED_AUTOGROUP
+       struct autogroup *autogroup;
+#endif
  };
  
  #define root_task_group init_task_group
@@ -279,13 +285,6 @@ static DEFINE_SPINLOCK(task_group_lock);
  
  #ifdef CONFIG_FAIR_GROUP_SCHED
  
-#ifdef CONFIG_SMP
-static int root_task_group_empty(void)
-{
-       return list_empty(&root_task_group.children);
-}
-#endif
-
  # define INIT_TASK_GROUP_LOAD  NICE_0_LOAD
  
  /*
@@ -360,9 +359,16 @@ struct cfs_rq {
          */
         unsigned long h_load;
  
+       /*
+        * Maintaining per-cpu shares distribution for group scheduling
+        *
+        * load_stamp is the last time we updated the load average
+        * load_last is the last time we updated the load average and saw load
+        * load_unacc_exec_time is currently unaccounted execution time
+        */
         u64 load_avg;
         u64 load_period;
-       u64 load_stamp;
+       u64 load_stamp, load_last, load_unacc_exec_time;
  
         unsigned long load_contribution;
  #endif
@@ -557,18 +563,8 @@ struct rq {
  
  static DEFINE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues);
  
-static inline
-void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags)
-{
-       rq->curr->sched_class->check_preempt_curr(rq, p, flags);
  
-       /*
-        * A queue event has occurred, and we're going to schedule.  In
-        * this case, we can save a useless back to back clock update.
-        */
-       if (test_tsk_need_resched(p))
-               rq->skip_clock_update = 1;
-}
+static void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags);
  
  static inline int cpu_of(struct rq *rq)
  {
@@ -612,11 +608,14 @@ static inline int cpu_of(struct rq *rq)
   */
  static inline struct task_group *task_group(struct task_struct *p)
  {
+       struct task_group *tg;
         struct cgroup_subsys_state *css;
  
         css = task_subsys_state_check(p, cpu_cgroup_subsys_id,
                         lockdep_is_held(&task_rq(p)->lock));
-       return container_of(css, struct task_group, css);
+       tg = container_of(css, struct task_group, css);
+
+       return autogroup_task_group(p, tg);
  }
  
  /* Change a task's cfs_rq and parent entity if it moves across CPUs/groups */
@@ -1546,48 +1545,6 @@ static unsigned long cpu_avg_load_per_task(int cpu)
  
  #ifdef CONFIG_FAIR_GROUP_SCHED
  
-static void update_cfs_load(struct cfs_rq *cfs_rq, int lb);
-static void update_cfs_shares(struct cfs_rq *cfs_rq);
-
-/*
- * update tg->load_weight by folding this cpu's load_avg
- */
-static int tg_shares_up(struct task_group *tg, void *data)
-{
-       long load_avg;
-       struct cfs_rq *cfs_rq;
-       unsigned long flags;
-       int cpu = (long)data;
-       struct rq *rq;
-
-       if (!tg->se[cpu])
-               return 0;
-
-       rq = cpu_rq(cpu);
-       cfs_rq = tg->cfs_rq[cpu];
-
-       raw_spin_lock_irqsave(&rq->lock, flags);
-
-       update_rq_clock(rq);
-       update_cfs_load(cfs_rq, 1);
-
-       load_avg = div64_u64(cfs_rq->load_avg, cfs_rq->load_period+1);
-       load_avg -= cfs_rq->load_contribution;
-
-       atomic_add(load_avg, &tg->load_weight);
-       cfs_rq->load_contribution += load_avg;
-
-       /*
-        * We need to update shares after updating tg->load_weight in
-        * order to adjust the weight of groups with long running tasks.
-        */
-       update_cfs_shares(cfs_rq);
-
-       raw_spin_unlock_irqrestore(&rq->lock, flags);
-
-       return 0;
-}
-
  /*
   * Compute the cpu's hierarchical load factor for each task group.
   * This needs to be done in a top-down fashion because the load of a child
@@ -1611,29 +1568,11 @@ static int tg_load_down(struct task_group *tg, void *data)
         return 0;
  }
  
-static void update_shares(long cpu)
-{
-       if (root_task_group_empty())
-               return;
-
-       /*
-        * XXX: replace with an on-demand list
-        */
-
-       walk_tg_tree(tg_nop, tg_shares_up, (void *)cpu);
-}
-
  static void update_h_load(long cpu)
  {
         walk_tg_tree(tg_load_down, tg_nop, (void *)cpu);
  }
  
-#else
-
-static inline void update_shares(int cpu)
-{
-}
-
  #endif
  
  #ifdef CONFIG_PREEMPT
@@ -1938,6 +1877,7 @@ static void sched_irq_time_avg_update(struct rq *rq, u64 curr_irq_time) { }
  #include "sched_idletask.c"
  #include "sched_fair.c"
  #include "sched_rt.c"
+#include "sched_autogroup.c"
  #include "sched_stoptask.c"
  #ifdef CONFIG_SCHED_DEBUG
  # include "sched_debug.c"
@@ -2040,6 +1980,31 @@ static inline void check_class_changed(struct rq *rq, struct task_struct *p,
                 p->sched_class->prio_changed(rq, p, oldprio, running);
  }
  
+static void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags)
+{
+       const struct sched_class *class;
+
+       if (p->sched_class == rq->curr->sched_class) {
+               rq->curr->sched_class->check_preempt_curr(rq, p, flags);
+       } else {
+               for_each_class(class) {
+                       if (class == rq->curr->sched_class)
+                               break;
+                       if (class == p->sched_class) {
+                               resched_task(rq->curr);
+                               break;
+                       }
+               }
+       }
+
+       /*
+        * A queue event has occurred, and we're going to schedule.  In
+        * this case, we can save a useless back to back clock update.
+        */
+       if (test_tsk_need_resched(rq->curr))
+               rq->skip_clock_update = 1;
+}
+
  #ifdef CONFIG_SMP
  /*
   * Is this task likely cache-hot:
@@ -2105,10 +2070,8 @@ static int migration_cpu_stop(void *data);
   * The task's runqueue lock must be held.
   * Returns true if you have to wait for migration thread.
   */
-static bool migrate_task(struct task_struct *p, int dest_cpu)
+static bool migrate_task(struct task_struct *p, struct rq *rq)
  {
-       struct rq *rq = task_rq(p);
-
         /*
          * If the task is not on a runqueue (and not running), then
          * the next wake-up will properly place the task.
@@ -2632,7 +2595,9 @@ void sched_fork(struct task_struct *p, int clone_flags)
         /* Want to start with kernel preemption disabled. */
         task_thread_info(p)->preempt_count = 1;
  #endif
+#ifdef CONFIG_SMP
         plist_node_init(&p->pushable_tasks, MAX_PRIO);
+#endif
  
         put_cpu();
  }
@@ -3268,7 +3233,7 @@ void sched_exec(void)
          * select_task_rq() can race against ->cpus_allowed
          */
         if (cpumask_test_cpu(dest_cpu, &p->cpus_allowed) &&
-           likely(cpu_active(dest_cpu)) && migrate_task(p, dest_cpu)) {
+           likely(cpu_active(dest_cpu)) && migrate_task(p, rq)) {
                 struct migration_arg arg = { p, dest_cpu };
  
                 task_rq_unlock(rq, &flags);
@@ -3933,7 +3898,7 @@ int mutex_spin_on_owner(struct mutex *lock, struct thread_info *owner)
                 if (task_thread_info(rq->curr) != owner || need_resched())
                         return 0;
  
-               cpu_relax();
+               arch_mutex_cpu_relax();
         }
  
         return 1;
@@ -5309,7 +5274,7 @@ void sched_show_task(struct task_struct *p)
         unsigned state;
  
         state = p->state ? __ffs(p->state) + 1 : 0;
-       printk(KERN_INFO "%-13.13s %c", p->comm,
+       printk(KERN_INFO "%-15.15s %c", p->comm,
                 state < sizeof(stat_nam) - 1 ? stat_nam[state] : '?');
  #if BITS_PER_LONG == 32
         if (state == TASK_RUNNING)
@@ -5548,7 +5513,7 @@ again:
                 goto out;
  
         dest_cpu = cpumask_any_and(cpu_active_mask, new_mask);
-       if (migrate_task(p, dest_cpu)) {
+       if (migrate_task(p, rq)) {
                 struct migration_arg arg = { p, dest_cpu };
                 /* Need help from migration thread: drop lock and wait. */
                 task_rq_unlock(rq, &flags);
@@ -6797,6 +6762,8 @@ static void init_sched_groups_power(int cpu, struct sched_domain *sd)
         if (cpu != group_first_cpu(sd->groups))
                 return;
  
+       sd->groups->group_weight = cpumask_weight(sched_group_cpus(sd->groups));
+
         child = sd->child;
  
         sd->groups->cpu_power = 0;
@@ -7706,7 +7673,7 @@ static void init_tg_cfs_entry(struct task_group *tg, struct cfs_rq *cfs_rq,
                 se->cfs_rq = parent->my_q;
  
         se->my_q = cfs_rq;
-       update_load_set(&se->load, tg->shares);
+       update_load_set(&se->load, 0);
         se->parent = parent;
  }
  #endif
@@ -7794,7 +7761,7 @@ void __init sched_init(void)
  #ifdef CONFIG_CGROUP_SCHED
         list_add(&init_task_group.list, &task_groups);
         INIT_LIST_HEAD(&init_task_group.children);
-
+       autogroup_init(&init_task);
  #endif /* CONFIG_CGROUP_SCHED */
  
         for_each_possible_cpu(i) {
@@ -8129,7 +8096,6 @@ static inline void unregister_fair_sched_group(struct task_group *tg, int cpu)
  {
         struct rq *rq = cpu_rq(cpu);
         unsigned long flags;
-       int i;
  
         /*
         * Only empty task groups can be destroyed; so we can speculatively
@@ -8139,7 +8105,7 @@ static inline void unregister_fair_sched_group(struct task_group *tg, int cpu)
                 return;
  
         raw_spin_lock_irqsave(&rq->lock, flags);
-       list_del_leaf_cfs_rq(tg->cfs_rq[i]);
+       list_del_leaf_cfs_rq(tg->cfs_rq[cpu]);
         raw_spin_unlock_irqrestore(&rq->lock, flags);
  }
  #else /* !CONFG_FAIR_GROUP_SCHED */
@@ -8334,37 +8300,12 @@ void sched_move_task(struct task_struct *tsk)
  #endif /* CONFIG_CGROUP_SCHED */
  
  #ifdef CONFIG_FAIR_GROUP_SCHED
-static void __set_se_shares(struct sched_entity *se, unsigned long shares)
-{
-       struct cfs_rq *cfs_rq = se->cfs_rq;
-       int on_rq;
-
-       on_rq = se->on_rq;
-       if (on_rq)
-               dequeue_entity(cfs_rq, se, 0);
-
-       update_load_set(&se->load, shares);
-
-       if (on_rq)
-               enqueue_entity(cfs_rq, se, 0);
-}
-
-static void set_se_shares(struct sched_entity *se, unsigned long shares)
-{
-       struct cfs_rq *cfs_rq = se->cfs_rq;
-       struct rq *rq = cfs_rq->rq;
-       unsigned long flags;
-
-       raw_spin_lock_irqsave(&rq->lock, flags);
-       __set_se_shares(se, shares);
-       raw_spin_unlock_irqrestore(&rq->lock, flags);
-}
-
  static DEFINE_MUTEX(shares_mutex);
  
  int sched_group_set_shares(struct task_group *tg, unsigned long shares)
  {
         int i;
+       unsigned long flags;
  
         /*
          * We can't change the weight of the root cgroup.
@@ -8383,10 +8324,15 @@ int sched_group_set_shares(struct task_group *tg, unsigned long shares)
  
         tg->shares = shares;
         for_each_possible_cpu(i) {
-               /*
-                * force a rebalance
-                */
-               set_se_shares(tg->se[i], shares);
+               struct rq *rq = cpu_rq(i);
+               struct sched_entity *se;
+
+               se = tg->se[i];
+               /* Propagate contribution to hierarchy */
+               raw_spin_lock_irqsave(&rq->lock, flags);
+               for_each_sched_entity(se)
+                       update_cfs_shares(group_cfs_rq(se), 0);
+               raw_spin_unlock_irqrestore(&rq->lock, flags);
         }
  
  done: