sched: Create special class for stop/migrate work

[firefly-linux-kernel-4.4.55.git] / kernel / sched.c
diff --git a/kernel/sched.c b/kernel/sched.c

index ed09d4f2a69c5b4c1412d350c7c834a655732f2a..5f64fed56a44d9528c8d2abce0913682dcdda4ba 100644 (file)
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -426,9 +426,7 @@ struct root_domain {
          */
         cpumask_var_t rto_mask;
         atomic_t rto_count;
-#ifdef CONFIG_SMP
         struct cpupri cpupri;
-#endif
  };
  
  /*
@@ -437,7 +435,7 @@ struct root_domain {
   */
  static struct root_domain def_root_domain;
  
-#endif
+#endif /* CONFIG_SMP */
  
  /*
   * This is the main, per-CPU runqueue data structure.
@@ -488,7 +486,7 @@ struct rq {
          */
         unsigned long nr_uninterruptible;
  
-       struct task_struct *curr, *idle;
+       struct task_struct *curr, *idle, *stop;
         unsigned long next_balance;
         struct mm_struct *prev_mm;
  
@@ -723,7 +721,7 @@ sched_feat_write(struct file *filp, const char __user *ubuf,
                 size_t cnt, loff_t *ppos)
  {
         char buf[64];
-       char *cmp = buf;
+       char *cmp;
         int neg = 0;
         int i;
  
@@ -734,6 +732,7 @@ sched_feat_write(struct file *filp, const char __user *ubuf,
                 return -EFAULT;
  
         buf[cnt] = 0;
+       cmp = strstrip(buf);
  
         if (strncmp(buf, "NO_", 3) == 0) {
                 neg = 1;
@@ -741,9 +740,7 @@ sched_feat_write(struct file *filp, const char __user *ubuf,
         }
  
         for (i = 0; sched_feat_names[i]; i++) {
-               int len = strlen(sched_feat_names[i]);
-
-               if (strncmp(cmp, sched_feat_names[i], len) == 0) {
+               if (strcmp(cmp, sched_feat_names[i]) == 0) {
                         if (neg)
                                 sysctl_sched_features &= ~(1UL << i);
                         else
@@ -1840,7 +1837,7 @@ static inline void __set_task_cpu(struct task_struct *p, unsigned int cpu)
  
  static const struct sched_class rt_sched_class;
  
-#define sched_class_highest (&rt_sched_class)
+#define sched_class_highest (&stop_sched_class)
  #define for_each_class(class) \
     for (class = sched_class_highest; class; class = class->next)
  
@@ -1920,10 +1917,41 @@ static void deactivate_task(struct rq *rq, struct task_struct *p, int flags)
  #include "sched_idletask.c"
  #include "sched_fair.c"
  #include "sched_rt.c"
+#include "sched_stoptask.c"
  #ifdef CONFIG_SCHED_DEBUG
  # include "sched_debug.c"
  #endif
  
+void sched_set_stop_task(int cpu, struct task_struct *stop)
+{
+       struct sched_param param = { .sched_priority = MAX_RT_PRIO - 1 };
+       struct task_struct *old_stop = cpu_rq(cpu)->stop;
+
+       if (stop) {
+               /*
+                * Make it appear like a SCHED_FIFO task, its something
+                * userspace knows about and won't get confused about.
+                *
+                * Also, it will make PI more or less work without too
+                * much confusion -- but then, stop work should not
+                * rely on PI working anyway.
+                */
+               sched_setscheduler_nocheck(stop, SCHED_FIFO, &param);
+
+               stop->sched_class = &stop_sched_class;
+       }
+
+       cpu_rq(cpu)->stop = stop;
+
+       if (old_stop) {
+               /*
+                * Reset it back to a normal scheduling class so that
+                * it can die in pieces.
+                */
+               old_stop->sched_class = &rt_sched_class;
+       }
+}
+
  /*
   * __normal_prio - return the priority that is based on the static prio
   */
@@ -2852,14 +2880,14 @@ context_switch(struct rq *rq, struct task_struct *prev,
          */
         arch_start_context_switch(prev);
  
-       if (likely(!mm)) {
+       if (!mm) {
                 next->active_mm = oldmm;
                 atomic_inc(&oldmm->mm_count);
                 enter_lazy_tlb(oldmm, next);
         } else
                 switch_mm(oldmm, mm, next);
  
-       if (likely(!prev->mm)) {
+       if (!prev->mm) {
                 prev->active_mm = NULL;
                 rq->prev_mm = oldmm;
         }
@@ -3513,9 +3541,9 @@ void task_times(struct task_struct *p, cputime_t *ut, cputime_t *st)
         rtime = nsecs_to_cputime(p->se.sum_exec_runtime);
  
         if (total) {
-               u64 temp;
+               u64 temp = rtime;
  
-               temp = (u64)(rtime * utime);
+               temp *= utime;
                 do_div(temp, total);
                 utime = (cputime_t)temp;
         } else
@@ -3546,9 +3574,9 @@ void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t *st)
         rtime = nsecs_to_cputime(cputime.sum_exec_runtime);
  
         if (total) {
-               u64 temp;
+               u64 temp = rtime;
  
-               temp = (u64)(rtime * cputime.utime);
+               temp *= cputime.utime;
                 do_div(temp, total);
                 utime = (cputime_t)temp;
         } else
@@ -3723,17 +3751,13 @@ pick_next_task(struct rq *rq)
                         return p;
         }
  
-       class = sched_class_highest;
-       for ( ; ; ) {
+       for_each_class(class) {
                 p = class->pick_next_task(rq);
                 if (p)
                         return p;
-               /*
-                * Will never be NULL as the idle class always
-                * returns a non-NULL p:
-                */
-               class = class->next;
         }
+
+       BUG(); /* the idle class will always have a runnable task */
  }
  
  /*
@@ -4358,6 +4382,7 @@ void rt_mutex_setprio(struct task_struct *p, int prio)
  
         rq = task_rq_lock(p, &flags);
  
+       trace_sched_pi_setprio(p, prio);
         oldprio = p->prio;
         prev_class = p->sched_class;
         on_rq = p->se.on_rq;
@@ -4661,6 +4686,15 @@ recheck:
          */
         rq = __task_rq_lock(p);
  
+       /*
+        * Changing the policy of the stop threads its a very bad idea
+        */
+       if (p == rq->stop) {
+               __task_rq_unlock(rq);
+               raw_spin_unlock_irqrestore(&p->pi_lock, flags);
+               return -EINVAL;
+       }
+
  #ifdef CONFIG_RT_GROUP_SCHED
         if (user) {
                 /*
@@ -4893,7 +4927,7 @@ long sched_setaffinity(pid_t pid, const struct cpumask *in_mask)
  
         cpuset_cpus_allowed(p, cpus_allowed);
         cpumask_and(new_mask, in_mask, cpus_allowed);
- again:
+again:
         retval = set_cpus_allowed_ptr(p, new_mask);
  
         if (!retval) {
@@ -6514,6 +6548,7 @@ struct s_data {
         cpumask_var_t           nodemask;
         cpumask_var_t           this_sibling_map;
         cpumask_var_t           this_core_map;
+       cpumask_var_t           this_book_map;
         cpumask_var_t           send_covered;
         cpumask_var_t           tmpmask;
         struct sched_group      **sched_group_nodes;
@@ -6525,6 +6560,7 @@ enum s_alloc {
         sa_rootdomain,
         sa_tmpmask,
         sa_send_covered,
+       sa_this_book_map,
         sa_this_core_map,
         sa_this_sibling_map,
         sa_nodemask,
@@ -6560,31 +6596,48 @@ cpu_to_cpu_group(int cpu, const struct cpumask *cpu_map,
  #ifdef CONFIG_SCHED_MC
  static DEFINE_PER_CPU(struct static_sched_domain, core_domains);
  static DEFINE_PER_CPU(struct static_sched_group, sched_group_core);
-#endif /* CONFIG_SCHED_MC */
  
-#if defined(CONFIG_SCHED_MC) && defined(CONFIG_SCHED_SMT)
  static int
  cpu_to_core_group(int cpu, const struct cpumask *cpu_map,
                   struct sched_group **sg, struct cpumask *mask)
  {
         int group;
-
+#ifdef CONFIG_SCHED_SMT
         cpumask_and(mask, topology_thread_cpumask(cpu), cpu_map);
         group = cpumask_first(mask);
+#else
+       group = cpu;
+#endif
         if (sg)
                 *sg = &per_cpu(sched_group_core, group).sg;
         return group;
  }
-#elif defined(CONFIG_SCHED_MC)
+#endif /* CONFIG_SCHED_MC */
+
+/*
+ * book sched-domains:
+ */
+#ifdef CONFIG_SCHED_BOOK
+static DEFINE_PER_CPU(struct static_sched_domain, book_domains);
+static DEFINE_PER_CPU(struct static_sched_group, sched_group_book);
+
  static int
-cpu_to_core_group(int cpu, const struct cpumask *cpu_map,
-                 struct sched_group **sg, struct cpumask *unused)
+cpu_to_book_group(int cpu, const struct cpumask *cpu_map,
+                 struct sched_group **sg, struct cpumask *mask)
  {
+       int group = cpu;
+#ifdef CONFIG_SCHED_MC
+       cpumask_and(mask, cpu_coregroup_mask(cpu), cpu_map);
+       group = cpumask_first(mask);
+#elif defined(CONFIG_SCHED_SMT)
+       cpumask_and(mask, topology_thread_cpumask(cpu), cpu_map);
+       group = cpumask_first(mask);
+#endif
         if (sg)
-               *sg = &per_cpu(sched_group_core, cpu).sg;
-       return cpu;
+               *sg = &per_cpu(sched_group_book, group).sg;
+       return group;
  }
-#endif
+#endif /* CONFIG_SCHED_BOOK */
  
  static DEFINE_PER_CPU(struct static_sched_domain, phys_domains);
  static DEFINE_PER_CPU(struct static_sched_group, sched_group_phys);
@@ -6594,7 +6647,10 @@ cpu_to_phys_group(int cpu, const struct cpumask *cpu_map,
                   struct sched_group **sg, struct cpumask *mask)
  {
         int group;
-#ifdef CONFIG_SCHED_MC
+#ifdef CONFIG_SCHED_BOOK
+       cpumask_and(mask, cpu_book_mask(cpu), cpu_map);
+       group = cpumask_first(mask);
+#elif defined(CONFIG_SCHED_MC)
         cpumask_and(mask, cpu_coregroup_mask(cpu), cpu_map);
         group = cpumask_first(mask);
  #elif defined(CONFIG_SCHED_SMT)
@@ -6855,6 +6911,9 @@ SD_INIT_FUNC(CPU)
  #ifdef CONFIG_SCHED_MC
   SD_INIT_FUNC(MC)
  #endif
+#ifdef CONFIG_SCHED_BOOK
+ SD_INIT_FUNC(BOOK)
+#endif
  
  static int default_relax_domain_level = -1;
  
@@ -6904,6 +6963,8 @@ static void __free_domain_allocs(struct s_data *d, enum s_alloc what,
                 free_cpumask_var(d->tmpmask); /* fall through */
         case sa_send_covered:
                 free_cpumask_var(d->send_covered); /* fall through */
+       case sa_this_book_map:
+               free_cpumask_var(d->this_book_map); /* fall through */
         case sa_this_core_map:
                 free_cpumask_var(d->this_core_map); /* fall through */
         case sa_this_sibling_map:
@@ -6950,8 +7011,10 @@ static enum s_alloc __visit_domain_allocation_hell(struct s_data *d,
                 return sa_nodemask;
         if (!alloc_cpumask_var(&d->this_core_map, GFP_KERNEL))
                 return sa_this_sibling_map;
-       if (!alloc_cpumask_var(&d->send_covered, GFP_KERNEL))
+       if (!alloc_cpumask_var(&d->this_book_map, GFP_KERNEL))
                 return sa_this_core_map;
+       if (!alloc_cpumask_var(&d->send_covered, GFP_KERNEL))
+               return sa_this_book_map;
         if (!alloc_cpumask_var(&d->tmpmask, GFP_KERNEL))
                 return sa_send_covered;
         d->rd = alloc_rootdomain();
@@ -7009,6 +7072,23 @@ static struct sched_domain *__build_cpu_sched_domain(struct s_data *d,
         return sd;
  }
  
+static struct sched_domain *__build_book_sched_domain(struct s_data *d,
+       const struct cpumask *cpu_map, struct sched_domain_attr *attr,
+       struct sched_domain *parent, int i)
+{
+       struct sched_domain *sd = parent;
+#ifdef CONFIG_SCHED_BOOK
+       sd = &per_cpu(book_domains, i).sd;
+       SD_INIT(sd, BOOK);
+       set_domain_attribute(sd, attr);
+       cpumask_and(sched_domain_span(sd), cpu_map, cpu_book_mask(i));
+       sd->parent = parent;
+       parent->child = sd;
+       cpu_to_book_group(i, cpu_map, &sd->groups, d->tmpmask);
+#endif
+       return sd;
+}
+
  static struct sched_domain *__build_mc_sched_domain(struct s_data *d,
         const struct cpumask *cpu_map, struct sched_domain_attr *attr,
         struct sched_domain *parent, int i)
@@ -7065,6 +7145,15 @@ static void build_sched_groups(struct s_data *d, enum sched_domain_level l,
                                                 &cpu_to_core_group,
                                                 d->send_covered, d->tmpmask);
                 break;
+#endif
+#ifdef CONFIG_SCHED_BOOK
+       case SD_LV_BOOK: /* set up book groups */
+               cpumask_and(d->this_book_map, cpu_map, cpu_book_mask(cpu));
+               if (cpu == cpumask_first(d->this_book_map))
+                       init_sched_build_groups(d->this_book_map, cpu_map,
+                                               &cpu_to_book_group,
+                                               d->send_covered, d->tmpmask);
+               break;
  #endif
         case SD_LV_CPU: /* set up physical groups */
                 cpumask_and(d->nodemask, cpumask_of_node(cpu), cpu_map);
@@ -7113,12 +7202,14 @@ static int __build_sched_domains(const struct cpumask *cpu_map,
  
                 sd = __build_numa_sched_domains(&d, cpu_map, attr, i);
                 sd = __build_cpu_sched_domain(&d, cpu_map, attr, sd, i);
+               sd = __build_book_sched_domain(&d, cpu_map, attr, sd, i);
                 sd = __build_mc_sched_domain(&d, cpu_map, attr, sd, i);
                 sd = __build_smt_sched_domain(&d, cpu_map, attr, sd, i);
         }
  
         for_each_cpu(i, cpu_map) {
                 build_sched_groups(&d, SD_LV_SIBLING, cpu_map, i);
+               build_sched_groups(&d, SD_LV_BOOK, cpu_map, i);
                 build_sched_groups(&d, SD_LV_MC, cpu_map, i);
         }
  
@@ -7149,6 +7240,12 @@ static int __build_sched_domains(const struct cpumask *cpu_map,
                 init_sched_groups_power(i, sd);
         }
  #endif
+#ifdef CONFIG_SCHED_BOOK
+       for_each_cpu(i, cpu_map) {
+               sd = &per_cpu(book_domains, i).sd;
+               init_sched_groups_power(i, sd);
+       }
+#endif
  
         for_each_cpu(i, cpu_map) {
                 sd = &per_cpu(phys_domains, i).sd;
@@ -7174,6 +7271,8 @@ static int __build_sched_domains(const struct cpumask *cpu_map,
                 sd = &per_cpu(cpu_domains, i).sd;
  #elif defined(CONFIG_SCHED_MC)
                 sd = &per_cpu(core_domains, i).sd;
+#elif defined(CONFIG_SCHED_BOOK)
+               sd = &per_cpu(book_domains, i).sd;
  #else
                 sd = &per_cpu(phys_domains, i).sd;
  #endif
@@ -8078,9 +8177,9 @@ int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent)
  
         return 1;
  
- err_free_rq:
+err_free_rq:
         kfree(cfs_rq);
- err:
+err:
         return 0;
  }
  
@@ -8168,9 +8267,9 @@ int alloc_rt_sched_group(struct task_group *tg, struct task_group *parent)
  
         return 1;
  
- err_free_rq:
+err_free_rq:
         kfree(rt_rq);
- err:
+err:
         return 0;
  }
  
@@ -8528,7 +8627,7 @@ static int tg_set_bandwidth(struct task_group *tg,
                 raw_spin_unlock(&rt_rq->rt_runtime_lock);
         }
         raw_spin_unlock_irq(&tg->rt_bandwidth.rt_runtime_lock);
- unlock:
+unlock:
         read_unlock(&tasklist_lock);
         mutex_unlock(&rt_constraints_mutex);