sched: Task placement for heterogeneous systems based on task load-tracking

author Morten Rasmussen <Morten.Rasmussen@arm.com>

Fri, 14 Sep 2012 13:38:09 +0000 (14:38 +0100)

committer Jon Medhurst <tixy@linaro.org>

Wed, 17 Jul 2013 10:12:24 +0000 (11:12 +0100)
author Morten Rasmussen <Morten.Rasmussen@arm.com>
Fri, 14 Sep 2012 13:38:09 +0000 (14:38 +0100)
committer Jon Medhurst <tixy@linaro.org>
Wed, 17 Jul 2013 10:12:24 +0000 (11:12 +0100)
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig

index 136f263ed47b79d010cf4ca06e7b1e2a07e4fd73..5fb586f4fcfe448c80cdce8154eb48735dbecdc3 100644 (file)
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -1494,6 +1494,23 @@ config SCHED_SMT
           MultiThreading at a cost of slightly increased overhead in some
           places. If unsure say N here.
  
+config DISABLE_CPU_SCHED_DOMAIN_BALANCE
+       bool "(EXPERIMENTAL) Disable CPU level scheduler load-balancing"
+       help
+         Disables scheduler load-balancing at CPU sched domain level.
+
+config SCHED_HMP
+       bool "(EXPERIMENTAL) Heterogenous multiprocessor scheduling"
+       depends on DISABLE_CPU_SCHED_DOMAIN_BALANCE && SCHED_MC && FAIR_GROUP_SCHED && !SCHED_AUTOGROUP
+       help
+         Experimental scheduler optimizations for heterogeneous platforms.
+         Attempts to introspectively select task affinity to optimize power
+         and performance. Basic support for multiple (>2) cpu types is in place,
+         but it has only been tested with two types of cpus.
+         There is currently no support for migration of task groups, hence
+         !SCHED_AUTOGROUP. Furthermore, normal load-balancing must be disabled
+         between cpus of different type (DISABLE_CPU_SCHED_DOMAIN_BALANCE).
+
  config HAVE_ARM_SCU
         bool
         help
diff --git a/include/linux/sched.h b/include/linux/sched.h

index 207054a9867aede70f65ccc42b02ab3aa5e0b7cd..cfb9a2efc213d683599d38be27d94df3833fa6f2 100644 (file)
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -885,6 +885,12 @@ void free_sched_domains(cpumask_var_t doms[], unsigned int ndoms);
  
  bool cpus_share_cache(int this_cpu, int that_cpu);
  
+#ifdef CONFIG_SCHED_HMP
+struct hmp_domain {
+       struct cpumask cpus;
+       struct list_head hmp_domains;
+};
+#endif /* CONFIG_SCHED_HMP */
  #else /* CONFIG_SMP */
  
  struct sched_domain_attr;
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c

index e6db20a013b019b76baaedffe233573117964f34..7d192bd8242882816d3e3534c01b6eb1fb87f08a 100644 (file)
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -3340,6 +3340,125 @@ done:
         return target;
  }
  
+#ifdef CONFIG_SCHED_HMP
+/*
+ * Heterogenous multiprocessor (HMP) optimizations
+ *
+ * The cpu types are distinguished using a list of hmp_domains
+ * which each represent one cpu type using a cpumask.
+ * The list is assumed ordered by compute capacity with the
+ * fastest domain first.
+ */
+DEFINE_PER_CPU(struct hmp_domain *, hmp_cpu_domain);
+
+extern void __init arch_get_hmp_domains(struct list_head *hmp_domains_list);
+
+/* Setup hmp_domains */
+static int __init hmp_cpu_mask_setup(void)
+{
+       char buf[64];
+       struct hmp_domain *domain;
+       struct list_head *pos;
+       int dc, cpu;
+
+       pr_debug("Initializing HMP scheduler:\n");
+
+       /* Initialize hmp_domains using platform code */
+       arch_get_hmp_domains(&hmp_domains);
+       if (list_empty(&hmp_domains)) {
+               pr_debug("HMP domain list is empty!\n");
+               return 0;
+       }
+
+       /* Print hmp_domains */
+       dc = 0;
+       list_for_each(pos, &hmp_domains) {
+               domain = list_entry(pos, struct hmp_domain, hmp_domains);
+               cpulist_scnprintf(buf, 64, &domain->cpus);
+               pr_debug("  HMP domain %d: %s\n", dc, buf);
+
+               for_each_cpu_mask(cpu, domain->cpus) {
+                       per_cpu(hmp_cpu_domain, cpu) = domain;
+               }
+               dc++;
+       }
+
+       return 1;
+}
+
+/*
+ * Migration thresholds should be in the range [0..1023]
+ * hmp_up_threshold: min. load required for migrating tasks to a faster cpu
+ * hmp_down_threshold: max. load allowed for tasks migrating to a slower cpu
+ * The default values (512, 256) offer good responsiveness, but may need
+ * tweaking suit particular needs.
+ */
+unsigned int hmp_up_threshold = 512;
+unsigned int hmp_down_threshold = 256;
+
+static unsigned int hmp_up_migration(int cpu, struct sched_entity *se);
+static unsigned int hmp_down_migration(int cpu, struct sched_entity *se);
+
+/* Check if cpu is in fastest hmp_domain */
+static inline unsigned int hmp_cpu_is_fastest(int cpu)
+{
+       struct list_head *pos;
+
+       pos = &hmp_cpu_domain(cpu)->hmp_domains;
+       return pos == hmp_domains.next;
+}
+
+/* Check if cpu is in slowest hmp_domain */
+static inline unsigned int hmp_cpu_is_slowest(int cpu)
+{
+       struct list_head *pos;
+
+       pos = &hmp_cpu_domain(cpu)->hmp_domains;
+       return list_is_last(pos, &hmp_domains);
+}
+
+/* Next (slower) hmp_domain relative to cpu */
+static inline struct hmp_domain *hmp_slower_domain(int cpu)
+{
+       struct list_head *pos;
+
+       pos = &hmp_cpu_domain(cpu)->hmp_domains;
+       return list_entry(pos->next, struct hmp_domain, hmp_domains);
+}
+
+/* Previous (faster) hmp_domain relative to cpu */
+static inline struct hmp_domain *hmp_faster_domain(int cpu)
+{
+       struct list_head *pos;
+
+       pos = &hmp_cpu_domain(cpu)->hmp_domains;
+       return list_entry(pos->prev, struct hmp_domain, hmp_domains);
+}
+
+/*
+ * Selects a cpu in previous (faster) hmp_domain
+ * Note that cpumask_any_and() returns the first cpu in the cpumask
+ */
+static inline unsigned int hmp_select_faster_cpu(struct task_struct *tsk,
+                                                       int cpu)
+{
+       return cpumask_any_and(&hmp_faster_domain(cpu)->cpus,
+                               tsk_cpus_allowed(tsk));
+}
+
+/*
+ * Selects a cpu in next (slower) hmp_domain
+ * Note that cpumask_any_and() returns the first cpu in the cpumask
+ */
+static inline unsigned int hmp_select_slower_cpu(struct task_struct *tsk,
+                                                       int cpu)
+{
+       return cpumask_any_and(&hmp_slower_domain(cpu)->cpus,
+                               tsk_cpus_allowed(tsk));
+}
+
+#endif /* CONFIG_SCHED_HMP */
+
  /*
   * sched_balance_self: balance the current task (running on cpu) in domains
   * that have the 'flag' flag set. In practice, this is SD_BALANCE_FORK and
@@ -3438,6 +3557,16 @@ select_task_rq_fair(struct task_struct *p, int sd_flag, int wake_flags)
  unlock:
         rcu_read_unlock();
  
+#ifdef CONFIG_SCHED_HMP
+       if (hmp_up_migration(prev_cpu, &p->se))
+               return hmp_select_faster_cpu(p, prev_cpu);
+       if (hmp_down_migration(prev_cpu, &p->se))
+               return hmp_select_slower_cpu(p, prev_cpu);
+       /* Make sure that the task stays in its previous hmp domain */
+       if (!cpumask_test_cpu(new_cpu, &hmp_cpu_domain(prev_cpu)->cpus))
+               return prev_cpu;
+#endif
+
         return new_cpu;
  }
  
@@ -5708,6 +5837,41 @@ need_kick:
  static void nohz_idle_balance(int this_cpu, enum cpu_idle_type idle) { }
  #endif
  
+#ifdef CONFIG_SCHED_HMP
+/* Check if task should migrate to a faster cpu */
+static unsigned int hmp_up_migration(int cpu, struct sched_entity *se)
+{
+       struct task_struct *p = task_of(se);
+
+       if (hmp_cpu_is_fastest(cpu))
+               return 0;
+
+       if (cpumask_intersects(&hmp_faster_domain(cpu)->cpus,
+                                       tsk_cpus_allowed(p))
+               && se->avg.load_avg_ratio > hmp_up_threshold) {
+               return 1;
+       }
+       return 0;
+}
+
+/* Check if task should migrate to a slower cpu */
+static unsigned int hmp_down_migration(int cpu, struct sched_entity *se)
+{
+       struct task_struct *p = task_of(se);
+
+       if (hmp_cpu_is_slowest(cpu))
+               return 0;
+
+       if (cpumask_intersects(&hmp_slower_domain(cpu)->cpus,
+                                       tsk_cpus_allowed(p))
+               && se->avg.load_avg_ratio < hmp_down_threshold) {
+               return 1;
+       }
+       return 0;
+}
+
+#endif /* CONFIG_SCHED_HMP */
+
  /*
   * run_rebalance_domains is triggered when needed from the scheduler tick.
   * Also triggered for nohz idle balancing (with nohz_balancing_kick set).
@@ -6218,6 +6382,10 @@ __init void init_sched_fair_class(void)
         zalloc_cpumask_var(&nohz.idle_cpus_mask, GFP_NOWAIT);
         cpu_notifier(sched_ilb_notifier, 0);
  #endif
+
+#ifdef CONFIG_SCHED_HMP
+       hmp_cpu_mask_setup();
+#endif
  #endif /* SMP */
  
  }
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h

index e1d155c68dfc4e8ef61f95141e672e284059897d..2e90c98900defa18db5c8ddc4ee118f44fa10fe3 100644 (file)
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -642,6 +642,12 @@ static inline unsigned int group_first_cpu(struct sched_group *group)
  
  extern int group_balance_cpu(struct sched_group *sg);
  
+#ifdef CONFIG_SCHED_HMP
+static LIST_HEAD(hmp_domains);
+DECLARE_PER_CPU(struct hmp_domain *, hmp_cpu_domain);
+#define hmp_cpu_domain(cpu)    (per_cpu(hmp_cpu_domain, (cpu)))
+#endif /* CONFIG_SCHED_HMP */
+
  #endif /* CONFIG_SMP */
  
  #include "stats.h"
author	Morten Rasmussen <Morten.Rasmussen@arm.com>
	Fri, 14 Sep 2012 13:38:09 +0000 (14:38 +0100)
committer	Jon Medhurst <tixy@linaro.org>
	Wed, 17 Jul 2013 10:12:24 +0000 (11:12 +0100)
arch/arm/Kconfig		patch \| blob \| history
include/linux/sched.h		patch \| blob \| history
kernel/sched/fair.c		patch \| blob \| history
kernel/sched/sched.h		patch \| blob \| history