HMP: Implement task packing for small tasks in HMP systems

author Chris Redpath <chris.redpath@arm.com>

Fri, 11 Oct 2013 10:45:04 +0000 (11:45 +0100)

committer Jon Medhurst <tixy@linaro.org>

Fri, 11 Oct 2013 14:07:18 +0000 (15:07 +0100)
author Chris Redpath <chris.redpath@arm.com>
Fri, 11 Oct 2013 10:45:04 +0000 (11:45 +0100)
committer Jon Medhurst <tixy@linaro.org>
Fri, 11 Oct 2013 14:07:18 +0000 (15:07 +0100)
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig

index 2a5f5b8c385c3f4ae21dea3f3587a3da73147a37..e79dfda6644a947415cce6f8301122190a2dcfe9 100644 (file)
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -1578,6 +1578,18 @@ config HMP_FREQUENCY_INVARIANT_SCALE
           migration strategy to interact more predictably with CPUFreq's
           asynchronous compute capacity changes.
  
+config SCHED_HMP_LITTLE_PACKING
+       bool "Small task packing for HMP"
+       depends on SCHED_HMP
+       default n
+       help
+         Allows the HMP Scheduler to pack small tasks into CPUs in the
+         smallest HMP domain.
+         Controlled by two sysfs files in sys/kernel/hmp.
+         packing_enable: 1 to enable, 0 to disable packing. Default 1.
+         packing_limit: runqueue load ratio where a RQ is considered
+           to be full. Default is NICE_0_LOAD * 9/8.
+
  config HAVE_ARM_SCU
         bool
         help
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c

index 003de405acdcd8259699b9fd019e90ab34cbeaea..66b5b30159f6197c7f89ff06a9efd6aa2e7af564 100644 (file)
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -1226,11 +1226,7 @@ struct hmp_global_attr {
         int (*from_sysfs)(int);
  };
  
-#ifdef CONFIG_HMP_FREQUENCY_INVARIANT_SCALE
-#define HMP_DATA_SYSFS_MAX 4
-#else
-#define HMP_DATA_SYSFS_MAX 3
-#endif
+#define HMP_DATA_SYSFS_MAX 8
  
  struct hmp_data_struct {
  #ifdef CONFIG_HMP_FREQUENCY_INVARIANT_SCALE
@@ -3668,6 +3664,13 @@ static struct sched_entity *hmp_get_lightest_task(
   * hmp_up_prio: Only up migrate task with high priority (<hmp_up_prio)
   * hmp_next_up_threshold: Delay before next up migration (1024 ~= 1 ms)
   * hmp_next_down_threshold: Delay before next down migration (1024 ~= 1 ms)
+ *
+ * Small Task Packing:
+ * We can choose to fill the littlest CPUs in an HMP system rather than
+ * the typical spreading mechanic. This behavior is controllable using
+ * two variables.
+ * hmp_packing_enabled: runtime control over pack/spread
+ * hmp_full_threshold: Consider a CPU with this much unweighted load full
   */
  unsigned int hmp_up_threshold = 700;
  unsigned int hmp_down_threshold = 512;
@@ -3677,11 +3680,27 @@ unsigned int hmp_up_prio = NICE_TO_PRIO(CONFIG_SCHED_HMP_PRIO_FILTER_VAL);
  unsigned int hmp_next_up_threshold = 4096;
  unsigned int hmp_next_down_threshold = 4096;
  
+#ifdef CONFIG_SCHED_HMP_LITTLE_PACKING
+unsigned int hmp_packing_enabled = 1;
+#ifndef CONFIG_ARCH_VEXPRESS_TC2
+unsigned int hmp_full_threshold = (NICE_0_LOAD * 9) / 8;
+#else
+/* TC2 has a sharp consumption curve @ around 800Mhz, so
+   we aim to spread the load around that frequency. */
+unsigned int hmp_full_threshold = 650;  /*  80% of the 800Mhz freq * NICE_0_LOAD */
+#endif
+#endif
+
  static unsigned int hmp_up_migration(int cpu, int *target_cpu, struct sched_entity *se);
  static unsigned int hmp_down_migration(int cpu, struct sched_entity *se);
  static inline unsigned int hmp_domain_min_load(struct hmp_domain *hmpd,
                                                 int *min_cpu, struct cpumask *affinity);
  
+static inline struct hmp_domain *hmp_smallest_domain(void)
+{
+       return list_entry(hmp_domains.prev, struct hmp_domain, hmp_domains);
+}
+
  /* Check if cpu is in fastest hmp_domain */
  static inline unsigned int hmp_cpu_is_fastest(int cpu)
  {
@@ -3760,7 +3779,49 @@ static inline unsigned int hmp_select_slower_cpu(struct task_struct *tsk,
  
         return lowest_cpu;
  }
+#ifdef CONFIG_SCHED_HMP_LITTLE_PACKING
+/*
+ * Select the 'best' candidate little CPU to wake up on.
+ * Implements a packing strategy which examines CPU in
+ * logical CPU order, and selects the first which will
+ * have at least 10% capacity available, according to
+ * both tracked load of the runqueue and the task.
+ */
+static inline unsigned int hmp_best_little_cpu(struct task_struct *tsk,
+               int cpu) {
+       int tmp_cpu;
+       unsigned long estimated_load;
+       struct hmp_domain *hmp;
+       struct sched_avg *avg;
+       struct cpumask allowed_hmp_cpus;
  
+       if(!hmp_packing_enabled ||
+                       tsk->se.avg.load_avg_ratio > ((NICE_0_LOAD * 90)/100))
+               return hmp_select_slower_cpu(tsk, cpu);
+
+       if (hmp_cpu_is_slowest(cpu))
+               hmp = hmp_cpu_domain(cpu);
+       else
+               hmp = hmp_slower_domain(cpu);
+
+       /* respect affinity */
+       cpumask_and(&allowed_hmp_cpus, &hmp->cpus,
+                       tsk_cpus_allowed(tsk));
+
+       for_each_cpu_mask(tmp_cpu, allowed_hmp_cpus) {
+               avg = &cpu_rq(tmp_cpu)->avg;
+               /* estimate new rq load if we add this task */
+               estimated_load = avg->load_avg_ratio +
+                               tsk->se.avg.load_avg_ratio;
+               if (estimated_load <= hmp_full_threshold) {
+                       cpu = tmp_cpu;
+                       break;
+               }
+       }
+       /* if no match was found, the task uses the initial value */
+       return cpu;
+}
+#endif
  static inline void hmp_next_up_delay(struct sched_entity *se, int cpu)
  {
         /* hack - always use clock from first online CPU */
@@ -3884,6 +3945,15 @@ static int hmp_freqinvar_from_sysfs(int value)
         return value;
  }
  #endif
+#ifdef CONFIG_SCHED_HMP_LITTLE_PACKING
+/* packing value must be non-negative */
+static int hmp_packing_from_sysfs(int value)
+{
+       if (value < 0)
+               return -1;
+       return value;
+}
+#endif
  static void hmp_attr_add(
         const char *name,
         int *value,
@@ -3935,6 +4005,16 @@ static int hmp_attr_init(void)
                 &hmp_data.freqinvar_load_scale_enabled,
                 NULL,
                 hmp_freqinvar_from_sysfs);
+#endif
+#ifdef CONFIG_SCHED_HMP_LITTLE_PACKING
+       hmp_attr_add("packing_enable",
+               &hmp_packing_enabled,
+               NULL,
+               hmp_freqinvar_from_sysfs);
+       hmp_attr_add("packing_limit",
+               &hmp_full_threshold,
+               NULL,
+               hmp_packing_from_sysfs);
  #endif
         hmp_data.attr_group.name = "hmp";
         hmp_data.attr_group.attrs = hmp_data.attributes;
@@ -4194,10 +4274,16 @@ unlock:
                 return new_cpu;
         }
         if (hmp_down_migration(prev_cpu, &p->se)) {
+#ifdef CONFIG_SCHED_HMP_LITTLE_PACKING
+               new_cpu = hmp_best_little_cpu(p, prev_cpu);
+#else
                 new_cpu = hmp_select_slower_cpu(p, prev_cpu);
-               hmp_next_down_delay(&p->se, new_cpu);
-               trace_sched_hmp_migrate(p, new_cpu, HMP_MIGRATE_WAKEUP);
-               return new_cpu;
+#endif
+               if (new_cpu != prev_cpu) {
+                       hmp_next_down_delay(&p->se, new_cpu);
+                       trace_sched_hmp_migrate(p, new_cpu, HMP_MIGRATE_WAKEUP);
+                       return new_cpu;
+               }
         }
         /* Make sure that the task stays in its previous hmp domain */
         if (!cpumask_test_cpu(new_cpu, &hmp_cpu_domain(prev_cpu)->cpus))
@@ -6164,16 +6250,49 @@ static struct {
         unsigned long next_balance;     /* in jiffy units */
  } nohz ____cacheline_aligned;
  
+#ifdef CONFIG_SCHED_HMP_LITTLE_PACKING
+/*
+ * Decide if the tasks on the busy CPUs in the
+ * littlest domain would benefit from an idle balance
+ */
+static int hmp_packing_ilb_needed(int cpu)
+{
+       struct hmp_domain *hmp;
+       /* always allow ilb on non-slowest domain */
+       if (!hmp_cpu_is_slowest(cpu))
+               return 1;
+
+       hmp = hmp_cpu_domain(cpu);
+       for_each_cpu_and(cpu, &hmp->cpus, nohz.idle_cpus_mask) {
+               /* only idle balance if a CPU is loaded over threshold */
+               if (cpu_rq(cpu)->avg.load_avg_ratio > hmp_full_threshold)
+                       return 1;
+       }
+       return 0;
+}
+#endif
+
  static inline int find_new_ilb(int call_cpu)
  {
         int ilb = cpumask_first(nohz.idle_cpus_mask);
  #ifdef CONFIG_SCHED_HMP
+       int ilb_needed = 1;
+
         /* restrict nohz balancing to occur in the same hmp domain */
         ilb = cpumask_first_and(nohz.idle_cpus_mask,
                         &((struct hmp_domain *)hmp_cpu_domain(call_cpu))->cpus);
+
+#ifdef CONFIG_SCHED_HMP_LITTLE_PACKING
+       if (ilb < nr_cpu_ids)
+               ilb_needed = hmp_packing_ilb_needed(ilb);
  #endif
+
+       if (ilb_needed && ilb < nr_cpu_ids && idle_cpu(ilb))
+               return ilb;
+#else
         if (ilb < nr_cpu_ids && idle_cpu(ilb))
                 return ilb;
+#endif
  
         return nr_cpu_ids;
  }
@@ -6539,8 +6658,14 @@ static unsigned int hmp_down_migration(int cpu, struct sched_entity *se)
         struct task_struct *p = task_of(se);
         u64 now;
  
-       if (hmp_cpu_is_slowest(cpu))
+       if (hmp_cpu_is_slowest(cpu)) {
+#ifdef CONFIG_SCHED_HMP_LITTLE_PACKING
+               if(hmp_packing_enabled)
+                       return 1;
+               else
+#endif
                 return 0;
+       }
  
  #ifdef CONFIG_SCHED_HMP_PRIO_FILTER
         /* Filter by task priority */
author	Chris Redpath <chris.redpath@arm.com>
	Fri, 11 Oct 2013 10:45:04 +0000 (11:45 +0100)
committer	Jon Medhurst <tixy@linaro.org>
	Fri, 11 Oct 2013 14:07:18 +0000 (15:07 +0100)
arch/arm/Kconfig		patch \| blob \| history
kernel/sched/fair.c		patch \| blob \| history