From 0b877c2baac65994016c6812804d1b30e89c18ed Mon Sep 17 00:00:00 2001 From: Chris Redpath Date: Mon, 11 Nov 2013 16:29:29 +0000 Subject: [PATCH] sched: hmp: add read-only hmp domain sysfs file In order to allow userspace to restrict known low-load tasks to little CPUs, we must export this knowledge from the kernel or expect userspace to make their own attempts at figuring it out. Since we now have a userspace requirement for an HMP implementation to always have at least some sysfs files, change the integration so that it only depends upon CONFIG_SCHED_HMP rather than CONFIG_HMP_VARIABLE_SCALE. Fix Kconfig text to match. Signed-off-by: Chris Redpath Signed-off-by: Jon Medhurst --- arch/arm/Kconfig | 31 ++++++++----- kernel/sched/fair.c | 111 +++++++++++++++++++++++++++++++------------- 2 files changed, 98 insertions(+), 44 deletions(-) diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index e79dfda6644a..6cbe972ead2a 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -1510,6 +1510,17 @@ config SCHED_HMP There is currently no support for migration of task groups, hence !SCHED_AUTOGROUP. Furthermore, normal load-balancing must be disabled between cpus of different type (DISABLE_CPU_SCHED_DOMAIN_BALANCE). + When turned on, this option adds sys/kernel/hmp directory which + contains the following files: + up_threshold - the load average threshold used for up migration + (0 - 1023) + down_threshold - the load average threshold used for down migration + (0 - 1023) + hmp_domains - a list of cpumasks for the present HMP domains, + starting with the 'biggest' and ending with the + 'smallest'. + Note that both the threshold files can be written at runtime to + control scheduler behaviour. config SCHED_HMP_PRIO_FILTER bool "(EXPERIMENTAL) Filter HMP migrations by task priority" @@ -1544,28 +1555,24 @@ config HMP_VARIABLE_SCALE bool "Allows changing the load tracking scale through sysfs" depends on SCHED_HMP help - When turned on, this option exports the thresholds and load average - period value for the load tracking patches through sysfs. + When turned on, this option exports the load average period value + for the load tracking patches through sysfs. The values can be modified to change the rate of load accumulation - and the thresholds used for HMP migration. - The load_avg_period_ms is the time in ms to reach a load average of - 0.5 for an idle task of 0 load average ratio that start a busy loop. - The up_threshold and down_threshold is the value to go to a faster - CPU or to go back to a slower cpu. - The {up,down}_threshold are devided by 1024 before being compared - to the load average. - For examples, with load_avg_period_ms = 128 and up_threshold = 512, + used for HMP migration. 'load_avg_period_ms' is the time in ms to + reach a load average of 0.5 for an idle task of 0 load average + ratio which becomes 100% busy. + For example, with load_avg_period_ms = 128 and up_threshold = 512, a running task with a load of 0 will be migrated to a bigger CPU after 128ms, because after 128ms its load_avg_ratio is 0.5 and the real up_threshold is 0.5. This patch has the same behavior as changing the Y of the load average computation to (1002/1024)^(LOAD_AVG_PERIOD/load_avg_period_ms) - but it remove intermadiate overflows in computation. + but removes intermediate overflows in computation. config HMP_FREQUENCY_INVARIANT_SCALE bool "(EXPERIMENTAL) Frequency-Invariant Tracked Load for HMP" - depends on HMP_VARIABLE_SCALE && CPU_FREQ + depends on SCHED_HMP && CPU_FREQ help Scales the current load contribution in line with the frequency of the CPU that the task was executed on. diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index cab90d6f32d5..c7d808ee0a36 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -31,7 +31,6 @@ #include #include -#ifdef CONFIG_HMP_VARIABLE_SCALE #include #include #ifdef CONFIG_HMP_FREQUENCY_INVARIANT_SCALE @@ -40,7 +39,6 @@ */ #include #endif /* CONFIG_HMP_FREQUENCY_INVARIANT_SCALE */ -#endif /* CONFIG_HMP_VARIABLE_SCALE */ #include "sched.h" @@ -1212,8 +1210,6 @@ static u32 __compute_runnable_contrib(u64 n) return contrib + runnable_avg_yN_sum[n]; } -#ifdef CONFIG_HMP_VARIABLE_SCALE - #define HMP_VARIABLE_SCALE_SHIFT 16ULL struct hmp_global_attr { struct attribute attr; @@ -1224,6 +1220,7 @@ struct hmp_global_attr { int *value; int (*to_sysfs)(int); int (*from_sysfs)(int); + ssize_t (*to_sysfs_text)(char *buf, int buf_size); }; #define HMP_DATA_SYSFS_MAX 8 @@ -1294,7 +1291,6 @@ struct cpufreq_extents { static struct cpufreq_extents freq_scale[CONFIG_NR_CPUS]; #endif /* CONFIG_HMP_FREQUENCY_INVARIANT_SCALE */ -#endif /* CONFIG_HMP_VARIABLE_SCALE */ /* We can represent the historical contribution to runnable average as the * coefficients of a geometric series. To do this we sub-divide our runnable @@ -1340,9 +1336,8 @@ static __always_inline int __update_entity_runnable_avg(u64 now, #endif /* CONFIG_HMP_FREQUENCY_INVARIANT_SCALE */ delta = now - sa->last_runnable_update; -#ifdef CONFIG_HMP_VARIABLE_SCALE + delta = hmp_variable_scale_convert(delta); -#endif /* * This should only happen when time goes backwards, which it * unfortunately does during sched clock init when we swap over to TSC. @@ -3842,7 +3837,6 @@ static inline void hmp_next_down_delay(struct sched_entity *se, int cpu) cpu_rq(cpu)->avg.hmp_last_up_migration = 0; } -#ifdef CONFIG_HMP_VARIABLE_SCALE /* * Heterogenous multiprocessor (HMP) optimizations * @@ -3875,27 +3869,35 @@ static inline void hmp_next_down_delay(struct sched_entity *se, int cpu) * The scale factor hmp_data.multiplier is a fixed point * number: (32-HMP_VARIABLE_SCALE_SHIFT).HMP_VARIABLE_SCALE_SHIFT */ -static u64 hmp_variable_scale_convert(u64 delta) +static inline u64 hmp_variable_scale_convert(u64 delta) { +#ifdef CONFIG_HMP_VARIABLE_SCALE u64 high = delta >> 32ULL; u64 low = delta & 0xffffffffULL; low *= hmp_data.multiplier; high *= hmp_data.multiplier; return (low >> HMP_VARIABLE_SCALE_SHIFT) + (high << (32ULL - HMP_VARIABLE_SCALE_SHIFT)); +#else + return delta; +#endif } static ssize_t hmp_show(struct kobject *kobj, struct attribute *attr, char *buf) { - ssize_t ret = 0; struct hmp_global_attr *hmp_attr = container_of(attr, struct hmp_global_attr, attr); - int temp = *(hmp_attr->value); + int temp; + + if (hmp_attr->to_sysfs_text != NULL) + return hmp_attr->to_sysfs_text(buf, PAGE_SIZE); + + temp = *(hmp_attr->value); if (hmp_attr->to_sysfs != NULL) temp = hmp_attr->to_sysfs(temp); - ret = sprintf(buf, "%d\n", temp); - return ret; + + return (ssize_t)sprintf(buf, "%d\n", temp); } static ssize_t hmp_store(struct kobject *a, struct attribute *attr, @@ -3924,11 +3926,31 @@ static ssize_t hmp_store(struct kobject *a, struct attribute *attr, return ret; } +static ssize_t hmp_print_domains(char *outbuf, int outbufsize) +{ + char buf[64]; + const char nospace[] = "%s", space[] = " %s"; + const char *fmt = nospace; + struct hmp_domain *domain; + struct list_head *pos; + int outpos = 0; + list_for_each(pos, &hmp_domains) { + domain = list_entry(pos, struct hmp_domain, hmp_domains); + if (cpumask_scnprintf(buf, 64, &domain->possible_cpus)) { + outpos += sprintf(outbuf+outpos, fmt, buf); + fmt = space; + } + } + strcat(outbuf, "\n"); + return outpos+1; +} + +#ifdef CONFIG_HMP_VARIABLE_SCALE static int hmp_period_tofrom_sysfs(int value) { return (LOAD_AVG_PERIOD << HMP_VARIABLE_SCALE_SHIFT) / value; } - +#endif /* max value for threshold is 1024 */ static int hmp_theshold_from_sysfs(int value) { @@ -3936,9 +3958,10 @@ static int hmp_theshold_from_sysfs(int value) return -1; return value; } -#ifdef CONFIG_HMP_FREQUENCY_INVARIANT_SCALE -/* freqinvar control is only 0,1 off/on */ -static int hmp_freqinvar_from_sysfs(int value) +#if defined(CONFIG_SCHED_HMP_LITTLE_PACKING) || \ + defined(CONFIG_HMP_FREQUENCY_INVARIANT_SCALE) +/* toggle control is only 0,1 off/on */ +static int hmp_toggle_from_sysfs(int value) { if (value < 0 || value > 1) return -1; @@ -3958,7 +3981,9 @@ static void hmp_attr_add( const char *name, int *value, int (*to_sysfs)(int), - int (*from_sysfs)(int)) + int (*from_sysfs)(int), + ssize_t (*to_sysfs_text)(char *, int), + umode_t mode) { int i = 0; while (hmp_data.attributes[i] != NULL) { @@ -3966,13 +3991,17 @@ static void hmp_attr_add( if (i >= HMP_DATA_SYSFS_MAX) return; } - hmp_data.attr[i].attr.mode = 0644; + if (mode) + hmp_data.attr[i].attr.mode = mode; + else + hmp_data.attr[i].attr.mode = 0644; hmp_data.attr[i].show = hmp_show; hmp_data.attr[i].store = hmp_store; hmp_data.attr[i].attr.name = name; hmp_data.attr[i].value = value; hmp_data.attr[i].to_sysfs = to_sysfs; hmp_data.attr[i].from_sysfs = from_sysfs; + hmp_data.attr[i].to_sysfs_text = to_sysfs_text; hmp_data.attributes[i] = &hmp_data.attr[i].attr; hmp_data.attributes[i + 1] = NULL; } @@ -3981,40 +4010,59 @@ static int hmp_attr_init(void) { int ret; memset(&hmp_data, sizeof(hmp_data), 0); + hmp_attr_add("hmp_domains", + NULL, + NULL, + NULL, + hmp_print_domains, + 0444); + hmp_attr_add("up_threshold", + &hmp_up_threshold, + NULL, + hmp_theshold_from_sysfs, + NULL, + 0); + hmp_attr_add("down_threshold", + &hmp_down_threshold, + NULL, + hmp_theshold_from_sysfs, + NULL, + 0); +#ifdef CONFIG_HMP_VARIABLE_SCALE /* by default load_avg_period_ms == LOAD_AVG_PERIOD * meaning no change */ hmp_data.multiplier = hmp_period_tofrom_sysfs(LOAD_AVG_PERIOD); - hmp_attr_add("load_avg_period_ms", &hmp_data.multiplier, hmp_period_tofrom_sysfs, - hmp_period_tofrom_sysfs); - hmp_attr_add("up_threshold", - &hmp_up_threshold, - NULL, - hmp_theshold_from_sysfs); - hmp_attr_add("down_threshold", - &hmp_down_threshold, + hmp_period_tofrom_sysfs, NULL, - hmp_theshold_from_sysfs); + 0); +#endif #ifdef CONFIG_HMP_FREQUENCY_INVARIANT_SCALE /* default frequency-invariant scaling ON */ hmp_data.freqinvar_load_scale_enabled = 1; hmp_attr_add("frequency_invariant_load_scale", &hmp_data.freqinvar_load_scale_enabled, NULL, - hmp_freqinvar_from_sysfs); + hmp_toggle_from_sysfs, + NULL, + 0); #endif #ifdef CONFIG_SCHED_HMP_LITTLE_PACKING hmp_attr_add("packing_enable", &hmp_packing_enabled, NULL, - hmp_freqinvar_from_sysfs); + hmp_toggle_from_sysfs, + NULL, + 0); hmp_attr_add("packing_limit", &hmp_full_threshold, NULL, - hmp_packing_from_sysfs); + hmp_packing_from_sysfs, + NULL, + 0); #endif hmp_data.attr_group.name = "hmp"; hmp_data.attr_group.attrs = hmp_data.attributes; @@ -4023,7 +4071,6 @@ static int hmp_attr_init(void) return 0; } late_initcall(hmp_attr_init); -#endif /* CONFIG_HMP_VARIABLE_SCALE */ /* * return the load of the lowest-loaded CPU in a given HMP domain * min_cpu optionally points to an int to receive the CPU. -- 2.34.1