* CPU notifier priorities.
*/
enum {
+ /*
+ * SCHED_ACTIVE marks a cpu which is coming up active during
+ * CPU_ONLINE and CPU_DOWN_FAILED and must be the first
+ * notifier. CPUSET_ACTIVE adjusts cpuset according to
+ * cpu_active mask right after SCHED_ACTIVE. During
+ * CPU_DOWN_PREPARE, SCHED_INACTIVE and CPUSET_INACTIVE are
+ * ordered in the similar way.
+ *
+ * This ordering guarantees consistent cpu_active mask and
+ * migration behavior to all cpu notifiers.
+ */
+ CPU_PRI_SCHED_ACTIVE = INT_MAX,
+ CPU_PRI_CPUSET_ACTIVE = INT_MAX - 1,
+ CPU_PRI_SCHED_INACTIVE = INT_MIN + 1,
+ CPU_PRI_CPUSET_INACTIVE = INT_MIN,
+
/* migration should happen before other stuff but after perf */
CPU_PRI_PERF = 20,
CPU_PRI_MIGRATION = 10,
extern int cpuset_init(void);
extern void cpuset_init_smp(void);
+extern void cpuset_update_active_cpus(void);
extern void cpuset_cpus_allowed(struct task_struct *p, struct cpumask *mask);
extern int cpuset_cpus_allowed_fallback(struct task_struct *p);
extern nodemask_t cpuset_mems_allowed(struct task_struct *p);
static inline int cpuset_init(void) { return 0; }
static inline void cpuset_init_smp(void) {}
+static inline void cpuset_update_active_cpus(void)
+{
+ partition_sched_domains(1, NULL, NULL);
+}
+
static inline void cpuset_cpus_allowed(struct task_struct *p,
struct cpumask *mask)
{
return -EINVAL;
cpu_hotplug_begin();
- set_cpu_active(cpu, false);
err = __cpu_notify(CPU_DOWN_PREPARE | mod, hcpu, -1, &nr_calls);
if (err) {
- set_cpu_active(cpu, true);
-
nr_calls--;
__cpu_notify(CPU_DOWN_FAILED | mod, hcpu, nr_calls, NULL);
printk("%s: attempt to take down CPU %u failed\n",
err = __stop_machine(take_cpu_down, &tcd_param, cpumask_of(cpu));
if (err) {
- set_cpu_active(cpu, true);
/* CPU didn't die: tell everyone. Can't complain. */
cpu_notify_nofail(CPU_DOWN_FAILED | mod, hcpu);
goto out_notify;
BUG_ON(!cpu_online(cpu));
- set_cpu_active(cpu, true);
-
/* Now call notifier in preparation. */
cpu_notify(CPU_ONLINE | mod, hcpu);
* but making no active use of cpusets.
*
* This routine ensures that top_cpuset.cpus_allowed tracks
- * cpu_online_map on each CPU hotplug (cpuhp) event.
+ * cpu_active_mask on each CPU hotplug (cpuhp) event.
*
* Called within get_online_cpus(). Needs to call cgroup_lock()
* before calling generate_sched_domains().
*/
-static int cpuset_track_online_cpus(struct notifier_block *unused_nb,
- unsigned long phase, void *unused_cpu)
+void __cpuexit cpuset_update_active_cpus(void)
{
struct sched_domain_attr *attr;
cpumask_var_t *doms;
int ndoms;
- switch (phase) {
- case CPU_ONLINE:
- case CPU_ONLINE_FROZEN:
- case CPU_DOWN_PREPARE:
- case CPU_DOWN_PREPARE_FROZEN:
- case CPU_DOWN_FAILED:
- case CPU_DOWN_FAILED_FROZEN:
- break;
-
- default:
- return NOTIFY_DONE;
- }
-
cgroup_lock();
mutex_lock(&callback_mutex);
cpumask_copy(top_cpuset.cpus_allowed, cpu_active_mask);
/* Have scheduler rebuild the domains */
partition_sched_domains(ndoms, doms, attr);
-
- return NOTIFY_OK;
}
#ifdef CONFIG_MEMORY_HOTPLUG
cpumask_copy(top_cpuset.cpus_allowed, cpu_active_mask);
top_cpuset.mems_allowed = node_states[N_HIGH_MEMORY];
- hotcpu_notifier(cpuset_track_online_cpus, 0);
hotplug_memory_notifier(cpuset_track_online_nodes, 10);
cpuset_wq = create_singlethread_workqueue("cpuset");
.priority = CPU_PRI_MIGRATION,
};
+static int __cpuinit sched_cpu_active(struct notifier_block *nfb,
+ unsigned long action, void *hcpu)
+{
+ switch (action & ~CPU_TASKS_FROZEN) {
+ case CPU_ONLINE:
+ case CPU_DOWN_FAILED:
+ set_cpu_active((long)hcpu, true);
+ return NOTIFY_OK;
+ default:
+ return NOTIFY_DONE;
+ }
+}
+
+static int __cpuinit sched_cpu_inactive(struct notifier_block *nfb,
+ unsigned long action, void *hcpu)
+{
+ switch (action & ~CPU_TASKS_FROZEN) {
+ case CPU_DOWN_PREPARE:
+ set_cpu_active((long)hcpu, false);
+ return NOTIFY_OK;
+ default:
+ return NOTIFY_DONE;
+ }
+}
+
static int __init migration_init(void)
{
void *cpu = (void *)(long)smp_processor_id();
int err;
- /* Start one for the boot CPU: */
+ /* Initialize migration for the boot CPU */
err = migration_call(&migration_notifier, CPU_UP_PREPARE, cpu);
BUG_ON(err == NOTIFY_BAD);
migration_call(&migration_notifier, CPU_ONLINE, cpu);
register_cpu_notifier(&migration_notifier);
+ /* Register cpu active notifiers */
+ cpu_notifier(sched_cpu_active, CPU_PRI_SCHED_ACTIVE);
+ cpu_notifier(sched_cpu_inactive, CPU_PRI_SCHED_INACTIVE);
+
return 0;
}
early_initcall(migration_init);
}
#endif /* CONFIG_SCHED_MC || CONFIG_SCHED_SMT */
-#ifndef CONFIG_CPUSETS
/*
- * Add online and remove offline CPUs from the scheduler domains.
- * When cpusets are enabled they take over this function.
+ * Update cpusets according to cpu_active mask. If cpusets are
+ * disabled, cpuset_update_active_cpus() becomes a simple wrapper
+ * around partition_sched_domains().
*/
-static int update_sched_domains(struct notifier_block *nfb,
- unsigned long action, void *hcpu)
+static int __cpuexit cpuset_cpu_active(struct notifier_block *nfb,
+ unsigned long action, void *hcpu)
{
- switch (action) {
+ switch (action & ~CPU_TASKS_FROZEN) {
case CPU_ONLINE:
- case CPU_ONLINE_FROZEN:
- case CPU_DOWN_PREPARE:
- case CPU_DOWN_PREPARE_FROZEN:
case CPU_DOWN_FAILED:
- case CPU_DOWN_FAILED_FROZEN:
- partition_sched_domains(1, NULL, NULL);
+ cpuset_update_active_cpus();
return NOTIFY_OK;
+ default:
+ return NOTIFY_DONE;
+ }
+}
+static int __cpuexit cpuset_cpu_inactive(struct notifier_block *nfb,
+ unsigned long action, void *hcpu)
+{
+ switch (action & ~CPU_TASKS_FROZEN) {
+ case CPU_DOWN_PREPARE:
+ cpuset_update_active_cpus();
+ return NOTIFY_OK;
default:
return NOTIFY_DONE;
}
}
-#endif
static int update_runtime(struct notifier_block *nfb,
unsigned long action, void *hcpu)
mutex_unlock(&sched_domains_mutex);
put_online_cpus();
-#ifndef CONFIG_CPUSETS
- /* XXX: Theoretical race here - CPU may be hotplugged now */
- hotcpu_notifier(update_sched_domains, 0);
-#endif
+ hotcpu_notifier(cpuset_cpu_active, CPU_PRI_CPUSET_ACTIVE);
+ hotcpu_notifier(cpuset_cpu_inactive, CPU_PRI_CPUSET_INACTIVE);
/* RT runtime code needs to handle some hotplug events */
hotcpu_notifier(update_runtime, 0);