sched: fix hotplug cpus on ia64
authorPeter Zijlstra <a.p.zijlstra@chello.nl>
Thu, 5 Jun 2008 12:49:58 +0000 (14:49 +0200)
committerIngo Molnar <mingo@elte.hu>
Tue, 10 Jun 2008 10:17:28 +0000 (12:17 +0200)
Cliff Wickman wrote:

> I built an ia64 kernel from Andrew's tree (2.6.26-rc2-mm1)
> and get a very predictable hotplug cpu problem.
> billberry1:/tmp/cpw # ./dis
> disabled cpu 17
> enabled cpu 17
> billberry1:/tmp/cpw # ./dis
> disabled cpu 17
> enabled cpu 17
> billberry1:/tmp/cpw # ./dis
>
> The script that disables the cpu always hangs (unkillable)
> on the 3rd attempt.
>
> And a bit further:
> The kstopmachine thread always sits on the run queue (real time) for about
> 30 minutes before running.

this fix solves some (but not all) issues between CPU hotplug and
RT bandwidth throttling.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
kernel/sched.c
kernel/sched_rt.c

index 727bdef7616121e4c143472517a64b21396376ce..e9c24a1286557dfbe99cd58ce0e4668370e252a2 100644 (file)
@@ -7513,21 +7513,28 @@ int sched_create_sysfs_power_savings_entries(struct sysdev_class *cls)
 static int update_sched_domains(struct notifier_block *nfb,
                                unsigned long action, void *hcpu)
 {
+       int cpu = (int)(long)hcpu;
+
        switch (action) {
-       case CPU_UP_PREPARE:
-       case CPU_UP_PREPARE_FROZEN:
        case CPU_DOWN_PREPARE:
        case CPU_DOWN_PREPARE_FROZEN:
+               disable_runtime(cpu_rq(cpu));
+               /* fall-through */
+       case CPU_UP_PREPARE:
+       case CPU_UP_PREPARE_FROZEN:
                detach_destroy_domains(&cpu_online_map);
                free_sched_domains();
                return NOTIFY_OK;
 
-       case CPU_UP_CANCELED:
-       case CPU_UP_CANCELED_FROZEN:
+
        case CPU_DOWN_FAILED:
        case CPU_DOWN_FAILED_FROZEN:
        case CPU_ONLINE:
        case CPU_ONLINE_FROZEN:
+               enable_runtime(cpu_rq(cpu));
+               /* fall-through */
+       case CPU_UP_CANCELED:
+       case CPU_UP_CANCELED_FROZEN:
        case CPU_DEAD:
        case CPU_DEAD_FROZEN:
                /*
index eaa606071d51c44f71a0c2d265c78566739f2c1f..8ae3416e0bb419ecc9fcb6ad5bf3cb699e1cf504 100644 (file)
@@ -286,6 +286,9 @@ static int balance_runtime(struct rt_rq *rt_rq)
                        continue;
 
                spin_lock(&iter->rt_runtime_lock);
+               if (iter->rt_runtime == RUNTIME_INF)
+                       goto next;
+
                diff = iter->rt_runtime - iter->rt_time;
                if (diff > 0) {
                        do_div(diff, weight);
@@ -299,12 +302,105 @@ static int balance_runtime(struct rt_rq *rt_rq)
                                break;
                        }
                }
+next:
                spin_unlock(&iter->rt_runtime_lock);
        }
        spin_unlock(&rt_b->rt_runtime_lock);
 
        return more;
 }
+
+static void __disable_runtime(struct rq *rq)
+{
+       struct root_domain *rd = rq->rd;
+       struct rt_rq *rt_rq;
+
+       if (unlikely(!scheduler_running))
+               return;
+
+       for_each_leaf_rt_rq(rt_rq, rq) {
+               struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq);
+               s64 want;
+               int i;
+
+               spin_lock(&rt_b->rt_runtime_lock);
+               spin_lock(&rt_rq->rt_runtime_lock);
+               if (rt_rq->rt_runtime == RUNTIME_INF ||
+                               rt_rq->rt_runtime == rt_b->rt_runtime)
+                       goto balanced;
+               spin_unlock(&rt_rq->rt_runtime_lock);
+
+               want = rt_b->rt_runtime - rt_rq->rt_runtime;
+
+               for_each_cpu_mask(i, rd->span) {
+                       struct rt_rq *iter = sched_rt_period_rt_rq(rt_b, i);
+                       s64 diff;
+
+                       if (iter == rt_rq)
+                               continue;
+
+                       spin_lock(&iter->rt_runtime_lock);
+                       if (want > 0) {
+                               diff = min_t(s64, iter->rt_runtime, want);
+                               iter->rt_runtime -= diff;
+                               want -= diff;
+                       } else {
+                               iter->rt_runtime -= want;
+                               want -= want;
+                       }
+                       spin_unlock(&iter->rt_runtime_lock);
+
+                       if (!want)
+                               break;
+               }
+
+               spin_lock(&rt_rq->rt_runtime_lock);
+               BUG_ON(want);
+balanced:
+               rt_rq->rt_runtime = RUNTIME_INF;
+               spin_unlock(&rt_rq->rt_runtime_lock);
+               spin_unlock(&rt_b->rt_runtime_lock);
+       }
+}
+
+static void disable_runtime(struct rq *rq)
+{
+       unsigned long flags;
+
+       spin_lock_irqsave(&rq->lock, flags);
+       __disable_runtime(rq);
+       spin_unlock_irqrestore(&rq->lock, flags);
+}
+
+static void __enable_runtime(struct rq *rq)
+{
+       struct root_domain *rd = rq->rd;
+       struct rt_rq *rt_rq;
+
+       if (unlikely(!scheduler_running))
+               return;
+
+       for_each_leaf_rt_rq(rt_rq, rq) {
+               struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq);
+
+               spin_lock(&rt_b->rt_runtime_lock);
+               spin_lock(&rt_rq->rt_runtime_lock);
+               rt_rq->rt_runtime = rt_b->rt_runtime;
+               rt_rq->rt_time = 0;
+               spin_unlock(&rt_rq->rt_runtime_lock);
+               spin_unlock(&rt_b->rt_runtime_lock);
+       }
+}
+
+static void enable_runtime(struct rq *rq)
+{
+       unsigned long flags;
+
+       spin_lock_irqsave(&rq->lock, flags);
+       __enable_runtime(rq);
+       spin_unlock_irqrestore(&rq->lock, flags);
+}
+
 #endif
 
 static inline int rt_se_prio(struct sched_rt_entity *rt_se)
@@ -334,14 +430,13 @@ static int sched_rt_runtime_exceeded(struct rt_rq *rt_rq)
 
 #ifdef CONFIG_SMP
        if (rt_rq->rt_time > runtime) {
-               int more;
-
                spin_unlock(&rt_rq->rt_runtime_lock);
-               more = balance_runtime(rt_rq);
+               balance_runtime(rt_rq);
                spin_lock(&rt_rq->rt_runtime_lock);
 
-               if (more)
-                       runtime = sched_rt_runtime(rt_rq);
+               runtime = sched_rt_runtime(rt_rq);
+               if (runtime == RUNTIME_INF)
+                       return 0;
        }
 #endif
 
@@ -1174,6 +1269,8 @@ static void rq_online_rt(struct rq *rq)
        if (rq->rt.overloaded)
                rt_set_overload(rq);
 
+       __enable_runtime(rq);
+
        cpupri_set(&rq->rd->cpupri, rq->cpu, rq->rt.highest_prio);
 }
 
@@ -1183,6 +1280,8 @@ static void rq_offline_rt(struct rq *rq)
        if (rq->rt.overloaded)
                rt_clear_overload(rq);
 
+       __disable_runtime(rq);
+
        cpupri_set(&rq->rd->cpupri, rq->cpu, CPUPRI_INVALID);
 }