sched: prioritize non-migratable tasks over migratable ones

author Gregory Haskins <ghaskins@novell.com>

Mon, 12 May 2008 19:20:41 +0000 (21:20 +0200)

committer Ingo Molnar <mingo@elte.hu>

Fri, 6 Jun 2008 13:19:25 +0000 (15:19 +0200)
author Gregory Haskins <ghaskins@novell.com>
Mon, 12 May 2008 19:20:41 +0000 (21:20 +0200)
committer Ingo Molnar <mingo@elte.hu>
Fri, 6 Jun 2008 13:19:25 +0000 (15:19 +0200)
diff --git a/kernel/sched.c b/kernel/sched.c

index bfb8ad8ed1717bf95f82ddf7ea8b5b40bb7fbe7b..7178b8c2351ce5f12289bfe858f52f636fd493a9 100644 (file)
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -151,7 +151,8 @@ static inline int task_has_rt_policy(struct task_struct *p)
   */
  struct rt_prio_array {
         DECLARE_BITMAP(bitmap, MAX_RT_PRIO+1); /* include 1 bit for delimiter */
-       struct list_head queue[MAX_RT_PRIO];
+       struct list_head xqueue[MAX_RT_PRIO]; /* exclusive queue */
+       struct list_head squeue[MAX_RT_PRIO];  /* shared queue */
  };
  
  struct rt_bandwidth {
@@ -7542,7 +7543,8 @@ static void init_rt_rq(struct rt_rq *rt_rq, struct rq *rq)
  
         array = &rt_rq->active;
         for (i = 0; i < MAX_RT_PRIO; i++) {
-               INIT_LIST_HEAD(array->queue + i);
+               INIT_LIST_HEAD(array->xqueue + i);
+               INIT_LIST_HEAD(array->squeue + i);
                 __clear_bit(i, array->bitmap);
         }
         /* delimiter for bitsearch: */
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c

index 3432d573205d415d8a02070c3f59d3ab2f68fbbb..fefed39fafd8fa6b5088ea8c4179c01ddd7debab 100644 (file)
--- a/kernel/sched_rt.c
+++ b/kernel/sched_rt.c
@@ -458,7 +458,13 @@ static void enqueue_rt_entity(struct sched_rt_entity *rt_se)
         if (group_rq && rt_rq_throttled(group_rq))
                 return;
  
-       list_add_tail(&rt_se->run_list, array->queue + rt_se_prio(rt_se));
+       if (rt_se->nr_cpus_allowed == 1)
+               list_add_tail(&rt_se->run_list,
+                             array->xqueue + rt_se_prio(rt_se));
+       else
+               list_add_tail(&rt_se->run_list,
+                             array->squeue + rt_se_prio(rt_se));
+
         __set_bit(rt_se_prio(rt_se), array->bitmap);
  
         inc_rt_tasks(rt_se, rt_rq);
@@ -470,7 +476,8 @@ static void dequeue_rt_entity(struct sched_rt_entity *rt_se)
         struct rt_prio_array *array = &rt_rq->active;
  
         list_del_init(&rt_se->run_list);
-       if (list_empty(array->queue + rt_se_prio(rt_se)))
+       if (list_empty(array->squeue + rt_se_prio(rt_se))
+           && list_empty(array->xqueue + rt_se_prio(rt_se)))
                 __clear_bit(rt_se_prio(rt_se), array->bitmap);
  
         dec_rt_tasks(rt_se, rt_rq);
@@ -537,13 +544,19 @@ static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int sleep)
  /*
   * Put task to the end of the run list without the overhead of dequeue
   * followed by enqueue.
+ *
+ * Note: We always enqueue the task to the shared-queue, regardless of its
+ * previous position w.r.t. exclusive vs shared.  This is so that exclusive RR
+ * tasks fairly round-robin with all tasks on the runqueue, not just other
+ * exclusive tasks.
   */
  static
  void requeue_rt_entity(struct rt_rq *rt_rq, struct sched_rt_entity *rt_se)
  {
         struct rt_prio_array *array = &rt_rq->active;
  
-       list_move_tail(&rt_se->run_list, array->queue + rt_se_prio(rt_se));
+       list_del_init(&rt_se->run_list);
+       list_add_tail(&rt_se->run_list, array->squeue + rt_se_prio(rt_se));
  }
  
  static void requeue_task_rt(struct rq *rq, struct task_struct *p)
@@ -601,13 +614,46 @@ static int select_task_rq_rt(struct task_struct *p, int sync)
  }
  #endif /* CONFIG_SMP */
  
+static struct sched_rt_entity *pick_next_rt_entity(struct rq *rq,
+                                                  struct rt_rq *rt_rq);
+
  /*
   * Preempt the current task with a newly woken task if needed:
   */
  static void check_preempt_curr_rt(struct rq *rq, struct task_struct *p)
  {
-       if (p->prio < rq->curr->prio)
+       if (p->prio < rq->curr->prio) {
                 resched_task(rq->curr);
+               return;
+       }
+
+#ifdef CONFIG_SMP
+       /*
+        * If:
+        *
+        * - the newly woken task is of equal priority to the current task
+        * - the newly woken task is non-migratable while current is migratable
+        * - current will be preempted on the next reschedule
+        *
+        * we should check to see if current can readily move to a different
+        * cpu.  If so, we will reschedule to allow the push logic to try
+        * to move current somewhere else, making room for our non-migratable
+        * task.
+        */
+       if((p->prio == rq->curr->prio)
+          && p->rt.nr_cpus_allowed == 1
+          && rq->curr->rt.nr_cpus_allowed != 1
+          && pick_next_rt_entity(rq, &rq->rt) != &rq->curr->rt) {
+               cpumask_t mask;
+
+               if (cpupri_find(&rq->rd->cpupri, rq->curr, &mask))
+                       /*
+                        * There appears to be other cpus that can accept
+                        * current, so lets reschedule to try and push it away
+                        */
+                       resched_task(rq->curr);
+       }
+#endif
  }
  
  static struct sched_rt_entity *pick_next_rt_entity(struct rq *rq,
@@ -621,8 +667,15 @@ static struct sched_rt_entity *pick_next_rt_entity(struct rq *rq,
         idx = sched_find_first_bit(array->bitmap);
         BUG_ON(idx >= MAX_RT_PRIO);
  
-       queue = array->queue + idx;
-       next = list_entry(queue->next, struct sched_rt_entity, run_list);
+       queue = array->xqueue + idx;
+       if (!list_empty(queue))
+               next = list_entry(queue->next, struct sched_rt_entity,
+                                 run_list);
+       else {
+               queue = array->squeue + idx;
+               next = list_entry(queue->next, struct sched_rt_entity,
+                                 run_list);
+       }
  
         return next;
  }
@@ -692,7 +745,7 @@ static struct task_struct *pick_next_highest_task_rt(struct rq *rq, int cpu)
                         continue;
                 if (next && next->prio < idx)
                         continue;
-               list_for_each_entry(rt_se, array->queue + idx, run_list) {
+               list_for_each_entry(rt_se, array->squeue + idx, run_list) {
                         struct task_struct *p = rt_task_of(rt_se);
                         if (pick_rt_task(rq, p, cpu)) {
                                 next = p;
@@ -1146,6 +1199,14 @@ static void set_cpus_allowed_rt(struct task_struct *p,
                 }
  
                 update_rt_migration(rq);
+
+               if (unlikely(weight == 1 || p->rt.nr_cpus_allowed == 1))
+                       /*
+                        * If either the new or old weight is a "1", we need
+                        * to requeue to properly move between shared and
+                        * exclusive queues.
+                        */
+                       requeue_task_rt(rq, p);
         }
  
         p->cpus_allowed    = *new_mask;
author	Gregory Haskins <ghaskins@novell.com>
	Mon, 12 May 2008 19:20:41 +0000 (21:20 +0200)
committer	Ingo Molnar <mingo@elte.hu>
	Fri, 6 Jun 2008 13:19:25 +0000 (15:19 +0200)
kernel/sched.c		patch \| blob \| history
kernel/sched_rt.c		patch \| blob \| history