SUNRPC: Don't allow low priority tasks to pre-empt higher priority ones
authorTrond Myklebust <Trond.Myklebust@netapp.com>
Sat, 1 Dec 2012 04:59:29 +0000 (23:59 -0500)
committerTrond Myklebust <Trond.Myklebust@netapp.com>
Wed, 5 Dec 2012 23:30:53 +0000 (00:30 +0100)
Currently, the priority queues attempt to be 'fair' to lower priority
tasks by scheduling them after a certain number of higher priority tasks
have run. The problem is that both the transport send queue and
the NFSv4.1 session slot queue have strong ordering requirements.

This patch therefore removes the fairness code in favour of strong
ordering of task priorities.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
include/linux/sunrpc/sched.h
net/sunrpc/sched.c

index dc0c3cc3ada3f8ced03b7fb00b2b1772bb722034..b64f8eb0b973973107e8c55b44b06f2b8998cff9 100644 (file)
@@ -192,7 +192,6 @@ struct rpc_wait_queue {
        pid_t                   owner;                  /* process id of last task serviced */
        unsigned char           maxpriority;            /* maximum priority (0 if queue is not a priority queue) */
        unsigned char           priority;               /* current priority */
-       unsigned char           count;                  /* # task groups remaining serviced so far */
        unsigned char           nr;                     /* # tasks remaining for cookie */
        unsigned short          qlen;                   /* total # tasks waiting in queue */
        struct rpc_timer        timer_list;
index 1aefc9fef866b9c71665ebe272ed537fced0ce5e..d17a704aaf5f2251674b3fa23421fa087b8ddd14 100644 (file)
@@ -98,6 +98,23 @@ __rpc_add_timer(struct rpc_wait_queue *queue, struct rpc_task *task)
        list_add(&task->u.tk_wait.timer_list, &queue->timer_list.list);
 }
 
+static void rpc_set_waitqueue_priority(struct rpc_wait_queue *queue, int priority)
+{
+       queue->priority = priority;
+}
+
+static void rpc_set_waitqueue_owner(struct rpc_wait_queue *queue, pid_t pid)
+{
+       queue->owner = pid;
+       queue->nr = RPC_BATCH_COUNT;
+}
+
+static void rpc_reset_waitqueue_priority(struct rpc_wait_queue *queue)
+{
+       rpc_set_waitqueue_priority(queue, queue->maxpriority);
+       rpc_set_waitqueue_owner(queue, 0);
+}
+
 /*
  * Add new request to a priority queue.
  */
@@ -109,9 +126,11 @@ static void __rpc_add_wait_queue_priority(struct rpc_wait_queue *queue,
        struct rpc_task *t;
 
        INIT_LIST_HEAD(&task->u.tk_wait.links);
-       q = &queue->tasks[queue_priority];
        if (unlikely(queue_priority > queue->maxpriority))
-               q = &queue->tasks[queue->maxpriority];
+               queue_priority = queue->maxpriority;
+       if (queue_priority > queue->priority)
+               rpc_set_waitqueue_priority(queue, queue_priority);
+       q = &queue->tasks[queue_priority];
        list_for_each_entry(t, q, u.tk_wait.list) {
                if (t->tk_owner == task->tk_owner) {
                        list_add_tail(&task->u.tk_wait.list, &t->u.tk_wait.links);
@@ -180,24 +199,6 @@ static void __rpc_remove_wait_queue(struct rpc_wait_queue *queue, struct rpc_tas
                        task->tk_pid, queue, rpc_qname(queue));
 }
 
-static inline void rpc_set_waitqueue_priority(struct rpc_wait_queue *queue, int priority)
-{
-       queue->priority = priority;
-       queue->count = 1 << (priority * 2);
-}
-
-static inline void rpc_set_waitqueue_owner(struct rpc_wait_queue *queue, pid_t pid)
-{
-       queue->owner = pid;
-       queue->nr = RPC_BATCH_COUNT;
-}
-
-static inline void rpc_reset_waitqueue_priority(struct rpc_wait_queue *queue)
-{
-       rpc_set_waitqueue_priority(queue, queue->maxpriority);
-       rpc_set_waitqueue_owner(queue, 0);
-}
-
 static void __rpc_init_priority_wait_queue(struct rpc_wait_queue *queue, const char *qname, unsigned char nr_queues)
 {
        int i;
@@ -464,8 +465,7 @@ static struct rpc_task *__rpc_find_next_queued_priority(struct rpc_wait_queue *q
                /*
                 * Check if we need to switch queues.
                 */
-               if (--queue->count)
-                       goto new_owner;
+               goto new_owner;
        }
 
        /*