NLM: Fix reclaim races
authorTrond Myklebust <Trond.Myklebust@netapp.com>
Fri, 9 Jun 2006 13:40:27 +0000 (09:40 -0400)
committerTrond Myklebust <Trond.Myklebust@netapp.com>
Fri, 9 Jun 2006 13:40:27 +0000 (09:40 -0400)
Currently it is possible for a task to remove its locks at the same time as
the NLM recovery thread is trying to recover them. This quickly leads to an
Oops.
Protect the locks using an rw semaphore while they are being recovered.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
fs/lockd/clntlock.c
fs/lockd/clntproc.c
fs/lockd/host.c
include/linux/lockd/lockd.h

index bce7444687082032394a8f3ff55cba41b0de51eb..52774feab93f3c4736febeeca2146a7dad2b9dfc 100644 (file)
@@ -147,11 +147,10 @@ u32 nlmclnt_grant(const struct sockaddr_in *addr, const struct nlm_lock *lock)
  * Someone has sent us an SM_NOTIFY. Ensure we bind to the new port number,
  * that we mark locks for reclaiming, and that we bump the pseudo NSM state.
  */
-static inline
-void nlmclnt_prepare_reclaim(struct nlm_host *host, u32 newstate)
+static void nlmclnt_prepare_reclaim(struct nlm_host *host)
 {
+       down_write(&host->h_rwsem);
        host->h_monitored = 0;
-       host->h_nsmstate = newstate;
        host->h_state++;
        host->h_nextrebind = 0;
        nlm_rebind_host(host);
@@ -164,6 +163,13 @@ void nlmclnt_prepare_reclaim(struct nlm_host *host, u32 newstate)
        dprintk("NLM: reclaiming locks for host %s", host->h_name);
 }
 
+static void nlmclnt_finish_reclaim(struct nlm_host *host)
+{
+       host->h_reclaiming = 0;
+       up_write(&host->h_rwsem);
+       dprintk("NLM: done reclaiming locks for host %s", host->h_name);
+}
+
 /*
  * Reclaim all locks on server host. We do this by spawning a separate
  * reclaimer thread.
@@ -171,12 +177,10 @@ void nlmclnt_prepare_reclaim(struct nlm_host *host, u32 newstate)
 void
 nlmclnt_recovery(struct nlm_host *host, u32 newstate)
 {
-       if (host->h_reclaiming++) {
-               if (host->h_nsmstate == newstate)
-                       return;
-               nlmclnt_prepare_reclaim(host, newstate);
-       } else {
-               nlmclnt_prepare_reclaim(host, newstate);
+       if (host->h_nsmstate == newstate)
+               return;
+       host->h_nsmstate = newstate;
+       if (!host->h_reclaiming++) {
                nlm_get_host(host);
                __module_get(THIS_MODULE);
                if (kernel_thread(reclaimer, host, CLONE_KERNEL) < 0)
@@ -190,6 +194,7 @@ reclaimer(void *ptr)
        struct nlm_host   *host = (struct nlm_host *) ptr;
        struct nlm_wait   *block;
        struct file_lock *fl, *next;
+       u32 nsmstate;
 
        daemonize("%s-reclaim", host->h_name);
        allow_signal(SIGKILL);
@@ -199,19 +204,25 @@ reclaimer(void *ptr)
        lock_kernel();
        lockd_up();
 
+       nlmclnt_prepare_reclaim(host);
        /* First, reclaim all locks that have been marked. */
 restart:
+       nsmstate = host->h_nsmstate;
        list_for_each_entry_safe(fl, next, &host->h_reclaim, fl_u.nfs_fl.list) {
                list_del_init(&fl->fl_u.nfs_fl.list);
 
                if (signalled())
                        continue;
-               if (nlmclnt_reclaim(host, fl) == 0)
-                       list_add_tail(&fl->fl_u.nfs_fl.list, &host->h_granted);
-               goto restart;
+               if (nlmclnt_reclaim(host, fl) != 0)
+                       continue;
+               list_add_tail(&fl->fl_u.nfs_fl.list, &host->h_granted);
+               if (host->h_nsmstate != nsmstate) {
+                       /* Argh! The server rebooted again! */
+                       list_splice_init(&host->h_granted, &host->h_reclaim);
+                       goto restart;
+               }
        }
-
-       host->h_reclaiming = 0;
+       nlmclnt_finish_reclaim(host);
 
        /* Now, wake up all processes that sleep on a blocked lock */
        list_for_each_entry(block, &nlm_blocked, b_list) {
index f96e38155b5cd75d9c8282a54c215c4d088fa7b3..4db62098d3f42da5093747b70077c97c556290c8 100644 (file)
@@ -508,7 +508,10 @@ nlmclnt_lock(struct nlm_rqst *req, struct file_lock *fl)
        }
 
        block = nlmclnt_prepare_block(host, fl);
+again:
        for(;;) {
+               /* Reboot protection */
+               fl->fl_u.nfs_fl.state = host->h_state;
                status = nlmclnt_call(req, NLMPROC_LOCK);
                if (status < 0)
                        goto out_unblock;
@@ -531,10 +534,16 @@ nlmclnt_lock(struct nlm_rqst *req, struct file_lock *fl)
        }
 
        if (resp->status == NLM_LCK_GRANTED) {
-               fl->fl_u.nfs_fl.state = host->h_state;
+               down_read(&host->h_rwsem);
+               /* Check whether or not the server has rebooted */
+               if (fl->fl_u.nfs_fl.state != host->h_state) {
+                       up_read(&host->h_rwsem);
+                       goto again;
+               }
                fl->fl_flags |= FL_SLEEP;
                /* Ensure the resulting lock will get added to granted list */
                do_vfs_lock(fl);
+               up_read(&host->h_rwsem);
        }
        status = nlm_stat_to_errno(resp->status);
 out_unblock:
@@ -596,6 +605,7 @@ nlmclnt_reclaim(struct nlm_host *host, struct file_lock *fl)
 static int
 nlmclnt_unlock(struct nlm_rqst *req, struct file_lock *fl)
 {
+       struct nlm_host *host = req->a_host;
        struct nlm_res  *resp = &req->a_res;
        int             status;
 
@@ -604,7 +614,9 @@ nlmclnt_unlock(struct nlm_rqst *req, struct file_lock *fl)
         * request, or to deny it with NLM_LCK_DENIED_GRACE_PERIOD. In either
         * case, we want to unlock.
         */
+       down_read(&host->h_rwsem);
        do_vfs_lock(fl);
+       up_read(&host->h_rwsem);
 
        if (req->a_flags & RPC_TASK_ASYNC)
                return nlm_async_call(req, NLMPROC_UNLOCK, &nlmclnt_unlock_ops);
index 5242743c9403e6cce10490dac01ceb9dae684926..38b0e8a1aec0903d61b8bd78a10e05d5f77d29b4 100644 (file)
@@ -117,6 +117,7 @@ nlm_lookup_host(int server, struct sockaddr_in *sin,
        host->h_expires    = jiffies + NLM_HOST_EXPIRE;
        atomic_set(&host->h_count, 1);
        init_waitqueue_head(&host->h_gracewait);
+       init_rwsem(&host->h_rwsem);
        host->h_state      = 0;                 /* pseudo NSM state */
        host->h_nsmstate   = 0;                 /* real NSM state */
        host->h_server     = server;
index a6c1a33e5ae30eaa56cd404428b5058fab1d5034..6b2684763fc79721a5723993090b9182640df108 100644 (file)
@@ -50,6 +50,7 @@ struct nlm_host {
                                h_killed     : 1,
                                h_monitored  : 1;
        wait_queue_head_t       h_gracewait;    /* wait while reclaiming */
+       struct rw_semaphore     h_rwsem;        /* Reboot recovery lock */
        u32                     h_state;        /* pseudo-state counter */
        u32                     h_nsmstate;     /* true remote NSM state */
        u32                     h_pidcount;     /* Pseudopids */