NFSv4.1: pNFS data servers may be temporarily offline
authorTrond Myklebust <Trond.Myklebust@netapp.com>
Tue, 18 Sep 2012 23:51:12 +0000 (19:51 -0400)
committerTrond Myklebust <Trond.Myklebust@netapp.com>
Fri, 28 Sep 2012 20:03:09 +0000 (16:03 -0400)
In cases where the pNFS data server is just temporarily out of service,
we want to mark it as such, and then try again later. Typically that will
be in cases of network connection errors etc.
This patch allows us to mark the devices as being "unavailable" for such
transient errors, and will make them available for retries after a
2 minute timeout period.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
fs/nfs/nfs4filelayout.c
fs/nfs/nfs4filelayout.h
fs/nfs/nfs4filelayoutdev.c
fs/nfs/pnfs.h
fs/nfs/pnfs_dev.c

index af6ee4ad3f15b691b342b883ae0c0ba0c4447bfa..dac2162c3ac46230da2a33c87ebdc2a7816177a6 100644 (file)
@@ -205,7 +205,7 @@ static int filelayout_async_handle_error(struct rpc_task *task,
        case -EPIPE:
                dprintk("%s DS connection error %d\n", __func__,
                        task->tk_status);
-               filelayout_mark_devid_invalid(devid);
+               nfs4_mark_deviceid_unavailable(devid);
                clear_bit(NFS_INO_LAYOUTCOMMIT, &NFS_I(inode)->flags);
                _pnfs_return_layout(inode);
                rpc_wake_up(&tbl->slot_tbl_waitq);
@@ -269,6 +269,22 @@ filelayout_set_layoutcommit(struct nfs_write_data *wdata)
                (unsigned long) NFS_I(hdr->inode)->layout->plh_lwb);
 }
 
+bool
+filelayout_test_devid_unavailable(struct nfs4_deviceid_node *node)
+{
+       return filelayout_test_devid_invalid(node) ||
+               nfs4_test_deviceid_unavailable(node);
+}
+
+static bool
+filelayout_reset_to_mds(struct pnfs_layout_segment *lseg)
+{
+       struct nfs4_deviceid_node *node = FILELAYOUT_DEVID_NODE(lseg);
+
+       return filelayout_test_layout_invalid(lseg->pls_layout) ||
+               filelayout_test_devid_unavailable(node);
+}
+
 /*
  * Call ops for the async read/write cases
  * In the case of dense layouts, the offset needs to be reset to its
@@ -613,8 +629,8 @@ filelayout_check_layout(struct pnfs_layout_hdr *lo,
                        goto out;
        } else
                dsaddr = container_of(d, struct nfs4_file_layout_dsaddr, id_node);
-       /* Found deviceid is being reaped */
-       if (test_bit(NFS_DEVICEID_INVALID, &dsaddr->id_node.flags))
+       /* Found deviceid is unavailable */
+       if (filelayout_test_devid_unavailable(&dsaddr->id_node))
                        goto out_put;
 
        fl->dsaddr = dsaddr;
index 11053c425a612bd2191ae0b3887406f68ecc246a..10b0f134400bd009762dd2e8ec60f3ed7eeddb47 100644 (file)
@@ -140,12 +140,8 @@ filelayout_test_devid_invalid(struct nfs4_deviceid_node *node)
        return test_bit(NFS_DEVICEID_INVALID, &node->flags);
 }
 
-static inline bool
-filelayout_reset_to_mds(struct pnfs_layout_segment *lseg)
-{
-       return filelayout_test_devid_invalid(FILELAYOUT_DEVID_NODE(lseg)) ||
-               filelayout_test_layout_invalid(lseg->pls_layout);
-}
+extern bool
+filelayout_test_devid_unavailable(struct nfs4_deviceid_node *node);
 
 extern struct nfs_fh *
 nfs4_fl_select_ds_fh(struct pnfs_layout_segment *lseg, u32 j);
index b85a29df20ae61e591a06d5930088cf218c6bba7..3336d5eaf879adc07a2e95f64b512835935763a6 100644 (file)
@@ -804,13 +804,14 @@ nfs4_fl_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx)
        struct nfs4_pnfs_ds *ds = dsaddr->ds_list[ds_idx];
        struct nfs4_deviceid_node *devid = FILELAYOUT_DEVID_NODE(lseg);
 
-       if (filelayout_test_devid_invalid(devid))
+       if (filelayout_test_devid_unavailable(devid))
                return NULL;
 
        if (ds == NULL) {
                printk(KERN_ERR "NFS: %s: No data server for offset index %d\n",
                        __func__, ds_idx);
-               goto mark_dev_invalid;
+               filelayout_mark_devid_invalid(devid);
+               return NULL;
        }
 
        if (!ds->ds_clp) {
@@ -818,14 +819,12 @@ nfs4_fl_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx)
                int err;
 
                err = nfs4_ds_connect(s, ds);
-               if (err)
-                       goto mark_dev_invalid;
+               if (err) {
+                       nfs4_mark_deviceid_unavailable(devid);
+                       return NULL;
+               }
        }
        return ds;
-
-mark_dev_invalid:
-       filelayout_mark_devid_invalid(devid);
-       return NULL;
 }
 
 module_param(dataserver_retrans, uint, 0644);
index bc8e5001203daad169dd11f54d1a4dce91c21752..9735031e1e1afdf9c87784a0911262e041d259ed 100644 (file)
@@ -234,6 +234,7 @@ struct nfs4_threshold *pnfs_mdsthreshold_alloc(void);
 /* nfs4_deviceid_flags */
 enum {
        NFS_DEVICEID_INVALID = 0,       /* set when MDS clientid recalled */
+       NFS_DEVICEID_UNAVAILABLE,       /* device temporarily unavailable */
 };
 
 /* pnfs_dev.c */
@@ -243,6 +244,7 @@ struct nfs4_deviceid_node {
        const struct pnfs_layoutdriver_type *ld;
        const struct nfs_client         *nfs_client;
        unsigned long                   flags;
+       unsigned long                   timestamp_unavailable;
        struct nfs4_deviceid            deviceid;
        atomic_t                        ref;
 };
@@ -255,6 +257,8 @@ void nfs4_init_deviceid_node(struct nfs4_deviceid_node *,
                             const struct nfs4_deviceid *);
 struct nfs4_deviceid_node *nfs4_insert_deviceid_node(struct nfs4_deviceid_node *);
 bool nfs4_put_deviceid_node(struct nfs4_deviceid_node *);
+void nfs4_mark_deviceid_unavailable(struct nfs4_deviceid_node *node);
+bool nfs4_test_deviceid_unavailable(struct nfs4_deviceid_node *node);
 void nfs4_deviceid_purge_client(const struct nfs_client *);
 
 static inline void
index 73f701f1f4d3325e2c54efb68e14b1df40eb90e1..d35b62e83ea638e9736587bfaec61c612221f579 100644 (file)
@@ -40,6 +40,8 @@
 #define NFS4_DEVICE_ID_HASH_SIZE       (1 << NFS4_DEVICE_ID_HASH_BITS)
 #define NFS4_DEVICE_ID_HASH_MASK       (NFS4_DEVICE_ID_HASH_SIZE - 1)
 
+#define PNFS_DEVICE_RETRY_TIMEOUT (120*HZ)
+
 static struct hlist_head nfs4_deviceid_cache[NFS4_DEVICE_ID_HASH_SIZE];
 static DEFINE_SPINLOCK(nfs4_deviceid_lock);
 
@@ -218,6 +220,30 @@ nfs4_put_deviceid_node(struct nfs4_deviceid_node *d)
 }
 EXPORT_SYMBOL_GPL(nfs4_put_deviceid_node);
 
+void
+nfs4_mark_deviceid_unavailable(struct nfs4_deviceid_node *node)
+{
+       node->timestamp_unavailable = jiffies;
+       set_bit(NFS_DEVICEID_UNAVAILABLE, &node->flags);
+}
+EXPORT_SYMBOL_GPL(nfs4_mark_deviceid_unavailable);
+
+bool
+nfs4_test_deviceid_unavailable(struct nfs4_deviceid_node *node)
+{
+       if (test_bit(NFS_DEVICEID_UNAVAILABLE, &node->flags)) {
+               unsigned long start, end;
+
+               end = jiffies;
+               start = end - PNFS_DEVICE_RETRY_TIMEOUT;
+               if (time_in_range(node->timestamp_unavailable, start, end))
+                       return true;
+               clear_bit(NFS_DEVICEID_UNAVAILABLE, &node->flags);
+       }
+       return false;
+}
+EXPORT_SYMBOL_GPL(nfs4_test_deviceid_unavailable);
+
 static void
 _deviceid_purge_client(const struct nfs_client *clp, long hash)
 {
@@ -276,3 +302,4 @@ nfs4_deviceid_mark_client_invalid(struct nfs_client *clp)
        }
        rcu_read_unlock();
 }
+