libceph: force resend of osd requests if we skip an osdmap
authorSage Weil <sage@newdream.net>
Fri, 14 Oct 2011 20:33:55 +0000 (13:33 -0700)
committerSage Weil <sage@newdream.net>
Tue, 25 Oct 2011 23:10:17 +0000 (16:10 -0700)
If we skip over one or more map epochs, we need to resend all osd requests
because it is possible they remapped to other servers and then back.

Signed-off-by: Sage Weil <sage@newdream.net>
net/ceph/osd_client.c

index 01ceb59a8b4a43176eab147c839039c509cca8ff..733e46008b89d6e67397d779edf9ccb5cb3e5c8a 100644 (file)
@@ -943,7 +943,7 @@ EXPORT_SYMBOL(ceph_osdc_set_request_linger);
  * Caller should hold map_sem for read and request_mutex.
  */
 static int __map_request(struct ceph_osd_client *osdc,
-                        struct ceph_osd_request *req)
+                        struct ceph_osd_request *req, int force_resend)
 {
        struct ceph_osd_request_head *reqhead = req->r_request->front.iov_base;
        struct ceph_pg pgid;
@@ -967,7 +967,8 @@ static int __map_request(struct ceph_osd_client *osdc,
                num = err;
        }
 
-       if ((req->r_osd && req->r_osd->o_osd == o &&
+       if ((!force_resend &&
+            req->r_osd && req->r_osd->o_osd == o &&
             req->r_sent >= req->r_osd->o_incarnation &&
             req->r_num_pg_osds == num &&
             memcmp(req->r_pg_osds, acting, sizeof(acting[0])*num) == 0) ||
@@ -1289,18 +1290,18 @@ static void reset_changed_osds(struct ceph_osd_client *osdc)
  *
  * Caller should hold map_sem for read and request_mutex.
  */
-static void kick_requests(struct ceph_osd_client *osdc)
+static void kick_requests(struct ceph_osd_client *osdc, int force_resend)
 {
        struct ceph_osd_request *req, *nreq;
        struct rb_node *p;
        int needmap = 0;
        int err;
 
-       dout("kick_requests\n");
+       dout("kick_requests %s\n", force_resend ? " (force resend)" : "");
        mutex_lock(&osdc->request_mutex);
        for (p = rb_first(&osdc->requests); p; p = rb_next(p)) {
                req = rb_entry(p, struct ceph_osd_request, r_node);
-               err = __map_request(osdc, req);
+               err = __map_request(osdc, req, force_resend);
                if (err < 0)
                        continue;  /* error */
                if (req->r_osd == NULL) {
@@ -1318,7 +1319,7 @@ static void kick_requests(struct ceph_osd_client *osdc)
                                 r_linger_item) {
                dout("linger req=%p req->r_osd=%p\n", req, req->r_osd);
 
-               err = __map_request(osdc, req);
+               err = __map_request(osdc, req, force_resend);
                if (err == 0)
                        continue;  /* no change and no osd was specified */
                if (err < 0)
@@ -1395,7 +1396,7 @@ void ceph_osdc_handle_map(struct ceph_osd_client *osdc, struct ceph_msg *msg)
                                ceph_osdmap_destroy(osdc->osdmap);
                                osdc->osdmap = newmap;
                        }
-                       kick_requests(osdc);
+                       kick_requests(osdc, 0);
                        reset_changed_osds(osdc);
                } else {
                        dout("ignoring incremental map %u len %d\n",
@@ -1423,6 +1424,8 @@ void ceph_osdc_handle_map(struct ceph_osd_client *osdc, struct ceph_msg *msg)
                             "older than our %u\n", epoch, maplen,
                             osdc->osdmap->epoch);
                } else {
+                       int skipped_map = 0;
+
                        dout("taking full map %u len %d\n", epoch, maplen);
                        newmap = osdmap_decode(&p, p+maplen);
                        if (IS_ERR(newmap)) {
@@ -1432,9 +1435,12 @@ void ceph_osdc_handle_map(struct ceph_osd_client *osdc, struct ceph_msg *msg)
                        BUG_ON(!newmap);
                        oldmap = osdc->osdmap;
                        osdc->osdmap = newmap;
-                       if (oldmap)
+                       if (oldmap) {
+                               if (oldmap->epoch + 1 < newmap->epoch)
+                                       skipped_map = 1;
                                ceph_osdmap_destroy(oldmap);
-                       kick_requests(osdc);
+                       }
+                       kick_requests(osdc, skipped_map);
                }
                p += maplen;
                nr_maps--;
@@ -1707,7 +1713,7 @@ int ceph_osdc_start_request(struct ceph_osd_client *osdc,
         * the request still han't been touched yet.
         */
        if (req->r_sent == 0) {
-               rc = __map_request(osdc, req);
+               rc = __map_request(osdc, req, 0);
                if (rc < 0) {
                        if (nofail) {
                                dout("osdc_start_request failed map, "