md-cluster: transfer the resync ownership to another node
authorGuoqing Jiang <gqjiang@suse.com>
Fri, 10 Jul 2015 08:54:04 +0000 (16:54 +0800)
committerNeilBrown <neilb@suse.com>
Mon, 31 Aug 2015 17:41:12 +0000 (19:41 +0200)
When node A stops an array while the array is doing a resync, we need
to let another node B take over the resync task.

To achieve the goal, we need the A send an explicit BITMAP_NEEDS_SYNC
message to the cluster. And the node B which received that message will
invoke __recover_slot to do resync.

Reviewed-by: Goldwyn Rodrigues <rgoldwyn@suse.com>
Signed-off-by: Guoqing Jiang <gqjiang@suse.com>
Signed-off-by: NeilBrown <neilb@suse.com>
drivers/md/md-cluster.c
drivers/md/md.c

index 24caabef10cd0912f8d3920bdbd4462ba16eee00..47199addae0426d1a11ce5cd2856591ca927c00c 100644 (file)
@@ -75,6 +75,7 @@ enum msg_type {
        NEWDISK,
        REMOVE,
        RE_ADD,
+       BITMAP_NEEDS_SYNC,
 };
 
 struct cluster_msg {
@@ -454,6 +455,11 @@ static void process_recvd_msg(struct mddev *mddev, struct cluster_msg *msg)
                        __func__, __LINE__, msg->slot);
                process_readd_disk(mddev, msg);
                break;
+       case BITMAP_NEEDS_SYNC:
+               pr_info("%s: %d Received BITMAP_NEEDS_SYNC from %d\n",
+                       __func__, __LINE__, msg->slot);
+               __recover_slot(mddev, msg->slot);
+               break;
        default:
                pr_warn("%s:%d Received unknown message from %d\n",
                        __func__, __LINE__, msg->slot);
@@ -814,8 +820,17 @@ static int resync_start(struct mddev *mddev, sector_t lo, sector_t hi)
 
 static void resync_finish(struct mddev *mddev)
 {
+       struct md_cluster_info *cinfo = mddev->cluster_info;
+       struct cluster_msg cmsg;
+       int slot = cinfo->slot_number - 1;
+
        pr_info("%s:%d\n", __func__, __LINE__);
        resync_send(mddev, RESYNCING, 0, 0);
+       if (test_bit(MD_RECOVERY_INTR, &mddev->recovery)) {
+               cmsg.type = cpu_to_le32(BITMAP_NEEDS_SYNC);
+               cmsg.slot = cpu_to_le32(slot);
+               sendmsg(cinfo, &cmsg);
+       }
 }
 
 static int area_resyncing(struct mddev *mddev, int direction,
index cdc080bf09d4793f040a5063ff0cb5760ea093d9..c0637603a39183057eb9e179c5ee7b08720ecdef 100644 (file)
@@ -7959,9 +7959,6 @@ void md_do_sync(struct md_thread *thread)
        /* tell personality that we are finished */
        mddev->pers->sync_request(mddev, max_sectors, &skipped);
 
-       if (mddev_is_clustered(mddev))
-               md_cluster_ops->resync_finish(mddev);
-
        if (!test_bit(MD_RECOVERY_CHECK, &mddev->recovery) &&
            mddev->curr_resync > 2) {
                if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) {
@@ -7995,6 +7992,9 @@ void md_do_sync(struct md_thread *thread)
                }
        }
  skip:
+       if (mddev_is_clustered(mddev))
+               md_cluster_ops->resync_finish(mddev);
+
        set_bit(MD_CHANGE_DEVS, &mddev->flags);
 
        spin_lock(&mddev->lock);