drbd: The new, smarter resync speed controller
authorPhilipp Reisner <philipp.reisner@linbit.com>
Tue, 6 Jul 2010 09:14:00 +0000 (11:14 +0200)
committerPhilipp Reisner <philipp.reisner@linbit.com>
Thu, 14 Oct 2010 16:38:14 +0000 (18:38 +0200)
Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
drivers/block/drbd/drbd_int.h
drivers/block/drbd/drbd_main.c
drivers/block/drbd/drbd_nl.c
drivers/block/drbd/drbd_receiver.c
drivers/block/drbd/drbd_worker.c

index fd2cdd45f15592ecbd460190cbc9c728b7c27639..facb72ccc56b2080494da66ce72921111442d987 100644 (file)
@@ -928,6 +928,12 @@ enum write_ordering_e {
        WO_bio_barrier
 };
 
+struct fifo_buffer {
+       int *values;
+       unsigned int head_index;
+       unsigned int size;
+};
+
 struct drbd_conf {
        /* things that are stored as / read from meta data on disk */
        unsigned long flags;
@@ -1068,6 +1074,11 @@ struct drbd_conf {
        u64 ed_uuid; /* UUID of the exposed data */
        struct mutex state_mutex;
        char congestion_reason;  /* Why we where congested... */
+       atomic_t rs_sect_in; /* counter to measure the incoming resync data rate */
+       int c_sync_rate; /* current resync rate after delay_probe magic */
+       struct fifo_buffer rs_plan_s; /* correction values of resync planer */
+       int rs_in_flight; /* resync sectors in flight (to proxy, in proxy and from proxy) */
+       int rs_planed;    /* resync sectors already planed */
 };
 
 static inline struct drbd_conf *minor_to_mdev(unsigned int minor)
index bff4f598d38f58acf0d9f795a92fb6976a650b14..ed09a840d83818467715eed1dd21d524a114e59c 100644 (file)
@@ -2734,6 +2734,7 @@ void drbd_init_set_defaults(struct drbd_conf *mdev)
        atomic_set(&mdev->net_cnt, 0);
        atomic_set(&mdev->packet_seq, 0);
        atomic_set(&mdev->pp_in_use, 0);
+       atomic_set(&mdev->rs_sect_in, 0);
 
        mutex_init(&mdev->md_io_mutex);
        mutex_init(&mdev->data.mutex);
index 7d384fd39c16c140674704f7d4772e929351066b..295b8d5937081074a20574992f23e8a32cf97b12 100644 (file)
@@ -1587,6 +1587,8 @@ static int drbd_nl_syncer_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *n
        struct crypto_hash *csums_tfm = NULL;
        struct syncer_conf sc;
        cpumask_var_t new_cpu_mask;
+       int *rs_plan_s = NULL;
+       int fifo_size;
 
        if (!zalloc_cpumask_var(&new_cpu_mask, GFP_KERNEL)) {
                retcode = ERR_NOMEM;
@@ -1687,6 +1689,16 @@ static int drbd_nl_syncer_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *n
        if (retcode != NO_ERROR)
                goto fail;
 
+       fifo_size = (sc.c_plan_ahead * 10 * SLEEP_TIME) / HZ;
+       if (fifo_size != mdev->rs_plan_s.size && fifo_size > 0) {
+               rs_plan_s   = kzalloc(sizeof(int) * fifo_size, GFP_KERNEL);
+               if (!rs_plan_s) {
+                       dev_err(DEV, "kmalloc of fifo_buffer failed");
+                       retcode = ERR_NOMEM;
+                       goto fail;
+               }
+       }
+
        /* ok, assign the rest of it as well.
         * lock against receive_SyncParam() */
        spin_lock(&mdev->peer_seq_lock);
@@ -1703,6 +1715,15 @@ static int drbd_nl_syncer_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *n
                mdev->verify_tfm = verify_tfm;
                verify_tfm = NULL;
        }
+
+       if (fifo_size != mdev->rs_plan_s.size) {
+               kfree(mdev->rs_plan_s.values);
+               mdev->rs_plan_s.values = rs_plan_s;
+               mdev->rs_plan_s.size   = fifo_size;
+               mdev->rs_planed = 0;
+               rs_plan_s = NULL;
+       }
+
        spin_unlock(&mdev->peer_seq_lock);
 
        if (get_ldev(mdev)) {
@@ -1734,6 +1755,7 @@ static int drbd_nl_syncer_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *n
 
        kobject_uevent(&disk_to_dev(mdev->vdisk)->kobj, KOBJ_CHANGE);
 fail:
+       kfree(rs_plan_s);
        free_cpumask_var(new_cpu_mask);
        crypto_free_hash(csums_tfm);
        crypto_free_hash(verify_tfm);
index 34bea972f734fe9552b0689e1609bcbba1877383..5f80b22e711db74259f960c4c2adb8b1d6783887 100644 (file)
@@ -1640,6 +1640,8 @@ static int receive_RSDataReply(struct drbd_conf *mdev, struct p_header *h)
                drbd_send_ack_dp(mdev, P_NEG_ACK, p);
        }
 
+       atomic_add(data_size >> 9, &mdev->rs_sect_in);
+
        return ok;
 }
 
@@ -2810,6 +2812,8 @@ static int receive_SyncParam(struct drbd_conf *mdev, struct p_header *h)
        struct crypto_hash *verify_tfm = NULL;
        struct crypto_hash *csums_tfm = NULL;
        const int apv = mdev->agreed_pro_version;
+       int *rs_plan_s = NULL;
+       int fifo_size = 0;
 
        exp_max_sz  = apv <= 87 ? sizeof(struct p_rs_param)
                    : apv == 88 ? sizeof(struct p_rs_param)
@@ -2904,6 +2908,15 @@ static int receive_SyncParam(struct drbd_conf *mdev, struct p_header *h)
                        mdev->sync_conf.c_delay_target = be32_to_cpu(p->c_delay_target);
                        mdev->sync_conf.c_fill_target = be32_to_cpu(p->c_fill_target);
                        mdev->sync_conf.c_max_rate = be32_to_cpu(p->c_max_rate);
+
+                       fifo_size = (mdev->sync_conf.c_plan_ahead * 10 * SLEEP_TIME) / HZ;
+                       if (fifo_size != mdev->rs_plan_s.size && fifo_size > 0) {
+                               rs_plan_s   = kzalloc(sizeof(int) * fifo_size, GFP_KERNEL);
+                               if (!rs_plan_s) {
+                                       dev_err(DEV, "kmalloc of fifo_buffer failed");
+                                       goto disconnect;
+                               }
+                       }
                }
 
                spin_lock(&mdev->peer_seq_lock);
@@ -2922,6 +2935,12 @@ static int receive_SyncParam(struct drbd_conf *mdev, struct p_header *h)
                        mdev->csums_tfm = csums_tfm;
                        dev_info(DEV, "using csums-alg: \"%s\"\n", p->csums_alg);
                }
+               if (fifo_size != mdev->rs_plan_s.size) {
+                       kfree(mdev->rs_plan_s.values);
+                       mdev->rs_plan_s.values = rs_plan_s;
+                       mdev->rs_plan_s.size   = fifo_size;
+                       mdev->rs_planed = 0;
+               }
                spin_unlock(&mdev->peer_seq_lock);
        }
 
@@ -4202,6 +4221,7 @@ static int got_IsInSync(struct drbd_conf *mdev, struct p_header *h)
        /* rs_same_csums is supposed to count in units of BM_BLOCK_SIZE */
        mdev->rs_same_csum += (blksize >> BM_BLOCK_SHIFT);
        dec_rs_pending(mdev);
+       atomic_add(blksize >> 9, &mdev->rs_sect_in);
 
        return TRUE;
 }
index d94720f4bd0745071c75437073e873408b15df8f..fd3e1e9561cb57fcd014921a1086bf635ef29345 100644 (file)
@@ -422,6 +422,89 @@ void resync_timer_fn(unsigned long data)
                drbd_queue_work(&mdev->data.work, &mdev->resync_work);
 }
 
+static void fifo_set(struct fifo_buffer *fb, int value)
+{
+       int i;
+
+       for (i = 0; i < fb->size; i++)
+               fb->values[i] += value;
+}
+
+static int fifo_push(struct fifo_buffer *fb, int value)
+{
+       int ov;
+
+       ov = fb->values[fb->head_index];
+       fb->values[fb->head_index++] = value;
+
+       if (fb->head_index >= fb->size)
+               fb->head_index = 0;
+
+       return ov;
+}
+
+static void fifo_add_val(struct fifo_buffer *fb, int value)
+{
+       int i;
+
+       for (i = 0; i < fb->size; i++)
+               fb->values[i] += value;
+}
+
+int drbd_rs_controller(struct drbd_conf *mdev)
+{
+       unsigned int sect_in;  /* Number of sectors that came in since the last turn */
+       unsigned int want;     /* The number of sectors we want in the proxy */
+       int req_sect; /* Number of sectors to request in this turn */
+       int correction; /* Number of sectors more we need in the proxy*/
+       int cps; /* correction per invocation of drbd_rs_controller() */
+       int steps; /* Number of time steps to plan ahead */
+       int curr_corr;
+       int max_sect;
+
+       sect_in = atomic_xchg(&mdev->rs_sect_in, 0); /* Number of sectors that came in */
+       mdev->rs_in_flight -= sect_in;
+
+       spin_lock(&mdev->peer_seq_lock); /* get an atomic view on mdev->rs_plan_s */
+
+       steps = mdev->rs_plan_s.size; /* (mdev->sync_conf.c_plan_ahead * 10 * SLEEP_TIME) / HZ; */
+
+       if (mdev->rs_in_flight + sect_in == 0) { /* At start of resync */
+               want = ((mdev->sync_conf.rate * 2 * SLEEP_TIME) / HZ) * steps;
+       } else { /* normal path */
+               want = mdev->sync_conf.c_fill_target ? mdev->sync_conf.c_fill_target :
+                       sect_in * mdev->sync_conf.c_delay_target * HZ / (SLEEP_TIME * 10);
+       }
+
+       correction = want - mdev->rs_in_flight - mdev->rs_planed;
+
+       /* Plan ahead */
+       cps = correction / steps;
+       fifo_add_val(&mdev->rs_plan_s, cps);
+       mdev->rs_planed += cps * steps;
+
+       /* What we do in this step */
+       curr_corr = fifo_push(&mdev->rs_plan_s, 0);
+       spin_unlock(&mdev->peer_seq_lock);
+       mdev->rs_planed -= curr_corr;
+
+       req_sect = sect_in + curr_corr;
+       if (req_sect < 0)
+               req_sect = 0;
+
+       max_sect = (mdev->sync_conf.c_max_rate * 2 * SLEEP_TIME) / HZ;
+       if (req_sect > max_sect)
+               req_sect = max_sect;
+
+       /*
+       dev_warn(DEV, "si=%u if=%d wa=%u co=%d st=%d cps=%d pl=%d cc=%d rs=%d\n",
+                sect_in, mdev->rs_in_flight, want, correction,
+                steps, cps, mdev->rs_planed, curr_corr, req_sect);
+       */
+
+       return req_sect;
+}
+
 int w_make_resync_request(struct drbd_conf *mdev,
                struct drbd_work *w, int cancel)
 {
@@ -459,7 +542,13 @@ int w_make_resync_request(struct drbd_conf *mdev,
        max_segment_size = mdev->agreed_pro_version < 94 ?
                queue_max_segment_size(mdev->rq_queue) : DRBD_MAX_SEGMENT_SIZE;
 
-       number = SLEEP_TIME * mdev->sync_conf.rate  / ((BM_BLOCK_SIZE / 1024) * HZ);
+       if (mdev->rs_plan_s.size) { /* mdev->sync_conf.c_plan_ahead */
+               number = drbd_rs_controller(mdev) >> (BM_BLOCK_SHIFT - 9);
+               mdev->c_sync_rate = number * HZ * (BM_BLOCK_SIZE / 1024) / SLEEP_TIME;
+       } else {
+               mdev->c_sync_rate = mdev->sync_conf.rate;
+               number = SLEEP_TIME * mdev->c_sync_rate  / ((BM_BLOCK_SIZE / 1024) * HZ);
+       }
        pe = atomic_read(&mdev->rs_pending_cnt);
 
        mutex_lock(&mdev->data.mutex);
@@ -593,6 +682,7 @@ next_sector:
        }
 
  requeue:
+       mdev->rs_in_flight += (i << (BM_BLOCK_SHIFT - 9));
        mod_timer(&mdev->resync_timer, jiffies + SLEEP_TIME);
        put_ldev(mdev);
        return 1;
@@ -1419,6 +1509,12 @@ void drbd_start_resync(struct drbd_conf *mdev, enum drbd_conns side)
                        drbd_resync_finished(mdev);
                }
 
+               atomic_set(&mdev->rs_sect_in, 0);
+               mdev->rs_in_flight = 0;
+               mdev->rs_planed = 0;
+               spin_lock(&mdev->peer_seq_lock);
+               fifo_set(&mdev->rs_plan_s, 0);
+               spin_unlock(&mdev->peer_seq_lock);
                /* ns.conn may already be != mdev->state.conn,
                 * we may have been paused in between, or become paused until
                 * the timer triggers.