Merge branch 'drbd-8.4_ed6' into for-3.8-drivers-drbd-8.4_ed6
[firefly-linux-kernel-4.4.55.git] / drivers / block / drbd / drbd_main.c
index c0acd86c84153185cfc8c18716a25dffe0928d65..52de26daa1f6c4dceed2115dac702aa431d2d289 100644 (file)
@@ -105,8 +105,8 @@ module_param(fault_devs, int, 0644);
 
 /* module parameter, defined */
 unsigned int minor_count = DRBD_MINOR_COUNT_DEF;
-int disable_sendpage;
-int allow_oos;
+bool disable_sendpage;
+bool allow_oos;
 int proc_details;       /* Detail level in proc drbd*/
 
 /* Module parameter for setting the user mode helper program
@@ -149,11 +149,6 @@ static const struct block_device_operations drbd_ops = {
        .release = drbd_release,
 };
 
-static void bio_destructor_drbd(struct bio *bio)
-{
-       bio_free(bio, drbd_md_io_bio_set);
-}
-
 struct bio *bio_alloc_drbd(gfp_t gfp_mask)
 {
        struct bio *bio;
@@ -164,7 +159,6 @@ struct bio *bio_alloc_drbd(gfp_t gfp_mask)
        bio = bio_alloc_bioset(gfp_mask, 1, drbd_md_io_bio_set);
        if (!bio)
                return NULL;
-       bio->bi_destructor = bio_destructor_drbd;
        return bio;
 }
 
@@ -207,7 +201,7 @@ void tl_release(struct drbd_tconn *tconn, unsigned int barrier_nr,
 
        spin_lock_irq(&tconn->req_lock);
 
-       /* find latest not yet barrier-acked write request,
+       /* find oldest not yet barrier-acked write request,
         * count writes in its epoch. */
        list_for_each_entry(r, &tconn->transfer_log, tl_requests) {
                const unsigned s = r->rq_state;
@@ -250,8 +244,14 @@ void tl_release(struct drbd_tconn *tconn, unsigned int barrier_nr,
                goto bail;
        }
 
-       /* Clean up list of requests processed during current epoch */
-       list_for_each_entry_safe(req, r, &tconn->transfer_log, tl_requests) {
+       /* Clean up list of requests processed during current epoch. */
+       /* this extra list walk restart is paranoia,
+        * to catch requests being barrier-acked "unexpectedly".
+        * It usually should find the same req again, or some READ preceding it. */
+       list_for_each_entry(req, &tconn->transfer_log, tl_requests)
+               if (req->epoch == expect_epoch)
+                       break;
+       list_for_each_entry_safe_from(req, r, &tconn->transfer_log, tl_requests) {
                if (req->epoch != expect_epoch)
                        break;
                _req_mod(req, BARRIER_ACKED);
@@ -838,8 +838,10 @@ int _drbd_send_uuids(struct drbd_conf *mdev, u64 uuid_flags)
                put_ldev(mdev);
                return -EIO;
        }
+       spin_lock_irq(&mdev->ldev->md.uuid_lock);
        for (i = UI_CURRENT; i < UI_SIZE; i++)
                p->uuid[i] = mdev->ldev ? cpu_to_be64(mdev->ldev->md.uuid[i]) : 0;
+       spin_unlock_irq(&mdev->ldev->md.uuid_lock);
 
        mdev->comm_bm_set = drbd_bm_total_weight(mdev);
        p->uuid[UI_SIZE] = cpu_to_be64(mdev->comm_bm_set);
@@ -912,7 +914,8 @@ int drbd_send_sizes(struct drbd_conf *mdev, int trigger_reply, enum dds_flags fl
        struct drbd_socket *sock;
        struct p_sizes *p;
        sector_t d_size, u_size;
-       int q_order_type, max_bio_size;
+       int q_order_type;
+       unsigned int max_bio_size;
 
        if (get_ldev_if_state(mdev, D_NEGOTIATING)) {
                D_ASSERT(mdev->ldev->backing_bdev);
@@ -922,7 +925,7 @@ int drbd_send_sizes(struct drbd_conf *mdev, int trigger_reply, enum dds_flags fl
                rcu_read_unlock();
                q_order_type = drbd_queue_order_type(mdev);
                max_bio_size = queue_max_hw_sectors(mdev->ldev->backing_bdev->bd_disk->queue) << 9;
-               max_bio_size = min_t(int, max_bio_size, DRBD_MAX_BIO_SIZE);
+               max_bio_size = min(max_bio_size, DRBD_MAX_BIO_SIZE);
                put_ldev(mdev);
        } else {
                d_size = 0;
@@ -937,9 +940,9 @@ int drbd_send_sizes(struct drbd_conf *mdev, int trigger_reply, enum dds_flags fl
                return -EIO;
 
        if (mdev->tconn->agreed_pro_version <= 94)
-               max_bio_size = min_t(int, max_bio_size, DRBD_MAX_SIZE_H80_PACKET);
+               max_bio_size = min(max_bio_size, DRBD_MAX_SIZE_H80_PACKET);
        else if (mdev->tconn->agreed_pro_version < 100)
-               max_bio_size = min_t(int, max_bio_size, DRBD_MAX_BIO_SIZE_P95);
+               max_bio_size = min(max_bio_size, DRBD_MAX_BIO_SIZE_P95);
 
        p->d_size = cpu_to_be64(d_size);
        p->u_size = cpu_to_be64(u_size);
@@ -2318,9 +2321,9 @@ static void drbd_cleanup(void)
 }
 
 /**
- * drbd_congested() - Callback for pdflush
+ * drbd_congested() - Callback for the flusher thread
  * @congested_data:    User data
- * @bdi_bits:          Bits pdflush is currently interested in
+ * @bdi_bits:          Bits the BDI flusher thread is currently interested in
  *
  * Returns 1<<BDI_async_congested and/or 1<<BDI_sync_congested if we are congested.
  */
@@ -2338,6 +2341,22 @@ static int drbd_congested(void *congested_data, int bdi_bits)
                goto out;
        }
 
+       if (test_bit(CALLBACK_PENDING, &mdev->tconn->flags)) {
+               r |= (1 << BDI_async_congested);
+               /* Without good local data, we would need to read from remote,
+                * and that would need the worker thread as well, which is
+                * currently blocked waiting for that usermode helper to
+                * finish.
+                */
+               if (!get_ldev_if_state(mdev, D_UP_TO_DATE))
+                       r |= (1 << BDI_sync_congested);
+               else
+                       put_ldev(mdev);
+               r &= bdi_bits;
+               reason = 'c';
+               goto out;
+       }
+
        if (get_ldev(mdev)) {
                q = bdev_get_queue(mdev->ldev->backing_bdev);
                r = bdi_congested(&q->backing_dev_info, bdi_bits);
@@ -2624,6 +2643,7 @@ enum drbd_ret_code conn_new_minor(struct drbd_tconn *tconn, unsigned int minor,
        q->backing_dev_info.congested_data = mdev;
 
        blk_queue_make_request(q, drbd_make_request);
+       blk_queue_flush(q, REQ_FLUSH | REQ_FUA);
        /* Setting the max_hw_sectors to an odd value of 8kibyte here
           This triggers a max_bio_size message upon first attach or connect */
        blk_queue_max_hw_sectors(q, DRBD_MAX_BIO_SIZE_SAFE >> 8);
@@ -2801,6 +2821,22 @@ void drbd_free_sock(struct drbd_tconn *tconn)
 
 /* meta data management */
 
+void conn_md_sync(struct drbd_tconn *tconn)
+{
+       struct drbd_conf *mdev;
+       int vnr;
+
+       rcu_read_lock();
+       idr_for_each_entry(&tconn->volumes, mdev, vnr) {
+               kref_get(&mdev->kref);
+               rcu_read_unlock();
+               drbd_md_sync(mdev);
+               kref_put(&mdev->kref, &drbd_minor_destroy);
+               rcu_read_lock();
+       }
+       rcu_read_unlock();
+}
+
 struct meta_data_on_disk {
        u64 la_size;           /* last agreed size. */
        u64 uuid[UI_SIZE];   /* UUIDs. */
@@ -2866,7 +2902,7 @@ void drbd_md_sync(struct drbd_conf *mdev)
        if (drbd_md_sync_page_io(mdev, mdev->ldev, sector, WRITE)) {
                /* this was a try anyways ... */
                dev_err(DEV, "meta data update failed!\n");
-               drbd_chk_io_error(mdev, 1, true);
+               drbd_chk_io_error(mdev, 1, DRBD_META_IO_ERROR);
        }
 
        /* Update mdev->ldev->md.la_size_sect,
@@ -2958,9 +2994,9 @@ int drbd_md_read(struct drbd_conf *mdev, struct drbd_backing_dev *bdev)
 
        spin_lock_irq(&mdev->tconn->req_lock);
        if (mdev->state.conn < C_CONNECTED) {
-               int peer;
+               unsigned int peer;
                peer = be32_to_cpu(buffer->la_peer_max_bio_size);
-               peer = max_t(int, peer, DRBD_MAX_BIO_SIZE_SAFE);
+               peer = max(peer, DRBD_MAX_BIO_SIZE_SAFE);
                mdev->peer_max_bio_size = peer;
        }
        spin_unlock_irq(&mdev->tconn->req_lock);
@@ -2998,7 +3034,7 @@ void drbd_md_mark_dirty(struct drbd_conf *mdev)
 }
 #endif
 
-static void drbd_uuid_move_history(struct drbd_conf *mdev) __must_hold(local)
+void drbd_uuid_move_history(struct drbd_conf *mdev) __must_hold(local)
 {
        int i;
 
@@ -3006,7 +3042,7 @@ static void drbd_uuid_move_history(struct drbd_conf *mdev) __must_hold(local)
                mdev->ldev->md.uuid[i+1] = mdev->ldev->md.uuid[i];
 }
 
-void _drbd_uuid_set(struct drbd_conf *mdev, int idx, u64 val) __must_hold(local)
+void __drbd_uuid_set(struct drbd_conf *mdev, int idx, u64 val) __must_hold(local)
 {
        if (idx == UI_CURRENT) {
                if (mdev->state.role == R_PRIMARY)
@@ -3021,14 +3057,24 @@ void _drbd_uuid_set(struct drbd_conf *mdev, int idx, u64 val) __must_hold(local)
        drbd_md_mark_dirty(mdev);
 }
 
+void _drbd_uuid_set(struct drbd_conf *mdev, int idx, u64 val) __must_hold(local)
+{
+       unsigned long flags;
+       spin_lock_irqsave(&mdev->ldev->md.uuid_lock, flags);
+       __drbd_uuid_set(mdev, idx, val);
+       spin_unlock_irqrestore(&mdev->ldev->md.uuid_lock, flags);
+}
 
 void drbd_uuid_set(struct drbd_conf *mdev, int idx, u64 val) __must_hold(local)
 {
+       unsigned long flags;
+       spin_lock_irqsave(&mdev->ldev->md.uuid_lock, flags);
        if (mdev->ldev->md.uuid[idx]) {
                drbd_uuid_move_history(mdev);
                mdev->ldev->md.uuid[UI_HISTORY_START] = mdev->ldev->md.uuid[idx];
        }
-       _drbd_uuid_set(mdev, idx, val);
+       __drbd_uuid_set(mdev, idx, val);
+       spin_unlock_irqrestore(&mdev->ldev->md.uuid_lock, flags);
 }
 
 /**
@@ -3041,15 +3087,20 @@ void drbd_uuid_set(struct drbd_conf *mdev, int idx, u64 val) __must_hold(local)
 void drbd_uuid_new_current(struct drbd_conf *mdev) __must_hold(local)
 {
        u64 val;
-       unsigned long long bm_uuid = mdev->ldev->md.uuid[UI_BITMAP];
+       unsigned long long bm_uuid;
+
+       get_random_bytes(&val, sizeof(u64));
+
+       spin_lock_irq(&mdev->ldev->md.uuid_lock);
+       bm_uuid = mdev->ldev->md.uuid[UI_BITMAP];
 
        if (bm_uuid)
                dev_warn(DEV, "bm UUID was already set: %llX\n", bm_uuid);
 
        mdev->ldev->md.uuid[UI_BITMAP] = mdev->ldev->md.uuid[UI_CURRENT];
+       __drbd_uuid_set(mdev, UI_CURRENT, val);
+       spin_unlock_irq(&mdev->ldev->md.uuid_lock);
 
-       get_random_bytes(&val, sizeof(u64));
-       _drbd_uuid_set(mdev, UI_CURRENT, val);
        drbd_print_uuids(mdev, "new current UUID");
        /* get it to stable storage _now_ */
        drbd_md_sync(mdev);
@@ -3057,9 +3108,11 @@ void drbd_uuid_new_current(struct drbd_conf *mdev) __must_hold(local)
 
 void drbd_uuid_set_bm(struct drbd_conf *mdev, u64 val) __must_hold(local)
 {
+       unsigned long flags;
        if (mdev->ldev->md.uuid[UI_BITMAP] == 0 && val == 0)
                return;
 
+       spin_lock_irqsave(&mdev->ldev->md.uuid_lock, flags);
        if (val == 0) {
                drbd_uuid_move_history(mdev);
                mdev->ldev->md.uuid[UI_HISTORY_START] = mdev->ldev->md.uuid[UI_BITMAP];
@@ -3071,6 +3124,8 @@ void drbd_uuid_set_bm(struct drbd_conf *mdev, u64 val) __must_hold(local)
 
                mdev->ldev->md.uuid[UI_BITMAP] = val & ~((u64)1);
        }
+       spin_unlock_irqrestore(&mdev->ldev->md.uuid_lock, flags);
+
        drbd_md_mark_dirty(mdev);
 }
 
@@ -3172,6 +3227,30 @@ static int w_go_diskless(struct drbd_work *w, int unused)
         * inc/dec it frequently. Once we are D_DISKLESS, no one will touch
         * the protected members anymore, though, so once put_ldev reaches zero
         * again, it will be safe to free them. */
+
+       /* Try to write changed bitmap pages, read errors may have just
+        * set some bits outside the area covered by the activity log.
+        *
+        * If we have an IO error during the bitmap writeout,
+        * we will want a full sync next time, just in case.
+        * (Do we want a specific meta data flag for this?)
+        *
+        * If that does not make it to stable storage either,
+        * we cannot do anything about that anymore.
+        *
+        * We still need to check if both bitmap and ldev are present, we may
+        * end up here after a failed attach, before ldev was even assigned.
+        */
+       if (mdev->bitmap && mdev->ldev) {
+               if (drbd_bitmap_io_from_worker(mdev, drbd_bm_write,
+                                       "detach", BM_LOCKED_MASK)) {
+                       if (test_bit(WAS_READ_ERROR, &mdev->flags)) {
+                               drbd_md_set_flag(mdev, MDF_FULL_SYNC);
+                               drbd_md_sync(mdev);
+                       }
+               }
+       }
+
        drbd_force_state(mdev, NS(disk, D_DISKLESS));
        return 0;
 }
@@ -3276,7 +3355,9 @@ static void md_sync_timer_fn(unsigned long data)
 {
        struct drbd_conf *mdev = (struct drbd_conf *) data;
 
-       drbd_queue_work_front(&mdev->tconn->sender_work, &mdev->md_sync_work);
+       /* must not double-queue! */
+       if (list_empty(&mdev->md_sync_work.list))
+               drbd_queue_work_front(&mdev->tconn->sender_work, &mdev->md_sync_work);
 }
 
 static int w_md_sync(struct drbd_work *w, int unused)
@@ -3322,7 +3403,7 @@ const char *cmdname(enum drbd_packet cmd)
                [P_RECV_ACK]            = "RecvAck",
                [P_WRITE_ACK]           = "WriteAck",
                [P_RS_WRITE_ACK]        = "RSWriteAck",
-               [P_DISCARD_WRITE]        = "DiscardWrite",
+               [P_SUPERSEDED]          = "Superseded",
                [P_NEG_ACK]             = "NegAck",
                [P_NEG_DREPLY]          = "NegDReply",
                [P_NEG_RS_DREPLY]       = "NegRSDReply",
@@ -3475,12 +3556,11 @@ const char *drbd_buildtag(void)
        static char buildtag[38] = "\0uilt-in";
 
        if (buildtag[0] == 0) {
-#ifdef CONFIG_MODULES
-               if (THIS_MODULE != NULL)
-                       sprintf(buildtag, "srcversion: %-24s", THIS_MODULE->srcversion);
-               else
+#ifdef MODULE
+               sprintf(buildtag, "srcversion: %-24s", THIS_MODULE->srcversion);
+#else
+               buildtag[0] = 'b';
 #endif
-                       buildtag[0] = 'b';
        }
 
        return buildtag;