X-Git-Url: http://demsky.eecs.uci.edu/git/?a=blobdiff_plain;f=drivers%2Fblock%2Fdrbd%2Fdrbd_nl.c;h=2af26fc9528083d23cbde63fbe42128a26757599;hb=f59dc2bb5a50b26ea751f9eac1c81e4cc7de5257;hp=838c3cd54cdd5c11a03f0c59bc6f8d85c63a5251;hpb=2ffca4f3ee6c2d507c39689e5f569bcb0612d3ad;p=firefly-linux-kernel-4.4.55.git diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 838c3cd54cdd..2af26fc95280 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -155,8 +155,7 @@ static int drbd_adm_prepare(struct sk_buff *skb, struct genl_info *info, memset(&adm_ctx, 0, sizeof(adm_ctx)); /* genl_rcv_msg only checks for CAP_NET_ADMIN on "GENL_ADMIN_PERM" :( */ - if (cmd != DRBD_ADM_GET_STATUS - && security_netlink_recv(skb, CAP_SYS_ADMIN)) + if (cmd != DRBD_ADM_GET_STATUS && !capable(CAP_NET_ADMIN)) return -EPERM; adm_ctx.reply_skb = genlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); @@ -323,11 +322,15 @@ int drbd_khelper(struct drbd_conf *mdev, char *cmd) NULL }; char mb[12]; char *argv[] = {usermode_helper, cmd, mb, NULL }; + struct drbd_tconn *tconn = mdev->tconn; struct sib_info sib; int ret; + if (current == tconn->worker.task) + set_bit(CALLBACK_PENDING, &tconn->flags); + snprintf(mb, 12, "minor-%d", mdev_to_minor(mdev)); - setup_khelper_env(mdev->tconn, envp); + setup_khelper_env(tconn, envp); /* The helper may take some time. * write out any unsynced meta data changes now */ @@ -337,7 +340,7 @@ int drbd_khelper(struct drbd_conf *mdev, char *cmd) sib.sib_reason = SIB_HELPER_PRE; sib.helper_name = cmd; drbd_bcast_event(mdev, &sib); - ret = call_usermodehelper(usermode_helper, argv, envp, 1); + ret = call_usermodehelper(usermode_helper, argv, envp, UMH_WAIT_PROC); if (ret) dev_warn(DEV, "helper command: %s %s %s exit code %u (0x%x)\n", usermode_helper, cmd, mb, @@ -350,28 +353,15 @@ int drbd_khelper(struct drbd_conf *mdev, char *cmd) sib.helper_exit_code = ret; drbd_bcast_event(mdev, &sib); + if (current == tconn->worker.task) + clear_bit(CALLBACK_PENDING, &tconn->flags); + if (ret < 0) /* Ignore any ERRNOs we got. */ ret = 0; return ret; } -static void conn_md_sync(struct drbd_tconn *tconn) -{ - struct drbd_conf *mdev; - int vnr; - - rcu_read_lock(); - idr_for_each_entry(&tconn->volumes, mdev, vnr) { - kref_get(&mdev->kref); - rcu_read_unlock(); - drbd_md_sync(mdev); - kref_put(&mdev->kref, &drbd_minor_destroy); - rcu_read_lock(); - } - rcu_read_unlock(); -} - int conn_khelper(struct drbd_tconn *tconn, char *cmd) { char *envp[] = { "HOME=/", @@ -389,7 +379,7 @@ int conn_khelper(struct drbd_tconn *tconn, char *cmd) conn_info(tconn, "helper command: %s %s %s\n", usermode_helper, cmd, tconn->name); /* TODO: conn_bcast_event() ?? */ - ret = call_usermodehelper(usermode_helper, argv, envp, 1); + ret = call_usermodehelper(usermode_helper, argv, envp, UMH_WAIT_PROC); if (ret) conn_warn(tconn, "helper command: %s %s %s exit code %u (0x%x)\n", usermode_helper, cmd, tconn->name, @@ -502,7 +492,7 @@ bool conn_try_outdate_peer(struct drbd_tconn *tconn) here, because we might were able to re-establish the connection in the meantime. */ spin_lock_irq(&tconn->req_lock); - if (tconn->cstate < C_WF_REPORT_PARAMS) + if (tconn->cstate < C_WF_REPORT_PARAMS && !test_bit(STATE_SENT, &tconn->flags)) _conn_request_state(tconn, mask, val, CS_VERBOSE); spin_unlock_irq(&tconn->req_lock); @@ -622,6 +612,8 @@ drbd_set_role(struct drbd_conf *mdev, enum drbd_role new_role, int force) /* Wait until nothing is on the fly :) */ wait_event(mdev->misc_wait, atomic_read(&mdev->ap_pending_cnt) == 0); + /* FIXME also wait for all pending P_BARRIER_ACK? */ + if (new_role == R_SECONDARY) { set_disk_ro(mdev->vdisk, true); if (get_ldev(mdev)) { @@ -654,7 +646,7 @@ drbd_set_role(struct drbd_conf *mdev, enum drbd_role new_role, int force) /* if this was forced, we should consider sync */ if (forced) drbd_send_uuids(mdev); - drbd_send_state(mdev); + drbd_send_current_state(mdev); } drbd_md_sync(mdev); @@ -885,8 +877,8 @@ enum determine_dev_size drbd_determine_dev_size(struct drbd_conf *mdev, enum dds la_size_changed && md_moved ? "size changed and md moved" : la_size_changed ? "size changed" : "md moved"); /* next line implicitly does drbd_suspend_io()+drbd_resume_io() */ - err = drbd_bitmap_io(mdev, &drbd_bm_write, - "size changed", BM_LOCKED_MASK); + err = drbd_bitmap_io(mdev, md_moved ? &drbd_bm_write_all : &drbd_bm_write, + "size changed", BM_LOCKED_MASK); if (err) { rv = dev_size_error; goto out; @@ -1009,8 +1001,8 @@ static int drbd_check_al_size(struct drbd_conf *mdev, struct disk_conf *dc) static void drbd_setup_queue_param(struct drbd_conf *mdev, unsigned int max_bio_size) { struct request_queue * const q = mdev->rq_queue; - int max_hw_sectors = max_bio_size >> 9; - int max_segments = 0; + unsigned int max_hw_sectors = max_bio_size >> 9; + unsigned int max_segments = 0; if (get_ldev_if_state(mdev, D_ATTACHING)) { struct request_queue * const b = mdev->ldev->backing_bdev->bd_disk->queue; @@ -1045,7 +1037,7 @@ static void drbd_setup_queue_param(struct drbd_conf *mdev, unsigned int max_bio_ void drbd_reconsider_max_bio_size(struct drbd_conf *mdev) { - int now, new, local, peer; + unsigned int now, new, local, peer; now = queue_max_hw_sectors(mdev->rq_queue) << 9; local = mdev->local_max_bio_size; /* Eventually last known value, from volatile memory */ @@ -1056,13 +1048,14 @@ void drbd_reconsider_max_bio_size(struct drbd_conf *mdev) mdev->local_max_bio_size = local; put_ldev(mdev); } + local = min(local, DRBD_MAX_BIO_SIZE); /* We may ignore peer limits if the peer is modern enough. Because new from 8.3.8 onwards the peer can use multiple BIOs for a single peer_request */ if (mdev->state.conn >= C_CONNECTED) { if (mdev->tconn->agreed_pro_version < 94) - peer = min_t(int, mdev->peer_max_bio_size, DRBD_MAX_SIZE_H80_PACKET); + peer = min( mdev->peer_max_bio_size, DRBD_MAX_SIZE_H80_PACKET); /* Correct old drbd (up to 8.3.7) if it believes it can do more than 32KiB */ else if (mdev->tconn->agreed_pro_version == 94) peer = DRBD_MAX_SIZE_H80_PACKET; @@ -1072,10 +1065,10 @@ void drbd_reconsider_max_bio_size(struct drbd_conf *mdev) peer = DRBD_MAX_BIO_SIZE; } - new = min_t(int, local, peer); + new = min(local, peer); if (mdev->state.role == R_PRIMARY && new < now) - dev_err(DEV, "ASSERT FAILED new < now; (%d < %d)\n", new, now); + dev_err(DEV, "ASSERT FAILED new < now; (%u < %u)\n", new, now); if (new != now) dev_info(DEV, "max BIO size = %u\n", new); @@ -1095,7 +1088,8 @@ static void conn_reconfig_done(struct drbd_tconn *tconn) { bool stop_threads; spin_lock_irq(&tconn->req_lock); - stop_threads = conn_all_vols_unconf(tconn); + stop_threads = conn_all_vols_unconf(tconn) && + tconn->cstate == C_STANDALONE; spin_unlock_irq(&tconn->req_lock); if (stop_threads) { /* asender is implicitly stopped by receiver @@ -1200,11 +1194,13 @@ int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info) } } + drbd_suspend_io(mdev); wait_event(mdev->al_wait, lc_try_lock(mdev->act_log)); drbd_al_shrink(mdev); err = drbd_check_al_size(mdev, new_disk_conf); lc_unlock(mdev->act_log); wake_up(&mdev->al_wait); + drbd_resume_io(mdev); if (err) { retcode = ERR_NOMEM; @@ -1228,6 +1224,19 @@ int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info) } mutex_unlock(&mdev->tconn->conf_update); + + if (new_disk_conf->al_updates) + mdev->ldev->md.flags &= ~MDF_AL_DISABLED; + else + mdev->ldev->md.flags |= MDF_AL_DISABLED; + + if (new_disk_conf->md_flushes) + clear_bit(MD_NO_FUA, &mdev->flags); + else + set_bit(MD_NO_FUA, &mdev->flags); + + drbd_bump_write_ordering(mdev->tconn, WO_bdev_flush); + drbd_md_sync(mdev); if (mdev->state.conn >= C_CONNECTED) @@ -1236,6 +1245,7 @@ int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info) synchronize_rcu(); kfree(old_disk_conf); kfree(old_plan); + mod_timer(&mdev->request_timer, jiffies + HZ); goto success; fail_unlock: @@ -1266,7 +1276,6 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) union drbd_state ns, os; enum drbd_state_rv rv; struct net_conf *nc; - int cp_discovered = 0; retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR); if (!adm_ctx.reply_skb) @@ -1288,12 +1297,24 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) * to realize a "hot spare" feature (not that I'd recommend that) */ wait_event(mdev->misc_wait, !atomic_read(&mdev->local_cnt)); + /* make sure there is no leftover from previous force-detach attempts */ + clear_bit(FORCE_DETACH, &mdev->flags); + clear_bit(WAS_IO_ERROR, &mdev->flags); + clear_bit(WAS_READ_ERROR, &mdev->flags); + + /* and no leftover from previously aborted resync or verify, either */ + mdev->rs_total = 0; + mdev->rs_failed = 0; + atomic_set(&mdev->rs_pending_cnt, 0); + /* allocation not in the IO path, drbdsetup context */ nbc = kzalloc(sizeof(struct drbd_backing_dev), GFP_KERNEL); if (!nbc) { retcode = ERR_NOMEM; goto fail; } + spin_lock_init(&nbc->md.uuid_lock); + new_disk_conf = kzalloc(sizeof(struct disk_conf), GFP_KERNEL); if (!new_disk_conf) { retcode = ERR_NOMEM; @@ -1427,6 +1448,12 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) drbd_suspend_io(mdev); /* also wait for the last barrier ack. */ + /* FIXME see also https://daiquiri.linbit/cgi-bin/bugzilla/show_bug.cgi?id=171 + * We need a way to either ignore barrier acks for barriers sent before a device + * was attached, or a way to wait for all pending barrier acks to come in. + * As barriers are counted per resource, + * we'd need to suspend io on all devices of a resource. + */ wait_event(mdev->misc_wait, !atomic_read(&mdev->ap_pending_cnt) || drbd_suspended(mdev)); /* and for any other previously queued work */ drbd_flush_workqueue(mdev); @@ -1476,11 +1503,6 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) goto force_diskless_dec; } - if (!drbd_al_read_log(mdev, nbc)) { - retcode = ERR_IO_MD_DISK; - goto force_diskless_dec; - } - /* Reset the "barriers don't work" bits here, then force meta data to * be written, to ensure we determine if barriers are supported. */ if (new_disk_conf->md_flushes) @@ -1501,8 +1523,7 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) new_disk_conf = NULL; new_plan = NULL; - mdev->write_ordering = WO_bdev_flush; - drbd_bump_write_ordering(mdev, WO_bdev_flush); + drbd_bump_write_ordering(mdev->tconn, WO_bdev_flush); if (drbd_md_test_flag(mdev->ldev, MDF_CRASHED_PRIMARY)) set_bit(CRASHED_PRIMARY, &mdev->flags); @@ -1510,10 +1531,8 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) clear_bit(CRASHED_PRIMARY, &mdev->flags); if (drbd_md_test_flag(mdev->ldev, MDF_PRIMARY_IND) && - !(mdev->state.role == R_PRIMARY && mdev->tconn->susp_nod)) { + !(mdev->state.role == R_PRIMARY && mdev->tconn->susp_nod)) set_bit(CRASHED_PRIMARY, &mdev->flags); - cp_discovered = 1; - } mdev->send_cnt = 0; mdev->recv_cnt = 0; @@ -1549,7 +1568,9 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) } else if (dd == grew) set_bit(RESYNC_AFTER_NEG, &mdev->flags); - if (drbd_md_test_flag(mdev->ldev, MDF_FULL_SYNC)) { + if (drbd_md_test_flag(mdev->ldev, MDF_FULL_SYNC) || + (test_bit(CRASHED_PRIMARY, &mdev->flags) && + drbd_md_test_flag(mdev->ldev, MDF_AL_DISABLED))) { dev_info(DEV, "Assuming that all blocks are out of sync " "(aka FullSync)\n"); if (drbd_bitmap_io(mdev, &drbd_bmio_set_n_write, @@ -1565,15 +1586,6 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) } } - if (cp_discovered) { - drbd_al_apply_to_bm(mdev); - if (drbd_bitmap_io(mdev, &drbd_bm_write, - "crashed primary apply AL", BM_LOCKED_MASK)) { - retcode = ERR_IO_MD_DISK; - goto force_diskless_dec; - } - } - if (_drbd_bm_total_weight(mdev) == drbd_bm_bits(mdev)) drbd_suspend_al(mdev); /* IO is still suspended here... */ @@ -1601,13 +1613,19 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) if (ns.disk == D_CONSISTENT && (ns.pdsk == D_OUTDATED || rcu_dereference(mdev->ldev->disk_conf)->fencing == FP_DONT_CARE)) ns.disk = D_UP_TO_DATE; - rcu_read_unlock(); /* All tests on MDF_PRIMARY_IND, MDF_CONNECTED_IND, MDF_CONSISTENT and MDF_WAS_UP_TO_DATE must happen before this point, because drbd_request_state() modifies these flags. */ + if (rcu_dereference(mdev->ldev->disk_conf)->al_updates) + mdev->ldev->md.flags &= ~MDF_AL_DISABLED; + else + mdev->ldev->md.flags |= MDF_AL_DISABLED; + + rcu_read_unlock(); + /* In case we are C_CONNECTED postpone any decision on the new disk state after the negotiation phase. */ if (mdev->state.conn == C_CONNECTED) { @@ -1628,6 +1646,8 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) if (rv < SS_SUCCESS) goto force_diskless_dec; + mod_timer(&mdev->request_timer, jiffies + HZ); + if (mdev->state.role == R_PRIMARY) mdev->ldev->md.uuid[UI_CURRENT] |= (u64)1; else @@ -1645,7 +1665,7 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) force_diskless_dec: put_ldev(mdev); force_diskless: - drbd_force_state(mdev, NS(disk, D_FAILED)); + drbd_force_state(mdev, NS(disk, D_DISKLESS)); drbd_md_sync(mdev); fail: conn_reconfig_done(mdev->tconn); @@ -1667,12 +1687,22 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) return 0; } -static int adm_detach(struct drbd_conf *mdev) +static int adm_detach(struct drbd_conf *mdev, int force) { enum drbd_state_rv retcode; int ret; + + if (force) { + set_bit(FORCE_DETACH, &mdev->flags); + drbd_force_state(mdev, NS(disk, D_FAILED)); + retcode = SS_SUCCESS; + goto out; + } + drbd_suspend_io(mdev); /* so no-one is stuck in drbd_al_begin_io */ + drbd_md_get_buffer(mdev); /* make sure there is no in-flight meta-data IO */ retcode = drbd_request_state(mdev, NS(disk, D_FAILED)); + drbd_md_put_buffer(mdev); /* D_FAILED will transition to DISKLESS. */ ret = wait_event_interruptible(mdev->misc_wait, mdev->state.disk != D_FAILED); @@ -1681,6 +1711,7 @@ static int adm_detach(struct drbd_conf *mdev) retcode = SS_NOTHING_TO_DO; if (ret) retcode = ERR_INTR; +out: return retcode; } @@ -1692,6 +1723,8 @@ static int adm_detach(struct drbd_conf *mdev) int drbd_adm_detach(struct sk_buff *skb, struct genl_info *info) { enum drbd_ret_code retcode; + struct detach_parms parms = { }; + int err; retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR); if (!adm_ctx.reply_skb) @@ -1699,7 +1732,16 @@ int drbd_adm_detach(struct sk_buff *skb, struct genl_info *info) if (retcode != NO_ERROR) goto out; - retcode = adm_detach(adm_ctx.mdev); + if (info->attrs[DRBD_NLA_DETACH_PARMS]) { + err = detach_parms_from_attrs(&parms, info); + if (err) { + retcode = ERR_MANDATORY_TAG; + drbd_msg_put_info(from_attrs_err_to_txt(err)); + goto out; + } + } + + retcode = adm_detach(adm_ctx.mdev, parms.force_detach); out: drbd_adm_finish(info, retcode); return 0; @@ -1758,9 +1800,6 @@ _check_net_options(struct drbd_tconn *tconn, struct net_conf *old_conf, struct n if (new_conf->two_primaries != old_conf->two_primaries) return ERR_NEED_APV_100; - if (!new_conf->integrity_alg != !old_conf->integrity_alg) - return ERR_NEED_APV_100; - if (strcmp(new_conf->integrity_alg, old_conf->integrity_alg)) return ERR_NEED_APV_100; } @@ -1782,7 +1821,7 @@ _check_net_options(struct drbd_tconn *tconn, struct net_conf *old_conf, struct n return ERR_STONITH_AND_PROT_A; } if (mdev->state.role == R_PRIMARY && new_conf->discard_my_data) - return ERR_DISCARD; + return ERR_DISCARD_IMPOSSIBLE; } if (new_conf->on_congestion != OC_BLOCK && new_conf->wire_protocol != DRBD_PROT_A) @@ -1818,8 +1857,6 @@ struct crypto { struct crypto_hash *csums_tfm; struct crypto_hash *cram_hmac_tfm; struct crypto_hash *integrity_tfm; - void *int_dig_in; - void *int_dig_vv; }; static int @@ -1842,7 +1879,6 @@ alloc_crypto(struct crypto *crypto, struct net_conf *new_conf) { char hmac_name[CRYPTO_MAX_ALG_NAME]; enum drbd_ret_code rv; - int hash_size; rv = alloc_hash(&crypto->csums_tfm, new_conf->csums_alg, ERR_CSUMS_ALG); @@ -1863,23 +1899,12 @@ alloc_crypto(struct crypto *crypto, struct net_conf *new_conf) rv = alloc_hash(&crypto->cram_hmac_tfm, hmac_name, ERR_AUTH_ALG); } - if (crypto->integrity_tfm) { - hash_size = crypto_hash_digestsize(crypto->integrity_tfm); - crypto->int_dig_in = kmalloc(hash_size, GFP_KERNEL); - if (!crypto->int_dig_in) - return ERR_NOMEM; - crypto->int_dig_vv = kmalloc(hash_size, GFP_KERNEL); - if (!crypto->int_dig_vv) - return ERR_NOMEM; - } return rv; } static void free_crypto(struct crypto *crypto) { - kfree(crypto->int_dig_in); - kfree(crypto->int_dig_vv); crypto_free_hash(crypto->cram_hmac_tfm); crypto_free_hash(crypto->integrity_tfm); crypto_free_hash(crypto->csums_tfm); @@ -1968,10 +1993,6 @@ int drbd_adm_net_opts(struct sk_buff *skb, struct genl_info *info) crypto.verify_tfm = NULL; } - kfree(tconn->int_dig_in); - tconn->int_dig_in = crypto.int_dig_in; - kfree(tconn->int_dig_vv); - tconn->int_dig_vv = crypto.int_dig_vv; crypto_free_hash(tconn->integrity_tfm); tconn->integrity_tfm = crypto.integrity_tfm; if (tconn->cstate >= C_WF_REPORT_PARAMS && tconn->agreed_pro_version >= 100) @@ -2050,7 +2071,7 @@ int drbd_adm_connect(struct sk_buff *skb, struct genl_info *info) goto fail; } - /* allocation not in the IO path, cqueue thread context */ + /* allocation not in the IO path, drbdsetup / netlink process context */ new_conf = kzalloc(sizeof(*new_conf), GFP_KERNEL); if (!new_conf) { retcode = ERR_NOMEM; @@ -2060,7 +2081,7 @@ int drbd_adm_connect(struct sk_buff *skb, struct genl_info *info) set_net_conf_defaults(new_conf); err = net_conf_from_attrs(new_conf, info); - if (err) { + if (err && err != -ENOMSG) { retcode = ERR_MANDATORY_TAG; drbd_msg_put_info(from_attrs_err_to_txt(err)); goto fail; @@ -2088,8 +2109,6 @@ int drbd_adm_connect(struct sk_buff *skb, struct genl_info *info) rcu_assign_pointer(tconn->net_conf, new_conf); conn_free_crypto(tconn); - tconn->int_dig_in = crypto.int_dig_in; - tconn->int_dig_vv = crypto.int_dig_vv; tconn->cram_hmac_tfm = crypto.cram_hmac_tfm; tconn->integrity_tfm = crypto.integrity_tfm; tconn->csums_tfm = crypto.csums_tfm; @@ -2222,7 +2241,7 @@ void resync_after_online_grow(struct drbd_conf *mdev) if (mdev->state.role != mdev->state.peer) iass = (mdev->state.role == R_PRIMARY); else - iass = test_bit(DISCARD_CONCURRENT, &mdev->tconn->flags); + iass = test_bit(RESOLVE_CONFLICTS, &mdev->tconn->flags); if (iass) drbd_start_resync(mdev, C_SYNC_SOURCE); @@ -2276,7 +2295,7 @@ int drbd_adm_resize(struct sk_buff *skb, struct genl_info *info) if (rs.no_resync && mdev->tconn->agreed_pro_version < 93) { retcode = ERR_NEED_APV_93; - goto fail; + goto fail_ldev; } rcu_read_lock(); @@ -2286,7 +2305,7 @@ int drbd_adm_resize(struct sk_buff *skb, struct genl_info *info) new_disk_conf = kmalloc(sizeof(struct disk_conf), GFP_KERNEL); if (!new_disk_conf) { retcode = ERR_NOMEM; - goto fail; + goto fail_ldev; } } @@ -2324,6 +2343,10 @@ int drbd_adm_resize(struct sk_buff *skb, struct genl_info *info) fail: drbd_adm_finish(info, retcode); return 0; + + fail_ldev: + put_ldev(mdev); + goto fail; } int drbd_adm_resource_opts(struct sk_buff *skb, struct genl_info *info) @@ -2377,8 +2400,11 @@ int drbd_adm_invalidate(struct sk_buff *skb, struct genl_info *info) mdev = adm_ctx.mdev; /* If there is still bitmap IO pending, probably because of a previous - * resync just being finished, wait for it before requesting a new resync. */ + * resync just being finished, wait for it before requesting a new resync. + * Also wait for it's after_state_ch(). */ + drbd_suspend_io(mdev); wait_event(mdev->misc_wait, !test_bit(BITMAP_IO, &mdev->flags)); + drbd_flush_workqueue(mdev); retcode = _drbd_request_state(mdev, NS(conn, C_STARTING_SYNC_T), CS_ORDERED); @@ -2396,12 +2422,30 @@ int drbd_adm_invalidate(struct sk_buff *skb, struct genl_info *info) retcode = drbd_request_state(mdev, NS(conn, C_STARTING_SYNC_T)); } + drbd_resume_io(mdev); out: drbd_adm_finish(info, retcode); return 0; } +static int drbd_adm_simple_request_state(struct sk_buff *skb, struct genl_info *info, + union drbd_state mask, union drbd_state val) +{ + enum drbd_ret_code retcode; + + retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR); + if (!adm_ctx.reply_skb) + return retcode; + if (retcode != NO_ERROR) + goto out; + + retcode = drbd_request_state(adm_ctx.mdev, mask, val); +out: + drbd_adm_finish(info, retcode); + return 0; +} + static int drbd_bmio_set_susp_al(struct drbd_conf *mdev) { int rv; @@ -2411,10 +2455,10 @@ static int drbd_bmio_set_susp_al(struct drbd_conf *mdev) return rv; } -static int drbd_adm_simple_request_state(struct sk_buff *skb, struct genl_info *info, - union drbd_state mask, union drbd_state val) +int drbd_adm_invalidate_peer(struct sk_buff *skb, struct genl_info *info) { - enum drbd_ret_code retcode; + int retcode; /* drbd_ret_code, drbd_state_rv */ + struct drbd_conf *mdev; retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR); if (!adm_ctx.reply_skb) @@ -2422,17 +2466,37 @@ static int drbd_adm_simple_request_state(struct sk_buff *skb, struct genl_info * if (retcode != NO_ERROR) goto out; - retcode = drbd_request_state(adm_ctx.mdev, mask, val); + mdev = adm_ctx.mdev; + + /* If there is still bitmap IO pending, probably because of a previous + * resync just being finished, wait for it before requesting a new resync. + * Also wait for it's after_state_ch(). */ + drbd_suspend_io(mdev); + wait_event(mdev->misc_wait, !test_bit(BITMAP_IO, &mdev->flags)); + drbd_flush_workqueue(mdev); + + retcode = _drbd_request_state(mdev, NS(conn, C_STARTING_SYNC_S), CS_ORDERED); + if (retcode < SS_SUCCESS) { + if (retcode == SS_NEED_CONNECTION && mdev->state.role == R_PRIMARY) { + /* The peer will get a resync upon connect anyways. + * Just make that into a full resync. */ + retcode = drbd_request_state(mdev, NS(pdsk, D_INCONSISTENT)); + if (retcode >= SS_SUCCESS) { + if (drbd_bitmap_io(mdev, &drbd_bmio_set_susp_al, + "set_n_write from invalidate_peer", + BM_LOCKED_SET_ALLOWED)) + retcode = ERR_IO_MD_DISK; + } + } else + retcode = drbd_request_state(mdev, NS(conn, C_STARTING_SYNC_S)); + } + drbd_resume_io(mdev); + out: drbd_adm_finish(info, retcode); return 0; } -int drbd_adm_invalidate_peer(struct sk_buff *skb, struct genl_info *info) -{ - return drbd_adm_simple_request_state(skb, info, NS(conn, C_STARTING_SYNC_S)); -} - int drbd_adm_pause_sync(struct sk_buff *skb, struct genl_info *info) { enum drbd_ret_code retcode; @@ -2523,13 +2587,17 @@ int nla_put_drbd_cfg_context(struct sk_buff *skb, struct drbd_tconn *tconn, unsi nla = nla_nest_start(skb, DRBD_NLA_CFG_CONTEXT); if (!nla) goto nla_put_failure; - if (vnr != VOLUME_UNSPECIFIED) - NLA_PUT_U32(skb, T_ctx_volume, vnr); - NLA_PUT_STRING(skb, T_ctx_resource_name, tconn->name); - if (tconn->my_addr_len) - NLA_PUT(skb, T_ctx_my_addr, tconn->my_addr_len, &tconn->my_addr); - if (tconn->peer_addr_len) - NLA_PUT(skb, T_ctx_peer_addr, tconn->peer_addr_len, &tconn->peer_addr); + if (vnr != VOLUME_UNSPECIFIED && + nla_put_u32(skb, T_ctx_volume, vnr)) + goto nla_put_failure; + if (nla_put_string(skb, T_ctx_resource_name, tconn->name)) + goto nla_put_failure; + if (tconn->my_addr_len && + nla_put(skb, T_ctx_my_addr, tconn->my_addr_len, &tconn->my_addr)) + goto nla_put_failure; + if (tconn->peer_addr_len && + nla_put(skb, T_ctx_peer_addr, tconn->peer_addr_len, &tconn->peer_addr)) + goto nla_put_failure; nla_nest_end(skb, nla); return 0; @@ -2587,20 +2655,40 @@ int nla_put_status_info(struct sk_buff *skb, struct drbd_conf *mdev, nla = nla_nest_start(skb, DRBD_NLA_STATE_INFO); if (!nla) goto nla_put_failure; - NLA_PUT_U32(skb, T_sib_reason, sib ? sib->sib_reason : SIB_GET_STATUS_REPLY); - NLA_PUT_U32(skb, T_current_state, mdev->state.i); - NLA_PUT_U64(skb, T_ed_uuid, mdev->ed_uuid); - NLA_PUT_U64(skb, T_capacity, drbd_get_capacity(mdev->this_bdev)); + if (nla_put_u32(skb, T_sib_reason, sib ? sib->sib_reason : SIB_GET_STATUS_REPLY) || + nla_put_u32(skb, T_current_state, mdev->state.i) || + nla_put_u64(skb, T_ed_uuid, mdev->ed_uuid) || + nla_put_u64(skb, T_capacity, drbd_get_capacity(mdev->this_bdev)) || + nla_put_u64(skb, T_send_cnt, mdev->send_cnt) || + nla_put_u64(skb, T_recv_cnt, mdev->recv_cnt) || + nla_put_u64(skb, T_read_cnt, mdev->read_cnt) || + nla_put_u64(skb, T_writ_cnt, mdev->writ_cnt) || + nla_put_u64(skb, T_al_writ_cnt, mdev->al_writ_cnt) || + nla_put_u64(skb, T_bm_writ_cnt, mdev->bm_writ_cnt) || + nla_put_u32(skb, T_ap_bio_cnt, atomic_read(&mdev->ap_bio_cnt)) || + nla_put_u32(skb, T_ap_pending_cnt, atomic_read(&mdev->ap_pending_cnt)) || + nla_put_u32(skb, T_rs_pending_cnt, atomic_read(&mdev->rs_pending_cnt))) + goto nla_put_failure; if (got_ldev) { - NLA_PUT_U32(skb, T_disk_flags, mdev->ldev->md.flags); - NLA_PUT(skb, T_uuids, sizeof(si->uuids), mdev->ldev->md.uuid); - NLA_PUT_U64(skb, T_bits_total, drbd_bm_bits(mdev)); - NLA_PUT_U64(skb, T_bits_oos, drbd_bm_total_weight(mdev)); + int err; + + spin_lock_irq(&mdev->ldev->md.uuid_lock); + err = nla_put(skb, T_uuids, sizeof(si->uuids), mdev->ldev->md.uuid); + spin_unlock_irq(&mdev->ldev->md.uuid_lock); + + if (err) + goto nla_put_failure; + + if (nla_put_u32(skb, T_disk_flags, mdev->ldev->md.flags) || + nla_put_u64(skb, T_bits_total, drbd_bm_bits(mdev)) || + nla_put_u64(skb, T_bits_oos, drbd_bm_total_weight(mdev))) + goto nla_put_failure; if (C_SYNC_SOURCE <= mdev->state.conn && C_PAUSED_SYNC_T >= mdev->state.conn) { - NLA_PUT_U64(skb, T_bits_rs_total, mdev->rs_total); - NLA_PUT_U64(skb, T_bits_rs_failed, mdev->rs_failed); + if (nla_put_u64(skb, T_bits_rs_total, mdev->rs_total) || + nla_put_u64(skb, T_bits_rs_failed, mdev->rs_failed)) + goto nla_put_failure; } } @@ -2610,15 +2698,18 @@ int nla_put_status_info(struct sk_buff *skb, struct drbd_conf *mdev, case SIB_GET_STATUS_REPLY: break; case SIB_STATE_CHANGE: - NLA_PUT_U32(skb, T_prev_state, sib->os.i); - NLA_PUT_U32(skb, T_new_state, sib->ns.i); + if (nla_put_u32(skb, T_prev_state, sib->os.i) || + nla_put_u32(skb, T_new_state, sib->ns.i)) + goto nla_put_failure; break; case SIB_HELPER_POST: - NLA_PUT_U32(skb, - T_helper_exit_code, sib->helper_exit_code); + if (nla_put_u32(skb, T_helper_exit_code, + sib->helper_exit_code)) + goto nla_put_failure; /* fall through */ case SIB_HELPER_PRE: - NLA_PUT_STRING(skb, T_helper, sib->helper_name); + if (nla_put_string(skb, T_helper, sib->helper_name)) + goto nla_put_failure; break; } } @@ -2718,21 +2809,25 @@ next_tconn: } } - dh = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, + dh = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, &drbd_genl_family, NLM_F_MULTI, DRBD_ADM_GET_STATUS); if (!dh) goto out; if (!mdev) { - /* this is a tconn without a single volume */ + /* This is a tconn without a single volume. + * Suprisingly enough, it may have a network + * configuration. */ + struct net_conf *nc; dh->minor = -1U; dh->ret_code = NO_ERROR; if (nla_put_drbd_cfg_context(skb, tconn, VOLUME_UNSPECIFIED)) - genlmsg_cancel(skb, dh); - else - genlmsg_end(skb, dh); - goto out; + goto cancel; + nc = rcu_dereference(tconn->net_conf); + if (nc && net_conf_to_skb(skb, nc, 1) != 0) + goto cancel; + goto done; } D_ASSERT(mdev->vnr == volume); @@ -2742,9 +2837,11 @@ next_tconn: dh->ret_code = NO_ERROR; if (nla_put_status_info(skb, mdev, NULL)) { +cancel: genlmsg_cancel(skb, dh); goto out; } +done: genlmsg_end(skb, dh); } @@ -2851,6 +2948,7 @@ int drbd_adm_start_ov(struct sk_buff *skb, struct genl_info *info) { struct drbd_conf *mdev; enum drbd_ret_code retcode; + struct start_ov_parms parms; retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR); if (!adm_ctx.reply_skb) @@ -2859,23 +2957,28 @@ int drbd_adm_start_ov(struct sk_buff *skb, struct genl_info *info) goto out; mdev = adm_ctx.mdev; + + /* resume from last known position, if possible */ + parms.ov_start_sector = mdev->ov_start_sector; + parms.ov_stop_sector = ULLONG_MAX; if (info->attrs[DRBD_NLA_START_OV_PARMS]) { - /* resume from last known position, if possible */ - struct start_ov_parms parms = - { .ov_start_sector = mdev->ov_start_sector }; int err = start_ov_parms_from_attrs(&parms, info); if (err) { retcode = ERR_MANDATORY_TAG; drbd_msg_put_info(from_attrs_err_to_txt(err)); goto out; } - /* w_make_ov_request expects position to be aligned */ - mdev->ov_start_sector = parms.ov_start_sector & ~BM_SECT_PER_BIT; } + /* w_make_ov_request expects position to be aligned */ + mdev->ov_start_sector = parms.ov_start_sector & ~(BM_SECT_PER_BIT-1); + mdev->ov_stop_sector = parms.ov_stop_sector; + /* If there is still bitmap IO pending, e.g. previous resync or verify * just being finished, wait for it before requesting a new resync. */ + drbd_suspend_io(mdev); wait_event(mdev->misc_wait, !test_bit(BITMAP_IO, &mdev->flags)); retcode = drbd_request_state(mdev,NS(conn,C_VERIFY_S)); + drbd_resume_io(mdev); out: drbd_adm_finish(info, retcode); return 0; @@ -3022,8 +3125,7 @@ int drbd_adm_add_minor(struct sk_buff *skb, struct genl_info *info) if (retcode != NO_ERROR) goto out; - /* FIXME drop minor_count parameter, limit to MINORMASK */ - if (dh->minor >= minor_count) { + if (dh->minor > MINORMASK) { drbd_msg_put_info("requested minor out of range"); retcode = ERR_INVALID_REQUEST; goto out; @@ -3056,6 +3158,8 @@ static enum drbd_ret_code adm_delete_minor(struct drbd_conf *mdev) * we may want to delete a minor from a live replication group. */ mdev->state.role == R_SECONDARY) { + _drbd_request_state(mdev, NS(conn, C_WF_REPORT_PARAMS), + CS_VERBOSE + CS_WAIT_COMPLETE); idr_remove(&mdev->tconn->volumes, mdev->vnr); idr_remove(&minors, mdev_to_minor(mdev)); del_gendisk(mdev->vdisk); @@ -3116,8 +3220,8 @@ int drbd_adm_down(struct sk_buff *skb, struct genl_info *info) /* detach */ idr_for_each_entry(&adm_ctx.tconn->volumes, mdev, i) { - retcode = adm_detach(mdev); - if (retcode < SS_SUCCESS) { + retcode = adm_detach(mdev, 0); + if (retcode < SS_SUCCESS || retcode > NO_ERROR) { drbd_msg_put_info("failed to detach"); goto out; } @@ -3193,6 +3297,13 @@ void drbd_bcast_event(struct drbd_conf *mdev, const struct sib_info *sib) unsigned seq; int err = -ENOMEM; + if (sib->sib_reason == SIB_SYNC_PROGRESS) { + if (time_after(jiffies, mdev->rs_last_bcast + HZ)) + mdev->rs_last_bcast = jiffies; + else + return; + } + seq = atomic_inc_return(&drbd_genl_seq); msg = genlmsg_new(NLMSG_GOODSIZE, GFP_NOIO); if (!msg)