X-Git-Url: http://demsky.eecs.uci.edu/git/?a=blobdiff_plain;f=drivers%2Fblock%2Fdrbd%2Fdrbd_nl.c;h=2af26fc9528083d23cbde63fbe42128a26757599;hb=f59dc2bb5a50b26ea751f9eac1c81e4cc7de5257;hp=c5d4fac1a1110871fab2bf39a4f99105fbe27765;hpb=9a51ab1c1b3c1e21f076cdd571bbe6ca7d1b504c;p=firefly-linux-kernel-4.4.55.git diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index c5d4fac1a111..2af26fc95280 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -155,8 +155,7 @@ static int drbd_adm_prepare(struct sk_buff *skb, struct genl_info *info, memset(&adm_ctx, 0, sizeof(adm_ctx)); /* genl_rcv_msg only checks for CAP_NET_ADMIN on "GENL_ADMIN_PERM" :( */ - if (cmd != DRBD_ADM_GET_STATUS - && security_netlink_recv(skb, CAP_SYS_ADMIN)) + if (cmd != DRBD_ADM_GET_STATUS && !capable(CAP_NET_ADMIN)) return -EPERM; adm_ctx.reply_skb = genlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); @@ -323,11 +322,15 @@ int drbd_khelper(struct drbd_conf *mdev, char *cmd) NULL }; char mb[12]; char *argv[] = {usermode_helper, cmd, mb, NULL }; + struct drbd_tconn *tconn = mdev->tconn; struct sib_info sib; int ret; + if (current == tconn->worker.task) + set_bit(CALLBACK_PENDING, &tconn->flags); + snprintf(mb, 12, "minor-%d", mdev_to_minor(mdev)); - setup_khelper_env(mdev->tconn, envp); + setup_khelper_env(tconn, envp); /* The helper may take some time. * write out any unsynced meta data changes now */ @@ -337,7 +340,7 @@ int drbd_khelper(struct drbd_conf *mdev, char *cmd) sib.sib_reason = SIB_HELPER_PRE; sib.helper_name = cmd; drbd_bcast_event(mdev, &sib); - ret = call_usermodehelper(usermode_helper, argv, envp, 1); + ret = call_usermodehelper(usermode_helper, argv, envp, UMH_WAIT_PROC); if (ret) dev_warn(DEV, "helper command: %s %s %s exit code %u (0x%x)\n", usermode_helper, cmd, mb, @@ -350,28 +353,15 @@ int drbd_khelper(struct drbd_conf *mdev, char *cmd) sib.helper_exit_code = ret; drbd_bcast_event(mdev, &sib); + if (current == tconn->worker.task) + clear_bit(CALLBACK_PENDING, &tconn->flags); + if (ret < 0) /* Ignore any ERRNOs we got. */ ret = 0; return ret; } -static void conn_md_sync(struct drbd_tconn *tconn) -{ - struct drbd_conf *mdev; - int vnr; - - rcu_read_lock(); - idr_for_each_entry(&tconn->volumes, mdev, vnr) { - kref_get(&mdev->kref); - rcu_read_unlock(); - drbd_md_sync(mdev); - kref_put(&mdev->kref, &drbd_minor_destroy); - rcu_read_lock(); - } - rcu_read_unlock(); -} - int conn_khelper(struct drbd_tconn *tconn, char *cmd) { char *envp[] = { "HOME=/", @@ -389,7 +379,7 @@ int conn_khelper(struct drbd_tconn *tconn, char *cmd) conn_info(tconn, "helper command: %s %s %s\n", usermode_helper, cmd, tconn->name); /* TODO: conn_bcast_event() ?? */ - ret = call_usermodehelper(usermode_helper, argv, envp, 1); + ret = call_usermodehelper(usermode_helper, argv, envp, UMH_WAIT_PROC); if (ret) conn_warn(tconn, "helper command: %s %s %s exit code %u (0x%x)\n", usermode_helper, cmd, tconn->name, @@ -622,6 +612,8 @@ drbd_set_role(struct drbd_conf *mdev, enum drbd_role new_role, int force) /* Wait until nothing is on the fly :) */ wait_event(mdev->misc_wait, atomic_read(&mdev->ap_pending_cnt) == 0); + /* FIXME also wait for all pending P_BARRIER_ACK? */ + if (new_role == R_SECONDARY) { set_disk_ro(mdev->vdisk, true); if (get_ldev(mdev)) { @@ -885,8 +877,8 @@ enum determine_dev_size drbd_determine_dev_size(struct drbd_conf *mdev, enum dds la_size_changed && md_moved ? "size changed and md moved" : la_size_changed ? "size changed" : "md moved"); /* next line implicitly does drbd_suspend_io()+drbd_resume_io() */ - err = drbd_bitmap_io(mdev, &drbd_bm_write, - "size changed", BM_LOCKED_MASK); + err = drbd_bitmap_io(mdev, md_moved ? &drbd_bm_write_all : &drbd_bm_write, + "size changed", BM_LOCKED_MASK); if (err) { rv = dev_size_error; goto out; @@ -1009,8 +1001,8 @@ static int drbd_check_al_size(struct drbd_conf *mdev, struct disk_conf *dc) static void drbd_setup_queue_param(struct drbd_conf *mdev, unsigned int max_bio_size) { struct request_queue * const q = mdev->rq_queue; - int max_hw_sectors = max_bio_size >> 9; - int max_segments = 0; + unsigned int max_hw_sectors = max_bio_size >> 9; + unsigned int max_segments = 0; if (get_ldev_if_state(mdev, D_ATTACHING)) { struct request_queue * const b = mdev->ldev->backing_bdev->bd_disk->queue; @@ -1045,7 +1037,7 @@ static void drbd_setup_queue_param(struct drbd_conf *mdev, unsigned int max_bio_ void drbd_reconsider_max_bio_size(struct drbd_conf *mdev) { - int now, new, local, peer; + unsigned int now, new, local, peer; now = queue_max_hw_sectors(mdev->rq_queue) << 9; local = mdev->local_max_bio_size; /* Eventually last known value, from volatile memory */ @@ -1056,13 +1048,14 @@ void drbd_reconsider_max_bio_size(struct drbd_conf *mdev) mdev->local_max_bio_size = local; put_ldev(mdev); } + local = min(local, DRBD_MAX_BIO_SIZE); /* We may ignore peer limits if the peer is modern enough. Because new from 8.3.8 onwards the peer can use multiple BIOs for a single peer_request */ if (mdev->state.conn >= C_CONNECTED) { if (mdev->tconn->agreed_pro_version < 94) - peer = min_t(int, mdev->peer_max_bio_size, DRBD_MAX_SIZE_H80_PACKET); + peer = min( mdev->peer_max_bio_size, DRBD_MAX_SIZE_H80_PACKET); /* Correct old drbd (up to 8.3.7) if it believes it can do more than 32KiB */ else if (mdev->tconn->agreed_pro_version == 94) peer = DRBD_MAX_SIZE_H80_PACKET; @@ -1072,10 +1065,10 @@ void drbd_reconsider_max_bio_size(struct drbd_conf *mdev) peer = DRBD_MAX_BIO_SIZE; } - new = min_t(int, local, peer); + new = min(local, peer); if (mdev->state.role == R_PRIMARY && new < now) - dev_err(DEV, "ASSERT FAILED new < now; (%d < %d)\n", new, now); + dev_err(DEV, "ASSERT FAILED new < now; (%u < %u)\n", new, now); if (new != now) dev_info(DEV, "max BIO size = %u\n", new); @@ -1201,11 +1194,13 @@ int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info) } } + drbd_suspend_io(mdev); wait_event(mdev->al_wait, lc_try_lock(mdev->act_log)); drbd_al_shrink(mdev); err = drbd_check_al_size(mdev, new_disk_conf); lc_unlock(mdev->act_log); wake_up(&mdev->al_wait); + drbd_resume_io(mdev); if (err) { retcode = ERR_NOMEM; @@ -1231,10 +1226,15 @@ int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info) mutex_unlock(&mdev->tconn->conf_update); if (new_disk_conf->al_updates) - mdev->ldev->md.flags &= MDF_AL_DISABLED; + mdev->ldev->md.flags &= ~MDF_AL_DISABLED; else mdev->ldev->md.flags |= MDF_AL_DISABLED; + if (new_disk_conf->md_flushes) + clear_bit(MD_NO_FUA, &mdev->flags); + else + set_bit(MD_NO_FUA, &mdev->flags); + drbd_bump_write_ordering(mdev->tconn, WO_bdev_flush); drbd_md_sync(mdev); @@ -1297,12 +1297,24 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) * to realize a "hot spare" feature (not that I'd recommend that) */ wait_event(mdev->misc_wait, !atomic_read(&mdev->local_cnt)); + /* make sure there is no leftover from previous force-detach attempts */ + clear_bit(FORCE_DETACH, &mdev->flags); + clear_bit(WAS_IO_ERROR, &mdev->flags); + clear_bit(WAS_READ_ERROR, &mdev->flags); + + /* and no leftover from previously aborted resync or verify, either */ + mdev->rs_total = 0; + mdev->rs_failed = 0; + atomic_set(&mdev->rs_pending_cnt, 0); + /* allocation not in the IO path, drbdsetup context */ nbc = kzalloc(sizeof(struct drbd_backing_dev), GFP_KERNEL); if (!nbc) { retcode = ERR_NOMEM; goto fail; } + spin_lock_init(&nbc->md.uuid_lock); + new_disk_conf = kzalloc(sizeof(struct disk_conf), GFP_KERNEL); if (!new_disk_conf) { retcode = ERR_NOMEM; @@ -1436,6 +1448,12 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) drbd_suspend_io(mdev); /* also wait for the last barrier ack. */ + /* FIXME see also https://daiquiri.linbit/cgi-bin/bugzilla/show_bug.cgi?id=171 + * We need a way to either ignore barrier acks for barriers sent before a device + * was attached, or a way to wait for all pending barrier acks to come in. + * As barriers are counted per resource, + * we'd need to suspend io on all devices of a resource. + */ wait_event(mdev->misc_wait, !atomic_read(&mdev->ap_pending_cnt) || drbd_suspended(mdev)); /* and for any other previously queued work */ drbd_flush_workqueue(mdev); @@ -1602,7 +1620,7 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) flags. */ if (rcu_dereference(mdev->ldev->disk_conf)->al_updates) - mdev->ldev->md.flags &= MDF_AL_DISABLED; + mdev->ldev->md.flags &= ~MDF_AL_DISABLED; else mdev->ldev->md.flags |= MDF_AL_DISABLED; @@ -1675,6 +1693,7 @@ static int adm_detach(struct drbd_conf *mdev, int force) int ret; if (force) { + set_bit(FORCE_DETACH, &mdev->flags); drbd_force_state(mdev, NS(disk, D_FAILED)); retcode = SS_SUCCESS; goto out; @@ -1781,9 +1800,6 @@ _check_net_options(struct drbd_tconn *tconn, struct net_conf *old_conf, struct n if (new_conf->two_primaries != old_conf->two_primaries) return ERR_NEED_APV_100; - if (!new_conf->integrity_alg != !old_conf->integrity_alg) - return ERR_NEED_APV_100; - if (strcmp(new_conf->integrity_alg, old_conf->integrity_alg)) return ERR_NEED_APV_100; } @@ -1805,7 +1821,7 @@ _check_net_options(struct drbd_tconn *tconn, struct net_conf *old_conf, struct n return ERR_STONITH_AND_PROT_A; } if (mdev->state.role == R_PRIMARY && new_conf->discard_my_data) - return ERR_DISCARD; + return ERR_DISCARD_IMPOSSIBLE; } if (new_conf->on_congestion != OC_BLOCK && new_conf->wire_protocol != DRBD_PROT_A) @@ -2225,7 +2241,7 @@ void resync_after_online_grow(struct drbd_conf *mdev) if (mdev->state.role != mdev->state.peer) iass = (mdev->state.role == R_PRIMARY); else - iass = test_bit(DISCARD_CONCURRENT, &mdev->tconn->flags); + iass = test_bit(RESOLVE_CONFLICTS, &mdev->tconn->flags); if (iass) drbd_start_resync(mdev, C_SYNC_SOURCE); @@ -2384,9 +2400,11 @@ int drbd_adm_invalidate(struct sk_buff *skb, struct genl_info *info) mdev = adm_ctx.mdev; /* If there is still bitmap IO pending, probably because of a previous - * resync just being finished, wait for it before requesting a new resync. */ + * resync just being finished, wait for it before requesting a new resync. + * Also wait for it's after_state_ch(). */ drbd_suspend_io(mdev); wait_event(mdev->misc_wait, !test_bit(BITMAP_IO, &mdev->flags)); + drbd_flush_workqueue(mdev); retcode = _drbd_request_state(mdev, NS(conn, C_STARTING_SYNC_T), CS_ORDERED); @@ -2451,9 +2469,11 @@ int drbd_adm_invalidate_peer(struct sk_buff *skb, struct genl_info *info) mdev = adm_ctx.mdev; /* If there is still bitmap IO pending, probably because of a previous - * resync just being finished, wait for it before requesting a new resync. */ + * resync just being finished, wait for it before requesting a new resync. + * Also wait for it's after_state_ch(). */ drbd_suspend_io(mdev); wait_event(mdev->misc_wait, !test_bit(BITMAP_IO, &mdev->flags)); + drbd_flush_workqueue(mdev); retcode = _drbd_request_state(mdev, NS(conn, C_STARTING_SYNC_S), CS_ORDERED); if (retcode < SS_SUCCESS) { @@ -2638,12 +2658,29 @@ int nla_put_status_info(struct sk_buff *skb, struct drbd_conf *mdev, if (nla_put_u32(skb, T_sib_reason, sib ? sib->sib_reason : SIB_GET_STATUS_REPLY) || nla_put_u32(skb, T_current_state, mdev->state.i) || nla_put_u64(skb, T_ed_uuid, mdev->ed_uuid) || - nla_put_u64(skb, T_capacity, drbd_get_capacity(mdev->this_bdev))) + nla_put_u64(skb, T_capacity, drbd_get_capacity(mdev->this_bdev)) || + nla_put_u64(skb, T_send_cnt, mdev->send_cnt) || + nla_put_u64(skb, T_recv_cnt, mdev->recv_cnt) || + nla_put_u64(skb, T_read_cnt, mdev->read_cnt) || + nla_put_u64(skb, T_writ_cnt, mdev->writ_cnt) || + nla_put_u64(skb, T_al_writ_cnt, mdev->al_writ_cnt) || + nla_put_u64(skb, T_bm_writ_cnt, mdev->bm_writ_cnt) || + nla_put_u32(skb, T_ap_bio_cnt, atomic_read(&mdev->ap_bio_cnt)) || + nla_put_u32(skb, T_ap_pending_cnt, atomic_read(&mdev->ap_pending_cnt)) || + nla_put_u32(skb, T_rs_pending_cnt, atomic_read(&mdev->rs_pending_cnt))) goto nla_put_failure; if (got_ldev) { + int err; + + spin_lock_irq(&mdev->ldev->md.uuid_lock); + err = nla_put(skb, T_uuids, sizeof(si->uuids), mdev->ldev->md.uuid); + spin_unlock_irq(&mdev->ldev->md.uuid_lock); + + if (err) + goto nla_put_failure; + if (nla_put_u32(skb, T_disk_flags, mdev->ldev->md.flags) || - nla_put(skb, T_uuids, sizeof(si->uuids), mdev->ldev->md.uuid) || nla_put_u64(skb, T_bits_total, drbd_bm_bits(mdev)) || nla_put_u64(skb, T_bits_oos, drbd_bm_total_weight(mdev))) goto nla_put_failure; @@ -2772,7 +2809,7 @@ next_tconn: } } - dh = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, + dh = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, &drbd_genl_family, NLM_F_MULTI, DRBD_ADM_GET_STATUS); if (!dh) @@ -2911,6 +2948,7 @@ int drbd_adm_start_ov(struct sk_buff *skb, struct genl_info *info) { struct drbd_conf *mdev; enum drbd_ret_code retcode; + struct start_ov_parms parms; retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR); if (!adm_ctx.reply_skb) @@ -2919,19 +2957,22 @@ int drbd_adm_start_ov(struct sk_buff *skb, struct genl_info *info) goto out; mdev = adm_ctx.mdev; + + /* resume from last known position, if possible */ + parms.ov_start_sector = mdev->ov_start_sector; + parms.ov_stop_sector = ULLONG_MAX; if (info->attrs[DRBD_NLA_START_OV_PARMS]) { - /* resume from last known position, if possible */ - struct start_ov_parms parms = - { .ov_start_sector = mdev->ov_start_sector }; int err = start_ov_parms_from_attrs(&parms, info); if (err) { retcode = ERR_MANDATORY_TAG; drbd_msg_put_info(from_attrs_err_to_txt(err)); goto out; } - /* w_make_ov_request expects position to be aligned */ - mdev->ov_start_sector = parms.ov_start_sector & ~BM_SECT_PER_BIT; } + /* w_make_ov_request expects position to be aligned */ + mdev->ov_start_sector = parms.ov_start_sector & ~(BM_SECT_PER_BIT-1); + mdev->ov_stop_sector = parms.ov_stop_sector; + /* If there is still bitmap IO pending, e.g. previous resync or verify * just being finished, wait for it before requesting a new resync. */ drbd_suspend_io(mdev); @@ -3180,7 +3221,7 @@ int drbd_adm_down(struct sk_buff *skb, struct genl_info *info) /* detach */ idr_for_each_entry(&adm_ctx.tconn->volumes, mdev, i) { retcode = adm_detach(mdev, 0); - if (retcode < SS_SUCCESS) { + if (retcode < SS_SUCCESS || retcode > NO_ERROR) { drbd_msg_put_info("failed to detach"); goto out; } @@ -3256,6 +3297,13 @@ void drbd_bcast_event(struct drbd_conf *mdev, const struct sib_info *sib) unsigned seq; int err = -ENOMEM; + if (sib->sib_reason == SIB_SYNC_PROGRESS) { + if (time_after(jiffies, mdev->rs_last_bcast + HZ)) + mdev->rs_last_bcast = jiffies; + else + return; + } + seq = atomic_inc_return(&drbd_genl_seq); msg = genlmsg_new(NLMSG_GOODSIZE, GFP_NOIO); if (!msg)