Merge branch 'drbd-8.4_ed6' into for-3.8-drivers-drbd-8.4_ed6
[firefly-linux-kernel-4.4.55.git] / drivers / block / drbd / drbd_receiver.c
index 26c30fd64ecfb154b7c99e4dee8c3b746ad5fc47..0331ad0b61e18583fb8f5fc5b768134eba0fd6ad 100644 (file)
@@ -425,7 +425,7 @@ static int drbd_finish_peer_reqs(struct drbd_conf *mdev)
                drbd_free_net_peer_req(mdev, peer_req);
 
        /* possible callbacks here:
-        * e_end_block, and e_end_resync_block, e_send_discard_write.
+        * e_end_block, and e_end_resync_block, e_send_superseded.
         * all ignore the last argument.
         */
        list_for_each_entry_safe(peer_req, t, &work_list, w.list) {
@@ -490,54 +490,34 @@ static int drbd_recv_short(struct socket *sock, void *buf, size_t size, int flag
 
 static int drbd_recv(struct drbd_tconn *tconn, void *buf, size_t size)
 {
-       mm_segment_t oldfs;
-       struct kvec iov = {
-               .iov_base = buf,
-               .iov_len = size,
-       };
-       struct msghdr msg = {
-               .msg_iovlen = 1,
-               .msg_iov = (struct iovec *)&iov,
-               .msg_flags = MSG_WAITALL | MSG_NOSIGNAL
-       };
        int rv;
 
-       oldfs = get_fs();
-       set_fs(KERNEL_DS);
+       rv = drbd_recv_short(tconn->data.socket, buf, size, 0);
 
-       for (;;) {
-               rv = sock_recvmsg(tconn->data.socket, &msg, size, msg.msg_flags);
-               if (rv == size)
-                       break;
+       if (rv < 0) {
+               if (rv == -ECONNRESET)
+                       conn_info(tconn, "sock was reset by peer\n");
+               else if (rv != -ERESTARTSYS)
+                       conn_err(tconn, "sock_recvmsg returned %d\n", rv);
+       } else if (rv == 0) {
+               if (test_bit(DISCONNECT_SENT, &tconn->flags)) {
+                       long t;
+                       rcu_read_lock();
+                       t = rcu_dereference(tconn->net_conf)->ping_timeo * HZ/10;
+                       rcu_read_unlock();
 
-               /* Note:
-                * ECONNRESET   other side closed the connection
-                * ERESTARTSYS  (on  sock) we got a signal
-                */
+                       t = wait_event_timeout(tconn->ping_wait, tconn->cstate < C_WF_REPORT_PARAMS, t);
 
-               if (rv < 0) {
-                       if (rv == -ECONNRESET)
-                               conn_info(tconn, "sock was reset by peer\n");
-                       else if (rv != -ERESTARTSYS)
-                               conn_err(tconn, "sock_recvmsg returned %d\n", rv);
-                       break;
-               } else if (rv == 0) {
-                       conn_info(tconn, "sock was shut down by peer\n");
-                       break;
-               } else  {
-                       /* signal came in, or peer/link went down,
-                        * after we read a partial message
-                        */
-                       /* D_ASSERT(signal_pending(current)); */
-                       break;
+                       if (t)
+                               goto out;
                }
-       };
-
-       set_fs(oldfs);
+               conn_info(tconn, "sock was shut down by peer\n");
+       }
 
        if (rv != size)
                conn_request_state(tconn, NS(conn, C_BROKEN_PIPE), CS_HARD);
 
+out:
        return rv;
 }
 
@@ -679,21 +659,15 @@ struct accept_wait_data {
 
 };
 
-static void incomming_connection(struct sock *sk)
+static void drbd_incoming_connection(struct sock *sk)
 {
        struct accept_wait_data *ad = sk->sk_user_data;
-       struct drbd_tconn *tconn = ad->tconn;
-
-       if (sk->sk_state != TCP_ESTABLISHED)
-               conn_warn(tconn, "unexpected tcp state change. sk_state = %d\n", sk->sk_state);
+       void (*state_change)(struct sock *sk);
 
-       write_lock_bh(&sk->sk_callback_lock);
-       sk->sk_state_change = ad->original_sk_state_change;
-       sk->sk_user_data = NULL;
-       write_unlock_bh(&sk->sk_callback_lock);
-
-       sk->sk_state_change(sk);
-       complete(&ad->door_bell);
+       state_change = ad->original_sk_state_change;
+       if (sk->sk_state == TCP_ESTABLISHED)
+               complete(&ad->door_bell);
+       state_change(sk);
 }
 
 static int prepare_listen_socket(struct drbd_tconn *tconn, struct accept_wait_data *ad)
@@ -725,7 +699,7 @@ static int prepare_listen_socket(struct drbd_tconn *tconn, struct accept_wait_da
                goto out;
        }
 
-       s_listen->sk->sk_reuse = 1; /* SO_REUSEADDR */
+       s_listen->sk->sk_reuse = SK_CAN_REUSE; /* SO_REUSEADDR */
        drbd_setbufsize(s_listen, sndbuf_size, rcvbuf_size);
 
        what = "bind before listen";
@@ -736,7 +710,7 @@ static int prepare_listen_socket(struct drbd_tconn *tconn, struct accept_wait_da
        ad->s_listen = s_listen;
        write_lock_bh(&s_listen->sk->sk_callback_lock);
        ad->original_sk_state_change = s_listen->sk->sk_state_change;
-       s_listen->sk->sk_state_change = incomming_connection;
+       s_listen->sk->sk_state_change = drbd_incoming_connection;
        s_listen->sk->sk_user_data = ad;
        write_unlock_bh(&s_listen->sk->sk_callback_lock);
 
@@ -759,6 +733,14 @@ out:
        return -EIO;
 }
 
+static void unregister_state_change(struct sock *sk, struct accept_wait_data *ad)
+{
+       write_lock_bh(&sk->sk_callback_lock);
+       sk->sk_state_change = ad->original_sk_state_change;
+       sk->sk_user_data = NULL;
+       write_unlock_bh(&sk->sk_callback_lock);
+}
+
 static struct socket *drbd_wait_for_connect(struct drbd_tconn *tconn, struct accept_wait_data *ad)
 {
        int timeo, connect_int, err = 0;
@@ -789,6 +771,9 @@ static struct socket *drbd_wait_for_connect(struct drbd_tconn *tconn, struct acc
                }
        }
 
+       if (s_estab)
+               unregister_state_change(s_estab->sk, ad);
+
        return s_estab;
 }
 
@@ -889,6 +874,7 @@ static int conn_connect(struct drbd_tconn *tconn)
                .door_bell = COMPLETION_INITIALIZER_ONSTACK(ad.door_bell),
        };
 
+       clear_bit(DISCONNECT_SENT, &tconn->flags);
        if (conn_request_state(tconn, NS(conn, C_WF_CONNECTION), CS_VERBOSE) < SS_SUCCESS)
                return -2;
 
@@ -901,8 +887,6 @@ static int conn_connect(struct drbd_tconn *tconn)
        msock.rbuf = tconn->meta.rbuf;
        msock.socket = NULL;
 
-       clear_bit(DISCARD_CONCURRENT, &tconn->flags);
-
        /* Assume that the peer only understands protocol 80 until we know better.  */
        tconn->agreed_pro_version = 80;
 
@@ -918,6 +902,7 @@ static int conn_connect(struct drbd_tconn *tconn)
                                sock.socket = s;
                                send_first_packet(tconn, &sock, P_INITIAL_DATA);
                        } else if (!msock.socket) {
+                               clear_bit(RESOLVE_CONFLICTS, &tconn->flags);
                                msock.socket = s;
                                send_first_packet(tconn, &msock, P_INITIAL_META);
                        } else {
@@ -955,7 +940,7 @@ retry:
                                sock.socket = s;
                                break;
                        case P_INITIAL_META:
-                               set_bit(DISCARD_CONCURRENT, &tconn->flags);
+                               set_bit(RESOLVE_CONFLICTS, &tconn->flags);
                                if (msock.socket) {
                                        conn_warn(tconn, "initial packet M crossed\n");
                                        sock_release(msock.socket);
@@ -989,8 +974,8 @@ randomize:
        if (ad.s_listen)
                sock_release(ad.s_listen);
 
-       sock.socket->sk->sk_reuse = 1; /* SO_REUSEADDR */
-       msock.socket->sk->sk_reuse = 1; /* SO_REUSEADDR */
+       sock.socket->sk->sk_reuse = SK_CAN_REUSE; /* SO_REUSEADDR */
+       msock.socket->sk->sk_reuse = SK_CAN_REUSE; /* SO_REUSEADDR */
 
        sock.socket->sk->sk_allocation = GFP_NOIO;
        msock.socket->sk->sk_allocation = GFP_NOIO;
@@ -1804,7 +1789,7 @@ static void restart_conflicting_writes(struct drbd_conf *mdev,
                        continue;
                /* as it is RQ_POSTPONED, this will cause it to
                 * be queued on the retry workqueue. */
-               __req_mod(req, DISCARD_WRITE, NULL);
+               __req_mod(req, CONFLICT_RESOLVED, NULL);
        }
 }
 
@@ -1865,9 +1850,9 @@ static int e_send_ack(struct drbd_work *w, enum drbd_packet ack)
        return err;
 }
 
-static int e_send_discard_write(struct drbd_work *w, int unused)
+static int e_send_superseded(struct drbd_work *w, int unused)
 {
-       return e_send_ack(w, P_DISCARD_WRITE);
+       return e_send_ack(w, P_SUPERSEDED);
 }
 
 static int e_send_retry_write(struct drbd_work *w, int unused)
@@ -1875,7 +1860,7 @@ static int e_send_retry_write(struct drbd_work *w, int unused)
        struct drbd_tconn *tconn = w->mdev->tconn;
 
        return e_send_ack(w, tconn->agreed_pro_version >= 100 ?
-                            P_RETRY_WRITE : P_DISCARD_WRITE);
+                            P_RETRY_WRITE : P_SUPERSEDED);
 }
 
 static bool seq_greater(u32 a, u32 b)
@@ -1900,7 +1885,7 @@ static bool need_peer_seq(struct drbd_conf *mdev)
 
        /*
         * We only need to keep track of the last packet_seq number of our peer
-        * if we are in dual-primary mode and we have the discard flag set; see
+        * if we are in dual-primary mode and we have the resolve-conflicts flag set; see
         * handle_write_conflicts().
         */
 
@@ -1908,7 +1893,7 @@ static bool need_peer_seq(struct drbd_conf *mdev)
        tp = rcu_dereference(mdev->tconn->net_conf)->two_primaries;
        rcu_read_unlock();
 
-       return tp && test_bit(DISCARD_CONCURRENT, &tconn->flags);
+       return tp && test_bit(RESOLVE_CONFLICTS, &tconn->flags);
 }
 
 static void update_peer_seq(struct drbd_conf *mdev, unsigned int peer_seq)
@@ -2049,7 +2034,7 @@ static int handle_write_conflicts(struct drbd_conf *mdev,
                                  struct drbd_peer_request *peer_req)
 {
        struct drbd_tconn *tconn = mdev->tconn;
-       bool resolve_conflicts = test_bit(DISCARD_CONCURRENT, &tconn->flags);
+       bool resolve_conflicts = test_bit(RESOLVE_CONFLICTS, &tconn->flags);
        sector_t sector = peer_req->i.sector;
        const unsigned int size = peer_req->i.size;
        struct drbd_interval *i;
@@ -2083,11 +2068,11 @@ static int handle_write_conflicts(struct drbd_conf *mdev,
                if (resolve_conflicts) {
                        /*
                         * If the peer request is fully contained within the
-                        * overlapping request, it can be discarded; otherwise,
-                        * it will be retried once all overlapping requests
-                        * have completed.
+                        * overlapping request, it can be considered overwritten
+                        * and thus superseded; otherwise, it will be retried
+                        * once all overlapping requests have completed.
                         */
-                       bool discard = i->sector <= sector && i->sector +
+                       bool superseded = i->sector <= sector && i->sector +
                                       (i->size >> 9) >= sector + (size >> 9);
 
                        if (!equal)
@@ -2096,10 +2081,10 @@ static int handle_write_conflicts(struct drbd_conf *mdev,
                                               "assuming %s came first\n",
                                          (unsigned long long)i->sector, i->size,
                                          (unsigned long long)sector, size,
-                                         discard ? "local" : "remote");
+                                         superseded ? "local" : "remote");
 
                        inc_unacked(mdev);
-                       peer_req->w.cb = discard ? e_send_discard_write :
+                       peer_req->w.cb = superseded ? e_send_superseded :
                                                   e_send_retry_write;
                        list_add_tail(&peer_req->w.list, &mdev->done_ee);
                        wake_asender(mdev->tconn);
@@ -2120,8 +2105,9 @@ static int handle_write_conflicts(struct drbd_conf *mdev,
                            !(req->rq_state & RQ_POSTPONED)) {
                                /*
                                 * Wait for the node with the discard flag to
-                                * decide if this request will be discarded or
-                                * retried.  Requests that are discarded will
+                                * decide if this request has been superseded
+                                * or needs to be retried.
+                                * Requests that have been superseded will
                                 * disappear from the write_requests tree.
                                 *
                                 * In addition, wait for the conflicting
@@ -2600,7 +2586,7 @@ static int drbd_asb_recover_0p(struct drbd_conf *mdev) __must_hold(local)
                     "Using discard-least-changes instead\n");
        case ASB_DISCARD_ZERO_CHG:
                if (ch_peer == 0 && ch_self == 0) {
-                       rv = test_bit(DISCARD_CONCURRENT, &mdev->tconn->flags)
+                       rv = test_bit(RESOLVE_CONFLICTS, &mdev->tconn->flags)
                                ? -1 : 1;
                        break;
                } else {
@@ -2616,7 +2602,7 @@ static int drbd_asb_recover_0p(struct drbd_conf *mdev) __must_hold(local)
                        rv =  1;
                else /* ( ch_self == ch_peer ) */
                     /* Well, then use something else. */
-                       rv = test_bit(DISCARD_CONCURRENT, &mdev->tconn->flags)
+                       rv = test_bit(RESOLVE_CONFLICTS, &mdev->tconn->flags)
                                ? -1 : 1;
                break;
        case ASB_DISCARD_LOCAL:
@@ -2791,7 +2777,9 @@ static int drbd_uuid_compare(struct drbd_conf *mdev, int *rule_nr) __must_hold(l
                        if ((mdev->ldev->md.uuid[UI_BITMAP] & ~((u64)1)) == (mdev->p_uuid[UI_HISTORY_START] & ~((u64)1)) &&
                            (mdev->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) == (mdev->p_uuid[UI_HISTORY_START + 1] & ~((u64)1))) {
                                dev_info(DEV, "was SyncSource, missed the resync finished event, corrected myself:\n");
-                               drbd_uuid_set_bm(mdev, 0UL);
+                               drbd_uuid_move_history(mdev);
+                               mdev->ldev->md.uuid[UI_HISTORY_START] = mdev->ldev->md.uuid[UI_BITMAP];
+                               mdev->ldev->md.uuid[UI_BITMAP] = 0;
 
                                drbd_uuid_dump(mdev, "self", mdev->ldev->md.uuid,
                                               mdev->state.disk >= D_NEGOTIATING ? drbd_bm_total_weight(mdev) : 0, 0);
@@ -2839,7 +2827,7 @@ static int drbd_uuid_compare(struct drbd_conf *mdev, int *rule_nr) __must_hold(l
                case 1: /*  self_pri && !peer_pri */ return 1;
                case 2: /* !self_pri &&  peer_pri */ return -1;
                case 3: /*  self_pri &&  peer_pri */
-                       dc = test_bit(DISCARD_CONCURRENT, &mdev->tconn->flags);
+                       dc = test_bit(RESOLVE_CONFLICTS, &mdev->tconn->flags);
                        return dc ? -1 : 1;
                }
        }
@@ -2899,8 +2887,8 @@ static int drbd_uuid_compare(struct drbd_conf *mdev, int *rule_nr) __must_hold(l
                        if (mdev->tconn->agreed_pro_version < 91)
                                return -1091;
 
-                       _drbd_uuid_set(mdev, UI_BITMAP, mdev->ldev->md.uuid[UI_HISTORY_START]);
-                       _drbd_uuid_set(mdev, UI_HISTORY_START, mdev->ldev->md.uuid[UI_HISTORY_START + 1]);
+                       __drbd_uuid_set(mdev, UI_BITMAP, mdev->ldev->md.uuid[UI_HISTORY_START]);
+                       __drbd_uuid_set(mdev, UI_HISTORY_START, mdev->ldev->md.uuid[UI_HISTORY_START + 1]);
 
                        dev_info(DEV, "Last syncUUID did not get through, corrected:\n");
                        drbd_uuid_dump(mdev, "self", mdev->ldev->md.uuid,
@@ -2954,11 +2942,14 @@ static enum drbd_conns drbd_sync_handshake(struct drbd_conf *mdev, enum drbd_rol
                mydisk = mdev->new_state_tmp.disk;
 
        dev_info(DEV, "drbd_sync_handshake:\n");
+
+       spin_lock_irq(&mdev->ldev->md.uuid_lock);
        drbd_uuid_dump(mdev, "self", mdev->ldev->md.uuid, mdev->comm_bm_set, 0);
        drbd_uuid_dump(mdev, "peer", mdev->p_uuid,
                       mdev->p_uuid[UI_SIZE], mdev->p_uuid[UI_FLAGS]);
 
        hg = drbd_uuid_compare(mdev, &rule_nr);
+       spin_unlock_irq(&mdev->ldev->md.uuid_lock);
 
        dev_info(DEV, "uuid_compare()=%d by rule %d\n", hg, rule_nr);
 
@@ -3775,7 +3766,7 @@ static int receive_req_state(struct drbd_tconn *tconn, struct packet_info *pi)
        mask.i = be32_to_cpu(p->mask);
        val.i = be32_to_cpu(p->val);
 
-       if (test_bit(DISCARD_CONCURRENT, &mdev->tconn->flags) &&
+       if (test_bit(RESOLVE_CONFLICTS, &mdev->tconn->flags) &&
            mutex_is_locked(mdev->state_mutex)) {
                drbd_send_sr_reply(mdev, SS_CONCURRENT_ST_CHG);
                return 0;
@@ -3801,7 +3792,7 @@ static int receive_req_conn_state(struct drbd_tconn *tconn, struct packet_info *
        mask.i = be32_to_cpu(p->mask);
        val.i = be32_to_cpu(p->val);
 
-       if (test_bit(DISCARD_CONCURRENT, &tconn->flags) &&
+       if (test_bit(RESOLVE_CONFLICTS, &tconn->flags) &&
            mutex_is_locked(&tconn->cstate_mutex)) {
                conn_send_sr_reply(tconn, SS_CONCURRENT_ST_CHG);
                return 0;
@@ -4522,6 +4513,10 @@ static int drbd_disconnected(struct drbd_conf *mdev)
           necessary to reclain net_ee in drbd_finish_peer_reqs(). */
        drbd_flush_workqueue(mdev);
 
+       /* need to do it again, drbd_finish_peer_reqs() may have populated it
+        * again via drbd_try_clear_on_disk_bm(). */
+       drbd_rs_cancel_all(mdev);
+
        kfree(mdev->p_uuid);
        mdev->p_uuid = NULL;
 
@@ -4995,8 +4990,8 @@ static int got_BlockAck(struct drbd_tconn *tconn, struct packet_info *pi)
        case P_RECV_ACK:
                what = RECV_ACKED_BY_PEER;
                break;
-       case P_DISCARD_WRITE:
-               what = DISCARD_WRITE;
+       case P_SUPERSEDED:
+               what = CONFLICT_RESOLVED;
                break;
        case P_RETRY_WRITE:
                what = POSTPONE_WRITE;
@@ -5221,7 +5216,7 @@ static struct asender_cmd asender_tbl[] = {
        [P_RECV_ACK]        = { sizeof(struct p_block_ack), got_BlockAck },
        [P_WRITE_ACK]       = { sizeof(struct p_block_ack), got_BlockAck },
        [P_RS_WRITE_ACK]    = { sizeof(struct p_block_ack), got_BlockAck },
-       [P_DISCARD_WRITE]   = { sizeof(struct p_block_ack), got_BlockAck },
+       [P_SUPERSEDED]   = { sizeof(struct p_block_ack), got_BlockAck },
        [P_NEG_ACK]         = { sizeof(struct p_block_ack), got_NegAck },
        [P_NEG_DREPLY]      = { sizeof(struct p_block_ack), got_NegDReply },
        [P_NEG_RS_DREPLY]   = { sizeof(struct p_block_ack), got_NegRSDReply },
@@ -5306,6 +5301,18 @@ int drbd_asender(struct drbd_thread *thi)
                        received += rv;
                        buf      += rv;
                } else if (rv == 0) {
+                       if (test_bit(DISCONNECT_SENT, &tconn->flags)) {
+                               long t;
+                               rcu_read_lock();
+                               t = rcu_dereference(tconn->net_conf)->ping_timeo * HZ/10;
+                               rcu_read_unlock();
+
+                               t = wait_event_timeout(tconn->ping_wait,
+                                                      tconn->cstate < C_WF_REPORT_PARAMS,
+                                                      t);
+                               if (t)
+                                       break;
+                       }
                        conn_err(tconn, "meta connection shut down by peer.\n");
                        goto reconnect;
                } else if (rv == -EAGAIN) {
@@ -5370,6 +5377,7 @@ int drbd_asender(struct drbd_thread *thi)
        if (0) {
 reconnect:
                conn_request_state(tconn, NS(conn, C_NETWORK_FAILURE), CS_HARD);
+               conn_md_sync(tconn);
        }
        if (0) {
 disconnect: